Spaces:

shibing624
/

pycorrector

Running

App Files Files Community

shibing624 commited on Feb 14, 2022

Commit

55292dd

1 Parent(s): 2f789a3

add app

Browse files

Files changed (2) hide show

app.py +57 -4
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,7 +1,60 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import operator
+import torch
+from transformers import BertTokenizer, BertForMaskedLM
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = BertTokenizer.from_pretrained("shibing624/macbert4csc-base-chinese")
+model = BertForMaskedLM.from_pretrained("shibing624/macbert4csc-base-chinese")
+model.to(device)
+def ai_text(texts):
+    with torch.no_grad():
+        outputs = model(**tokenizer(texts, padding=True, return_tensors='pt').to(device))
+    def get_errors(corrected_text, origin_text):
+        sub_details = []
+        for i, ori_char in enumerate(origin_text):
+            if ori_char in [' ', '“', '”', '‘', '’', '琊', '\n', '…', '—', '擤']:
+                # add unk word
+                corrected_text = corrected_text[:i] + ori_char + corrected_text[i:]
+                continue
+            if i >= len(corrected_text):
+                continue
+            if ori_char != corrected_text[i]:
+                if ori_char.lower() == corrected_text[i]:
+                    # pass english upper char
+                    corrected_text = corrected_text[:i] + ori_char + corrected_text[i + 1:]
+                    continue
+                sub_details.append((ori_char, corrected_text[i], i, i + 1))
+        sub_details = sorted(sub_details, key=operator.itemgetter(2))
+        return corrected_text, sub_details
+    result = []
+    for ids, text in zip(outputs.logits, texts):
+        _text = tokenizer.decode(torch.argmax(ids, dim=-1), skip_special_tokens=True).replace(' ', '')
+        corrected_text = _text[:len(text)]
+        corrected_text, details = get_errors(corrected_text, text)
+        print(text, ' => ', corrected_text, details)
+        result.append((corrected_text, details))
+    print(result)
+    return result
+examples = [
+    ['真麻烦你了。希望你们好好的跳无'],
+    ['少先队员因该为老人让坐'],
+    ['机七学习是人工智能领遇最能体现智能的一个分知'],
+    ['今天心情很好',
+     '老是较书。'],
+    ['遇到一位很棒的奴生跟我聊天。'],
+    ['他的语说的很好，法语也不错'],
+    ['他法语说的很好，的语也不错'],
+    ['他们的吵翻很不错，再说他们做的咖喱鸡也好吃'],
+    ['不过在许多传统国家，女人向未得到平等'],
+]
+output_text = gr.outputs.Textbox()
+gr.Interface(ai_text, "textbox", output_text, title="Chinese Text Correction shibing624/macbert4csc-base-chinese",
+             description="Copy or input error Chinese text. Submit and the machine will correct text.", examples=examples).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ transformers