Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

topdu commited on Dec 4, 2024

Commit

695a4a4

1 Parent(s): ac9bf47

update app

Browse files

Files changed (5) hide show

app.py +59 -14
configs/det/dbnet/repvit_db.yml +1 -1
opendet/postprocess/db_postprocess.py +6 -1
tools/infer_det.py +3 -2
tools/infer_e2e.py +11 -6

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import os
 import gradio as gr  # gradio==4.20.0
@@ -20,10 +23,20 @@ font_path = './simfang.ttf'
 check_and_download_font(font_path)
-def main(input_image):
     img = input_image[:, :, ::-1]
     starttime = time.time()
-    results, time_dict, mask = text_sys(img_numpy=img, return_mask=True)
     elapse = time.time() - starttime
     save_pred = json.dumps(results[0], ensure_ascii=False)
     image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -35,10 +48,10 @@ def main(input_image):
         boxes,
         txts,
         scores,
-        drop_score=drop_score,
         font_path=font_path,
     )
-    mask = mask[0, 0, :, :] > 0.3
     return save_pred, elapse, draw_img, mask.astype('uint8') * 255
@@ -75,15 +88,6 @@ def find_file_in_current_dir_and_subdirs(file_name):
             return relative_path
-def predict1(input_image, Model_type, OCR_type):
-    if OCR_type == 'E2E':
-        return 11111, 'E2E', input_image
-    elif OCR_type == 'STR':
-        return 11111, 'STR', input_image
-    else:
-        return 11111, 'STD', input_image
 e2e_img_example = list_image_paths('./OCR_e2e_img')
 if __name__ == '__main__':
@@ -103,6 +107,45 @@ if __name__ == '__main__':
                                        label='Examples')
                 downstream = gr.Button('Run')
             with gr.Column(scale=1):
                 img_mask = gr.Image(label='mask',
                                     interactive=False,
@@ -116,7 +159,9 @@ if __name__ == '__main__':
             downstream.click(fn=main,
                              inputs=[
-                                 input_image,
                              ],
                              outputs=[
                                  output,

+# -*- encoding: utf-8 -*-
+# @Author: OpenOCR
+# @Contact: 784990967@qq.com
 import os
 import gradio as gr  # gradio==4.20.0
 check_and_download_font(font_path)
+def main(input_image,
+         rec_drop_score=0.01,
+         mask_thresh=0.3,
+         box_thresh=0.6,
+         unclip_ratio=1.5,
+         det_score_mode='slow'):
     img = input_image[:, :, ::-1]
     starttime = time.time()
+    results, time_dict, mask = text_sys(img_numpy=img,
+                                        return_mask=True,
+                                        thresh=mask_thresh,
+                                        box_thresh=box_thresh,
+                                        unclip_ratio=unclip_ratio,
+                                        score_mode=det_score_mode)
     elapse = time.time() - starttime
     save_pred = json.dumps(results[0], ensure_ascii=False)
     image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
         boxes,
         txts,
         scores,
+        drop_score=rec_drop_score,
         font_path=font_path,
     )
+    mask = mask[0, 0, :, :] > mask_thresh
     return save_pred, elapse, draw_img, mask.astype('uint8') * 255
             return relative_path
 e2e_img_example = list_image_paths('./OCR_e2e_img')
 if __name__ == '__main__':
                                        label='Examples')
                 downstream = gr.Button('Run')
+                with gr.Row():
+                    with gr.Column():
+                        rec_drop_score_slider = gr.Slider(
+                            0.0,
+                            1.0,
+                            value=0.01,
+                            step=0.01,
+                            label="Recognition Drop Score",
+                            info="Recognition confidence threshold, default value is 0.01. Recognition results and corresponding text boxes lower than this threshold are discarded.")
+                        mask_thresh_slider = gr.Slider(
+                            0.0,
+                            1.0,
+                            value=0.3,
+                            step=0.01,
+                            label="Mask Threshold",
+                            info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
+                    with gr.Column():
+                        box_thresh_slider = gr.Slider(
+                            0.0,
+                            1.0,
+                            value=0.6,
+                            step=0.01,
+                            label="Box Threshold",
+                            info="Text Box Confidence Threshold, default value is 0.6, turn it down if there is text being missed.")
+                        unclip_ratio_slider = gr.Slider(
+                            1.5,
+                            2.0,
+                            value=1.5,
+                            step=0.05,
+                            label="Unclip Ratio",
+                            info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
+                det_score_mode_dropdown = gr.Dropdown(
+                    ["slow", "fast"],
+                    value="slow",
+                    label="Det Score Mode",
+                    info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
+                )
             with gr.Column(scale=1):
                 img_mask = gr.Image(label='mask',
                                     interactive=False,
             downstream.click(fn=main,
                              inputs=[
+                                 input_image, rec_drop_score_slider,
+                                 mask_thresh_slider, box_thresh_slider,
+                                 unclip_ratio_slider, det_score_mode_dropdown
                              ],
                              outputs=[
                                  output,

configs/det/dbnet/repvit_db.yml CHANGED Viewed

@@ -53,7 +53,7 @@ Architecture:
 PostProcess:
   name: DBPostProcess
   thresh: 0.3
-  box_thresh: 0.4
   max_candidates: 1000
   unclip_ratio: 1.5
   score_mode: 'slow'

 PostProcess:
   name: DBPostProcess
   thresh: 0.3
+  box_thresh: 0.6
   max_candidates: 1000
   unclip_ratio: 1.5
   score_mode: 'slow'

opendet/postprocess/db_postprocess.py CHANGED Viewed

@@ -208,7 +208,12 @@ class DBPostProcess(object):
         cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
         return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-    def __call__(self, outs_dict, shape_list):
         pred = outs_dict['maps']
         if isinstance(pred, torch.Tensor):
             pred = pred.detach().cpu().numpy()

         cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
         return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def __call__(self, outs_dict, shape_list, **kwargs):
+        self.thresh= kwargs.get('thresh', self.thresh)
+        self.box_thresh = kwargs.get('box_thresh', self.box_thresh)
+        self.unclip_ratio = kwargs.get('unclip_ratio', self.unclip_ratio)
+        self.box_type = kwargs.get('box_type', self.box_type)
+        self.score_mode = kwargs.get('score_mode', self.score_mode)
         pred = outs_dict['maps']
         if isinstance(pred, torch.Tensor):
             pred = pred.detach().cpu().numpy()

tools/infer_det.py CHANGED Viewed

@@ -353,7 +353,8 @@ class OpenDetector(object):
                  img_path=None,
                  img_numpy_list=None,
                  img_numpy=None,
-                 return_mask=False):
         """
         对输入图像进行处理，并返回处理结果。
@@ -400,7 +401,7 @@ class OpenDetector(object):
                 t_start = time.time()
                 preds = self.model(images)
                 t_cost = time.time() - t_start
-            post_result = self.post_process_class(preds, shape_list)
             info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
             if return_mask:

                  img_path=None,
                  img_numpy_list=None,
                  img_numpy=None,
+                 return_mask=False,
+                 **kwargs):
         """
         对输入图像进行处理，并返回处理结果。
                 t_start = time.time()
                 preds = self.model(images)
                 t_cost = time.time() - t_start
+            post_result = self.post_process_class(preds, shape_list, **kwargs)
             info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
             if return_mask:

tools/infer_e2e.py CHANGED Viewed

@@ -182,14 +182,15 @@ class OpenOCR(object):
                            ori_img,
                            crop_infer=False,
                            rec_batch_num=6,
-                           return_mask=False):
         start = time.time()
         if crop_infer:
             dt_boxes = self.text_detector.crop_infer(
                 img_numpy=img_numpy)[0]['boxes']
         else:
             det_res = self.text_detector(img_numpy=img_numpy,
-                                         return_mask=return_mask)[0]
             dt_boxes = det_res['boxes']
         # logger.info(dt_boxes)
         det_time_cost = time.time() - start
@@ -247,7 +248,8 @@ class OpenOCR(object):
                  img_numpy=None,
                  rec_batch_num=6,
                  crop_infer=False,
-                 return_mask=False):
         """
         img_path: str, optional, default=None
             Path to the directory containing images or the image filename.
@@ -278,13 +280,15 @@ class OpenOCR(object):
                         ori_img=ori_img,
                         crop_infer=crop_infer,
                         rec_batch_num=rec_batch_num,
-                        return_mask=return_mask)
                 else:
                     dt_boxes, rec_res, time_dict = self.infer_single_image(
                         img_numpy=img,
                         ori_img=ori_img,
                         crop_infer=crop_infer,
-                        rec_batch_num=rec_batch_num)
                 if dt_boxes is None:
                     results.append([])
                     time_dicts.append({})
@@ -324,7 +328,8 @@ class OpenOCR(object):
                     img_numpy=img_numpy,
                     ori_img=ori_img,
                     crop_infer=crop_infer,
-                    rec_batch_num=rec_batch_num)
                 if dt_boxes is None:
                     res_list.append([])
                     time_dicts.append({})

                            ori_img,
                            crop_infer=False,
                            rec_batch_num=6,
+                           return_mask=False,
+                           **kwargs):
         start = time.time()
         if crop_infer:
             dt_boxes = self.text_detector.crop_infer(
                 img_numpy=img_numpy)[0]['boxes']
         else:
             det_res = self.text_detector(img_numpy=img_numpy,
+                                         return_mask=return_mask, **kwargs)[0]
             dt_boxes = det_res['boxes']
         # logger.info(dt_boxes)
         det_time_cost = time.time() - start
                  img_numpy=None,
                  rec_batch_num=6,
                  crop_infer=False,
+                 return_mask=False,
+                 **kwargs):
         """
         img_path: str, optional, default=None
             Path to the directory containing images or the image filename.
                         ori_img=ori_img,
                         crop_infer=crop_infer,
                         rec_batch_num=rec_batch_num,
+                        return_mask=return_mask,
+                        **kwargs)
                 else:
                     dt_boxes, rec_res, time_dict = self.infer_single_image(
                         img_numpy=img,
                         ori_img=ori_img,
                         crop_infer=crop_infer,
+                        rec_batch_num=rec_batch_num,
+                        **kwargs)
                 if dt_boxes is None:
                     results.append([])
                     time_dicts.append({})
                     img_numpy=img_numpy,
                     ori_img=ori_img,
                     crop_infer=crop_infer,
+                    rec_batch_num=rec_batch_num,
+                    **kwargs)
                 if dt_boxes is None:
                     res_list.append([])
                     time_dicts.append({})