Spaces:

rtr46
/

meikiocr

Running

App Files Files Community

rtr46 commited on Nov 3

Commit

893788d

verified ·

1 Parent(s): 1457119

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -19

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ except Exception as e:
     raise gr.Error(f"failed to load models. please check space logs. error: {e}")
 # --- 2. ocr pipeline helper functions ---
-# these are the core processing functions adapted from meiki_ocr.py
 def preprocess_for_detection(image):
     h_orig, w_orig, _ = image.shape
@@ -122,17 +122,14 @@ def postprocess_recognition_results(raw_rec_outputs, valid_indices, crop_metadat
             if not is_overlap: accepted.append(cand)
         accepted.sort(key=lambda c: c['x_interval'][0])
         text = ''.join(c['char'] for c in accepted)
-        full_results[valid_indices[i]] = {'text': text, 'chars': accepted}
     return full_results
 # --- 3. main gradio processing function ---
 def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
-    """
-    this function is called when the user clicks the 'run ocr' button.
-    it takes the user's image and settings, runs the full pipeline,
-    and returns the visualized image and the recognized text.
-    """
     if input_image is None:
         raise gr.Error("please upload an image to process.")
@@ -142,7 +139,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
     text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
     if not text_boxes:
-        return input_image, "no text detected. try lowering the 'detection confidence' slider."
     # --- recognition stage ---
     rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
@@ -159,7 +156,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
             x1, y1, x2, y2 = char_info['bbox']
             cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
-    return cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB), "\n".join(full_text)
 # --- 4. gradio interface definition ---
@@ -184,20 +181,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=2):
             output_image = gr.Image(type="numpy", label="ocr result")
-            output_text = gr.Textbox(label="recognized text", lines=10)
-    gr.Examples(
-        examples=[os.path.join(os.path.dirname(__file__), "example.jpg")],
-        inputs=[input_image],
-        outputs=[output_image, output_text],
-        fn=lambda img: run_ocr_pipeline(img, 0.5, 0.1),
-        cache_examples=True
-    )
     run_button.click(
         fn=run_ocr_pipeline,
         inputs=[input_image, det_threshold, rec_threshold],
-        outputs=[output_image, output_text]
     )
 # --- 5. launch the app ---

     raise gr.Error(f"failed to load models. please check space logs. error: {e}")
 # --- 2. ocr pipeline helper functions ---
+# (these functions remain unchanged)
 def preprocess_for_detection(image):
     h_orig, w_orig, _ = image.shape
             if not is_overlap: accepted.append(cand)
         accepted.sort(key=lambda c: c['x_interval'][0])
         text = ''.join(c['char'] for c in accepted)
+        # remove interval before returning
+        final_chars = [{'char': c['char'], 'bbox': c['bbox'], 'conf': c['conf']} for c in accepted]
+        full_results[valid_indices[i]] = {'text': text, 'chars': final_chars}
     return full_results
 # --- 3. main gradio processing function ---
 def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
     if input_image is None:
         raise gr.Error("please upload an image to process.")
     text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
     if not text_boxes:
+        return input_image, "no text detected. try lowering the 'detection confidence' slider.", None
     # --- recognition stage ---
     rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
             x1, y1, x2, y2 = char_info['bbox']
             cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+    return output_image, "\n".join(full_text), results
 # --- 4. gradio interface definition ---
         with gr.Column(scale=2):
             output_image = gr.Image(type="numpy", label="ocr result")
+            output_text = gr.Textbox(label="recognized text", lines=5)
+            output_json = gr.Code(label="json output", language="json", lines=5)
+    # example image needs to be in the same folder as app.py
+    example_image_path = os.path.join(os.path.dirname(__file__), "ace-attorney.jpg")
+    if os.path.exists(example_image_path):
+        gr.Examples(
+            examples=[example_image_path],
+            inputs=[input_image],
+            fn=lambda img_path: run_ocr_pipeline(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB), 0.5, 0.1),
+            outputs=[output_image, output_text, output_json],
+            cache_examples=True
+        )
     run_button.click(
         fn=run_ocr_pipeline,
         inputs=[input_image, det_threshold, rec_threshold],
+        outputs=[output_image, output_text, output_json]
     )
 # --- 5. launch the app ---