Update app.py
Browse files
app.py
CHANGED
|
@@ -43,7 +43,7 @@ except Exception as e:
|
|
| 43 |
raise gr.Error(f"failed to load models. please check space logs. error: {e}")
|
| 44 |
|
| 45 |
# --- 2. ocr pipeline helper functions ---
|
| 46 |
-
# these
|
| 47 |
|
| 48 |
def preprocess_for_detection(image):
|
| 49 |
h_orig, w_orig, _ = image.shape
|
|
@@ -122,17 +122,14 @@ def postprocess_recognition_results(raw_rec_outputs, valid_indices, crop_metadat
|
|
| 122 |
if not is_overlap: accepted.append(cand)
|
| 123 |
accepted.sort(key=lambda c: c['x_interval'][0])
|
| 124 |
text = ''.join(c['char'] for c in accepted)
|
| 125 |
-
|
|
|
|
|
|
|
| 126 |
return full_results
|
| 127 |
|
| 128 |
# --- 3. main gradio processing function ---
|
| 129 |
|
| 130 |
def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
|
| 131 |
-
"""
|
| 132 |
-
this function is called when the user clicks the 'run ocr' button.
|
| 133 |
-
it takes the user's image and settings, runs the full pipeline,
|
| 134 |
-
and returns the visualized image and the recognized text.
|
| 135 |
-
"""
|
| 136 |
if input_image is None:
|
| 137 |
raise gr.Error("please upload an image to process.")
|
| 138 |
|
|
@@ -142,7 +139,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
|
|
| 142 |
text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
|
| 143 |
|
| 144 |
if not text_boxes:
|
| 145 |
-
return input_image, "no text detected. try lowering the 'detection confidence' slider."
|
| 146 |
|
| 147 |
# --- recognition stage ---
|
| 148 |
rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
|
|
@@ -159,7 +156,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
|
|
| 159 |
x1, y1, x2, y2 = char_info['bbox']
|
| 160 |
cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 161 |
|
| 162 |
-
return
|
| 163 |
|
| 164 |
# --- 4. gradio interface definition ---
|
| 165 |
|
|
@@ -184,20 +181,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 184 |
|
| 185 |
with gr.Column(scale=2):
|
| 186 |
output_image = gr.Image(type="numpy", label="ocr result")
|
| 187 |
-
output_text = gr.Textbox(label="recognized text", lines=
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
run_button.click(
|
| 198 |
fn=run_ocr_pipeline,
|
| 199 |
inputs=[input_image, det_threshold, rec_threshold],
|
| 200 |
-
outputs=[output_image, output_text]
|
| 201 |
)
|
| 202 |
|
| 203 |
# --- 5. launch the app ---
|
|
|
|
| 43 |
raise gr.Error(f"failed to load models. please check space logs. error: {e}")
|
| 44 |
|
| 45 |
# --- 2. ocr pipeline helper functions ---
|
| 46 |
+
# (these functions remain unchanged)
|
| 47 |
|
| 48 |
def preprocess_for_detection(image):
|
| 49 |
h_orig, w_orig, _ = image.shape
|
|
|
|
| 122 |
if not is_overlap: accepted.append(cand)
|
| 123 |
accepted.sort(key=lambda c: c['x_interval'][0])
|
| 124 |
text = ''.join(c['char'] for c in accepted)
|
| 125 |
+
# remove interval before returning
|
| 126 |
+
final_chars = [{'char': c['char'], 'bbox': c['bbox'], 'conf': c['conf']} for c in accepted]
|
| 127 |
+
full_results[valid_indices[i]] = {'text': text, 'chars': final_chars}
|
| 128 |
return full_results
|
| 129 |
|
| 130 |
# --- 3. main gradio processing function ---
|
| 131 |
|
| 132 |
def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if input_image is None:
|
| 134 |
raise gr.Error("please upload an image to process.")
|
| 135 |
|
|
|
|
| 139 |
text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
|
| 140 |
|
| 141 |
if not text_boxes:
|
| 142 |
+
return input_image, "no text detected. try lowering the 'detection confidence' slider.", None
|
| 143 |
|
| 144 |
# --- recognition stage ---
|
| 145 |
rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
|
|
|
|
| 156 |
x1, y1, x2, y2 = char_info['bbox']
|
| 157 |
cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 158 |
|
| 159 |
+
return output_image, "\n".join(full_text), results
|
| 160 |
|
| 161 |
# --- 4. gradio interface definition ---
|
| 162 |
|
|
|
|
| 181 |
|
| 182 |
with gr.Column(scale=2):
|
| 183 |
output_image = gr.Image(type="numpy", label="ocr result")
|
| 184 |
+
output_text = gr.Textbox(label="recognized text", lines=5)
|
| 185 |
+
output_json = gr.Code(label="json output", language="json", lines=5)
|
| 186 |
+
|
| 187 |
+
# example image needs to be in the same folder as app.py
|
| 188 |
+
example_image_path = os.path.join(os.path.dirname(__file__), "ace-attorney.jpg")
|
| 189 |
+
if os.path.exists(example_image_path):
|
| 190 |
+
gr.Examples(
|
| 191 |
+
examples=[example_image_path],
|
| 192 |
+
inputs=[input_image],
|
| 193 |
+
fn=lambda img_path: run_ocr_pipeline(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB), 0.5, 0.1),
|
| 194 |
+
outputs=[output_image, output_text, output_json],
|
| 195 |
+
cache_examples=True
|
| 196 |
+
)
|
| 197 |
|
| 198 |
run_button.click(
|
| 199 |
fn=run_ocr_pipeline,
|
| 200 |
inputs=[input_image, det_threshold, rec_threshold],
|
| 201 |
+
outputs=[output_image, output_text, output_json]
|
| 202 |
)
|
| 203 |
|
| 204 |
# --- 5. launch the app ---
|