rtr46 commited on
Commit
893788d
·
verified ·
1 Parent(s): 1457119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -43,7 +43,7 @@ except Exception as e:
43
  raise gr.Error(f"failed to load models. please check space logs. error: {e}")
44
 
45
  # --- 2. ocr pipeline helper functions ---
46
- # these are the core processing functions adapted from meiki_ocr.py
47
 
48
  def preprocess_for_detection(image):
49
  h_orig, w_orig, _ = image.shape
@@ -122,17 +122,14 @@ def postprocess_recognition_results(raw_rec_outputs, valid_indices, crop_metadat
122
  if not is_overlap: accepted.append(cand)
123
  accepted.sort(key=lambda c: c['x_interval'][0])
124
  text = ''.join(c['char'] for c in accepted)
125
- full_results[valid_indices[i]] = {'text': text, 'chars': accepted}
 
 
126
  return full_results
127
 
128
  # --- 3. main gradio processing function ---
129
 
130
  def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
131
- """
132
- this function is called when the user clicks the 'run ocr' button.
133
- it takes the user's image and settings, runs the full pipeline,
134
- and returns the visualized image and the recognized text.
135
- """
136
  if input_image is None:
137
  raise gr.Error("please upload an image to process.")
138
 
@@ -142,7 +139,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
142
  text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
143
 
144
  if not text_boxes:
145
- return input_image, "no text detected. try lowering the 'detection confidence' slider."
146
 
147
  # --- recognition stage ---
148
  rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
@@ -159,7 +156,7 @@ def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
159
  x1, y1, x2, y2 = char_info['bbox']
160
  cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
161
 
162
- return cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB), "\n".join(full_text)
163
 
164
  # --- 4. gradio interface definition ---
165
 
@@ -184,20 +181,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
184
 
185
  with gr.Column(scale=2):
186
  output_image = gr.Image(type="numpy", label="ocr result")
187
- output_text = gr.Textbox(label="recognized text", lines=10)
188
-
189
- gr.Examples(
190
- examples=[os.path.join(os.path.dirname(__file__), "example.jpg")],
191
- inputs=[input_image],
192
- outputs=[output_image, output_text],
193
- fn=lambda img: run_ocr_pipeline(img, 0.5, 0.1),
194
- cache_examples=True
195
- )
 
 
 
 
196
 
197
  run_button.click(
198
  fn=run_ocr_pipeline,
199
  inputs=[input_image, det_threshold, rec_threshold],
200
- outputs=[output_image, output_text]
201
  )
202
 
203
  # --- 5. launch the app ---
 
43
  raise gr.Error(f"failed to load models. please check space logs. error: {e}")
44
 
45
  # --- 2. ocr pipeline helper functions ---
46
+ # (these functions remain unchanged)
47
 
48
  def preprocess_for_detection(image):
49
  h_orig, w_orig, _ = image.shape
 
122
  if not is_overlap: accepted.append(cand)
123
  accepted.sort(key=lambda c: c['x_interval'][0])
124
  text = ''.join(c['char'] for c in accepted)
125
+ # remove interval before returning
126
+ final_chars = [{'char': c['char'], 'bbox': c['bbox'], 'conf': c['conf']} for c in accepted]
127
+ full_results[valid_indices[i]] = {'text': text, 'chars': final_chars}
128
  return full_results
129
 
130
  # --- 3. main gradio processing function ---
131
 
132
  def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
 
 
 
 
 
133
  if input_image is None:
134
  raise gr.Error("please upload an image to process.")
135
 
 
139
  text_boxes = postprocess_detection_results(det_raw, sx, sy, det_threshold)
140
 
141
  if not text_boxes:
142
+ return input_image, "no text detected. try lowering the 'detection confidence' slider.", None
143
 
144
  # --- recognition stage ---
145
  rec_batch, valid_indices, crop_metadata = preprocess_for_recognition(input_image, text_boxes)
 
156
  x1, y1, x2, y2 = char_info['bbox']
157
  cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
158
 
159
+ return output_image, "\n".join(full_text), results
160
 
161
  # --- 4. gradio interface definition ---
162
 
 
181
 
182
  with gr.Column(scale=2):
183
  output_image = gr.Image(type="numpy", label="ocr result")
184
+ output_text = gr.Textbox(label="recognized text", lines=5)
185
+ output_json = gr.Code(label="json output", language="json", lines=5)
186
+
187
+ # example image needs to be in the same folder as app.py
188
+ example_image_path = os.path.join(os.path.dirname(__file__), "ace-attorney.jpg")
189
+ if os.path.exists(example_image_path):
190
+ gr.Examples(
191
+ examples=[example_image_path],
192
+ inputs=[input_image],
193
+ fn=lambda img_path: run_ocr_pipeline(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB), 0.5, 0.1),
194
+ outputs=[output_image, output_text, output_json],
195
+ cache_examples=True
196
+ )
197
 
198
  run_button.click(
199
  fn=run_ocr_pipeline,
200
  inputs=[input_image, det_threshold, rec_threshold],
201
+ outputs=[output_image, output_text, output_json]
202
  )
203
 
204
  # --- 5. launch the app ---