meikiocr / app.py
rtr46's picture
rerevert
d6bd879 verified
import gradio as gr
import numpy as np
import cv2
import json
import os
from meikiocr import MeikiOCR
print("Initializing meikiocr...")
try:
ocr = MeikiOCR(provider='CPUExecutionProvider')
print("meikiocr initialized successfully.")
except Exception as e:
ocr = None
print(f"Error initializing meikiocr: {e}")
# Display a persistent error in the Gradio interface if model loading fails.
raise gr.Error(f"Failed to load OCR models. Please check the space logs for details. Error: {e}")
def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
"""
Takes a user-uploaded image and confidence thresholds, runs the OCR pipeline,
and returns the results formatted for the Gradio interface.
"""
if input_image is None:
raise gr.Error("Please upload an image to process.")
results = ocr.run_ocr(input_image, det_threshold=det_threshold, rec_threshold=rec_threshold)
if not results:
return input_image, "No text detected. Try lowering the 'Detection Confidence' slider.", ""
# Prepare the outputs for Gradio
output_image = input_image.copy()
full_text_lines = []
# Draw bounding boxes and collect text
for line_result in results:
if line_result['text']:
full_text_lines.append(line_result['text'])
# Draw a green rectangle for each recognized character
for char_info in line_result['chars']:
x1, y1, x2, y2 = char_info['bbox']
cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Format the full text and JSON output
recognized_text = "\n".join(full_text_lines)
json_output = json.dumps(results, indent=2, ensure_ascii=False)
return output_image, recognized_text, json_output
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# meikiocr: Japanese Video Game OCR")
gr.Markdown(
"Upload a screenshot from a Japanese video game to see the high-accuracy OCR in action. "
"The pipeline first detects text lines, then recognizes the characters in each line. "
"Adjust the confidence sliders if text is missed or incorrectly detected."
)
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(type="numpy", label="Upload Image")
det_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence")
rec_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Recognition Confidence")
run_button = gr.Button("Run OCR", variant="primary")
with gr.Column(scale=2):
output_image = gr.Image(type="numpy", label="OCR Result")
output_text = gr.Textbox(label="Recognized Text", lines=5)
output_json = gr.Code(label="JSON Output", language="json", lines=5)
# The function for handling examples is also simplified.
def process_example(img):
return run_ocr_pipeline(img, 0.5, 0.1)
# Load the example image if it exists
example_image_path = os.path.join(os.path.dirname(__file__), "example.jpg")
if os.path.exists(example_image_path):
gr.Examples(
examples=[example_image_path],
inputs=[input_image],
outputs=[output_image, output_text, output_json],
fn=process_example,
cache_examples=True
)
# Connect the button click to the main processing function
run_button.click(
fn=run_ocr_pipeline,
inputs=[input_image, det_threshold, rec_threshold],
outputs=[output_image, output_text, output_json]
)
gr.Markdown(
"""
---
### Official GitHub Repository
The full source code, documentation, and local command-line script for `meikiocr` are available on GitHub.
**[github.com/rtr46/meikiocr](https://github.com/rtr46/meikiocr)**
"""
)
demo.launch()