|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import cv2 |
|
|
import json |
|
|
import os |
|
|
from meikiocr import MeikiOCR |
|
|
|
|
|
print("Initializing meikiocr...") |
|
|
try: |
|
|
ocr = MeikiOCR(provider='CPUExecutionProvider') |
|
|
print("meikiocr initialized successfully.") |
|
|
except Exception as e: |
|
|
ocr = None |
|
|
print(f"Error initializing meikiocr: {e}") |
|
|
|
|
|
raise gr.Error(f"Failed to load OCR models. Please check the space logs for details. Error: {e}") |
|
|
|
|
|
def run_ocr_pipeline(input_image, det_threshold, rec_threshold): |
|
|
""" |
|
|
Takes a user-uploaded image and confidence thresholds, runs the OCR pipeline, |
|
|
and returns the results formatted for the Gradio interface. |
|
|
""" |
|
|
if input_image is None: |
|
|
raise gr.Error("Please upload an image to process.") |
|
|
|
|
|
results = ocr.run_ocr(input_image, det_threshold=det_threshold, rec_threshold=rec_threshold) |
|
|
|
|
|
if not results: |
|
|
return input_image, "No text detected. Try lowering the 'Detection Confidence' slider.", "" |
|
|
|
|
|
|
|
|
output_image = input_image.copy() |
|
|
full_text_lines = [] |
|
|
|
|
|
|
|
|
for line_result in results: |
|
|
if line_result['text']: |
|
|
full_text_lines.append(line_result['text']) |
|
|
|
|
|
for char_info in line_result['chars']: |
|
|
x1, y1, x2, y2 = char_info['bbox'] |
|
|
cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2) |
|
|
|
|
|
|
|
|
recognized_text = "\n".join(full_text_lines) |
|
|
json_output = json.dumps(results, indent=2, ensure_ascii=False) |
|
|
|
|
|
return output_image, recognized_text, json_output |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# meikiocr: Japanese Video Game OCR") |
|
|
gr.Markdown( |
|
|
"Upload a screenshot from a Japanese video game to see the high-accuracy OCR in action. " |
|
|
"The pipeline first detects text lines, then recognizes the characters in each line. " |
|
|
"Adjust the confidence sliders if text is missed or incorrectly detected." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_image = gr.Image(type="numpy", label="Upload Image") |
|
|
det_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence") |
|
|
rec_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Recognition Confidence") |
|
|
run_button = gr.Button("Run OCR", variant="primary") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
output_image = gr.Image(type="numpy", label="OCR Result") |
|
|
output_text = gr.Textbox(label="Recognized Text", lines=5) |
|
|
output_json = gr.Code(label="JSON Output", language="json", lines=5) |
|
|
|
|
|
|
|
|
def process_example(img): |
|
|
return run_ocr_pipeline(img, 0.5, 0.1) |
|
|
|
|
|
|
|
|
example_image_path = os.path.join(os.path.dirname(__file__), "example.jpg") |
|
|
if os.path.exists(example_image_path): |
|
|
gr.Examples( |
|
|
examples=[example_image_path], |
|
|
inputs=[input_image], |
|
|
outputs=[output_image, output_text, output_json], |
|
|
fn=process_example, |
|
|
cache_examples=True |
|
|
) |
|
|
|
|
|
|
|
|
run_button.click( |
|
|
fn=run_ocr_pipeline, |
|
|
inputs=[input_image, det_threshold, rec_threshold], |
|
|
outputs=[output_image, output_text, output_json] |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
--- |
|
|
### Official GitHub Repository |
|
|
The full source code, documentation, and local command-line script for `meikiocr` are available on GitHub. |
|
|
**[github.com/rtr46/meikiocr](https://github.com/rtr46/meikiocr)** |
|
|
""" |
|
|
) |
|
|
|
|
|
demo.launch() |