File size: 3,962 Bytes
f0955c3
 
 
58969d7
d6bd879
 
58969d7
d6bd879
f0955c3
d6bd879
 
f0955c3
d6bd879
 
 
 
f0955c3
 
d6bd879
 
 
 
f0955c3
d6bd879
f0955c3
d6bd879
f0955c3
d6bd879
 
f0955c3
d6bd879
f0955c3
d6bd879
 
 
 
 
 
 
 
f0955c3
 
 
d6bd879
 
1b0f6fa
 
d6bd879
f0955c3
 
d6bd879
f0955c3
d6bd879
 
 
f0955c3
 
 
 
d6bd879
 
 
 
f0955c3
 
d6bd879
 
 
893788d
d6bd879
091981b
 
71a1c93
d6bd879
cd8f758
893788d
 
 
 
 
091981b
893788d
 
f0955c3
d6bd879
f0955c3
 
 
893788d
f0955c3
 
091981b
 
 
d6bd879
 
cd8f758
091981b
 
 
f0955c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
import numpy as np
import cv2
import json
import os
from meikiocr import MeikiOCR

print("Initializing meikiocr...")
try:
    ocr = MeikiOCR(provider='CPUExecutionProvider')
    print("meikiocr initialized successfully.")
except Exception as e:
    ocr = None
    print(f"Error initializing meikiocr: {e}")
    # Display a persistent error in the Gradio interface if model loading fails.
    raise gr.Error(f"Failed to load OCR models. Please check the space logs for details. Error: {e}")

def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
    """
    Takes a user-uploaded image and confidence thresholds, runs the OCR pipeline,
    and returns the results formatted for the Gradio interface.
    """
    if input_image is None:
        raise gr.Error("Please upload an image to process.")

    results = ocr.run_ocr(input_image, det_threshold=det_threshold, rec_threshold=rec_threshold)

    if not results:
        return input_image, "No text detected. Try lowering the 'Detection Confidence' slider.", ""

    # Prepare the outputs for Gradio
    output_image = input_image.copy()
    full_text_lines = []
    
    # Draw bounding boxes and collect text
    for line_result in results:
        if line_result['text']:
            full_text_lines.append(line_result['text'])
        # Draw a green rectangle for each recognized character
        for char_info in line_result['chars']:
            x1, y1, x2, y2 = char_info['bbox']
            cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    # Format the full text and JSON output
    recognized_text = "\n".join(full_text_lines)
    json_output = json.dumps(results, indent=2, ensure_ascii=False)
    
    return output_image, recognized_text, json_output

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# meikiocr: Japanese Video Game OCR")
    gr.Markdown(
        "Upload a screenshot from a Japanese video game to see the high-accuracy OCR in action. "
        "The pipeline first detects text lines, then recognizes the characters in each line. "
        "Adjust the confidence sliders if text is missed or incorrectly detected."
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(type="numpy", label="Upload Image")
            det_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence")
            rec_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Recognition Confidence")
            run_button = gr.Button("Run OCR", variant="primary")
            
        with gr.Column(scale=2):
            output_image = gr.Image(type="numpy", label="OCR Result")
            output_text = gr.Textbox(label="Recognized Text", lines=5)
            output_json = gr.Code(label="JSON Output", language="json", lines=5)

    # The function for handling examples is also simplified.
    def process_example(img):
        return run_ocr_pipeline(img, 0.5, 0.1)

    # Load the example image if it exists
    example_image_path = os.path.join(os.path.dirname(__file__), "example.jpg")
    if os.path.exists(example_image_path):
        gr.Examples(
            examples=[example_image_path],
            inputs=[input_image],
            outputs=[output_image, output_text, output_json],
            fn=process_example,
            cache_examples=True
        )
            
    # Connect the button click to the main processing function
    run_button.click(
        fn=run_ocr_pipeline,
        inputs=[input_image, det_threshold, rec_threshold],
        outputs=[output_image, output_text, output_json]
    )

    gr.Markdown(
        """
        ---
        ### Official GitHub Repository
        The full source code, documentation, and local command-line script for `meikiocr` are available on GitHub.
        **[github.com/rtr46/meikiocr](https://github.com/rtr46/meikiocr)**
        """
    )

demo.launch()