Spaces:

rtr46
/

meikiocr

Running

App Files Files Community

meikiocr / app.py

rtr46

rerevert

d6bd879 verified about 1 month ago

raw

history blame contribute delete

3.96 kB

	import gradio as gr
	import numpy as np
	import cv2
	import json
	import os
	from meikiocr import MeikiOCR

	print("Initializing meikiocr...")
	try:
	ocr = MeikiOCR(provider='CPUExecutionProvider')
	print("meikiocr initialized successfully.")
	except Exception as e:
	ocr = None
	print(f"Error initializing meikiocr: {e}")
	# Display a persistent error in the Gradio interface if model loading fails.
	raise gr.Error(f"Failed to load OCR models. Please check the space logs for details. Error: {e}")

	def run_ocr_pipeline(input_image, det_threshold, rec_threshold):
	"""
	Takes a user-uploaded image and confidence thresholds, runs the OCR pipeline,
	and returns the results formatted for the Gradio interface.
	"""
	if input_image is None:
	raise gr.Error("Please upload an image to process.")

	results = ocr.run_ocr(input_image, det_threshold=det_threshold, rec_threshold=rec_threshold)

	if not results:
	return input_image, "No text detected. Try lowering the 'Detection Confidence' slider.", ""

	# Prepare the outputs for Gradio
	output_image = input_image.copy()
	full_text_lines = []

	# Draw bounding boxes and collect text
	for line_result in results:
	if line_result['text']:
	full_text_lines.append(line_result['text'])
	# Draw a green rectangle for each recognized character
	for char_info in line_result['chars']:
	x1, y1, x2, y2 = char_info['bbox']
	cv2.rectangle(output_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

	# Format the full text and JSON output
	recognized_text = "\n".join(full_text_lines)
	json_output = json.dumps(results, indent=2, ensure_ascii=False)

	return output_image, recognized_text, json_output

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# meikiocr: Japanese Video Game OCR")
	gr.Markdown(
	"Upload a screenshot from a Japanese video game to see the high-accuracy OCR in action. "
	"The pipeline first detects text lines, then recognizes the characters in each line. "
	"Adjust the confidence sliders if text is missed or incorrectly detected."
	)

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="numpy", label="Upload Image")
	det_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Detection Confidence")
	rec_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Recognition Confidence")
	run_button = gr.Button("Run OCR", variant="primary")

	with gr.Column(scale=2):
	output_image = gr.Image(type="numpy", label="OCR Result")
	output_text = gr.Textbox(label="Recognized Text", lines=5)
	output_json = gr.Code(label="JSON Output", language="json", lines=5)

	# The function for handling examples is also simplified.
	def process_example(img):
	return run_ocr_pipeline(img, 0.5, 0.1)

	# Load the example image if it exists
	example_image_path = os.path.join(os.path.dirname(__file__), "example.jpg")
	if os.path.exists(example_image_path):
	gr.Examples(
	examples=[example_image_path],
	inputs=[input_image],
	outputs=[output_image, output_text, output_json],
	fn=process_example,
	cache_examples=True
	)

	# Connect the button click to the main processing function
	run_button.click(
	fn=run_ocr_pipeline,
	inputs=[input_image, det_threshold, rec_threshold],
	outputs=[output_image, output_text, output_json]
	)

	gr.Markdown(
	"""
	---
	### Official GitHub Repository
	The full source code, documentation, and local command-line script for `meikiocr` are available on GitHub.
	[github.com/rtr46/meikiocr](https://github.com/rtr46/meikiocr)
	"""
	)

	demo.launch()