|
|
import torch |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import onnxruntime as ort |
|
|
import time |
|
|
|
|
|
|
|
|
INPUT_WIDTH = 320 |
|
|
INPUT_HEIGHT = 192 |
|
|
MODEL_PATH = f"meiki.text.detect.v0.1.{INPUT_WIDTH}x{INPUT_HEIGHT}.onnx" |
|
|
INPUT_IMAGE_PATH = f"input.jpg" |
|
|
OUTPUT_IMAGE_PATH = f"output.{INPUT_WIDTH}x{INPUT_HEIGHT}.jpg" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CONFIDENCE_THRESHOLD = 0.4 |
|
|
|
|
|
def resize(image: np.ndarray, w, h): |
|
|
original_height, original_width, _ = image.shape |
|
|
|
|
|
|
|
|
ratio_w = w / original_width |
|
|
ratio_h = h / original_height |
|
|
|
|
|
|
|
|
resized_image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR) |
|
|
|
|
|
return resized_image, ratio_w, ratio_h |
|
|
|
|
|
def main(): |
|
|
""" |
|
|
Main function to run the inference process. |
|
|
""" |
|
|
|
|
|
try: |
|
|
|
|
|
session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider']) |
|
|
print("Session providers:", session.get_providers()) |
|
|
print(f"Successfully loaded model: {MODEL_PATH}") |
|
|
except Exception as e: |
|
|
print(f"Error: Failed to load the ONNX model. Make sure '{MODEL_PATH}' exists.") |
|
|
print(f"Details: {e}") |
|
|
return |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
original_image = cv2.imread(INPUT_IMAGE_PATH) |
|
|
if original_image is None: |
|
|
raise FileNotFoundError(f"Image not found at '{INPUT_IMAGE_PATH}'") |
|
|
print(f"Successfully loaded image: {INPUT_IMAGE_PATH}") |
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
return |
|
|
|
|
|
resized_image, ratio_w, ratio_h = resize(original_image, INPUT_WIDTH,INPUT_HEIGHT) |
|
|
|
|
|
|
|
|
img_normalized = resized_image.astype(np.float32) / 255.0 |
|
|
|
|
|
|
|
|
|
|
|
img_transposed = np.transpose(img_normalized, (2, 0, 1)) |
|
|
|
|
|
image_input_tensor = np.expand_dims(img_transposed, axis=0) |
|
|
|
|
|
|
|
|
|
|
|
sizes_input_tensor = np.array([[INPUT_WIDTH, INPUT_HEIGHT]], dtype=np.int64) |
|
|
|
|
|
|
|
|
input_names = [inp.name for inp in session.get_inputs()] |
|
|
|
|
|
|
|
|
inputs = { |
|
|
input_names[0]: image_input_tensor, |
|
|
input_names[1]: sizes_input_tensor |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for i in range(10): |
|
|
start = time.perf_counter() |
|
|
outputs = session.run(None, inputs) |
|
|
print(f"runtime {time.perf_counter() - start}") |
|
|
labels, boxes, scores = outputs |
|
|
|
|
|
|
|
|
|
|
|
boxes = boxes[0] |
|
|
scores = scores[0] |
|
|
|
|
|
print(f"Model returned {len(boxes)} boxes. Filtering with confidence > {CONFIDENCE_THRESHOLD}...") |
|
|
|
|
|
|
|
|
output_image = original_image.copy() |
|
|
|
|
|
|
|
|
confident_boxes_count = 0 |
|
|
for box, score in zip(boxes, scores): |
|
|
|
|
|
if score > CONFIDENCE_THRESHOLD: |
|
|
confident_boxes_count += 1 |
|
|
|
|
|
|
|
|
x_min, y_min, x_max, y_max = box |
|
|
|
|
|
final_x_min = int(x_min / ratio_w) |
|
|
final_y_min = int(y_min / ratio_h) |
|
|
final_x_max = int(x_max / ratio_w) |
|
|
final_y_max = int(y_max / ratio_h) |
|
|
|
|
|
|
|
|
cv2.rectangle(output_image, (final_x_min, final_y_min), (final_x_max, final_y_max), (0, 255, 0), 2) |
|
|
|
|
|
print(f"Found {confident_boxes_count} confident boxes.") |
|
|
|
|
|
|
|
|
cv2.imwrite(OUTPUT_IMAGE_PATH, output_image) |
|
|
print(f"Successfully saved result to: {OUTPUT_IMAGE_PATH}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|