# import gradio as gr # import numpy as np # from PIL import Image, ImageEnhance # from ultralytics import YOLO # import cv2 # # Load YOLO model # model_path = "./best.pt" # modelY = YOLO(model_path) # modelY.to('cpu') # # Preprocessing function # def preprocessing(image): # if image.mode != 'RGB': # image = image.convert('RGB') # image = ImageEnhance.Sharpness(image).enhance(2.0) # image = ImageEnhance.Contrast(image).enhance(1.5) # image = ImageEnhance.Brightness(image).enhance(0.8) # width = 448 # aspect_ratio = image.height / image.width # height = int(width * aspect_ratio) # return image.resize((width, height)) # # YOLO document detection and cropping # def detect_and_crop_document(image): # image_np = np.array(image) # results = modelY(image_np, conf=0.80, device='cpu') # cropped_images = [] # predictions = [] # for result in results: # for box in result.boxes: # x1, y1, x2, y2 = map(int, box.xyxy[0]) # conf = int(box.conf[0] * 100) # Convert confidence to percentage # cls = int(box.cls[0]) # class_name = modelY.names[cls].capitalize() # Capitalize class names # cropped_image_np = image_np[y1:y2, x1:x2] # cropped_image = Image.fromarray(cropped_image_np) # cropped_images.append(cropped_image) # predictions.append(f"Detected: STNK {class_name} -- (Confidence: {conf}%)") # if not cropped_images: # return None, "No document detected" # return cropped_images, predictions # # Gradio interface # def process_image(image): # preprocessed_image = preprocessing(image) # cropped_images, predictions = detect_and_crop_document(preprocessed_image) # if cropped_images: # return cropped_images, '\n'.join(predictions) # return None, "No document detected" # with gr.Blocks(css=".gr-button {background-color: #4caf50; color: white; font-size: 16px; padding: 10px 20px; border-radius: 8px;}") as demo: # gr.Markdown( # """ #
Upload an image and let the YOLO model detect and crop license documents automatically.
# """ # ) # with gr.Row(): # with gr.Column(scale=1, min_width=300): # input_image = gr.Image(type="pil", label="Upload License Image", interactive=True) # with gr.Row(): # clear_btn = gr.Button("Clear") # submit_btn = gr.Button("Detect Document") # with gr.Column(scale=2): # output_image = gr.Gallery(label="Cropped Documents", interactive=False) # output_text = gr.Textbox(label="Detection Result", interactive=False) # submit_btn.click(process_image, inputs=input_image, outputs=[output_image, output_text]) # clear_btn.click(lambda: (None, ""), outputs=[output_image, output_text]) # demo.launch() import gradio as gr import numpy as np from PIL import Image, ImageEnhance from ultralytics import YOLO import cv2 import os # --- DOCUMENTATION STRINGS (English Only) --- GUIDELINE_SETUP = """ ## 1. Quick Start Guide: Setup and Run Instructions This application uses a YOLO model to automatically detect, classify, and extract specific license registration documents (STNK). 1. **Preparation:** Ensure your image clearly shows the target license document. 2. **Upload:** Click the 'Upload License Image' box and select your image (JPG, PNG). 3. **Run:** Click the **"Detect Document"** button. 4. **Review:** The detected documents will appear in the 'Cropped Documents' gallery, and the 'Detection Result' box will show the classification and confidence score. """ GUIDELINE_INPUT = """ ## 2. Expected Inputs and Preprocessing | Input Field | Purpose | Requirement | | :--- | :--- | :--- | | **Upload License Image** | The image containing the license document you want to detect and classify. | Must be an image file (e.g., JPG, PNG). | ### Automatic Preprocessing Steps: Before detection, the input image is automatically adjusted to enhance accuracy: 1. **Sharpness:** Increased sharpness by 2.0. 2. **Contrast:** Increased contrast by 1.5. 3. **Brightness:** Slightly reduced brightness by 0.8. 4. **Resizing:** The image is resized to a width of 448 pixels while maintaining its original aspect ratio. """ GUIDELINE_OUTPUT = """ ## 3. Expected Outputs (Detection and Classification) The application produces two outputs based on a successful detection: 1. **Cropped Documents (Gallery):** * This gallery displays only the regions of the image where a license document was confidently detected (Confidence > 80%). * If multiple documents are found, all cropped images will appear here. 2. **Detection Result (Textbox):** * A text summary listing each detected document, including its specific class name (e.g., 'STNK Class A'), and the model's confidence level (as a percentage). ### Failure Modes: * If "No document detected" is returned, it means the model did not find a document with a confidence level of 80% or higher, or the image quality was too poor for detection. """ # --- CORE LOGIC --- # Load YOLO model # NOTE: Ensure 'best.pt' is available in the execution directory. model_path = "./best.pt" try: modelY = YOLO(model_path) modelY.to('cpu') except Exception as e: print(f"Error loading model: {e}") modelY = None # Preprocessing function def preprocessing(image): if image.mode != 'RGB': image = image.convert('RGB') # Enhancement steps image = ImageEnhance.Sharpness(image).enhance(2.0) image = ImageEnhance.Contrast(image).enhance(1.5) image = ImageEnhance.Brightness(image).enhance(0.8) # Resizing while preserving aspect ratio width = 448 aspect_ratio = image.height / image.width height = int(width * aspect_ratio) return image.resize((width, height)) # YOLO document detection and cropping def detect_and_crop_document(image): if modelY is None: return [], ["Model not loaded."] image_np = np.array(image) # Run inference with confidence threshold 0.80 results = modelY(image_np, conf=0.80, device='cpu', verbose=False) cropped_images = [] predictions = [] for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) conf = int(box.conf[0].item() * 100) # Ensure conversion to scalar for item() cls = int(box.cls[0].item()) class_name = modelY.names.get(cls, "Unknown").capitalize() cropped_image_np = image_np[y1:y2, x1:x2] # Check for valid crop size before converting to PIL if cropped_image_np.size > 0: cropped_image = Image.fromarray(cropped_image_np) cropped_images.append(cropped_image) predictions.append(f"Detected: STNK {class_name} -- (Confidence: {conf}%)") return cropped_images, predictions # Gradio interface function def process_image(image): if image is None: raise gr.Error("Please upload an image.") preprocessed_image = preprocessing(image) cropped_images, predictions = detect_and_crop_document(preprocessed_image) if cropped_images: return cropped_images, '\n'.join(predictions) # If no documents are detected with sufficient confidence return [], "No document detected (Confidence threshold not met or image is unclear)." # --- GRADIO UI SETUP --- # Define example paths (NOTE: Replace with actual paths if needed) examples = [ ["./licence2.jpg"], ["./licence.jpg"], ] with gr.Blocks(css=".gr-button {background-color: #4caf50; color: white; font-size: 16px; padding: 10px 20px; border-radius: 8px;}") as demo: gr.Markdown( """Upload an image and let the YOLO model detect and crop license documents automatically.
""" ) # 1. GUIDELINES SECTION with gr.Accordion("User Guidelines and Documentation", open=False): gr.Markdown(GUIDELINE_SETUP) gr.Markdown("---") gr.Markdown(GUIDELINE_INPUT) gr.Markdown("---") gr.Markdown(GUIDELINE_OUTPUT) gr.Markdown("---") # 2. APPLICATION INTERFACE with gr.Row(): with gr.Column(scale=1, min_width=300): input_image = gr.Image(type="pil", label="Upload License Image", interactive=True) with gr.Row(): clear_btn = gr.Button("Clear") submit_btn = gr.Button("Detect Document") with gr.Column(scale=2): output_image = gr.Gallery(label="Cropped Documents", interactive=False, object_fit="contain") output_text = gr.Textbox(label="Detection Result", interactive=False, lines=5) submit_btn.click(process_image, inputs=input_image, outputs=[output_image, output_text]) clear_btn.click(lambda: (None, ""), outputs=[output_image, output_text, input_image], show_progress=False) gr.Markdown("---") # 3. EXAMPLES SECTION gr.Markdown("## Sample Data for Testing") gr.Examples( examples=examples, inputs=input_image, outputs=[output_image, output_text], fn=process_image, cache_examples=False, label="Click to load and run a sample detection.", ) demo.queue() demo.launch()