muhammadhamza-stack
refine the gradio app
9644ee8
# import gradio as gr
# import numpy as np
# from PIL import Image, ImageEnhance
# from ultralytics import YOLO
# import cv2
# # Load YOLO model
# model_path = "./best.pt"
# modelY = YOLO(model_path)
# modelY.to('cpu')
# # Preprocessing function
# def preprocessing(image):
# if image.mode != 'RGB':
# image = image.convert('RGB')
# image = ImageEnhance.Sharpness(image).enhance(2.0)
# image = ImageEnhance.Contrast(image).enhance(1.5)
# image = ImageEnhance.Brightness(image).enhance(0.8)
# width = 448
# aspect_ratio = image.height / image.width
# height = int(width * aspect_ratio)
# return image.resize((width, height))
# # YOLO document detection and cropping
# def detect_and_crop_document(image):
# image_np = np.array(image)
# results = modelY(image_np, conf=0.80, device='cpu')
# cropped_images = []
# predictions = []
# for result in results:
# for box in result.boxes:
# x1, y1, x2, y2 = map(int, box.xyxy[0])
# conf = int(box.conf[0] * 100) # Convert confidence to percentage
# cls = int(box.cls[0])
# class_name = modelY.names[cls].capitalize() # Capitalize class names
# cropped_image_np = image_np[y1:y2, x1:x2]
# cropped_image = Image.fromarray(cropped_image_np)
# cropped_images.append(cropped_image)
# predictions.append(f"Detected: STNK {class_name} -- (Confidence: {conf}%)")
# if not cropped_images:
# return None, "No document detected"
# return cropped_images, predictions
# # Gradio interface
# def process_image(image):
# preprocessed_image = preprocessing(image)
# cropped_images, predictions = detect_and_crop_document(preprocessed_image)
# if cropped_images:
# return cropped_images, '\n'.join(predictions)
# return None, "No document detected"
# with gr.Blocks(css=".gr-button {background-color: #4caf50; color: white; font-size: 16px; padding: 10px 20px; border-radius: 8px;}") as demo:
# gr.Markdown(
# """
# <h1 style="text-align: center; color: #4caf50;">📜 License Registration Classification</h1>
# <p style="text-align: center; font-size: 18px;">Upload an image and let the YOLO model detect and crop license documents automatically.</p>
# """
# )
# with gr.Row():
# with gr.Column(scale=1, min_width=300):
# input_image = gr.Image(type="pil", label="Upload License Image", interactive=True)
# with gr.Row():
# clear_btn = gr.Button("Clear")
# submit_btn = gr.Button("Detect Document")
# with gr.Column(scale=2):
# output_image = gr.Gallery(label="Cropped Documents", interactive=False)
# output_text = gr.Textbox(label="Detection Result", interactive=False)
# submit_btn.click(process_image, inputs=input_image, outputs=[output_image, output_text])
# clear_btn.click(lambda: (None, ""), outputs=[output_image, output_text])
# demo.launch()
import gradio as gr
import numpy as np
from PIL import Image, ImageEnhance
from ultralytics import YOLO
import cv2
import os
# --- DOCUMENTATION STRINGS (English Only) ---
GUIDELINE_SETUP = """
## 1. Quick Start Guide: Setup and Run Instructions
This application uses a YOLO model to automatically detect, classify, and extract specific license registration documents (STNK).
1. **Preparation:** Ensure your image clearly shows the target license document.
2. **Upload:** Click the 'Upload License Image' box and select your image (JPG, PNG).
3. **Run:** Click the **"Detect Document"** button.
4. **Review:** The detected documents will appear in the 'Cropped Documents' gallery, and the 'Detection Result' box will show the classification and confidence score.
"""
GUIDELINE_INPUT = """
## 2. Expected Inputs and Preprocessing
| Input Field | Purpose | Requirement |
| :--- | :--- | :--- |
| **Upload License Image** | The image containing the license document you want to detect and classify. | Must be an image file (e.g., JPG, PNG). |
### Automatic Preprocessing Steps:
Before detection, the input image is automatically adjusted to enhance accuracy:
1. **Sharpness:** Increased sharpness by 2.0.
2. **Contrast:** Increased contrast by 1.5.
3. **Brightness:** Slightly reduced brightness by 0.8.
4. **Resizing:** The image is resized to a width of 448 pixels while maintaining its original aspect ratio.
"""
GUIDELINE_OUTPUT = """
## 3. Expected Outputs (Detection and Classification)
The application produces two outputs based on a successful detection:
1. **Cropped Documents (Gallery):**
* This gallery displays only the regions of the image where a license document was confidently detected (Confidence > 80%).
* If multiple documents are found, all cropped images will appear here.
2. **Detection Result (Textbox):**
* A text summary listing each detected document, including its specific class name (e.g., 'STNK Class A'), and the model's confidence level (as a percentage).
### Failure Modes:
* If "No document detected" is returned, it means the model did not find a document with a confidence level of 80% or higher, or the image quality was too poor for detection.
"""
# --- CORE LOGIC ---
# Load YOLO model
# NOTE: Ensure 'best.pt' is available in the execution directory.
model_path = "./best.pt"
try:
modelY = YOLO(model_path)
modelY.to('cpu')
except Exception as e:
print(f"Error loading model: {e}")
modelY = None
# Preprocessing function
def preprocessing(image):
if image.mode != 'RGB':
image = image.convert('RGB')
# Enhancement steps
image = ImageEnhance.Sharpness(image).enhance(2.0)
image = ImageEnhance.Contrast(image).enhance(1.5)
image = ImageEnhance.Brightness(image).enhance(0.8)
# Resizing while preserving aspect ratio
width = 448
aspect_ratio = image.height / image.width
height = int(width * aspect_ratio)
return image.resize((width, height))
# YOLO document detection and cropping
def detect_and_crop_document(image):
if modelY is None:
return [], ["Model not loaded."]
image_np = np.array(image)
# Run inference with confidence threshold 0.80
results = modelY(image_np, conf=0.80, device='cpu', verbose=False)
cropped_images = []
predictions = []
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = int(box.conf[0].item() * 100) # Ensure conversion to scalar for item()
cls = int(box.cls[0].item())
class_name = modelY.names.get(cls, "Unknown").capitalize()
cropped_image_np = image_np[y1:y2, x1:x2]
# Check for valid crop size before converting to PIL
if cropped_image_np.size > 0:
cropped_image = Image.fromarray(cropped_image_np)
cropped_images.append(cropped_image)
predictions.append(f"Detected: STNK {class_name} -- (Confidence: {conf}%)")
return cropped_images, predictions
# Gradio interface function
def process_image(image):
if image is None:
raise gr.Error("Please upload an image.")
preprocessed_image = preprocessing(image)
cropped_images, predictions = detect_and_crop_document(preprocessed_image)
if cropped_images:
return cropped_images, '\n'.join(predictions)
# If no documents are detected with sufficient confidence
return [], "No document detected (Confidence threshold not met or image is unclear)."
# --- GRADIO UI SETUP ---
# Define example paths (NOTE: Replace with actual paths if needed)
examples = [
["./licence2.jpg"],
["./licence.jpg"],
]
with gr.Blocks(css=".gr-button {background-color: #4caf50; color: white; font-size: 16px; padding: 10px 20px; border-radius: 8px;}") as demo:
gr.Markdown(
"""
<h1 style="color: #4caf50;">License Registration Classification</h1>
<p style="font-size: 18px;">Upload an image and let the YOLO model detect and crop license documents automatically.</p>
"""
)
# 1. GUIDELINES SECTION
with gr.Accordion("User Guidelines and Documentation", open=False):
gr.Markdown(GUIDELINE_SETUP)
gr.Markdown("---")
gr.Markdown(GUIDELINE_INPUT)
gr.Markdown("---")
gr.Markdown(GUIDELINE_OUTPUT)
gr.Markdown("---")
# 2. APPLICATION INTERFACE
with gr.Row():
with gr.Column(scale=1, min_width=300):
input_image = gr.Image(type="pil", label="Upload License Image", interactive=True)
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Detect Document")
with gr.Column(scale=2):
output_image = gr.Gallery(label="Cropped Documents", interactive=False, object_fit="contain")
output_text = gr.Textbox(label="Detection Result", interactive=False, lines=5)
submit_btn.click(process_image, inputs=input_image, outputs=[output_image, output_text])
clear_btn.click(lambda: (None, ""), outputs=[output_image, output_text, input_image], show_progress=False)
gr.Markdown("---")
# 3. EXAMPLES SECTION
gr.Markdown("## Sample Data for Testing")
gr.Examples(
examples=examples,
inputs=input_image,
outputs=[output_image, output_text],
fn=process_image,
cache_examples=False,
label="Click to load and run a sample detection.",
)
demo.queue()
demo.launch()