ImageCA / app.py
BoojithDharshan's picture
Create app.py
0b02ad1 verified
import gradio as gr
from huggingface_hub import InferenceClient, auth_check
from deep_translator import GoogleTranslator
from PIL import Image
from gradio.themes import Base
import os
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
# Fetch the API token from environment variable
hf_api_token = os.getenv("HF_API_TOKEN")
# Hugging Face Inference API client
client = InferenceClient(token=hf_api_token)
# Supported languages for translation (aligned with deep_translator)
languages = {
"English": "en",
"Hindi": "hi",
"Tamil": "ta",
"Spanish": "es",
"French": "fr",
"German": "de",
"Bengali": "bn",
"Telugu": "te",
"Marathi": "mr",
}
# Check token access to the model
def check_model_access():
try:
auth_check("Salesforce/blip-image-captioning-large", token=hf_api_token)
return "Token has access to the model."
except GatedRepoError:
return "Error: Token does not have permission to access this gated repository."
except RepositoryNotFoundError:
return "Error: The repository was not found or you do not have access."
except Exception as e:
return f"Error checking access: {str(e)}"
# Print access check result (for debugging)
print(check_model_access())
def generate_caption(image, target_language_name):
try:
# Map the selected language name to its code
target_language = languages.get(target_language_name)
if not target_language:
return f"Error: Selected language '{target_language_name}' is not supported. Please choose from: {list(languages.keys())}"
# Convert PIL image to bytes for API
from io import BytesIO
img_byte_arr = BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
# Use Hugging Face Inference API for captioning
result = client.image_to_text(
image=img_byte_arr,
model="Salesforce/blip-image-captioning-large"
)
# Extract the generated text from the ImageToTextOutput object
english_caption = result.generated_text
# If target language is English, return as is
if target_language == "en":
return english_caption
# Translate to the selected local language
translator = GoogleTranslator(source='en', target=target_language)
local_caption = translator.translate(english_caption)
return local_caption
except Exception as e:
return f"Error: {str(e)}"
# Custom theme
custom_theme = gr.themes.Default(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="slate",
text_size="lg",
radius_size="md",
font=[gr.themes.GoogleFont("Roboto"), "sans-serif"]
)
# Gradio interface
interface = gr.Interface(
fn=generate_caption,
inputs=[
gr.Image(type="pil", label="Upload an Image"),
gr.Dropdown(
choices=list(languages.keys()),
label="Select Language",
value="English"
)
],
outputs=gr.Textbox(label="Caption", lines=2, placeholder="Caption will appear here..."),
title="Image Caption Generator with Language Selection",
description="Upload an image and select a local language to get a caption.",
theme=custom_theme,
css="""
.gradio-container { max-width: 800px; margin: auto; }
h1 { text-align: center; color: #1E40AF; }
.label { font-weight: bold; }
input, output { border-radius: 8px; }
"""
)
interface.launch()