| import streamlit as st |
| import tensorflow as tf |
| from PIL import Image |
| import numpy as np |
| import json |
| from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input |
| from tensorflow.keras.preprocessing.image import img_to_array |
| from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| from tensorflow.keras.models import Model |
|
|
| from keras.models import load_model |
|
|
| |
| model = load_model('image_caption.h5') |
|
|
| with open('tokenizer_config.json', 'r') as f: |
| tokenizer_config = json.load(f) |
| tokenizer = tokenizer_from_json(tokenizer_config) |
| |
|
|
| max_length=35 |
| |
| vgg_model = VGG16() |
| vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) |
|
|
| |
| st.set_page_config(page_title="Image Captioning App", layout="wide") |
|
|
|
|
| |
| def preprocess_image(image): |
| image = image.convert("RGB") |
| image = image.resize((224, 224)) |
| image = img_to_array(image) |
| image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) |
| image = preprocess_input(image) |
| return image |
|
|
| |
| def predict(image): |
| image = preprocess_image(image) |
| feature = vgg_model.predict(image, verbose=0) |
| preds = predict_caption(model, feature, tokenizer, max_length) |
| preds=preds[8:-7] |
| return preds |
|
|
| def idx_word(integer,tok): |
| for word,index in tok.word_index.items(): |
| if index== integer: |
| return word |
| return None |
|
|
| def predict_caption(model,image,tok,max_len): |
| in_text="startseq" |
| for i in range(max_len): |
| seq=tok.texts_to_sequences([in_text])[0] |
| seq=pad_sequences([seq],max_len) |
| yhat = model.predict([image, seq], verbose=0) |
| yhat = np.argmax(yhat) |
| word = idx_word(yhat, tok) |
| if word is None: |
| break |
| in_text += " " + word |
| if word == 'endseq': |
| break |
| return in_text |
|
|
| |
| def main(): |
| st.title("Image Captioning App") |
| st.write("Upload an image and the app will predict its class.") |
|
|
| uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"]) |
|
|
| if uploaded_image is not None: |
| image = Image.open(uploaded_image) |
| st.image(image, caption='Uploaded Image', use_column_width=True) |
| st.write("") |
|
|
| if st.button("Generate Caption"): |
| with st.spinner("Generating..."): |
| predictions = predict(image) |
|
|
| st.write(f"Top Caption:{predictions}") |
|
|
| |
| if __name__ == "__main__": |
| main() |
|
|