internal-v0 / ui /main_page.py
carlosh93's picture
adding queue system, rotate transformations, and scroll to the top when click on submit button
d5ac27c
raw
history blame
17.3 kB
import gradio as gr
from functools import partial
from gradio_modal import Modal
from data.lang2eng_map import lang2eng_mapping
from data.words_map import words_mapping
import datetime
from pyuca import Collator
def sort_with_pyuca(strings):
collator = Collator()
return sorted(strings, key=collator.sort_key)
def build_main_page(concepts_dict, metadata_dict, local_storage):
try:
country, lang, _, _ = local_storage.value
if not country and not lang:
country, lang = "USA", "English"
except (TypeError, ValueError):
country, lang = "USA", "English"
with gr.Column(visible=False, elem_id="main_page") as main_ui_placeholder:
# Read the markdown file
with open(metadata_dict["USA"]["English"]["Task"], "r", encoding="utf-8") as f:
TASK_TEXT = f.read()
with open(metadata_dict["USA"]["English"]["Instructions"], "r", encoding="utf-8") as f:
INST_TEXT = f.read()
gr.Markdown("# Wonders: Multicultural Images and Captions for Inclusive AI")
gr.Markdown("Help us collect culturally rich images and captions to improve AI for everyone!")
gr.Markdown("Choose a concept, upload an image showing it in a typical local situation, and write a detailed description about the image.<br> For more information, see the overviews below or click on 'Show instructions' for detailed guidelines.")
gr.Markdown(""" \
### Notes
- By participating in this task, you agree to donate images and captions under [CC BY license](https://creativecommons.org/licenses/by/4.0/), which will allow us to share the data with others to improve AI.
- You will be able to **update or remove** any provided data at any time if you wish so.
- Participants that provide more data, across more concepts and of high quality (see "Instructions" button) will be **rewarded** (see TODO).
"""
)
# with gr.Accordion("πŸ“˜ Show task overview", open=False):
intro_text_inp = gr.Markdown(TASK_TEXT, visible=False)
# gr.Markdown("## Data Collection")
with gr.Row(equal_height=True):
country_inp = gr.Textbox(label="Country", elem_id="country_inp", interactive=False)
language_inp = gr.Textbox(label="Language", elem_id="language_inp", interactive=False)
username_inp = gr.Textbox(label="email (optional)", type="email", elem_id="username_inp", interactive=False)
password_inp = gr.Textbox(label="password (optional)", type="password", elem_id="password_inp", interactive=False)
exit_btn = gr.Button("⬅️ Change Language", elem_id="exit_btn", elem_classes=["compact-btn"])
with gr.Row(equal_height=True, ):
with gr.Column():
# Main category and concept row - SINGLE SELECTION
with gr.Group():
gr.Markdown("### Main Concept")
gr.Markdown("""\
Select the main concept and category for which you want to share data.<br>
If you want to add a new concept, you can just type it in the same box (after selecting its category).
"""
)
with gr.Row():
# Use concepts_dict to populate the dropdown
categories = concepts_dict["USA"]["English"]
category_btn = gr.Dropdown(choices=sort_with_pyuca(categories.keys()), interactive=True, label="Main Category",
allow_custom_value=False, elem_id="category_btn", multiselect=False, value=None)
concept_btn = gr.Dropdown(choices=[], interactive=True, label="Main Concept",
allow_custom_value=True, elem_id="concept_btn", multiselect=False)
with gr.Group():
gr.Markdown("### Image")
with gr.Accordion("πŸ“˜ An image of the main concept in typical contexts (click to read more)", open=False):
gr.Markdown("""\=
An image where the *main concept is clearly visible*
- in typical contexts in your country, and
- with other objects/people when possible
Pictures taken by yourself are preferred.
If you want to use an image from the Web, directly add its URL below.
- Please use publicly-licensed images (e.g., from pexels.com or freepik.com)
For more details, click on β€œShow instructions” below.
"""
)
image_url_inp = gr.Textbox(label="Image URL (Optional, if not uploading an image)", type="text", elem_id="image_url_inp")
image_inp = gr.Image(label="Image", elem_id="image_inp", format="png", height=512, width=768)
with gr.Row():
hide_all_faces_btn = gr.Button("πŸ‘€ Hide All Faces", elem_id="hide_all_faces_btn")
hide_faces_btn = gr.Button("πŸ‘€ Hide Specific Faces", elem_id="hide_faces_btn")
unhide_faces_btn = gr.Button("πŸ‘€ Unhide Faces", elem_id="unhide_faces_btn")
with gr.Row():
rotate_left_btn = gr.Button("β†Ί Rotate Left", elem_id="rotate_left_btn")
rotate_right_btn = gr.Button("↻ Rotate Right", elem_id="rotate_right_btn")
reset_image_btn = gr.Button("πŸ”„ Reset Image", elem_id="reset_image_btn")
with gr.Column():
# short_caption_inp = gr.Textbox(lines=2, label="Short Description", elem_id="short_caption_inp")
with gr.Group():
gr.Markdown("### Description")
with gr.Accordion("πŸ“˜ A detailed description (around 40 words covering who?, what? and where?) for the image (click to read more)", open=False):
gr.Markdown("""\
A long image description (around 40 words) with extensive and detailed visual information.<br>
Descriptions must be *objective*: focus on how you would describe the image to someone who can’t see it, without your own opinions/speculations.
The text needs to *include the main concept* and describes the content of the image in detail by including:
- **Who?** The visual *appearance* and observable *emotions* (e.g., β€œis smiling”) of persons and animals.
- **What?** The *actions* performed in the image.
- **Where?** The *setting* of the image, including the size, color and relationships between objects.
For more details, click on β€œShow instructions” below.
"""
)
long_caption_inp = gr.Textbox(lines=6, label="Description", elem_id="long_caption_inp")
num_words_inp = gr.Textbox(lines=1, label="Number of words", elem_id="num_words", interactive=False, value=0)
# num_words_inp = gr.Markdown("Number of words", elem_id="num_words")
#########################################################
with Modal(visible=False, allow_user_close=False) as modal_vlm:
question = gr.Markdown("Would you like to see if a VLM can generate a culturally aware description for your uploaded concept?")
with gr.Row():
gen_button = gr.Button("Yes", variant="primary", elem_id="generate_answer_btn")
vlm_no_btn = gr.Button("No")
vlm_cancel_btn = gr.Button("Cancel")
# ["SmolVLM-500M", "Qwen2.5-VL-7B", "InternVL3_5-8B", "Gemma3-4B"]
vlm_model_dropdown = gr.Dropdown(
["SmolVLM-500M"], value="SmolVLM-500M", multiselect=False, label="VLM Model", info="Select the VLM model to use for generating the description."
)
vlm_output = gr.Textbox(lines=6, label="Generated description", elem_id="vlm_output", interactive=False)
vlm_feedback = gr.Radio(["Yes πŸ‘", "No πŸ‘Ž"], label="Do you think the generated description is accurate within the cultural context of your country?", visible=False, elem_id="vlm_feedback", interactive=True)
vlm_done_btn = gr.Button("Complete Submission", visible=False)
with Modal(visible=False, allow_user_close=False) as modal_submit:
gr.Markdown("⚠️ You've already generated a caption for this image. An optional description with the VLM can only be generated once. Would you like to proceed and submit your modified data?")
with gr.Row():
submit_yes = gr.Button("Yes", variant="primary", elem_id="submit_confirm_yes")
submit_no = gr.Button("No", variant="stop", elem_id="submit_confirm_no")
# with gr.Group():
# gr.Markdown("### VLM Generation (Optional)")
# with gr.Accordion("πŸ“˜ Click here if you want to get a generated answer from a small vlm", open=False):
# gen_button = gr.Button("Generate Answer", variant="primary", elem_id="generate_answer_btn")
# vlm_output = gr.Textbox(lines=6, label="Generated Answer", elem_id="vlm_output", interactive=False)
# vlm_feedback = gr.Radio(["Yes πŸ‘", "No πŸ‘Ž"], label="Do you like the generated caption?", visible=False, elem_id="vlm_feedback", interactive=True)
##########################################################
categories_list = sort_with_pyuca(list(concepts_dict["USA"]["English"].keys()))
def create_category_dropdown(category, index):
original_category = category
if lang in words_mapping:
display_category = words_mapping[lang].get(original_category, original_category)
else:
display_category = original_category
category_choices = concepts_dict[country][lang2eng_mapping.get(lang, lang)][original_category]
sorted_choices = sort_with_pyuca(category_choices)
dropdown = gr.Dropdown(
choices=sorted_choices,
interactive=True,
label=display_category,
allow_custom_value=True,
elem_id=f"category_{index+1}_concepts_btn",
multiselect=True,
value=None
)
return dropdown
category_concept_dropdowns = []
with gr.Group():
gr.Markdown("### Additional concepts (optional)")
gr.Markdown("Tag any other concept that is visible in the image.")
# First row - categories 1 and 2
with gr.Row():
dropdown1 = create_category_dropdown(categories_list[0], 0)
category_concept_dropdowns.append(dropdown1)
dropdown2 = create_category_dropdown(categories_list[1], 1)
category_concept_dropdowns.append(dropdown2)
# Second row - categories 3 and 4
# with gr.Row():
dropdown3 = create_category_dropdown(categories_list[2], 2)
category_concept_dropdowns.append(dropdown3)
dropdown4 = create_category_dropdown(categories_list[3], 3)
category_concept_dropdowns.append(dropdown4)
dropdown5 = create_category_dropdown(categories_list[4], 4)
category_concept_dropdowns.append(dropdown5)
# Third row - category 5 and instructions button
with gr.Row(equal_height=True):
# dropdown5 = create_category_dropdown(categories_list[4], 4)
# category_concept_dropdowns.append(dropdown5)
instruct_btn = gr.Button("πŸ“˜ Show Instructions")
with Modal(visible=False) as modal:
intro_text_inst_inp = gr.Markdown(INST_TEXT)
with gr.Column():
# with gr.Row():
# instruct_btn = gr.Button("πŸ“˜ Show Instructions")
# with Modal(visible=False) as modal:
# intro_text_inst_inp = gr.Markdown(INST_TEXT)
with gr.Row(equal_height=True):
clear_btn = gr.Button("Clear", variant="huggingface", elem_id="clear_btn")
with Modal(visible=False, allow_user_close=False) as modal_saving:
modal_saving_text = gr.Markdown("⏳ Please wait while your submission is being saved.")
with Modal(visible=False) as modal_data_saved:
modal_data_saved_text = gr.Markdown("Your data has been saved successfully. The data in the table below will be updated shortly. You can now close this window.")
submit_btn = gr.Button("Submit", variant="primary", interactive=False, elem_id="submit_btn")
with Modal(visible=False) as modal_exclude_confirm:
gr.Markdown("## Are you sure you want to exclude this example?")
gr.Markdown("This action will permanently delete the example.")
with gr.Row():
cancel_exclude_btn = gr.Button("Cancel")
confirm_exclude_btn = gr.Button("Yes, delete", variant="stop")
exclude_btn = gr.Button("Exclude Selected Example", variant="stop", visible=True)
with gr.Column():
timestamp_btn = gr.Textbox(datetime.datetime.now(), label="Timestamp", visible=False, elem_id="timestamp_btn", interactive=False) # FIXME visible=False)
exampleid_btn = gr.Textbox(label="ID", visible=False, elem_id="example_id", interactive=False) # FIXME visible=False)
output_dict = {
"main_ui_placeholder": main_ui_placeholder,
"country_inp": country_inp,
"language_inp": language_inp,
"username_inp": username_inp,
"password_inp": password_inp,
"image_inp": image_inp,
"image_url_inp": image_url_inp,
"long_caption_inp": long_caption_inp,
"num_words_inp": num_words_inp,
"category_btn": category_btn,
"concept_btn": concept_btn,
"category_concept_dropdowns": category_concept_dropdowns,
"category_1_concepts": category_concept_dropdowns[0],
"category_2_concepts": category_concept_dropdowns[1],
"category_3_concepts": category_concept_dropdowns[2],
"category_4_concepts": category_concept_dropdowns[3],
"category_5_concepts": category_concept_dropdowns[4],
"instruct_btn": instruct_btn,
"clear_btn": clear_btn,
"submit_btn": submit_btn,
"modal": modal,
"modal_saving": modal_saving,
"modal_data_saved": modal_data_saved,
"timestamp_btn": timestamp_btn,
"exampleid_btn": exampleid_btn,
"exit_btn": exit_btn,
"intro_text_inp": intro_text_inp,
"intro_text_inst_inp": intro_text_inst_inp,
"modal_saving_text": modal_saving_text,
"modal_data_saved_text": modal_data_saved_text,
"hide_faces_btn": hide_faces_btn,
"hide_all_faces_btn": hide_all_faces_btn,
"unhide_faces_btn": unhide_faces_btn,
"rotate_left_btn": rotate_left_btn,
"rotate_right_btn": rotate_right_btn,
"reset_image_btn": reset_image_btn,
"exclude_btn": exclude_btn,
"modal_exclude_confirm": modal_exclude_confirm,
"cancel_exclude_btn": cancel_exclude_btn,
"confirm_exclude_btn": confirm_exclude_btn,
"vlm_output": vlm_output,
"gen_button": gen_button,
"vlm_feedback": vlm_feedback,
"modal_vlm": modal_vlm,
"vlm_no_btn": vlm_no_btn,
"vlm_done_btn": vlm_done_btn,
"submit_yes": submit_yes,
"submit_no": submit_no,
"modal_submit": modal_submit,
"vlm_cancel_btn": vlm_cancel_btn,
"vlm_model_dropdown": vlm_model_dropdown
}
return output_dict