Spaces:
Build error
Build error
Update app_dialogue.py
Browse files- app_dialogue.py +44 -2
app_dialogue.py
CHANGED
|
@@ -115,7 +115,26 @@ def convert_to_rgb(filepath_or_pilimg):
|
|
| 115 |
|
| 116 |
return temp_file_path # Return the path to the saved image
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
|
|
|
| 119 |
def base64_to_pil(encoded_image):
|
| 120 |
decoded_image = base64.b64decode(encoded_image)
|
| 121 |
pil_image = Image.open(BytesIO(decoded_image))
|
|
@@ -322,37 +341,60 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
| 322 |
Produces the resulting list that needs to go inside the processor.
|
| 323 |
It handles the potential image box input, the history and the system conditionning.
|
| 324 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
| 326 |
|
| 327 |
# Format history
|
| 328 |
for turn in history:
|
|
|
|
| 329 |
user_utterance, assistant_utterance = turn
|
|
|
|
| 330 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
|
|
|
| 331 |
splitted_user_utterance = [
|
| 332 |
im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
| 333 |
for s in splitted_user_utterance
|
| 334 |
if s != ""
|
| 335 |
]
|
|
|
|
|
|
|
| 336 |
if isinstance(splitted_user_utterance[0], str):
|
| 337 |
resulting_list.append("\nUser: ")
|
| 338 |
else:
|
| 339 |
resulting_list.append("\nUser:")
|
|
|
|
| 340 |
resulting_list.extend(splitted_user_utterance)
|
|
|
|
| 341 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
|
|
|
|
|
|
| 342 |
|
| 343 |
# Format current input
|
| 344 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
|
|
|
|
|
|
| 345 |
if current_image is None:
|
|
|
|
| 346 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
| 347 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
| 348 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
|
|
|
| 349 |
resulting_list.append("\nUser: ")
|
|
|
|
| 350 |
resulting_list.extend(current_user_prompt_list)
|
|
|
|
| 351 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
|
|
|
| 352 |
return resulting_list, current_user_prompt_list
|
| 353 |
else:
|
|
|
|
| 354 |
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
| 355 |
-
resulting_list.extend(["\nUser:", current_image, f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"])
|
|
|
|
| 356 |
return resulting_list, [current_user_prompt_str]
|
| 357 |
|
| 358 |
|
|
@@ -535,7 +577,7 @@ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
|
|
| 535 |
)
|
| 536 |
processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
|
| 537 |
|
| 538 |
-
imagebox = gr.Image(type="
|
| 539 |
|
| 540 |
with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
|
| 541 |
max_new_tokens = gr.Slider(
|
|
|
|
| 115 |
|
| 116 |
return temp_file_path # Return the path to the saved image
|
| 117 |
|
| 118 |
+
def pil_to_markdown_im(image):
|
| 119 |
+
"""
|
| 120 |
+
Convert a PIL image into markdown filled with the base64 string representation.
|
| 121 |
+
"""
|
| 122 |
+
print(f"***** pil_to_markdown_im ******")
|
| 123 |
+
print(f"params: image is - {image}")
|
| 124 |
+
#if isinstance(image, PIL.Image.Image):
|
| 125 |
+
#img_b64_str = pil_to_base64(image)
|
| 126 |
+
#img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
|
| 127 |
+
#if path_or_url.startswith(("http://", "https://")):
|
| 128 |
+
#response = requests.get(image)
|
| 129 |
+
#image = Image.open(BytesIO(response.content))
|
| 130 |
+
# Generate a unique filename using UUID
|
| 131 |
+
filename = f"{uuid.uuid4()}.jpg"
|
| 132 |
+
local_path = f"{filename}"
|
| 133 |
+
image.save(local_path)
|
| 134 |
+
img_str = f""
|
| 135 |
+
return img_str
|
| 136 |
|
| 137 |
+
|
| 138 |
def base64_to_pil(encoded_image):
|
| 139 |
decoded_image = base64.b64decode(encoded_image)
|
| 140 |
pil_image = Image.open(BytesIO(decoded_image))
|
|
|
|
| 341 |
Produces the resulting list that needs to go inside the processor.
|
| 342 |
It handles the potential image box input, the history and the system conditionning.
|
| 343 |
"""
|
| 344 |
+
print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
|
| 345 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
|
| 346 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
|
| 347 |
+
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
|
| 348 |
+
|
| 349 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
| 350 |
|
| 351 |
# Format history
|
| 352 |
for turn in history:
|
| 353 |
+
print(f"inside for loop, turn is - {turn}")
|
| 354 |
user_utterance, assistant_utterance = turn
|
| 355 |
+
print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
|
| 356 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
| 357 |
+
print(f"splitted_user_utterance from split_str_on_im_markdown is - {splitted_user_utterance} ")
|
| 358 |
splitted_user_utterance = [
|
| 359 |
im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
| 360 |
for s in splitted_user_utterance
|
| 361 |
if s != ""
|
| 362 |
]
|
| 363 |
+
print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
|
| 364 |
+
|
| 365 |
if isinstance(splitted_user_utterance[0], str):
|
| 366 |
resulting_list.append("\nUser: ")
|
| 367 |
else:
|
| 368 |
resulting_list.append("\nUser:")
|
| 369 |
+
print(f"resulting_list after if..else block is - {resulting_list}")
|
| 370 |
resulting_list.extend(splitted_user_utterance)
|
| 371 |
+
print(f"resulting_list after extend is - {resulting_list}")
|
| 372 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
| 373 |
+
print(f"resulting_list after append is - {resulting_list}")
|
| 374 |
+
|
| 375 |
|
| 376 |
# Format current input
|
| 377 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
| 378 |
+
print(f"current_user_prompt_str is - {current_user_prompt_str}")
|
| 379 |
+
|
| 380 |
if current_image is None:
|
| 381 |
+
print("inside IF : current_image is NONE")
|
| 382 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
| 383 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
| 384 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
| 385 |
+
print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
|
| 386 |
resulting_list.append("\nUser: ")
|
| 387 |
+
print(f"resulting_list with append user - {resulting_list}")
|
| 388 |
resulting_list.extend(current_user_prompt_list)
|
| 389 |
+
print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
|
| 390 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
| 391 |
+
print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
|
| 392 |
return resulting_list, current_user_prompt_list
|
| 393 |
else:
|
| 394 |
+
print("inside ELSE : current_image is not NONE")
|
| 395 |
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
| 396 |
+
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
| 397 |
+
print(f"final resulting_list passed on to calling function is - {resulting_list}")
|
| 398 |
return resulting_list, [current_user_prompt_str]
|
| 399 |
|
| 400 |
|
|
|
|
| 577 |
)
|
| 578 |
processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
|
| 579 |
|
| 580 |
+
imagebox = gr.Image(type="filepath", label="Image input")
|
| 581 |
|
| 582 |
with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
|
| 583 |
max_new_tokens = gr.Slider(
|