Spaces:
Sleeping
Sleeping
Peiran
commited on
Commit
·
579bdeb
1
Parent(s):
6a51e6d
Ensure no duplicate evaluations: idempotent submit, update pair_state to remove evaluated pair, and clear UI on completion; add graceful returns when no pairs
Browse files
app.py
CHANGED
|
@@ -379,6 +379,7 @@ def on_submit(
|
|
| 379 |
):
|
| 380 |
if not task_name:
|
| 381 |
return (
|
|
|
|
| 382 |
gr.update(value=0),
|
| 383 |
gr.update(value=""),
|
| 384 |
gr.update(value=None),
|
|
@@ -391,6 +392,7 @@ def on_submit(
|
|
| 391 |
|
| 392 |
if not pairs:
|
| 393 |
return (
|
|
|
|
| 394 |
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 395 |
gr.update(value=""),
|
| 396 |
gr.update(value=None),
|
|
@@ -427,21 +429,40 @@ def on_submit(
|
|
| 427 |
"model2_semantic_functional_alignment_score": int(a_semantic_score),
|
| 428 |
"model2_overall_photorealism_score": int(a_overall_score),
|
| 429 |
}
|
|
|
|
| 430 |
row = _build_eval_row(pair, score_map)
|
| 431 |
-
ok_local = _append_local_persist_csv(task_name, row)
|
| 432 |
-
ok_hub, hub_msg = _upload_eval_record_to_dataset(task_name, row)
|
| 433 |
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
info += " Dataset upload " + ("succeeded" if ok_hub else "failed") + (f" ({hub_msg})" if hub_msg else "") + "."
|
| 438 |
|
| 439 |
-
if
|
| 440 |
-
|
|
|
|
| 441 |
header = _format_pair_header(pair)
|
| 442 |
a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
|
| 443 |
b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
|
| 444 |
return (
|
|
|
|
| 445 |
gr.update(value=next_index),
|
| 446 |
gr.update(value=header),
|
| 447 |
_resolve_image_path(pair["org_img"]),
|
|
@@ -449,18 +470,20 @@ def on_submit(
|
|
| 449 |
_resolve_image_path(b_path),
|
| 450 |
3, 3, 3, 3,
|
| 451 |
3, 3, 3, 3,
|
| 452 |
-
gr.update(value=info + f"
|
| 453 |
)
|
| 454 |
|
|
|
|
| 455 |
return (
|
| 456 |
-
|
| 457 |
-
gr.update(),
|
| 458 |
-
gr.update(),
|
| 459 |
-
gr.update(),
|
| 460 |
-
gr.update(),
|
|
|
|
| 461 |
3, 3, 3, 3,
|
| 462 |
3, 3, 3, 3,
|
| 463 |
-
gr.update(value=info + "
|
| 464 |
)
|
| 465 |
|
| 466 |
|
|
@@ -512,8 +535,8 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
|
|
| 512 |
b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B: Semantic/Functional Alignment")
|
| 513 |
b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B: Overall Photorealism")
|
| 514 |
|
| 515 |
-
|
| 516 |
-
|
| 517 |
|
| 518 |
# Event bindings
|
| 519 |
task_selector.change(
|
|
@@ -574,6 +597,7 @@ with gr.Blocks(title="VisArena Human Evaluation") as demo:
|
|
| 574 |
b_overall_input,
|
| 575 |
],
|
| 576 |
outputs=[
|
|
|
|
| 577 |
index_slider,
|
| 578 |
pair_header,
|
| 579 |
orig_image,
|
|
|
|
| 379 |
):
|
| 380 |
if not task_name:
|
| 381 |
return (
|
| 382 |
+
pairs,
|
| 383 |
gr.update(value=0),
|
| 384 |
gr.update(value=""),
|
| 385 |
gr.update(value=None),
|
|
|
|
| 392 |
|
| 393 |
if not pairs:
|
| 394 |
return (
|
| 395 |
+
pairs,
|
| 396 |
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 397 |
gr.update(value=""),
|
| 398 |
gr.update(value=None),
|
|
|
|
| 429 |
"model2_semantic_functional_alignment_score": int(a_semantic_score),
|
| 430 |
"model2_overall_photorealism_score": int(a_overall_score),
|
| 431 |
}
|
| 432 |
+
# Build record
|
| 433 |
row = _build_eval_row(pair, score_map)
|
|
|
|
|
|
|
| 434 |
|
| 435 |
+
# Idempotency: check if this pair already evaluated; if so, skip writing
|
| 436 |
+
done_keys = _read_existing_eval_keys(task_name)
|
| 437 |
+
eval_key = (pair["test_id"], frozenset({pair["model1_name"], pair["model2_name"]}), pair["org_img"])
|
| 438 |
+
if eval_key in done_keys:
|
| 439 |
+
ok_local = False
|
| 440 |
+
ok_hub, hub_msg = (False, "Skipped duplicate; already evaluated.")
|
| 441 |
+
info_prefix = "Skipped duplicate submission."
|
| 442 |
+
else:
|
| 443 |
+
ok_local = _append_local_persist_csv(task_name, row)
|
| 444 |
+
# add key locally for subsequent filtering in this call
|
| 445 |
+
if ok_local:
|
| 446 |
+
done_keys.add(eval_key)
|
| 447 |
+
ok_hub, hub_msg = _upload_eval_record_to_dataset(task_name, row)
|
| 448 |
+
info_prefix = "Saved evaluation."
|
| 449 |
+
|
| 450 |
+
# Recompute remaining pairs by filtering current state against done_keys
|
| 451 |
+
def key_of(p: Dict[str, str]):
|
| 452 |
+
return (p["test_id"], frozenset({p["model1_name"], p["model2_name"]}), p["org_img"])
|
| 453 |
+
remaining_pairs = [p for p in pairs if key_of(p) not in done_keys]
|
| 454 |
+
|
| 455 |
+
info = f"{info_prefix} Local persistence " + ("succeeded" if ok_local else "skipped/failed") + "."
|
| 456 |
info += " Dataset upload " + ("succeeded" if ok_hub else "failed") + (f" ({hub_msg})" if hub_msg else "") + "."
|
| 457 |
|
| 458 |
+
if remaining_pairs:
|
| 459 |
+
next_index = min(index, len(remaining_pairs) - 1)
|
| 460 |
+
pair = remaining_pairs[next_index]
|
| 461 |
header = _format_pair_header(pair)
|
| 462 |
a_path = pair["model2_path"] if pair.get("swap") else pair["model1_path"]
|
| 463 |
b_path = pair["model1_path"] if pair.get("swap") else pair["model2_path"]
|
| 464 |
return (
|
| 465 |
+
remaining_pairs,
|
| 466 |
gr.update(value=next_index),
|
| 467 |
gr.update(value=header),
|
| 468 |
_resolve_image_path(pair["org_img"]),
|
|
|
|
| 470 |
_resolve_image_path(b_path),
|
| 471 |
3, 3, 3, 3,
|
| 472 |
3, 3, 3, 3,
|
| 473 |
+
gr.update(value=info + f" Next pair ({next_index + 1}/{len(remaining_pairs)})."),
|
| 474 |
)
|
| 475 |
|
| 476 |
+
# No remaining pairs: clear UI, hide slider, and return updated empty state
|
| 477 |
return (
|
| 478 |
+
[],
|
| 479 |
+
gr.update(value=0, minimum=0, maximum=0, visible=False),
|
| 480 |
+
gr.update(value=""),
|
| 481 |
+
gr.update(value=None),
|
| 482 |
+
gr.update(value=None),
|
| 483 |
+
gr.update(value=None),
|
| 484 |
3, 3, 3, 3,
|
| 485 |
3, 3, 3, 3,
|
| 486 |
+
gr.update(value=info + " All pairs completed."),
|
| 487 |
)
|
| 488 |
|
| 489 |
|
|
|
|
| 535 |
b_semantic_input = gr.Slider(1, 5, value=3, step=1, label="B: Semantic/Functional Alignment")
|
| 536 |
b_overall_input = gr.Slider(1, 5, value=3, step=1, label="B: Overall Photorealism")
|
| 537 |
|
| 538 |
+
submit_button = gr.Button("Submit Evaluation", variant="primary")
|
| 539 |
+
feedback_box = gr.Markdown("")
|
| 540 |
|
| 541 |
# Event bindings
|
| 542 |
task_selector.change(
|
|
|
|
| 597 |
b_overall_input,
|
| 598 |
],
|
| 599 |
outputs=[
|
| 600 |
+
pair_state,
|
| 601 |
index_slider,
|
| 602 |
pair_header,
|
| 603 |
orig_image,
|