WeijianQi1999 commited on
Commit
d57548f
·
1 Parent(s): eb42cc9

update layout

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. requirements.txt +1 -2
app.py CHANGED
@@ -34,8 +34,8 @@ def get_dataframe_from_results(eval_path):
34
  else:
35
  df = df.sort_values(
36
  by=["Verified", "Average SR"],
37
- ascending=[False, False],
38
- kind="mergesort"
39
  )
40
 
41
  for col in ['Easy', 'Medium', 'Hard', 'Average SR']:
@@ -283,7 +283,7 @@ with demo:
283
  )
284
  gr.Markdown("### Visualization")
285
  gr.Markdown("This figure presents a fine-grained heatmap illustrating task-level completion across different agents. Each row corresponds to a specific agent, and each column represents a task (identified by its task ID). Blue bars indicate successful completions, while white spaces denote failures. Any agent: A task is considered successful if at least one agent is able to complete it. (This style of visualization is inspired by [HAL](https://hal.cs.princeton.edu/).)")
286
- fig = plot_heatmap_with_performance_bar("./human_label.json")
287
  gr.Plot(fig)
288
  gr.Markdown(EVALUATION_DETAILS)
289
 
 
34
  else:
35
  df = df.sort_values(
36
  by=["Verified", "Average SR"],
37
+ ascending=[False, False], # False 表示降序;Verified=True 会排到最上面
38
+ kind="mergesort" # 稳定排序,保证次序可预期
39
  )
40
 
41
  for col in ['Easy', 'Medium', 'Hard', 'Average SR']:
 
283
  )
284
  gr.Markdown("### Visualization")
285
  gr.Markdown("This figure presents a fine-grained heatmap illustrating task-level completion across different agents. Each row corresponds to a specific agent, and each column represents a task (identified by its task ID). Blue bars indicate successful completions, while white spaces denote failures. Any agent: A task is considered successful if at least one agent is able to complete it. (This style of visualization is inspired by [HAL](https://hal.cs.princeton.edu/).)")
286
+ fig = plot_heatmap_with_performance_bar("./human_label_071625.json")
287
  gr.Plot(fig)
288
  gr.Markdown(EVALUATION_DETAILS)
289
 
requirements.txt CHANGED
@@ -2,5 +2,4 @@ datasets
2
  gradio
3
  huggingface-hub
4
  numpy
5
- APScheduler
6
- plotly
 
2
  gradio
3
  huggingface-hub
4
  numpy
5
+ APScheduler