OpenRA-Bench / eval_stats.json
yxc20098's picture
win-speed bonus: record + reward how fast a model wins
35d7d47
{
"run_id": "20260519-162229",
"model": "qwen/qwen3.6-flash",
"truncated": false,
"resumed": 0,
"cost": {
"calls": 41,
"prompt_tokens": 332060,
"completion_tokens": 43651,
"usd": 0.0,
"max_usd": 0.0
},
"summary": {
"action-sequenced-execution:hard": {
"n": 1,
"win_rate": 0.0,
"composite_mean": 0.1773,
"composite_std": 0.0,
"perception_mean": 0.6844,
"reasoning_mean": 0.6737,
"action_mean": 1.0,
"objective_mean": 0.375,
"weakest_link_hist": {
"reasoning": 1
}
}
},
"overall": {
"n": 1,
"win_rate": 0.0,
"composite_mean": 0.1773,
"composite_std": 0.0,
"perception_mean": 0.6844,
"reasoning_mean": 0.6737,
"action_mean": 1.0,
"objective_mean": 0.375,
"weakest_link_hist": {
"reasoning": 1
}
},
"reward_vector_mean": {
"economy": 0.5,
"military": 0.0,
"territory": 0.5491,
"scouting": 0.6,
"objective": 0.375
},
"episodes": [
{
"cell": "action-sequenced-execution:hard",
"capability": "action",
"split": "public",
"seed": 1,
"outcome": "loss",
"composite": 0.1773,
"perception": 0.6844,
"reasoning": 0.6737,
"action": 1.0,
"weakest_link": "reasoning",
"objective_progress": 0.375,
"reward_vector": {
"economy": 0.5,
"military": 0.0,
"territory": 0.5491,
"scouting": 0.6,
"objective": 0.375
},
"turns": 41,
"notes": [
"objective not met (loss); weakest link: reasoning"
]
}
],
"skipped": []
}