Spaces:
Running
Running
| { | |
| "run_id": "20260519-162229", | |
| "model": "qwen/qwen3.6-flash", | |
| "truncated": false, | |
| "resumed": 0, | |
| "cost": { | |
| "calls": 41, | |
| "prompt_tokens": 332060, | |
| "completion_tokens": 43651, | |
| "usd": 0.0, | |
| "max_usd": 0.0 | |
| }, | |
| "summary": { | |
| "action-sequenced-execution:hard": { | |
| "n": 1, | |
| "win_rate": 0.0, | |
| "composite_mean": 0.1773, | |
| "composite_std": 0.0, | |
| "perception_mean": 0.6844, | |
| "reasoning_mean": 0.6737, | |
| "action_mean": 1.0, | |
| "objective_mean": 0.375, | |
| "weakest_link_hist": { | |
| "reasoning": 1 | |
| } | |
| } | |
| }, | |
| "overall": { | |
| "n": 1, | |
| "win_rate": 0.0, | |
| "composite_mean": 0.1773, | |
| "composite_std": 0.0, | |
| "perception_mean": 0.6844, | |
| "reasoning_mean": 0.6737, | |
| "action_mean": 1.0, | |
| "objective_mean": 0.375, | |
| "weakest_link_hist": { | |
| "reasoning": 1 | |
| } | |
| }, | |
| "reward_vector_mean": { | |
| "economy": 0.5, | |
| "military": 0.0, | |
| "territory": 0.5491, | |
| "scouting": 0.6, | |
| "objective": 0.375 | |
| }, | |
| "episodes": [ | |
| { | |
| "cell": "action-sequenced-execution:hard", | |
| "capability": "action", | |
| "split": "public", | |
| "seed": 1, | |
| "outcome": "loss", | |
| "composite": 0.1773, | |
| "perception": 0.6844, | |
| "reasoning": 0.6737, | |
| "action": 1.0, | |
| "weakest_link": "reasoning", | |
| "objective_progress": 0.375, | |
| "reward_vector": { | |
| "economy": 0.5, | |
| "military": 0.0, | |
| "territory": 0.5491, | |
| "scouting": 0.6, | |
| "objective": 0.375 | |
| }, | |
| "turns": 41, | |
| "notes": [ | |
| "objective not met (loss); weakest link: reasoning" | |
| ] | |
| } | |
| ], | |
| "skipped": [] | |
| } |