| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.256410256410255, | |
| "eval_steps": 5000, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020512820512820513, | |
| "grad_norm": 1.4934550523757935, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4065, | |
| "step": 200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06214292347431183, | |
| "epoch": 0.020512820512820513, | |
| "step": 200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.436601400375366 | |
| }, | |
| { | |
| "epoch": 0.041025641025641026, | |
| "grad_norm": 1.465198040008545, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3808, | |
| "step": 400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.2184431552886963, | |
| "epoch": 0.041025641025641026, | |
| "step": 400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.824479341506958 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 1.3252092599868774, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3757, | |
| "step": 600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06124761700630188, | |
| "epoch": 0.06153846153846154, | |
| "step": 600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.270543098449707 | |
| }, | |
| { | |
| "epoch": 0.08205128205128205, | |
| "grad_norm": 1.5135141611099243, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3724, | |
| "step": 800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017732299864292145, | |
| "epoch": 0.08205128205128205, | |
| "step": 800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.8774924278259277 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 1.527764081954956, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3716, | |
| "step": 1000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019019341096282005, | |
| "epoch": 0.10256410256410256, | |
| "step": 1000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30416667461395264, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5648303031921387 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 1.3558701276779175, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3713, | |
| "step": 1200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.034935202449560165, | |
| "epoch": 0.12307692307692308, | |
| "step": 1200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.679027557373047 | |
| }, | |
| { | |
| "epoch": 0.14358974358974358, | |
| "grad_norm": 1.3953908681869507, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3742, | |
| "step": 1400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03359450027346611, | |
| "epoch": 0.14358974358974358, | |
| "step": 1400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.658151865005493 | |
| }, | |
| { | |
| "epoch": 0.1641025641025641, | |
| "grad_norm": 1.3837014436721802, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3751, | |
| "step": 1600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023025812581181526, | |
| "epoch": 0.1641025641025641, | |
| "step": 1600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2658565044403076 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 1.5721590518951416, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3747, | |
| "step": 1800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.08445067703723907, | |
| "epoch": 0.18461538461538463, | |
| "step": 1800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38333335518836975, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3911333084106445 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 1.32631516456604, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3766, | |
| "step": 2000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.042738281190395355, | |
| "epoch": 0.20512820512820512, | |
| "step": 2000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7338380813598633 | |
| }, | |
| { | |
| "epoch": 0.22564102564102564, | |
| "grad_norm": 1.3824011087417603, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3777, | |
| "step": 2200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06659181416034698, | |
| "epoch": 0.22564102564102564, | |
| "step": 2200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2330055236816406 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 1.423620581626892, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3828, | |
| "step": 2400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008443932048976421, | |
| "epoch": 0.24615384615384617, | |
| "step": 2400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.05609393119812 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.4979031085968018, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3825, | |
| "step": 2600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.052634354680776596, | |
| "epoch": 0.26666666666666666, | |
| "step": 2600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.28333336114883423, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6076059341430664 | |
| }, | |
| { | |
| "epoch": 0.28717948717948716, | |
| "grad_norm": 1.5777536630630493, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3828, | |
| "step": 2800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018900839611887932, | |
| "epoch": 0.28717948717948716, | |
| "step": 2800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3003084659576416 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 1.449562668800354, | |
| "learning_rate": 2e-05, | |
| "loss": 0.385, | |
| "step": 3000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010468097403645515, | |
| "epoch": 0.3076923076923077, | |
| "step": 3000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30000001192092896, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.636639356613159 | |
| }, | |
| { | |
| "epoch": 0.3282051282051282, | |
| "grad_norm": 1.583873987197876, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 3200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06589348614215851, | |
| "epoch": 0.3282051282051282, | |
| "step": 3200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.25, | |
| "wm_acc_tail": 0.1484375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.009855270385742 | |
| }, | |
| { | |
| "epoch": 0.3487179487179487, | |
| "grad_norm": 1.3733325004577637, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 3400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0026404904201626778, | |
| "epoch": 0.3487179487179487, | |
| "step": 3400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2875000238418579, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8700666427612305 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 1.4926812648773193, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 3600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.039389658719301224, | |
| "epoch": 0.36923076923076925, | |
| "step": 3600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6547391414642334 | |
| }, | |
| { | |
| "epoch": 0.38974358974358975, | |
| "grad_norm": 1.7389109134674072, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3917, | |
| "step": 3800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.016246166080236435, | |
| "epoch": 0.38974358974358975, | |
| "step": 3800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.357604742050171 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 1.5250686407089233, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3919, | |
| "step": 4000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.015929628163576126, | |
| "epoch": 0.41025641025641024, | |
| "step": 4000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9595532417297363 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 1.4706652164459229, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3941, | |
| "step": 4200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027975566685199738, | |
| "epoch": 0.4307692307692308, | |
| "step": 4200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4055233001708984 | |
| }, | |
| { | |
| "epoch": 0.4512820512820513, | |
| "grad_norm": 1.5600982904434204, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3926, | |
| "step": 4400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0512637235224247, | |
| "epoch": 0.4512820512820513, | |
| "step": 4400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3677384853363037 | |
| }, | |
| { | |
| "epoch": 0.4717948717948718, | |
| "grad_norm": 1.441586971282959, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3945, | |
| "step": 4600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07077232003211975, | |
| "epoch": 0.4717948717948718, | |
| "step": 4600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9571967124938965 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 1.4141793251037598, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3948, | |
| "step": 4800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019603557884693146, | |
| "epoch": 0.49230769230769234, | |
| "step": 4800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2328951358795166 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 1.5332987308502197, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3975, | |
| "step": 5000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031281083822250366, | |
| "epoch": 0.5128205128205128, | |
| "step": 5000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8760924339294434 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 1.4803385734558105, | |
| "learning_rate": 2e-05, | |
| "loss": 0.396, | |
| "step": 5200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031858351081609726, | |
| "epoch": 0.5333333333333333, | |
| "step": 5200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30000001192092896, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7030813694000244 | |
| }, | |
| { | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 1.725152850151062, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3938, | |
| "step": 5400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.13517172634601593, | |
| "epoch": 0.5538461538461539, | |
| "step": 5400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.290188789367676 | |
| }, | |
| { | |
| "epoch": 0.5743589743589743, | |
| "grad_norm": 1.4720208644866943, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3967, | |
| "step": 5600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.08771564811468124, | |
| "epoch": 0.5743589743589743, | |
| "step": 5600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.472694158554077 | |
| }, | |
| { | |
| "epoch": 0.5948717948717949, | |
| "grad_norm": 1.4724576473236084, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3955, | |
| "step": 5800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012787049636244774, | |
| "epoch": 0.5948717948717949, | |
| "step": 5800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1797008514404297 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.399617314338684, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3949, | |
| "step": 6000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0538334958255291, | |
| "epoch": 0.6153846153846154, | |
| "step": 6000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8461050987243652 | |
| }, | |
| { | |
| "epoch": 0.6358974358974359, | |
| "grad_norm": 1.5025241374969482, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3951, | |
| "step": 6200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04307379201054573, | |
| "epoch": 0.6358974358974359, | |
| "step": 6200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6626486778259277 | |
| }, | |
| { | |
| "epoch": 0.6564102564102564, | |
| "grad_norm": 1.4680161476135254, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3964, | |
| "step": 6400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04405893385410309, | |
| "epoch": 0.6564102564102564, | |
| "step": 6400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4833333492279053, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9823522567749023 | |
| }, | |
| { | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 1.4593451023101807, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3976, | |
| "step": 6600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019440408796072006, | |
| "epoch": 0.676923076923077, | |
| "step": 6600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.331028461456299 | |
| }, | |
| { | |
| "epoch": 0.6974358974358974, | |
| "grad_norm": 1.3424919843673706, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3963, | |
| "step": 6800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014973487704992294, | |
| "epoch": 0.6974358974358974, | |
| "step": 6800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.390171527862549 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 1.6031973361968994, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3949, | |
| "step": 7000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01609065756201744, | |
| "epoch": 0.717948717948718, | |
| "step": 7000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.467106819152832 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 1.4650102853775024, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3967, | |
| "step": 7200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018057633191347122, | |
| "epoch": 0.7384615384615385, | |
| "step": 7200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.27916666865348816, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.2004008293151855 | |
| }, | |
| { | |
| "epoch": 0.7589743589743589, | |
| "grad_norm": 1.4718828201293945, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3982, | |
| "step": 7400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07578733563423157, | |
| "epoch": 0.7589743589743589, | |
| "step": 7400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6809661388397217 | |
| }, | |
| { | |
| "epoch": 0.7794871794871795, | |
| "grad_norm": 1.5644972324371338, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3943, | |
| "step": 7600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.044367287307977676, | |
| "epoch": 0.7794871794871795, | |
| "step": 7600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2239198684692383 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.3958021402359009, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3962, | |
| "step": 7800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0015901032602414489, | |
| "epoch": 0.8, | |
| "step": 7800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.47083336114883423, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1782445907592773 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.4607535600662231, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3937, | |
| "step": 8000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004034082870930433, | |
| "epoch": 0.8205128205128205, | |
| "step": 8000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.26250001788139343, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.135829925537109 | |
| }, | |
| { | |
| "epoch": 0.841025641025641, | |
| "grad_norm": 1.3551061153411865, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3979, | |
| "step": 8200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04078478738665581, | |
| "epoch": 0.841025641025641, | |
| "step": 8200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.1640625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.049280166625977 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 1.3672192096710205, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3934, | |
| "step": 8400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07468579709529877, | |
| "epoch": 0.8615384615384616, | |
| "step": 8400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2708333432674408, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.340592861175537 | |
| }, | |
| { | |
| "epoch": 0.882051282051282, | |
| "grad_norm": 1.4376333951950073, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3958, | |
| "step": 8600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026692824438214302, | |
| "epoch": 0.882051282051282, | |
| "step": 8600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.00715446472168 | |
| }, | |
| { | |
| "epoch": 0.9025641025641026, | |
| "grad_norm": 1.4733850955963135, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3952, | |
| "step": 8800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01416705921292305, | |
| "epoch": 0.9025641025641026, | |
| "step": 8800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.828453779220581 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 1.3863928318023682, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3931, | |
| "step": 9000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03890518844127655, | |
| "epoch": 0.9230769230769231, | |
| "step": 9000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6811275482177734 | |
| }, | |
| { | |
| "epoch": 0.9435897435897436, | |
| "grad_norm": 1.4648759365081787, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3956, | |
| "step": 9200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03804709389805794, | |
| "epoch": 0.9435897435897436, | |
| "step": 9200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9158973693847656 | |
| }, | |
| { | |
| "epoch": 0.9641025641025641, | |
| "grad_norm": 1.3553239107131958, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3927, | |
| "step": 9400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06128055602312088, | |
| "epoch": 0.9641025641025641, | |
| "step": 9400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5097744464874268 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 1.6811316013336182, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3947, | |
| "step": 9600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.045378539711236954, | |
| "epoch": 0.9846153846153847, | |
| "step": 9600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9966671466827393 | |
| }, | |
| { | |
| "epoch": 1.005128205128205, | |
| "grad_norm": 1.4799553155899048, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3924, | |
| "step": 9800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04443782567977905, | |
| "epoch": 1.005128205128205, | |
| "step": 9800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8639302253723145 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 1.5605977773666382, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3932, | |
| "step": 10000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023311596363782883, | |
| "epoch": 1.0256410256410255, | |
| "step": 10000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.166300296783447 | |
| }, | |
| { | |
| "epoch": 1.0461538461538462, | |
| "grad_norm": 1.4259541034698486, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3961, | |
| "step": 10200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004396775737404823, | |
| "epoch": 1.0461538461538462, | |
| "step": 10200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.3828125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9529929161071777 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 1.4113320112228394, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3932, | |
| "step": 10400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014129268005490303, | |
| "epoch": 1.0666666666666667, | |
| "step": 10400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.424802303314209 | |
| }, | |
| { | |
| "epoch": 1.087179487179487, | |
| "grad_norm": 1.410915732383728, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3923, | |
| "step": 10600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.013658558018505573, | |
| "epoch": 1.087179487179487, | |
| "step": 10600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.731975555419922 | |
| }, | |
| { | |
| "epoch": 1.1076923076923078, | |
| "grad_norm": 1.5998386144638062, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3915, | |
| "step": 10800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03611575812101364, | |
| "epoch": 1.1076923076923078, | |
| "step": 10800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8375022411346436 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 1.6531355381011963, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3904, | |
| "step": 11000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.042696237564086914, | |
| "epoch": 1.1282051282051282, | |
| "step": 11000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9178273677825928 | |
| }, | |
| { | |
| "epoch": 1.1487179487179486, | |
| "grad_norm": 1.4610941410064697, | |
| "learning_rate": 2e-05, | |
| "loss": 0.393, | |
| "step": 11200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023624129593372345, | |
| "epoch": 1.1487179487179486, | |
| "step": 11200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5666667222976685, | |
| "wm_acc_tail": 0.4921875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.4824001789093018 | |
| }, | |
| { | |
| "epoch": 1.1692307692307693, | |
| "grad_norm": 1.5470315217971802, | |
| "learning_rate": 2e-05, | |
| "loss": 0.394, | |
| "step": 11400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0320688895881176, | |
| "epoch": 1.1692307692307693, | |
| "step": 11400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4208333492279053, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2636866569519043 | |
| }, | |
| { | |
| "epoch": 1.1897435897435897, | |
| "grad_norm": 1.5323113203048706, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3907, | |
| "step": 11600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.016900230199098587, | |
| "epoch": 1.1897435897435897, | |
| "step": 11600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1652252674102783 | |
| }, | |
| { | |
| "epoch": 1.2102564102564102, | |
| "grad_norm": 1.7143681049346924, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3906, | |
| "step": 11800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.040420934557914734, | |
| "epoch": 1.2102564102564102, | |
| "step": 11800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.24166667461395264, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.34954833984375 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 1.4635480642318726, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3924, | |
| "step": 12000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.14603480696678162, | |
| "epoch": 1.2307692307692308, | |
| "step": 12000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.718276262283325 | |
| }, | |
| { | |
| "epoch": 1.2512820512820513, | |
| "grad_norm": 1.4752073287963867, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3908, | |
| "step": 12200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.028994087129831314, | |
| "epoch": 1.2512820512820513, | |
| "step": 12200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.685457706451416 | |
| }, | |
| { | |
| "epoch": 1.2717948717948717, | |
| "grad_norm": 1.4292196035385132, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3909, | |
| "step": 12400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0843985304236412, | |
| "epoch": 1.2717948717948717, | |
| "step": 12400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8521623611450195 | |
| }, | |
| { | |
| "epoch": 1.2923076923076924, | |
| "grad_norm": 1.498486876487732, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3898, | |
| "step": 12600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024806326255202293, | |
| "epoch": 1.2923076923076924, | |
| "step": 12600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.917541265487671 | |
| }, | |
| { | |
| "epoch": 1.3128205128205128, | |
| "grad_norm": 1.5838302373886108, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3905, | |
| "step": 12800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03449104726314545, | |
| "epoch": 1.3128205128205128, | |
| "step": 12800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.48750001192092896, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9455487728118896 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.851238489151001, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3899, | |
| "step": 13000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05558072403073311, | |
| "epoch": 1.3333333333333333, | |
| "step": 13000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8734242916107178 | |
| }, | |
| { | |
| "epoch": 1.353846153846154, | |
| "grad_norm": 1.5212973356246948, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3903, | |
| "step": 13200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04731273278594017, | |
| "epoch": 1.353846153846154, | |
| "step": 13200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0312440395355225 | |
| }, | |
| { | |
| "epoch": 1.3743589743589744, | |
| "grad_norm": 1.5241458415985107, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3909, | |
| "step": 13400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03890714794397354, | |
| "epoch": 1.3743589743589744, | |
| "step": 13400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.621284246444702 | |
| }, | |
| { | |
| "epoch": 1.3948717948717948, | |
| "grad_norm": 1.4947129487991333, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3927, | |
| "step": 13600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024421326816082, | |
| "epoch": 1.3948717948717948, | |
| "step": 13600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4331119060516357 | |
| }, | |
| { | |
| "epoch": 1.4153846153846155, | |
| "grad_norm": 1.4279069900512695, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3913, | |
| "step": 13800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05231979116797447, | |
| "epoch": 1.4153846153846155, | |
| "step": 13800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.692835569381714 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 1.418777585029602, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 14000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06311968713998795, | |
| "epoch": 1.435897435897436, | |
| "step": 14000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.668109178543091 | |
| }, | |
| { | |
| "epoch": 1.4564102564102563, | |
| "grad_norm": 1.567391276359558, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3903, | |
| "step": 14200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.040513865649700165, | |
| "epoch": 1.4564102564102563, | |
| "step": 14200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.835188865661621 | |
| }, | |
| { | |
| "epoch": 1.476923076923077, | |
| "grad_norm": 1.4088457822799683, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3896, | |
| "step": 14400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02080567367374897, | |
| "epoch": 1.476923076923077, | |
| "step": 14400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.044020652770996 | |
| }, | |
| { | |
| "epoch": 1.4974358974358974, | |
| "grad_norm": 1.4865601062774658, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3898, | |
| "step": 14600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017929682508111, | |
| "epoch": 1.4974358974358974, | |
| "step": 14600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.576387882232666 | |
| }, | |
| { | |
| "epoch": 1.5179487179487179, | |
| "grad_norm": 1.4024782180786133, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3893, | |
| "step": 14800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01442943885922432, | |
| "epoch": 1.5179487179487179, | |
| "step": 14800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3609468936920166 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 1.5455652475357056, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3904, | |
| "step": 15000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03161302208900452, | |
| "epoch": 1.5384615384615383, | |
| "step": 15000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.243866205215454 | |
| }, | |
| { | |
| "epoch": 1.558974358974359, | |
| "grad_norm": 1.4510728120803833, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3889, | |
| "step": 15200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.013236227445304394, | |
| "epoch": 1.558974358974359, | |
| "step": 15200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5351722240448 | |
| }, | |
| { | |
| "epoch": 1.5794871794871796, | |
| "grad_norm": 1.5527985095977783, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3914, | |
| "step": 15400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026610709726810455, | |
| "epoch": 1.5794871794871796, | |
| "step": 15400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6479239463806152 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.4501392841339111, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3888, | |
| "step": 15600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03431869298219681, | |
| "epoch": 1.6, | |
| "step": 15600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2219135761260986 | |
| }, | |
| { | |
| "epoch": 1.6205128205128205, | |
| "grad_norm": 1.492476224899292, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3904, | |
| "step": 15800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03636428341269493, | |
| "epoch": 1.6205128205128205, | |
| "step": 15800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4613499641418457 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 1.4169890880584717, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3901, | |
| "step": 16000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.038338519632816315, | |
| "epoch": 1.641025641025641, | |
| "step": 16000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.543891668319702 | |
| }, | |
| { | |
| "epoch": 1.6615384615384614, | |
| "grad_norm": 1.5604560375213623, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3901, | |
| "step": 16200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.038870152086019516, | |
| "epoch": 1.6615384615384614, | |
| "step": 16200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.160886764526367 | |
| }, | |
| { | |
| "epoch": 1.682051282051282, | |
| "grad_norm": 1.4771243333816528, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3917, | |
| "step": 16400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009855646640062332, | |
| "epoch": 1.682051282051282, | |
| "step": 16400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4382901191711426 | |
| }, | |
| { | |
| "epoch": 1.7025641025641025, | |
| "grad_norm": 1.4382553100585938, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3893, | |
| "step": 16600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03487122803926468, | |
| "epoch": 1.7025641025641025, | |
| "step": 16600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.767749786376953 | |
| }, | |
| { | |
| "epoch": 1.7230769230769232, | |
| "grad_norm": 1.342573881149292, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 16800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.028576789423823357, | |
| "epoch": 1.7230769230769232, | |
| "step": 16800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4979097843170166 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 1.4764020442962646, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3921, | |
| "step": 17000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02335483394563198, | |
| "epoch": 1.7435897435897436, | |
| "step": 17000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7024049758911133 | |
| }, | |
| { | |
| "epoch": 1.764102564102564, | |
| "grad_norm": 1.4894694089889526, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 17200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04700682684779167, | |
| "epoch": 1.764102564102564, | |
| "step": 17200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.700472116470337 | |
| }, | |
| { | |
| "epoch": 1.7846153846153845, | |
| "grad_norm": 1.5218092203140259, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3892, | |
| "step": 17400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0076140230521559715, | |
| "epoch": 1.7846153846153845, | |
| "step": 17400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5336365699768066 | |
| }, | |
| { | |
| "epoch": 1.8051282051282052, | |
| "grad_norm": 1.3163012266159058, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3914, | |
| "step": 17600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002477464731782675, | |
| "epoch": 1.8051282051282052, | |
| "step": 17600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.338789224624634 | |
| }, | |
| { | |
| "epoch": 1.8256410256410256, | |
| "grad_norm": 1.4332598447799683, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3889, | |
| "step": 17800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017103038728237152, | |
| "epoch": 1.8256410256410256, | |
| "step": 17800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7265193462371826 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 1.4816334247589111, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3892, | |
| "step": 18000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.030678337439894676, | |
| "epoch": 1.8461538461538463, | |
| "step": 18000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7759010791778564 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 1.4025768041610718, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3889, | |
| "step": 18200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05686439201235771, | |
| "epoch": 1.8666666666666667, | |
| "step": 18200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4395804405212402 | |
| }, | |
| { | |
| "epoch": 1.8871794871794871, | |
| "grad_norm": 1.532806634902954, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3887, | |
| "step": 18400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025746779516339302, | |
| "epoch": 1.8871794871794871, | |
| "step": 18400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2981905937194824 | |
| }, | |
| { | |
| "epoch": 1.9076923076923076, | |
| "grad_norm": 1.4978644847869873, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 18600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06122591719031334, | |
| "epoch": 1.9076923076923076, | |
| "step": 18600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4833333492279053, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.06728196144104 | |
| }, | |
| { | |
| "epoch": 1.9282051282051282, | |
| "grad_norm": 1.4785903692245483, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3885, | |
| "step": 18800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.09253613650798798, | |
| "epoch": 1.9282051282051282, | |
| "step": 18800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.430678367614746 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 1.4245176315307617, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3907, | |
| "step": 19000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04644398018717766, | |
| "epoch": 1.9487179487179487, | |
| "step": 19000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2258567810058594 | |
| }, | |
| { | |
| "epoch": 1.9692307692307693, | |
| "grad_norm": 1.4846798181533813, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 19200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.013294296339154243, | |
| "epoch": 1.9692307692307693, | |
| "step": 19200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.253403425216675 | |
| }, | |
| { | |
| "epoch": 1.9897435897435898, | |
| "grad_norm": 1.618064045906067, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3933, | |
| "step": 19400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07541658729314804, | |
| "epoch": 1.9897435897435898, | |
| "step": 19400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38333335518836975, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1926443576812744 | |
| }, | |
| { | |
| "epoch": 2.01025641025641, | |
| "grad_norm": 1.4721879959106445, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 19600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0018250009743496776, | |
| "epoch": 2.01025641025641, | |
| "step": 19600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.818826913833618 | |
| }, | |
| { | |
| "epoch": 2.0307692307692307, | |
| "grad_norm": 1.5306202173233032, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3898, | |
| "step": 19800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02344120293855667, | |
| "epoch": 2.0307692307692307, | |
| "step": 19800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5652098655700684 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 1.3842817544937134, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3872, | |
| "step": 20000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04052623361349106, | |
| "epoch": 2.051282051282051, | |
| "step": 20000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.552062749862671 | |
| }, | |
| { | |
| "epoch": 2.071794871794872, | |
| "grad_norm": 1.463069200515747, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 20200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008825588971376419, | |
| "epoch": 2.071794871794872, | |
| "step": 20200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.48750001192092896, | |
| "wm_acc_tail": 0.390625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.910459518432617 | |
| }, | |
| { | |
| "epoch": 2.0923076923076924, | |
| "grad_norm": 1.5662769079208374, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 20400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04462813213467598, | |
| "epoch": 2.0923076923076924, | |
| "step": 20400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.577489137649536 | |
| }, | |
| { | |
| "epoch": 2.112820512820513, | |
| "grad_norm": 1.5395299196243286, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 20600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.037117354571819305, | |
| "epoch": 2.112820512820513, | |
| "step": 20600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.181680679321289 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 1.5279722213745117, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3885, | |
| "step": 20800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0014837193302810192, | |
| "epoch": 2.1333333333333333, | |
| "step": 20800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.55937123298645 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 1.416802167892456, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 21000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.028997544199228287, | |
| "epoch": 2.1538461538461537, | |
| "step": 21000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1908884048461914 | |
| }, | |
| { | |
| "epoch": 2.174358974358974, | |
| "grad_norm": 1.3719505071640015, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 21200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04018910974264145, | |
| "epoch": 2.174358974358974, | |
| "step": 21200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.46065092086792 | |
| }, | |
| { | |
| "epoch": 2.194871794871795, | |
| "grad_norm": 1.4909734725952148, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 21400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0389191210269928, | |
| "epoch": 2.194871794871795, | |
| "step": 21400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.346126079559326 | |
| }, | |
| { | |
| "epoch": 2.2153846153846155, | |
| "grad_norm": 1.4870017766952515, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 21600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025184862315654755, | |
| "epoch": 2.2153846153846155, | |
| "step": 21600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.472446918487549 | |
| }, | |
| { | |
| "epoch": 2.235897435897436, | |
| "grad_norm": 1.5093843936920166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 21800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05302753672003746, | |
| "epoch": 2.235897435897436, | |
| "step": 21800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.915269613265991 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 1.4073779582977295, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 22000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04351036250591278, | |
| "epoch": 2.2564102564102564, | |
| "step": 22000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.965484380722046 | |
| }, | |
| { | |
| "epoch": 2.276923076923077, | |
| "grad_norm": 1.4587961435317993, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3917, | |
| "step": 22200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003597542643547058, | |
| "epoch": 2.276923076923077, | |
| "step": 22200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4375000298023224, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3197576999664307 | |
| }, | |
| { | |
| "epoch": 2.2974358974358973, | |
| "grad_norm": 1.556536078453064, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 22400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0076848710887134075, | |
| "epoch": 2.2974358974358973, | |
| "step": 22400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.703307867050171 | |
| }, | |
| { | |
| "epoch": 2.3179487179487177, | |
| "grad_norm": 1.4983381032943726, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 22600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.060980044305324554, | |
| "epoch": 2.3179487179487177, | |
| "step": 22600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.721877336502075 | |
| }, | |
| { | |
| "epoch": 2.3384615384615386, | |
| "grad_norm": 1.574175238609314, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 22800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021354133263230324, | |
| "epoch": 2.3384615384615386, | |
| "step": 22800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5041667222976685, | |
| "wm_acc_tail": 0.46875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.8068106174468994 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 1.5010496377944946, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3913, | |
| "step": 23000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0028512319549918175, | |
| "epoch": 2.358974358974359, | |
| "step": 23000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4476890563964844 | |
| }, | |
| { | |
| "epoch": 2.3794871794871795, | |
| "grad_norm": 1.4922162294387817, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 23200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017922472208738327, | |
| "epoch": 2.3794871794871795, | |
| "step": 23200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.48750001192092896, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.940507173538208 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.5313700437545776, | |
| "learning_rate": 2e-05, | |
| "loss": 0.388, | |
| "step": 23400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.1170651838183403, | |
| "epoch": 2.4, | |
| "step": 23400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1523709297180176 | |
| }, | |
| { | |
| "epoch": 2.4205128205128204, | |
| "grad_norm": 1.5253169536590576, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 23600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025437969714403152, | |
| "epoch": 2.4205128205128204, | |
| "step": 23600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8073008060455322 | |
| }, | |
| { | |
| "epoch": 2.4410256410256412, | |
| "grad_norm": 1.5971364974975586, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 23800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04568465054035187, | |
| "epoch": 2.4410256410256412, | |
| "step": 23800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2942323684692383 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 1.43277907371521, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3885, | |
| "step": 24000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0314912348985672, | |
| "epoch": 2.4615384615384617, | |
| "step": 24000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4750000238418579, | |
| "wm_acc_tail": 0.359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2301626205444336 | |
| }, | |
| { | |
| "epoch": 2.482051282051282, | |
| "grad_norm": 1.4152660369873047, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 24200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05173131823539734, | |
| "epoch": 2.482051282051282, | |
| "step": 24200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.577099323272705 | |
| }, | |
| { | |
| "epoch": 2.5025641025641026, | |
| "grad_norm": 1.4160469770431519, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3872, | |
| "step": 24400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03394094109535217, | |
| "epoch": 2.5025641025641026, | |
| "step": 24400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7894275188446045 | |
| }, | |
| { | |
| "epoch": 2.523076923076923, | |
| "grad_norm": 1.5736653804779053, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 24600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009905572980642319, | |
| "epoch": 2.523076923076923, | |
| "step": 24600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.123368263244629 | |
| }, | |
| { | |
| "epoch": 2.5435897435897434, | |
| "grad_norm": 1.409644365310669, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 24800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.044106125831604004, | |
| "epoch": 2.5435897435897434, | |
| "step": 24800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5186243057250977 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 1.4453808069229126, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 25000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.028986696153879166, | |
| "epoch": 2.564102564102564, | |
| "step": 25000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.403463840484619 | |
| }, | |
| { | |
| "epoch": 2.5846153846153848, | |
| "grad_norm": 1.4683994054794312, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 25200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003974465187638998, | |
| "epoch": 2.5846153846153848, | |
| "step": 25200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.171875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7244718074798584 | |
| }, | |
| { | |
| "epoch": 2.605128205128205, | |
| "grad_norm": 1.493219256401062, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 25400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021346906200051308, | |
| "epoch": 2.605128205128205, | |
| "step": 25400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.792936325073242 | |
| }, | |
| { | |
| "epoch": 2.6256410256410256, | |
| "grad_norm": 1.4262144565582275, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 25600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0015618682373315096, | |
| "epoch": 2.6256410256410256, | |
| "step": 25600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5958924293518066 | |
| }, | |
| { | |
| "epoch": 2.646153846153846, | |
| "grad_norm": 1.4737701416015625, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 25800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0823127031326294, | |
| "epoch": 2.646153846153846, | |
| "step": 25800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.491666704416275, | |
| "wm_acc_tail": 0.4140625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.797745704650879 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 1.51614248752594, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 26000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018522437661886215, | |
| "epoch": 2.6666666666666665, | |
| "step": 26000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2087600231170654 | |
| }, | |
| { | |
| "epoch": 2.6871794871794874, | |
| "grad_norm": 1.4669991731643677, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3919, | |
| "step": 26200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.032568737864494324, | |
| "epoch": 2.6871794871794874, | |
| "step": 26200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.328549861907959 | |
| }, | |
| { | |
| "epoch": 2.707692307692308, | |
| "grad_norm": 1.5245352983474731, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 26400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.037205372005701065, | |
| "epoch": 2.707692307692308, | |
| "step": 26400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4833333492279053, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.16813063621521 | |
| }, | |
| { | |
| "epoch": 2.7282051282051283, | |
| "grad_norm": 1.4143562316894531, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3924, | |
| "step": 26600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003386417170986533, | |
| "epoch": 2.7282051282051283, | |
| "step": 26600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46666669845581055, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.7259981632232666 | |
| }, | |
| { | |
| "epoch": 2.7487179487179487, | |
| "grad_norm": 1.4961721897125244, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 26800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0256776362657547, | |
| "epoch": 2.7487179487179487, | |
| "step": 26800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5, | |
| "wm_acc_tail": 0.40625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.7547457218170166 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 1.6144366264343262, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 27000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04213555529713631, | |
| "epoch": 2.769230769230769, | |
| "step": 27000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.072260856628418 | |
| }, | |
| { | |
| "epoch": 2.7897435897435896, | |
| "grad_norm": 1.5208280086517334, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3899, | |
| "step": 27200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07453599572181702, | |
| "epoch": 2.7897435897435896, | |
| "step": 27200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6939046382904053 | |
| }, | |
| { | |
| "epoch": 2.81025641025641, | |
| "grad_norm": 1.4335572719573975, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 27400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02030731551349163, | |
| "epoch": 2.81025641025641, | |
| "step": 27400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8225529193878174 | |
| }, | |
| { | |
| "epoch": 2.830769230769231, | |
| "grad_norm": 1.3971728086471558, | |
| "learning_rate": 2e-05, | |
| "loss": 0.39, | |
| "step": 27600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012988986447453499, | |
| "epoch": 2.830769230769231, | |
| "step": 27600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.80534291267395 | |
| }, | |
| { | |
| "epoch": 2.8512820512820514, | |
| "grad_norm": 1.467367172241211, | |
| "learning_rate": 2e-05, | |
| "loss": 0.388, | |
| "step": 27800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00312808551825583, | |
| "epoch": 2.8512820512820514, | |
| "step": 27800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5492842197418213 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 1.4223904609680176, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 28000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008621451444923878, | |
| "epoch": 2.871794871794872, | |
| "step": 28000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5689311027526855 | |
| }, | |
| { | |
| "epoch": 2.8923076923076922, | |
| "grad_norm": 1.5225142240524292, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3889, | |
| "step": 28200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010706331580877304, | |
| "epoch": 2.8923076923076922, | |
| "step": 28200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8479461669921875 | |
| }, | |
| { | |
| "epoch": 2.9128205128205127, | |
| "grad_norm": 1.4230109453201294, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 28400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007974586449563503, | |
| "epoch": 2.9128205128205127, | |
| "step": 28400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.747277021408081 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 1.5248308181762695, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3933, | |
| "step": 28600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02767806313931942, | |
| "epoch": 2.9333333333333336, | |
| "step": 28600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.854739189147949 | |
| }, | |
| { | |
| "epoch": 2.953846153846154, | |
| "grad_norm": 1.5826784372329712, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 28800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007122544106096029, | |
| "epoch": 2.953846153846154, | |
| "step": 28800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.25833335518836975, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.294154167175293 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 1.4472224712371826, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 29000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.029514769092202187, | |
| "epoch": 2.9743589743589745, | |
| "step": 29000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4288177490234375 | |
| }, | |
| { | |
| "epoch": 2.994871794871795, | |
| "grad_norm": 1.4700934886932373, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 29200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.033029720187187195, | |
| "epoch": 2.994871794871795, | |
| "step": 29200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.29390287399292 | |
| }, | |
| { | |
| "epoch": 3.0153846153846153, | |
| "grad_norm": 1.4764951467514038, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 29400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019703233614563942, | |
| "epoch": 3.0153846153846153, | |
| "step": 29400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.817373752593994 | |
| }, | |
| { | |
| "epoch": 3.0358974358974358, | |
| "grad_norm": 1.500429630279541, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 29600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026412876322865486, | |
| "epoch": 3.0358974358974358, | |
| "step": 29600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.546070098876953 | |
| }, | |
| { | |
| "epoch": 3.056410256410256, | |
| "grad_norm": 1.3552403450012207, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 29800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004673309158533812, | |
| "epoch": 3.056410256410256, | |
| "step": 29800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.336923837661743 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 1.476533055305481, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 30000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009490852244198322, | |
| "epoch": 3.076923076923077, | |
| "step": 30000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.405824899673462 | |
| }, | |
| { | |
| "epoch": 3.0974358974358975, | |
| "grad_norm": 1.6581186056137085, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 30200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03696921840310097, | |
| "epoch": 3.0974358974358975, | |
| "step": 30200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8570985794067383 | |
| }, | |
| { | |
| "epoch": 3.117948717948718, | |
| "grad_norm": 1.526413083076477, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3882, | |
| "step": 30400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0716283768415451, | |
| "epoch": 3.117948717948718, | |
| "step": 30400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.341503143310547 | |
| }, | |
| { | |
| "epoch": 3.1384615384615384, | |
| "grad_norm": 1.539967656135559, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 30600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008754679933190346, | |
| "epoch": 3.1384615384615384, | |
| "step": 30600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5535738468170166 | |
| }, | |
| { | |
| "epoch": 3.158974358974359, | |
| "grad_norm": 1.4273923635482788, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 30800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.060118481516838074, | |
| "epoch": 3.158974358974359, | |
| "step": 30800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.340306520462036 | |
| }, | |
| { | |
| "epoch": 3.1794871794871793, | |
| "grad_norm": 1.6495362520217896, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 31000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04556620493531227, | |
| "epoch": 3.1794871794871793, | |
| "step": 31000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3988497257232666 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.4728152751922607, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 31200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03349052369594574, | |
| "epoch": 3.2, | |
| "step": 31200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.725931167602539 | |
| }, | |
| { | |
| "epoch": 3.2205128205128206, | |
| "grad_norm": 1.3835036754608154, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 31400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.006235354579985142, | |
| "epoch": 3.2205128205128206, | |
| "step": 31400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2483482360839844 | |
| }, | |
| { | |
| "epoch": 3.241025641025641, | |
| "grad_norm": 1.4785337448120117, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 31600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00963902659714222, | |
| "epoch": 3.241025641025641, | |
| "step": 31600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3429524898529053 | |
| }, | |
| { | |
| "epoch": 3.2615384615384615, | |
| "grad_norm": 1.4489798545837402, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3891, | |
| "step": 31800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02218327671289444, | |
| "epoch": 3.2615384615384615, | |
| "step": 31800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.39813494682312 | |
| }, | |
| { | |
| "epoch": 3.282051282051282, | |
| "grad_norm": 1.5042344331741333, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3868, | |
| "step": 32000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.10154395550489426, | |
| "epoch": 3.282051282051282, | |
| "step": 32000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.512002468109131 | |
| }, | |
| { | |
| "epoch": 3.3025641025641024, | |
| "grad_norm": 1.3888541460037231, | |
| "learning_rate": 2e-05, | |
| "loss": 0.386, | |
| "step": 32200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.060953158885240555, | |
| "epoch": 3.3025641025641024, | |
| "step": 32200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6787185668945312 | |
| }, | |
| { | |
| "epoch": 3.3230769230769233, | |
| "grad_norm": 1.477118968963623, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 32400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07284487783908844, | |
| "epoch": 3.3230769230769233, | |
| "step": 32400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6856086254119873 | |
| }, | |
| { | |
| "epoch": 3.3435897435897437, | |
| "grad_norm": 1.4373570680618286, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3899, | |
| "step": 32600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.035309016704559326, | |
| "epoch": 3.3435897435897437, | |
| "step": 32600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9572913646698 | |
| }, | |
| { | |
| "epoch": 3.364102564102564, | |
| "grad_norm": 1.602277398109436, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 32800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004685917869210243, | |
| "epoch": 3.364102564102564, | |
| "step": 32800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4882845878601074 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 1.5022590160369873, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 33000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014063969254493713, | |
| "epoch": 3.3846153846153846, | |
| "step": 33000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6028807163238525 | |
| }, | |
| { | |
| "epoch": 3.405128205128205, | |
| "grad_norm": 1.3358067274093628, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 33200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05115222558379173, | |
| "epoch": 3.405128205128205, | |
| "step": 33200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.729491710662842 | |
| }, | |
| { | |
| "epoch": 3.4256410256410255, | |
| "grad_norm": 1.5712002515792847, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 33400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.040494609624147415, | |
| "epoch": 3.4256410256410255, | |
| "step": 33400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.904383897781372 | |
| }, | |
| { | |
| "epoch": 3.4461538461538463, | |
| "grad_norm": 1.9585435390472412, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 33600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.020133905112743378, | |
| "epoch": 3.4461538461538463, | |
| "step": 33600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5860111713409424 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 1.4978489875793457, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 33800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03127191960811615, | |
| "epoch": 3.466666666666667, | |
| "step": 33800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3541769981384277 | |
| }, | |
| { | |
| "epoch": 3.4871794871794872, | |
| "grad_norm": 1.4658176898956299, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 34000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04670090973377228, | |
| "epoch": 3.4871794871794872, | |
| "step": 34000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7610206604003906 | |
| }, | |
| { | |
| "epoch": 3.5076923076923077, | |
| "grad_norm": 1.5037034749984741, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3893, | |
| "step": 34200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.039000004529953, | |
| "epoch": 3.5076923076923077, | |
| "step": 34200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.585411787033081 | |
| }, | |
| { | |
| "epoch": 3.528205128205128, | |
| "grad_norm": 1.3355331420898438, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 34400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03782371059060097, | |
| "epoch": 3.528205128205128, | |
| "step": 34400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2119288444519043 | |
| }, | |
| { | |
| "epoch": 3.5487179487179485, | |
| "grad_norm": 1.5419424772262573, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 34600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008601857349276543, | |
| "epoch": 3.5487179487179485, | |
| "step": 34600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7497758865356445 | |
| }, | |
| { | |
| "epoch": 3.569230769230769, | |
| "grad_norm": 1.4731394052505493, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 34800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.020187292248010635, | |
| "epoch": 3.569230769230769, | |
| "step": 34800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.3170905113220215 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 1.5813066959381104, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 35000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0018536999123170972, | |
| "epoch": 3.58974358974359, | |
| "step": 35000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.0756449699401855 | |
| }, | |
| { | |
| "epoch": 3.6102564102564103, | |
| "grad_norm": 1.5061304569244385, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 35200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024228887632489204, | |
| "epoch": 3.6102564102564103, | |
| "step": 35200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.286659002304077 | |
| }, | |
| { | |
| "epoch": 3.6307692307692307, | |
| "grad_norm": 1.790723443031311, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 35400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023501893505454063, | |
| "epoch": 3.6307692307692307, | |
| "step": 35400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7225608825683594 | |
| }, | |
| { | |
| "epoch": 3.651282051282051, | |
| "grad_norm": 1.5373083353042603, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 35600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05589212849736214, | |
| "epoch": 3.651282051282051, | |
| "step": 35600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7108023166656494 | |
| }, | |
| { | |
| "epoch": 3.6717948717948716, | |
| "grad_norm": 1.5005486011505127, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3891, | |
| "step": 35800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.022814037278294563, | |
| "epoch": 3.6717948717948716, | |
| "step": 35800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7163655757904053 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 1.471091628074646, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 36000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04794114828109741, | |
| "epoch": 3.6923076923076925, | |
| "step": 36000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2954134941101074 | |
| }, | |
| { | |
| "epoch": 3.712820512820513, | |
| "grad_norm": 1.6404759883880615, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 36200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026450395584106445, | |
| "epoch": 3.712820512820513, | |
| "step": 36200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9437618255615234 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 1.4991207122802734, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3873, | |
| "step": 36400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04349401965737343, | |
| "epoch": 3.7333333333333334, | |
| "step": 36400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3739945888519287 | |
| }, | |
| { | |
| "epoch": 3.753846153846154, | |
| "grad_norm": 1.4097429513931274, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 36600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04169745370745659, | |
| "epoch": 3.753846153846154, | |
| "step": 36600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2198851108551025 | |
| }, | |
| { | |
| "epoch": 3.7743589743589743, | |
| "grad_norm": 1.5394887924194336, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 36800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0014998462283983827, | |
| "epoch": 3.7743589743589743, | |
| "step": 36800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30416667461395264, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.24046516418457 | |
| }, | |
| { | |
| "epoch": 3.7948717948717947, | |
| "grad_norm": 1.4933193922042847, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 37000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.013612525537610054, | |
| "epoch": 3.7948717948717947, | |
| "step": 37000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5747547149658203 | |
| }, | |
| { | |
| "epoch": 3.815384615384615, | |
| "grad_norm": 1.4842512607574463, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 37200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011224096640944481, | |
| "epoch": 3.815384615384615, | |
| "step": 37200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.890993595123291 | |
| }, | |
| { | |
| "epoch": 3.835897435897436, | |
| "grad_norm": 1.4974702596664429, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3868, | |
| "step": 37400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.028456171974539757, | |
| "epoch": 3.835897435897436, | |
| "step": 37400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.444819211959839 | |
| }, | |
| { | |
| "epoch": 3.8564102564102565, | |
| "grad_norm": 1.4589213132858276, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 37600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0070193978026509285, | |
| "epoch": 3.8564102564102565, | |
| "step": 37600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.018898963928223 | |
| }, | |
| { | |
| "epoch": 3.876923076923077, | |
| "grad_norm": 1.4236966371536255, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3889, | |
| "step": 37800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06821958720684052, | |
| "epoch": 3.876923076923077, | |
| "step": 37800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5083333849906921, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.929452657699585 | |
| }, | |
| { | |
| "epoch": 3.8974358974358974, | |
| "grad_norm": 1.4124774932861328, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3868, | |
| "step": 38000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0013521455693989992, | |
| "epoch": 3.8974358974358974, | |
| "step": 38000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3708333671092987, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.393043279647827 | |
| }, | |
| { | |
| "epoch": 3.917948717948718, | |
| "grad_norm": 1.4662126302719116, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 38200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02276427112519741, | |
| "epoch": 3.917948717948718, | |
| "step": 38200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6165518760681152 | |
| }, | |
| { | |
| "epoch": 3.9384615384615387, | |
| "grad_norm": 1.65989089012146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 38400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027003858238458633, | |
| "epoch": 3.9384615384615387, | |
| "step": 38400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38333335518836975, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4005722999572754 | |
| }, | |
| { | |
| "epoch": 3.958974358974359, | |
| "grad_norm": 1.434751033782959, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 38600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03400566056370735, | |
| "epoch": 3.958974358974359, | |
| "step": 38600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2708333432674408, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.300734043121338 | |
| }, | |
| { | |
| "epoch": 3.9794871794871796, | |
| "grad_norm": 1.603958249092102, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3891, | |
| "step": 38800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.051449522376060486, | |
| "epoch": 3.9794871794871796, | |
| "step": 38800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5019376277923584 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.515657663345337, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3906, | |
| "step": 39000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004214930813759565, | |
| "epoch": 4.0, | |
| "step": 39000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0174567699432373 | |
| }, | |
| { | |
| "epoch": 4.02051282051282, | |
| "grad_norm": 1.540717601776123, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 39200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07484902441501617, | |
| "epoch": 4.02051282051282, | |
| "step": 39200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4080257415771484 | |
| }, | |
| { | |
| "epoch": 4.041025641025641, | |
| "grad_norm": 1.6538184881210327, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 39400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02911394275724888, | |
| "epoch": 4.041025641025641, | |
| "step": 39400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8047070503234863 | |
| }, | |
| { | |
| "epoch": 4.061538461538461, | |
| "grad_norm": 1.4197715520858765, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 39600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.030046191066503525, | |
| "epoch": 4.061538461538461, | |
| "step": 39600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38333335518836975, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.478719711303711 | |
| }, | |
| { | |
| "epoch": 4.082051282051282, | |
| "grad_norm": 1.4359487295150757, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 39800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.022685207426548004, | |
| "epoch": 4.082051282051282, | |
| "step": 39800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.598498821258545 | |
| }, | |
| { | |
| "epoch": 4.102564102564102, | |
| "grad_norm": 1.4681591987609863, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 40000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0063376594334840775, | |
| "epoch": 4.102564102564102, | |
| "step": 40000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0440094470977783 | |
| }, | |
| { | |
| "epoch": 4.123076923076923, | |
| "grad_norm": 1.4982995986938477, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 40200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024779850617051125, | |
| "epoch": 4.123076923076923, | |
| "step": 40200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3038814067840576 | |
| }, | |
| { | |
| "epoch": 4.143589743589744, | |
| "grad_norm": 1.530997633934021, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 40400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027216836810112, | |
| "epoch": 4.143589743589744, | |
| "step": 40400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6868953704833984 | |
| }, | |
| { | |
| "epoch": 4.164102564102564, | |
| "grad_norm": 1.7809888124465942, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 40600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007897686213254929, | |
| "epoch": 4.164102564102564, | |
| "step": 40600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.48750001192092896, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9763967990875244 | |
| }, | |
| { | |
| "epoch": 4.184615384615385, | |
| "grad_norm": 1.6376887559890747, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 40800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014669639058411121, | |
| "epoch": 4.184615384615385, | |
| "step": 40800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4958333671092987, | |
| "wm_acc_tail": 0.3828125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0125534534454346 | |
| }, | |
| { | |
| "epoch": 4.205128205128205, | |
| "grad_norm": 1.4559520483016968, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 41000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01763276755809784, | |
| "epoch": 4.205128205128205, | |
| "step": 41000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5036072731018066 | |
| }, | |
| { | |
| "epoch": 4.225641025641026, | |
| "grad_norm": 1.5217077732086182, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 41200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.035533029586076736, | |
| "epoch": 4.225641025641026, | |
| "step": 41200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46666669845581055, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.016638994216919 | |
| }, | |
| { | |
| "epoch": 4.246153846153846, | |
| "grad_norm": 1.489271879196167, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 41400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04831657186150551, | |
| "epoch": 4.246153846153846, | |
| "step": 41400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.581559419631958 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 1.4234576225280762, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3896, | |
| "step": 41600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07114691287279129, | |
| "epoch": 4.266666666666667, | |
| "step": 41600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5250000357627869, | |
| "wm_acc_tail": 0.40625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.825859308242798 | |
| }, | |
| { | |
| "epoch": 4.287179487179487, | |
| "grad_norm": 1.4943255186080933, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 41800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03429649770259857, | |
| "epoch": 4.287179487179487, | |
| "step": 41800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3708333671092987, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7633705139160156 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 1.4343676567077637, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 42000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02486131712794304, | |
| "epoch": 4.3076923076923075, | |
| "step": 42000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4712772369384766 | |
| }, | |
| { | |
| "epoch": 4.328205128205128, | |
| "grad_norm": 1.5647730827331543, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 42200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012715376913547516, | |
| "epoch": 4.328205128205128, | |
| "step": 42200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4791666865348816, | |
| "wm_acc_tail": 0.40625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.095299482345581 | |
| }, | |
| { | |
| "epoch": 4.348717948717948, | |
| "grad_norm": 1.3705652952194214, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 42400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.001977815292775631, | |
| "epoch": 4.348717948717948, | |
| "step": 42400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4747111797332764 | |
| }, | |
| { | |
| "epoch": 4.36923076923077, | |
| "grad_norm": 1.4510146379470825, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3903, | |
| "step": 42600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008497975766658783, | |
| "epoch": 4.36923076923077, | |
| "step": 42600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.603215456008911 | |
| }, | |
| { | |
| "epoch": 4.38974358974359, | |
| "grad_norm": 1.3965692520141602, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 42800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.10778119415044785, | |
| "epoch": 4.38974358974359, | |
| "step": 42800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.417860984802246 | |
| }, | |
| { | |
| "epoch": 4.410256410256411, | |
| "grad_norm": 1.4570536613464355, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 43000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03583671525120735, | |
| "epoch": 4.410256410256411, | |
| "step": 43000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3708333671092987, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.511754035949707 | |
| }, | |
| { | |
| "epoch": 4.430769230769231, | |
| "grad_norm": 1.4476046562194824, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 43200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04623754322528839, | |
| "epoch": 4.430769230769231, | |
| "step": 43200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1269614696502686 | |
| }, | |
| { | |
| "epoch": 4.4512820512820515, | |
| "grad_norm": 1.5305196046829224, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 43400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04036559537053108, | |
| "epoch": 4.4512820512820515, | |
| "step": 43400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5765433311462402 | |
| }, | |
| { | |
| "epoch": 4.471794871794872, | |
| "grad_norm": 1.5796819925308228, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 43600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014430545270442963, | |
| "epoch": 4.471794871794872, | |
| "step": 43600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.26250001788139343, | |
| "wm_acc_tail": 0.1640625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.2396721839904785 | |
| }, | |
| { | |
| "epoch": 4.492307692307692, | |
| "grad_norm": 1.5786937475204468, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 43800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0653475821018219, | |
| "epoch": 4.492307692307692, | |
| "step": 43800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4750000238418579, | |
| "wm_acc_tail": 0.359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.016564130783081 | |
| }, | |
| { | |
| "epoch": 4.512820512820513, | |
| "grad_norm": 1.5266082286834717, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 44000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023241586983203888, | |
| "epoch": 4.512820512820513, | |
| "step": 44000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9328858852386475 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 1.493490219116211, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3882, | |
| "step": 44200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03869625926017761, | |
| "epoch": 4.533333333333333, | |
| "step": 44200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.587139129638672 | |
| }, | |
| { | |
| "epoch": 4.553846153846154, | |
| "grad_norm": 1.3878897428512573, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 44400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004450260661542416, | |
| "epoch": 4.553846153846154, | |
| "step": 44400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1333746910095215 | |
| }, | |
| { | |
| "epoch": 4.574358974358974, | |
| "grad_norm": 1.4746562242507935, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 44600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018214711919426918, | |
| "epoch": 4.574358974358974, | |
| "step": 44600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9523847103118896 | |
| }, | |
| { | |
| "epoch": 4.5948717948717945, | |
| "grad_norm": 1.4711662530899048, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 44800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.020461026579141617, | |
| "epoch": 4.5948717948717945, | |
| "step": 44800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.130517482757568 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 1.8539690971374512, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 45000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.040325827896595, | |
| "epoch": 4.615384615384615, | |
| "step": 45000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8965537548065186 | |
| }, | |
| { | |
| "epoch": 4.635897435897435, | |
| "grad_norm": 1.5801750421524048, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3903, | |
| "step": 45200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0499042272567749, | |
| "epoch": 4.635897435897435, | |
| "step": 45200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5105934143066406 | |
| }, | |
| { | |
| "epoch": 4.656410256410257, | |
| "grad_norm": 1.501552700996399, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 45400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019361576065421104, | |
| "epoch": 4.656410256410257, | |
| "step": 45400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.587190866470337 | |
| }, | |
| { | |
| "epoch": 4.676923076923077, | |
| "grad_norm": 1.6324543952941895, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 45600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.053294774144887924, | |
| "epoch": 4.676923076923077, | |
| "step": 45600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.905782699584961 | |
| }, | |
| { | |
| "epoch": 4.697435897435898, | |
| "grad_norm": 1.6176339387893677, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 45800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00848244410008192, | |
| "epoch": 4.697435897435898, | |
| "step": 45800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.557039499282837 | |
| }, | |
| { | |
| "epoch": 4.717948717948718, | |
| "grad_norm": 1.4094936847686768, | |
| "learning_rate": 2e-05, | |
| "loss": 0.388, | |
| "step": 46000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02305266633629799, | |
| "epoch": 4.717948717948718, | |
| "step": 46000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.87615966796875 | |
| }, | |
| { | |
| "epoch": 4.7384615384615385, | |
| "grad_norm": 1.415177345275879, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 46200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00399048300459981, | |
| "epoch": 4.7384615384615385, | |
| "step": 46200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.151414632797241 | |
| }, | |
| { | |
| "epoch": 4.758974358974359, | |
| "grad_norm": 1.4603790044784546, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 46400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02078969217836857, | |
| "epoch": 4.758974358974359, | |
| "step": 46400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4528462886810303 | |
| }, | |
| { | |
| "epoch": 4.779487179487179, | |
| "grad_norm": 1.5804147720336914, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 46600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009785688482224941, | |
| "epoch": 4.779487179487179, | |
| "step": 46600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.071739673614502 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 1.5307948589324951, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 46800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07876758277416229, | |
| "epoch": 4.8, | |
| "step": 46800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4375000298023224, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1185576915740967 | |
| }, | |
| { | |
| "epoch": 4.82051282051282, | |
| "grad_norm": 1.5309840440750122, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3904, | |
| "step": 47000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01908997818827629, | |
| "epoch": 4.82051282051282, | |
| "step": 47000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.531825542449951 | |
| }, | |
| { | |
| "epoch": 4.841025641025641, | |
| "grad_norm": 1.5262001752853394, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 47200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03207289054989815, | |
| "epoch": 4.841025641025641, | |
| "step": 47200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4541666805744171, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.288022041320801 | |
| }, | |
| { | |
| "epoch": 4.861538461538462, | |
| "grad_norm": 1.4241925477981567, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 47400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017524540424346924, | |
| "epoch": 4.861538461538462, | |
| "step": 47400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4208333492279053, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.308858871459961 | |
| }, | |
| { | |
| "epoch": 4.8820512820512825, | |
| "grad_norm": 1.5966178178787231, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3885, | |
| "step": 47600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.054292645305395126, | |
| "epoch": 4.8820512820512825, | |
| "step": 47600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.902402400970459 | |
| }, | |
| { | |
| "epoch": 4.902564102564103, | |
| "grad_norm": 1.5068988800048828, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3882, | |
| "step": 47800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.022446684539318085, | |
| "epoch": 4.902564102564103, | |
| "step": 47800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5160224437713623 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "grad_norm": 1.684203863143921, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 48000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012435570359230042, | |
| "epoch": 4.923076923076923, | |
| "step": 48000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.759791135787964 | |
| }, | |
| { | |
| "epoch": 4.943589743589744, | |
| "grad_norm": 2.0323543548583984, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3891, | |
| "step": 48200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014341471716761589, | |
| "epoch": 4.943589743589744, | |
| "step": 48200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8117034435272217 | |
| }, | |
| { | |
| "epoch": 4.964102564102564, | |
| "grad_norm": 1.610014796257019, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 48400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010367088951170444, | |
| "epoch": 4.964102564102564, | |
| "step": 48400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2458333522081375, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.102650165557861 | |
| }, | |
| { | |
| "epoch": 4.984615384615385, | |
| "grad_norm": 1.5982123613357544, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3887, | |
| "step": 48600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04053257778286934, | |
| "epoch": 4.984615384615385, | |
| "step": 48600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.430593967437744 | |
| }, | |
| { | |
| "epoch": 5.005128205128205, | |
| "grad_norm": 1.3401280641555786, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 48800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010357022285461426, | |
| "epoch": 5.005128205128205, | |
| "step": 48800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7139079570770264 | |
| }, | |
| { | |
| "epoch": 5.0256410256410255, | |
| "grad_norm": 1.5741493701934814, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 49000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04704487323760986, | |
| "epoch": 5.0256410256410255, | |
| "step": 49000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.026562690734863 | |
| }, | |
| { | |
| "epoch": 5.046153846153846, | |
| "grad_norm": 1.5934900045394897, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 49200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02490474469959736, | |
| "epoch": 5.046153846153846, | |
| "step": 49200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5291666984558105, | |
| "wm_acc_tail": 0.40625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.6822569370269775 | |
| }, | |
| { | |
| "epoch": 5.066666666666666, | |
| "grad_norm": 1.624564528465271, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 49400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003930829465389252, | |
| "epoch": 5.066666666666666, | |
| "step": 49400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5833333730697632, | |
| "wm_acc_tail": 0.4296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.5207793712615967 | |
| }, | |
| { | |
| "epoch": 5.087179487179487, | |
| "grad_norm": 1.3889561891555786, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 49600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04863318055868149, | |
| "epoch": 5.087179487179487, | |
| "step": 49600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5588364601135254 | |
| }, | |
| { | |
| "epoch": 5.107692307692307, | |
| "grad_norm": 1.5344611406326294, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 49800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04580392688512802, | |
| "epoch": 5.107692307692307, | |
| "step": 49800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.597409725189209 | |
| }, | |
| { | |
| "epoch": 5.128205128205128, | |
| "grad_norm": 1.672608494758606, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 50000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01383890025317669, | |
| "epoch": 5.128205128205128, | |
| "step": 50000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.322786569595337 | |
| }, | |
| { | |
| "epoch": 5.148717948717949, | |
| "grad_norm": 1.4397544860839844, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 50200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02704683691263199, | |
| "epoch": 5.148717948717949, | |
| "step": 50200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.755046844482422 | |
| }, | |
| { | |
| "epoch": 5.1692307692307695, | |
| "grad_norm": 1.4592448472976685, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 50400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.004319323692470789, | |
| "epoch": 5.1692307692307695, | |
| "step": 50400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.383592128753662 | |
| }, | |
| { | |
| "epoch": 5.18974358974359, | |
| "grad_norm": 1.4306566715240479, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 50600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.052446089684963226, | |
| "epoch": 5.18974358974359, | |
| "step": 50600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5333333611488342, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.6345105171203613 | |
| }, | |
| { | |
| "epoch": 5.21025641025641, | |
| "grad_norm": 1.5413750410079956, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 50800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.040215667337179184, | |
| "epoch": 5.21025641025641, | |
| "step": 50800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2291622161865234 | |
| }, | |
| { | |
| "epoch": 5.230769230769231, | |
| "grad_norm": 1.5162241458892822, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 51000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02652158960700035, | |
| "epoch": 5.230769230769231, | |
| "step": 51000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1024301052093506 | |
| }, | |
| { | |
| "epoch": 5.251282051282051, | |
| "grad_norm": 1.4429219961166382, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 51200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014661935158073902, | |
| "epoch": 5.251282051282051, | |
| "step": 51200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8339622020721436 | |
| }, | |
| { | |
| "epoch": 5.271794871794872, | |
| "grad_norm": 1.7731890678405762, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 51400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.039028067141771317, | |
| "epoch": 5.271794871794872, | |
| "step": 51400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4541666805744171, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.226318120956421 | |
| }, | |
| { | |
| "epoch": 5.292307692307692, | |
| "grad_norm": 1.5176918506622314, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3873, | |
| "step": 51600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06967823207378387, | |
| "epoch": 5.292307692307692, | |
| "step": 51600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9553639888763428 | |
| }, | |
| { | |
| "epoch": 5.312820512820513, | |
| "grad_norm": 1.7715626955032349, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 51800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05109746754169464, | |
| "epoch": 5.312820512820513, | |
| "step": 51800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9271154403686523 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 1.587107539176941, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 52000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05523735284805298, | |
| "epoch": 5.333333333333333, | |
| "step": 52000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.49139666557312 | |
| }, | |
| { | |
| "epoch": 5.3538461538461535, | |
| "grad_norm": 1.7316102981567383, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 52200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.006700905971229076, | |
| "epoch": 5.3538461538461535, | |
| "step": 52200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.222589015960693 | |
| }, | |
| { | |
| "epoch": 5.374358974358975, | |
| "grad_norm": 1.5056092739105225, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3915, | |
| "step": 52400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05272316187620163, | |
| "epoch": 5.374358974358975, | |
| "step": 52400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.28333336114883423, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.114068984985352 | |
| }, | |
| { | |
| "epoch": 5.394871794871795, | |
| "grad_norm": 1.5428037643432617, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3888, | |
| "step": 52600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.052870891988277435, | |
| "epoch": 5.394871794871795, | |
| "step": 52600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.25833335518836975, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.260002136230469 | |
| }, | |
| { | |
| "epoch": 5.415384615384616, | |
| "grad_norm": 1.5930677652359009, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3868, | |
| "step": 52800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05991308391094208, | |
| "epoch": 5.415384615384616, | |
| "step": 52800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.0723676681518555 | |
| }, | |
| { | |
| "epoch": 5.435897435897436, | |
| "grad_norm": 1.5060288906097412, | |
| "learning_rate": 2e-05, | |
| "loss": 0.39, | |
| "step": 53000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008106578141450882, | |
| "epoch": 5.435897435897436, | |
| "step": 53000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2458333522081375, | |
| "wm_acc_tail": 0.1328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.200095176696777 | |
| }, | |
| { | |
| "epoch": 5.456410256410257, | |
| "grad_norm": 1.5505714416503906, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 53200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04732687026262283, | |
| "epoch": 5.456410256410257, | |
| "step": 53200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.491666704416275, | |
| "wm_acc_tail": 0.3828125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.008178472518921 | |
| }, | |
| { | |
| "epoch": 5.476923076923077, | |
| "grad_norm": 1.5118067264556885, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 53400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007295343093574047, | |
| "epoch": 5.476923076923077, | |
| "step": 53400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.190182685852051 | |
| }, | |
| { | |
| "epoch": 5.4974358974358974, | |
| "grad_norm": 1.4463170766830444, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 53600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04257812723517418, | |
| "epoch": 5.4974358974358974, | |
| "step": 53600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.178072452545166 | |
| }, | |
| { | |
| "epoch": 5.517948717948718, | |
| "grad_norm": 1.4655468463897705, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 53800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.055367615073919296, | |
| "epoch": 5.517948717948718, | |
| "step": 53800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.879100799560547 | |
| }, | |
| { | |
| "epoch": 5.538461538461538, | |
| "grad_norm": 1.572830319404602, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 54000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01620059832930565, | |
| "epoch": 5.538461538461538, | |
| "step": 54000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7262613773345947 | |
| }, | |
| { | |
| "epoch": 5.558974358974359, | |
| "grad_norm": 1.501634955406189, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3872, | |
| "step": 54200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04939816892147064, | |
| "epoch": 5.558974358974359, | |
| "step": 54200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.654742956161499 | |
| }, | |
| { | |
| "epoch": 5.579487179487179, | |
| "grad_norm": 1.7280412912368774, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 54400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011417646892368793, | |
| "epoch": 5.579487179487179, | |
| "step": 54400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.504270076751709 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 1.7620073556900024, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 54600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04106645658612251, | |
| "epoch": 5.6, | |
| "step": 54600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.889505624771118 | |
| }, | |
| { | |
| "epoch": 5.62051282051282, | |
| "grad_norm": 1.435755729675293, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3903, | |
| "step": 54800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019297143444418907, | |
| "epoch": 5.62051282051282, | |
| "step": 54800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.213240146636963 | |
| }, | |
| { | |
| "epoch": 5.641025641025641, | |
| "grad_norm": 1.5367580652236938, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 55000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01854798011481762, | |
| "epoch": 5.641025641025641, | |
| "step": 55000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.675200939178467 | |
| }, | |
| { | |
| "epoch": 5.661538461538462, | |
| "grad_norm": 1.61268150806427, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 55200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010234334506094456, | |
| "epoch": 5.661538461538462, | |
| "step": 55200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6159310340881348 | |
| }, | |
| { | |
| "epoch": 5.682051282051282, | |
| "grad_norm": 1.5882552862167358, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 55400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007443170994520187, | |
| "epoch": 5.682051282051282, | |
| "step": 55400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8028922080993652 | |
| }, | |
| { | |
| "epoch": 5.702564102564103, | |
| "grad_norm": 1.4263242483139038, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3882, | |
| "step": 55600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009146138094365597, | |
| "epoch": 5.702564102564103, | |
| "step": 55600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5, | |
| "wm_acc_tail": 0.3828125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9359819889068604 | |
| }, | |
| { | |
| "epoch": 5.723076923076923, | |
| "grad_norm": 1.6595350503921509, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 55800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027344558387994766, | |
| "epoch": 5.723076923076923, | |
| "step": 55800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5083333849906921, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.048349380493164 | |
| }, | |
| { | |
| "epoch": 5.743589743589744, | |
| "grad_norm": 1.8234341144561768, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 56000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04182230681180954, | |
| "epoch": 5.743589743589744, | |
| "step": 56000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.042160987854004 | |
| }, | |
| { | |
| "epoch": 5.764102564102564, | |
| "grad_norm": 1.7104073762893677, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 56200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01444804947823286, | |
| "epoch": 5.764102564102564, | |
| "step": 56200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.47083336114883423, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.257399559020996 | |
| }, | |
| { | |
| "epoch": 5.7846153846153845, | |
| "grad_norm": 1.5385661125183105, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 56400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.049989283084869385, | |
| "epoch": 5.7846153846153845, | |
| "step": 56400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3600668907165527 | |
| }, | |
| { | |
| "epoch": 5.805128205128205, | |
| "grad_norm": 1.5447169542312622, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 56600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.052676066756248474, | |
| "epoch": 5.805128205128205, | |
| "step": 56600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.579963445663452 | |
| }, | |
| { | |
| "epoch": 5.825641025641025, | |
| "grad_norm": 1.539596438407898, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 56800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03848004713654518, | |
| "epoch": 5.825641025641025, | |
| "step": 56800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9125478267669678 | |
| }, | |
| { | |
| "epoch": 5.846153846153846, | |
| "grad_norm": 1.5190871953964233, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 57000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01663527451455593, | |
| "epoch": 5.846153846153846, | |
| "step": 57000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1401031017303467 | |
| }, | |
| { | |
| "epoch": 5.866666666666667, | |
| "grad_norm": 1.549738883972168, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 57200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002755908528342843, | |
| "epoch": 5.866666666666667, | |
| "step": 57200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.387282371520996 | |
| }, | |
| { | |
| "epoch": 5.887179487179488, | |
| "grad_norm": 1.847240924835205, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 57400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01088266633450985, | |
| "epoch": 5.887179487179488, | |
| "step": 57400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3762354850769043 | |
| }, | |
| { | |
| "epoch": 5.907692307692308, | |
| "grad_norm": 1.4735372066497803, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 57600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03350949287414551, | |
| "epoch": 5.907692307692308, | |
| "step": 57600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4744575023651123 | |
| }, | |
| { | |
| "epoch": 5.9282051282051285, | |
| "grad_norm": 1.5345823764801025, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3902, | |
| "step": 57800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0014021102106198668, | |
| "epoch": 5.9282051282051285, | |
| "step": 57800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4750000238418579, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0774242877960205 | |
| }, | |
| { | |
| "epoch": 5.948717948717949, | |
| "grad_norm": 1.4983795881271362, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 58000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017278311774134636, | |
| "epoch": 5.948717948717949, | |
| "step": 58000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3708333671092987, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.360539674758911 | |
| }, | |
| { | |
| "epoch": 5.969230769230769, | |
| "grad_norm": 1.5648690462112427, | |
| "learning_rate": 2e-05, | |
| "loss": 0.39, | |
| "step": 58200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.049857039004564285, | |
| "epoch": 5.969230769230769, | |
| "step": 58200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.449976682662964 | |
| }, | |
| { | |
| "epoch": 5.98974358974359, | |
| "grad_norm": 1.456133484840393, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 58400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02349213883280754, | |
| "epoch": 5.98974358974359, | |
| "step": 58400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2666666805744171, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.241814613342285 | |
| }, | |
| { | |
| "epoch": 6.01025641025641, | |
| "grad_norm": 1.8524450063705444, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 58600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02423018030822277, | |
| "epoch": 6.01025641025641, | |
| "step": 58600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3679964542388916 | |
| }, | |
| { | |
| "epoch": 6.030769230769231, | |
| "grad_norm": 1.547088623046875, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 58800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03751825541257858, | |
| "epoch": 6.030769230769231, | |
| "step": 58800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.385275363922119 | |
| }, | |
| { | |
| "epoch": 6.051282051282051, | |
| "grad_norm": 1.5215375423431396, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 59000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01584138534963131, | |
| "epoch": 6.051282051282051, | |
| "step": 59000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.719261407852173 | |
| }, | |
| { | |
| "epoch": 6.0717948717948715, | |
| "grad_norm": 1.505509376525879, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 59200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02194974757730961, | |
| "epoch": 6.0717948717948715, | |
| "step": 59200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.338383436203003 | |
| }, | |
| { | |
| "epoch": 6.092307692307692, | |
| "grad_norm": 1.549731969833374, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 59400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017771070823073387, | |
| "epoch": 6.092307692307692, | |
| "step": 59400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46666669845581055, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9475672245025635 | |
| }, | |
| { | |
| "epoch": 6.112820512820512, | |
| "grad_norm": 1.589035153388977, | |
| "learning_rate": 2e-05, | |
| "loss": 0.386, | |
| "step": 59600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0407552495598793, | |
| "epoch": 6.112820512820512, | |
| "step": 59600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4610705375671387 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "grad_norm": 1.586647391319275, | |
| "learning_rate": 2e-05, | |
| "loss": 0.386, | |
| "step": 59800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019300801679491997, | |
| "epoch": 6.133333333333334, | |
| "step": 59800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2875000238418579, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.105323791503906 | |
| }, | |
| { | |
| "epoch": 6.153846153846154, | |
| "grad_norm": 1.6962783336639404, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 60000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03685312718153, | |
| "epoch": 6.153846153846154, | |
| "step": 60000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5583333373069763, | |
| "wm_acc_tail": 0.3984375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.4907143115997314 | |
| }, | |
| { | |
| "epoch": 6.174358974358975, | |
| "grad_norm": 1.4902276992797852, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 60200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.020909996703267097, | |
| "epoch": 6.174358974358975, | |
| "step": 60200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.319096565246582 | |
| }, | |
| { | |
| "epoch": 6.194871794871795, | |
| "grad_norm": 1.5638924837112427, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 60400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017304861918091774, | |
| "epoch": 6.194871794871795, | |
| "step": 60400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.26250001788139343, | |
| "wm_acc_tail": 0.140625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.336837291717529 | |
| }, | |
| { | |
| "epoch": 6.2153846153846155, | |
| "grad_norm": 1.4089213609695435, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3844, | |
| "step": 60600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06848035752773285, | |
| "epoch": 6.2153846153846155, | |
| "step": 60600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4958333671092987, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0000624656677246 | |
| }, | |
| { | |
| "epoch": 6.235897435897436, | |
| "grad_norm": 1.5539191961288452, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 60800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027483763173222542, | |
| "epoch": 6.235897435897436, | |
| "step": 60800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2022814750671387 | |
| }, | |
| { | |
| "epoch": 6.256410256410256, | |
| "grad_norm": 1.5159022808074951, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 61000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018918905407190323, | |
| "epoch": 6.256410256410256, | |
| "step": 61000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.532113552093506 | |
| }, | |
| { | |
| "epoch": 6.276923076923077, | |
| "grad_norm": 1.7098314762115479, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 61200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008750401437282562, | |
| "epoch": 6.276923076923077, | |
| "step": 61200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.491666704416275, | |
| "wm_acc_tail": 0.390625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.925954818725586 | |
| }, | |
| { | |
| "epoch": 6.297435897435897, | |
| "grad_norm": 1.67826247215271, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 61400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02675863914191723, | |
| "epoch": 6.297435897435897, | |
| "step": 61400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7245612144470215 | |
| }, | |
| { | |
| "epoch": 6.317948717948718, | |
| "grad_norm": 1.5546609163284302, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 61600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026509325951337814, | |
| "epoch": 6.317948717948718, | |
| "step": 61600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.003115653991699 | |
| }, | |
| { | |
| "epoch": 6.338461538461538, | |
| "grad_norm": 1.479357123374939, | |
| "learning_rate": 2e-05, | |
| "loss": 0.385, | |
| "step": 61800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031075866892933846, | |
| "epoch": 6.338461538461538, | |
| "step": 61800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2954213619232178 | |
| }, | |
| { | |
| "epoch": 6.358974358974359, | |
| "grad_norm": 1.7480745315551758, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 62000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.045431774109601974, | |
| "epoch": 6.358974358974359, | |
| "step": 62000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9454991817474365 | |
| }, | |
| { | |
| "epoch": 6.37948717948718, | |
| "grad_norm": 1.7372509241104126, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 62200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03187049180269241, | |
| "epoch": 6.37948717948718, | |
| "step": 62200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.27916666865348816, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.205851078033447 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 1.657104253768921, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 62400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.022336388006806374, | |
| "epoch": 6.4, | |
| "step": 62400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2750000059604645, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.043581008911133 | |
| }, | |
| { | |
| "epoch": 6.420512820512821, | |
| "grad_norm": 1.7278469800949097, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 62600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.020370513200759888, | |
| "epoch": 6.420512820512821, | |
| "step": 62600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38333335518836975, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.854361057281494 | |
| }, | |
| { | |
| "epoch": 6.441025641025641, | |
| "grad_norm": 1.7679606676101685, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3893, | |
| "step": 62800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.053616002202034, | |
| "epoch": 6.441025641025641, | |
| "step": 62800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.505981206893921 | |
| }, | |
| { | |
| "epoch": 6.461538461538462, | |
| "grad_norm": 1.608422040939331, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 63000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06787025183439255, | |
| "epoch": 6.461538461538462, | |
| "step": 63000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4750000238418579, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1202709674835205 | |
| }, | |
| { | |
| "epoch": 6.482051282051282, | |
| "grad_norm": 1.6202107667922974, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 63200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.013225387781858444, | |
| "epoch": 6.482051282051282, | |
| "step": 63200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.969465494155884 | |
| }, | |
| { | |
| "epoch": 6.5025641025641026, | |
| "grad_norm": 1.5373591184616089, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 63400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.051493387669324875, | |
| "epoch": 6.5025641025641026, | |
| "step": 63400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.286562204360962 | |
| }, | |
| { | |
| "epoch": 6.523076923076923, | |
| "grad_norm": 1.6284937858581543, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 63600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019318677484989166, | |
| "epoch": 6.523076923076923, | |
| "step": 63600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.480285406112671 | |
| }, | |
| { | |
| "epoch": 6.543589743589743, | |
| "grad_norm": 1.5001331567764282, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 63800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025429822504520416, | |
| "epoch": 6.543589743589743, | |
| "step": 63800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9839584827423096 | |
| }, | |
| { | |
| "epoch": 6.564102564102564, | |
| "grad_norm": 1.562241554260254, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 64000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031849928200244904, | |
| "epoch": 6.564102564102564, | |
| "step": 64000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.583045244216919 | |
| }, | |
| { | |
| "epoch": 6.584615384615384, | |
| "grad_norm": 1.513997197151184, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3893, | |
| "step": 64200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03714115545153618, | |
| "epoch": 6.584615384615384, | |
| "step": 64200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.757371425628662 | |
| }, | |
| { | |
| "epoch": 6.605128205128205, | |
| "grad_norm": 1.469828724861145, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 64400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06254711002111435, | |
| "epoch": 6.605128205128205, | |
| "step": 64400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5831172466278076 | |
| }, | |
| { | |
| "epoch": 6.625641025641025, | |
| "grad_norm": 1.6540462970733643, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 64600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.049855247139930725, | |
| "epoch": 6.625641025641025, | |
| "step": 64600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9955742359161377 | |
| }, | |
| { | |
| "epoch": 6.6461538461538465, | |
| "grad_norm": 1.5540004968643188, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 64800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.10088969022035599, | |
| "epoch": 6.6461538461538465, | |
| "step": 64800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8232319355010986 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 1.4553481340408325, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 65000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05756954476237297, | |
| "epoch": 6.666666666666667, | |
| "step": 65000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.501070499420166 | |
| }, | |
| { | |
| "epoch": 6.687179487179487, | |
| "grad_norm": 1.5228793621063232, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 65200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04212230443954468, | |
| "epoch": 6.687179487179487, | |
| "step": 65200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.362852096557617 | |
| }, | |
| { | |
| "epoch": 6.707692307692308, | |
| "grad_norm": 1.4791803359985352, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 65400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03619470074772835, | |
| "epoch": 6.707692307692308, | |
| "step": 65400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9605600833892822 | |
| }, | |
| { | |
| "epoch": 6.728205128205128, | |
| "grad_norm": 1.7693204879760742, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 65600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.016947759315371513, | |
| "epoch": 6.728205128205128, | |
| "step": 65600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.295398712158203 | |
| }, | |
| { | |
| "epoch": 6.748717948717949, | |
| "grad_norm": 1.529558539390564, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 65800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008063782937824726, | |
| "epoch": 6.748717948717949, | |
| "step": 65800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2666666805744171, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9668362140655518 | |
| }, | |
| { | |
| "epoch": 6.769230769230769, | |
| "grad_norm": 1.5409899950027466, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 66000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021163687109947205, | |
| "epoch": 6.769230769230769, | |
| "step": 66000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.245103120803833 | |
| }, | |
| { | |
| "epoch": 6.78974358974359, | |
| "grad_norm": 1.5285154581069946, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 66200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014751858077943325, | |
| "epoch": 6.78974358974359, | |
| "step": 66200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4791666865348816, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9529457092285156 | |
| }, | |
| { | |
| "epoch": 6.81025641025641, | |
| "grad_norm": 1.7057801485061646, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 66400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008380528539419174, | |
| "epoch": 6.81025641025641, | |
| "step": 66400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.27916666865348816, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.152902126312256 | |
| }, | |
| { | |
| "epoch": 6.8307692307692305, | |
| "grad_norm": 1.6728711128234863, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 66600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03793511167168617, | |
| "epoch": 6.8307692307692305, | |
| "step": 66600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1793134212493896 | |
| }, | |
| { | |
| "epoch": 6.851282051282051, | |
| "grad_norm": 1.695508360862732, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 66800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.043301355093717575, | |
| "epoch": 6.851282051282051, | |
| "step": 66800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6939327716827393 | |
| }, | |
| { | |
| "epoch": 6.871794871794872, | |
| "grad_norm": 1.677759051322937, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 67000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04182637855410576, | |
| "epoch": 6.871794871794872, | |
| "step": 67000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.772810220718384 | |
| }, | |
| { | |
| "epoch": 6.892307692307693, | |
| "grad_norm": 1.5913299322128296, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 67200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06855608522891998, | |
| "epoch": 6.892307692307693, | |
| "step": 67200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.539832592010498 | |
| }, | |
| { | |
| "epoch": 6.912820512820513, | |
| "grad_norm": 1.5180904865264893, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 67400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.057179685682058334, | |
| "epoch": 6.912820512820513, | |
| "step": 67400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7593939304351807 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "grad_norm": 1.6407569646835327, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3865, | |
| "step": 67600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04871804639697075, | |
| "epoch": 6.933333333333334, | |
| "step": 67600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6753456592559814 | |
| }, | |
| { | |
| "epoch": 6.953846153846154, | |
| "grad_norm": 1.4626140594482422, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 67800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025291886180639267, | |
| "epoch": 6.953846153846154, | |
| "step": 67800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.703162908554077 | |
| }, | |
| { | |
| "epoch": 6.9743589743589745, | |
| "grad_norm": 1.4282091856002808, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 68000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.015486420132219791, | |
| "epoch": 6.9743589743589745, | |
| "step": 68000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.791802406311035 | |
| }, | |
| { | |
| "epoch": 6.994871794871795, | |
| "grad_norm": 2.0221214294433594, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 68200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.037816066294908524, | |
| "epoch": 6.994871794871795, | |
| "step": 68200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4541666805744171, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.143450975418091 | |
| }, | |
| { | |
| "epoch": 7.015384615384615, | |
| "grad_norm": 1.879718542098999, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 68400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03278129920363426, | |
| "epoch": 7.015384615384615, | |
| "step": 68400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2468111515045166 | |
| }, | |
| { | |
| "epoch": 7.035897435897436, | |
| "grad_norm": 1.6184027194976807, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 68600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.006634172052145004, | |
| "epoch": 7.035897435897436, | |
| "step": 68600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7409698963165283 | |
| }, | |
| { | |
| "epoch": 7.056410256410256, | |
| "grad_norm": 1.8298183679580688, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3894, | |
| "step": 68800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04778463765978813, | |
| "epoch": 7.056410256410256, | |
| "step": 68800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46250003576278687, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9611756801605225 | |
| }, | |
| { | |
| "epoch": 7.076923076923077, | |
| "grad_norm": 1.535378336906433, | |
| "learning_rate": 2e-05, | |
| "loss": 0.386, | |
| "step": 69000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07201196253299713, | |
| "epoch": 7.076923076923077, | |
| "step": 69000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.52687668800354 | |
| }, | |
| { | |
| "epoch": 7.097435897435897, | |
| "grad_norm": 1.6324633359909058, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3895, | |
| "step": 69200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023695409297943115, | |
| "epoch": 7.097435897435897, | |
| "step": 69200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.65922474861145 | |
| }, | |
| { | |
| "epoch": 7.1179487179487175, | |
| "grad_norm": 1.566884994506836, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 69400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03259052336215973, | |
| "epoch": 7.1179487179487175, | |
| "step": 69400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5, | |
| "wm_acc_tail": 0.375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9456732273101807 | |
| }, | |
| { | |
| "epoch": 7.138461538461539, | |
| "grad_norm": 1.823432207107544, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3843, | |
| "step": 69600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02876487746834755, | |
| "epoch": 7.138461538461539, | |
| "step": 69600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4377105236053467 | |
| }, | |
| { | |
| "epoch": 7.158974358974359, | |
| "grad_norm": 1.5202486515045166, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 69800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017299501225352287, | |
| "epoch": 7.158974358974359, | |
| "step": 69800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4583333432674408, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.172873020172119 | |
| }, | |
| { | |
| "epoch": 7.17948717948718, | |
| "grad_norm": 1.8749486207962036, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 70000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06780079007148743, | |
| "epoch": 7.17948717948718, | |
| "step": 70000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.206528425216675 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 1.655782699584961, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 70200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02349303849041462, | |
| "epoch": 7.2, | |
| "step": 70200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30000001192092896, | |
| "wm_acc_tail": 0.1484375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9709131717681885 | |
| }, | |
| { | |
| "epoch": 7.220512820512821, | |
| "grad_norm": 1.5597331523895264, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 70400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06380563229322433, | |
| "epoch": 7.220512820512821, | |
| "step": 70400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4833333492279053, | |
| "wm_acc_tail": 0.3828125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.9376752376556396 | |
| }, | |
| { | |
| "epoch": 7.241025641025641, | |
| "grad_norm": 1.6546941995620728, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3847, | |
| "step": 70600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0575292743742466, | |
| "epoch": 7.241025641025641, | |
| "step": 70600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5138397216796875 | |
| }, | |
| { | |
| "epoch": 7.2615384615384615, | |
| "grad_norm": 1.5616551637649536, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3845, | |
| "step": 70800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0457751639187336, | |
| "epoch": 7.2615384615384615, | |
| "step": 70800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.716745376586914 | |
| }, | |
| { | |
| "epoch": 7.282051282051282, | |
| "grad_norm": 1.5881015062332153, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3846, | |
| "step": 71000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04407665878534317, | |
| "epoch": 7.282051282051282, | |
| "step": 71000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3945817947387695 | |
| }, | |
| { | |
| "epoch": 7.302564102564102, | |
| "grad_norm": 1.5914266109466553, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 71200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02100374922156334, | |
| "epoch": 7.302564102564102, | |
| "step": 71200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.091380596160889 | |
| }, | |
| { | |
| "epoch": 7.323076923076923, | |
| "grad_norm": 1.6502960920333862, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 71400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04237911105155945, | |
| "epoch": 7.323076923076923, | |
| "step": 71400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.314310073852539 | |
| }, | |
| { | |
| "epoch": 7.343589743589743, | |
| "grad_norm": 1.5422579050064087, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 71600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06033250689506531, | |
| "epoch": 7.343589743589743, | |
| "step": 71600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.661388397216797 | |
| }, | |
| { | |
| "epoch": 7.364102564102564, | |
| "grad_norm": 1.6338279247283936, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 71800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.001942570903338492, | |
| "epoch": 7.364102564102564, | |
| "step": 71800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.745222568511963 | |
| }, | |
| { | |
| "epoch": 7.384615384615385, | |
| "grad_norm": 1.7342321872711182, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 72000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025969162583351135, | |
| "epoch": 7.384615384615385, | |
| "step": 72000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5345499515533447 | |
| }, | |
| { | |
| "epoch": 7.4051282051282055, | |
| "grad_norm": 1.6571812629699707, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 72200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009137322194874287, | |
| "epoch": 7.4051282051282055, | |
| "step": 72200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9135258197784424 | |
| }, | |
| { | |
| "epoch": 7.425641025641026, | |
| "grad_norm": 1.5881911516189575, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3921, | |
| "step": 72400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0008579132263548672, | |
| "epoch": 7.425641025641026, | |
| "step": 72400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.27916666865348816, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.291309833526611 | |
| }, | |
| { | |
| "epoch": 7.446153846153846, | |
| "grad_norm": 1.7501994371414185, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 72600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.036843620240688324, | |
| "epoch": 7.446153846153846, | |
| "step": 72600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.46666669845581055, | |
| "wm_acc_tail": 0.359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1197257041931152 | |
| }, | |
| { | |
| "epoch": 7.466666666666667, | |
| "grad_norm": 1.7451938390731812, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3846, | |
| "step": 72800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011395081877708435, | |
| "epoch": 7.466666666666667, | |
| "step": 72800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4286627769470215 | |
| }, | |
| { | |
| "epoch": 7.487179487179487, | |
| "grad_norm": 1.5333466529846191, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 73000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008320590481162071, | |
| "epoch": 7.487179487179487, | |
| "step": 73000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3958333432674408, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.667175531387329 | |
| }, | |
| { | |
| "epoch": 7.507692307692308, | |
| "grad_norm": 1.5736767053604126, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 73200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03791148588061333, | |
| "epoch": 7.507692307692308, | |
| "step": 73200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9075472354888916 | |
| }, | |
| { | |
| "epoch": 7.528205128205128, | |
| "grad_norm": 1.5854053497314453, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3878, | |
| "step": 73400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.029326096177101135, | |
| "epoch": 7.528205128205128, | |
| "step": 73400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4457318782806396 | |
| }, | |
| { | |
| "epoch": 7.5487179487179485, | |
| "grad_norm": 1.688315987586975, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3842, | |
| "step": 73600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03858431428670883, | |
| "epoch": 7.5487179487179485, | |
| "step": 73600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6623852252960205 | |
| }, | |
| { | |
| "epoch": 7.569230769230769, | |
| "grad_norm": 1.4782124757766724, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 73800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011838916689157486, | |
| "epoch": 7.569230769230769, | |
| "step": 73800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.692974090576172 | |
| }, | |
| { | |
| "epoch": 7.589743589743589, | |
| "grad_norm": 2.0963611602783203, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 74000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024848317727446556, | |
| "epoch": 7.589743589743589, | |
| "step": 74000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4331865310668945 | |
| }, | |
| { | |
| "epoch": 7.61025641025641, | |
| "grad_norm": 1.504183292388916, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 74200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06636366248130798, | |
| "epoch": 7.61025641025641, | |
| "step": 74200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.621044397354126 | |
| }, | |
| { | |
| "epoch": 7.63076923076923, | |
| "grad_norm": 1.5864686965942383, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 74400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01880892924964428, | |
| "epoch": 7.63076923076923, | |
| "step": 74400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2179043292999268 | |
| }, | |
| { | |
| "epoch": 7.651282051282052, | |
| "grad_norm": 1.6286088228225708, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 74600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03265329450368881, | |
| "epoch": 7.651282051282052, | |
| "step": 74600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7157068252563477 | |
| }, | |
| { | |
| "epoch": 7.671794871794872, | |
| "grad_norm": 1.5542349815368652, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3873, | |
| "step": 74800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002137470757588744, | |
| "epoch": 7.671794871794872, | |
| "step": 74800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.419750213623047 | |
| }, | |
| { | |
| "epoch": 7.6923076923076925, | |
| "grad_norm": 1.765474557876587, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 75000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0269240140914917, | |
| "epoch": 7.6923076923076925, | |
| "step": 75000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.647392511367798 | |
| }, | |
| { | |
| "epoch": 7.712820512820513, | |
| "grad_norm": 1.5655930042266846, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 75200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.032510556280612946, | |
| "epoch": 7.712820512820513, | |
| "step": 75200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.019177436828613 | |
| }, | |
| { | |
| "epoch": 7.733333333333333, | |
| "grad_norm": 1.6071889400482178, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 75400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.022565068677067757, | |
| "epoch": 7.733333333333333, | |
| "step": 75400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.284686803817749 | |
| }, | |
| { | |
| "epoch": 7.753846153846154, | |
| "grad_norm": 1.6705830097198486, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 75600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0017536316299811006, | |
| "epoch": 7.753846153846154, | |
| "step": 75600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.270529270172119 | |
| }, | |
| { | |
| "epoch": 7.774358974358974, | |
| "grad_norm": 1.5537898540496826, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3898, | |
| "step": 75800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.015203636139631271, | |
| "epoch": 7.774358974358974, | |
| "step": 75800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42500001192092896, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4316771030426025 | |
| }, | |
| { | |
| "epoch": 7.794871794871795, | |
| "grad_norm": 1.6828925609588623, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 76000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0030918011907488108, | |
| "epoch": 7.794871794871795, | |
| "step": 76000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.27916666865348816, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.161820888519287 | |
| }, | |
| { | |
| "epoch": 7.815384615384615, | |
| "grad_norm": 1.6492630243301392, | |
| "learning_rate": 2e-05, | |
| "loss": 0.39, | |
| "step": 76200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03650224953889847, | |
| "epoch": 7.815384615384615, | |
| "step": 76200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8538737297058105 | |
| }, | |
| { | |
| "epoch": 7.835897435897436, | |
| "grad_norm": 1.652735710144043, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 76400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017196496948599815, | |
| "epoch": 7.835897435897436, | |
| "step": 76400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8718252182006836 | |
| }, | |
| { | |
| "epoch": 7.856410256410256, | |
| "grad_norm": 1.6604610681533813, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3953, | |
| "step": 76600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010073775425553322, | |
| "epoch": 7.856410256410256, | |
| "step": 76600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4125000238418579, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.478922128677368 | |
| }, | |
| { | |
| "epoch": 7.876923076923077, | |
| "grad_norm": 1.5518853664398193, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 76800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012821176089346409, | |
| "epoch": 7.876923076923077, | |
| "step": 76800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9874823093414307 | |
| }, | |
| { | |
| "epoch": 7.897435897435898, | |
| "grad_norm": 1.5574307441711426, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3885, | |
| "step": 77000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014334427192807198, | |
| "epoch": 7.897435897435898, | |
| "step": 77000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4541666805744171, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.099032163619995 | |
| }, | |
| { | |
| "epoch": 7.917948717948718, | |
| "grad_norm": 1.7604780197143555, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3914, | |
| "step": 77200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021761490032076836, | |
| "epoch": 7.917948717948718, | |
| "step": 77200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.714301347732544 | |
| }, | |
| { | |
| "epoch": 7.938461538461539, | |
| "grad_norm": 1.7585182189941406, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 77400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05323059484362602, | |
| "epoch": 7.938461538461539, | |
| "step": 77400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.067575931549072 | |
| }, | |
| { | |
| "epoch": 7.958974358974359, | |
| "grad_norm": 1.579727053642273, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 77600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03464706614613533, | |
| "epoch": 7.958974358974359, | |
| "step": 77600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3344321250915527 | |
| }, | |
| { | |
| "epoch": 7.97948717948718, | |
| "grad_norm": 1.6596037149429321, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 77800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0019494693260639906, | |
| "epoch": 7.97948717948718, | |
| "step": 77800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.299652099609375 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.8035117387771606, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 78000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0147392051294446, | |
| "epoch": 8.0, | |
| "step": 78000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9413981437683105 | |
| }, | |
| { | |
| "epoch": 8.02051282051282, | |
| "grad_norm": 1.5986641645431519, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 78200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.047565262764692307, | |
| "epoch": 8.02051282051282, | |
| "step": 78200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4234087467193604 | |
| }, | |
| { | |
| "epoch": 8.04102564102564, | |
| "grad_norm": 1.5270459651947021, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3877, | |
| "step": 78400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01918724551796913, | |
| "epoch": 8.04102564102564, | |
| "step": 78400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.391367197036743 | |
| }, | |
| { | |
| "epoch": 8.061538461538461, | |
| "grad_norm": 1.9521206617355347, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3846, | |
| "step": 78600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021097484976053238, | |
| "epoch": 8.061538461538461, | |
| "step": 78600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3708333671092987, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4204821586608887 | |
| }, | |
| { | |
| "epoch": 8.082051282051282, | |
| "grad_norm": 1.6571987867355347, | |
| "learning_rate": 2e-05, | |
| "loss": 0.385, | |
| "step": 78800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.024147879332304, | |
| "epoch": 8.082051282051282, | |
| "step": 78800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4055848121643066 | |
| }, | |
| { | |
| "epoch": 8.102564102564102, | |
| "grad_norm": 1.7489327192306519, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3864, | |
| "step": 79000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.1068626269698143, | |
| "epoch": 8.102564102564102, | |
| "step": 79000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5626184940338135 | |
| }, | |
| { | |
| "epoch": 8.123076923076923, | |
| "grad_norm": 1.489255428314209, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 79200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.07043063640594482, | |
| "epoch": 8.123076923076923, | |
| "step": 79200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5874361991882324 | |
| }, | |
| { | |
| "epoch": 8.143589743589743, | |
| "grad_norm": 1.4816105365753174, | |
| "learning_rate": 2e-05, | |
| "loss": 0.385, | |
| "step": 79400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.015999089926481247, | |
| "epoch": 8.143589743589743, | |
| "step": 79400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.275242805480957 | |
| }, | |
| { | |
| "epoch": 8.164102564102564, | |
| "grad_norm": 2.031759023666382, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 79600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017515093088150024, | |
| "epoch": 8.164102564102564, | |
| "step": 79600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4541666805744171, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2041189670562744 | |
| }, | |
| { | |
| "epoch": 8.184615384615384, | |
| "grad_norm": 1.6426056623458862, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 79800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019731685519218445, | |
| "epoch": 8.184615384615384, | |
| "step": 79800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.185087203979492 | |
| }, | |
| { | |
| "epoch": 8.205128205128204, | |
| "grad_norm": 1.580042839050293, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3872, | |
| "step": 80000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.043625276535749435, | |
| "epoch": 8.205128205128204, | |
| "step": 80000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2930240631103516 | |
| }, | |
| { | |
| "epoch": 8.225641025641025, | |
| "grad_norm": 1.500888466835022, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3911, | |
| "step": 80200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05004902556538582, | |
| "epoch": 8.225641025641025, | |
| "step": 80200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.25833335518836975, | |
| "wm_acc_tail": 0.1484375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.661126136779785 | |
| }, | |
| { | |
| "epoch": 8.246153846153845, | |
| "grad_norm": 1.771716833114624, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 80400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03320280835032463, | |
| "epoch": 8.246153846153845, | |
| "step": 80400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.472299814224243 | |
| }, | |
| { | |
| "epoch": 8.266666666666667, | |
| "grad_norm": 2.1364688873291016, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 80600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0752418264746666, | |
| "epoch": 8.266666666666667, | |
| "step": 80600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1271276473999023 | |
| }, | |
| { | |
| "epoch": 8.287179487179488, | |
| "grad_norm": 1.6009913682937622, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 80800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0186256542801857, | |
| "epoch": 8.287179487179488, | |
| "step": 80800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8596951961517334 | |
| }, | |
| { | |
| "epoch": 8.307692307692308, | |
| "grad_norm": 1.5789217948913574, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3866, | |
| "step": 81000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06927180290222168, | |
| "epoch": 8.307692307692308, | |
| "step": 81000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30000001192092896, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.179666996002197 | |
| }, | |
| { | |
| "epoch": 8.328205128205129, | |
| "grad_norm": 1.454155683517456, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3874, | |
| "step": 81200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.11113943159580231, | |
| "epoch": 8.328205128205129, | |
| "step": 81200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.744417428970337 | |
| }, | |
| { | |
| "epoch": 8.34871794871795, | |
| "grad_norm": 1.5741214752197266, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 81400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03502166271209717, | |
| "epoch": 8.34871794871795, | |
| "step": 81400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4375000298023224, | |
| "wm_acc_tail": 0.3359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6756207942962646 | |
| }, | |
| { | |
| "epoch": 8.36923076923077, | |
| "grad_norm": 1.5735766887664795, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3869, | |
| "step": 81600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.012843900360167027, | |
| "epoch": 8.36923076923077, | |
| "step": 81600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.080127239227295 | |
| }, | |
| { | |
| "epoch": 8.38974358974359, | |
| "grad_norm": 1.9374159574508667, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 81800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031983666121959686, | |
| "epoch": 8.38974358974359, | |
| "step": 81800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.918639659881592 | |
| }, | |
| { | |
| "epoch": 8.41025641025641, | |
| "grad_norm": 1.5713447332382202, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3844, | |
| "step": 82000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05561190843582153, | |
| "epoch": 8.41025641025641, | |
| "step": 82000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6664786338806152 | |
| }, | |
| { | |
| "epoch": 8.430769230769231, | |
| "grad_norm": 1.5956305265426636, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3873, | |
| "step": 82200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03269462659955025, | |
| "epoch": 8.430769230769231, | |
| "step": 82200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5805511474609375 | |
| }, | |
| { | |
| "epoch": 8.451282051282051, | |
| "grad_norm": 1.6951425075531006, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 82400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0530354380607605, | |
| "epoch": 8.451282051282051, | |
| "step": 82400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4208333492279053, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4973604679107666 | |
| }, | |
| { | |
| "epoch": 8.471794871794872, | |
| "grad_norm": 1.617085576057434, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 82600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0148295434191823, | |
| "epoch": 8.471794871794872, | |
| "step": 82600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.359375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3280575275421143 | |
| }, | |
| { | |
| "epoch": 8.492307692307692, | |
| "grad_norm": 1.4985271692276, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3847, | |
| "step": 82800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04492203891277313, | |
| "epoch": 8.492307692307692, | |
| "step": 82800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3083333373069763, | |
| "wm_acc_tail": 0.1796875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.852931499481201 | |
| }, | |
| { | |
| "epoch": 8.512820512820513, | |
| "grad_norm": 1.6087768077850342, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3845, | |
| "step": 83000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011407393962144852, | |
| "epoch": 8.512820512820513, | |
| "step": 83000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4958333671092987, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.101167678833008 | |
| }, | |
| { | |
| "epoch": 8.533333333333333, | |
| "grad_norm": 1.6026486158370972, | |
| "learning_rate": 2e-05, | |
| "loss": 0.385, | |
| "step": 83200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.042812809348106384, | |
| "epoch": 8.533333333333333, | |
| "step": 83200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.834040641784668 | |
| }, | |
| { | |
| "epoch": 8.553846153846154, | |
| "grad_norm": 1.568938970565796, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 83400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03828920051455498, | |
| "epoch": 8.553846153846154, | |
| "step": 83400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.085087776184082 | |
| }, | |
| { | |
| "epoch": 8.574358974358974, | |
| "grad_norm": 1.599661111831665, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3844, | |
| "step": 83600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008683944121003151, | |
| "epoch": 8.574358974358974, | |
| "step": 83600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2666666805744171, | |
| "wm_acc_tail": 0.140625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.122366905212402 | |
| }, | |
| { | |
| "epoch": 8.594871794871795, | |
| "grad_norm": 1.676993489265442, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 83800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02966560237109661, | |
| "epoch": 8.594871794871795, | |
| "step": 83800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9873692989349365 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "grad_norm": 1.715393304824829, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 84000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01917995698750019, | |
| "epoch": 8.615384615384615, | |
| "step": 84000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.892207384109497 | |
| }, | |
| { | |
| "epoch": 8.635897435897435, | |
| "grad_norm": 1.5663189888000488, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 84200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.031542129814624786, | |
| "epoch": 8.635897435897435, | |
| "step": 84200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7427568435668945 | |
| }, | |
| { | |
| "epoch": 8.656410256410256, | |
| "grad_norm": 1.6801159381866455, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 84400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01534717995673418, | |
| "epoch": 8.656410256410256, | |
| "step": 84400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5, | |
| "wm_acc_tail": 0.3515625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0198466777801514 | |
| }, | |
| { | |
| "epoch": 8.676923076923076, | |
| "grad_norm": 1.6139174699783325, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 84600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00903190579265356, | |
| "epoch": 8.676923076923076, | |
| "step": 84600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.390871524810791 | |
| }, | |
| { | |
| "epoch": 8.697435897435897, | |
| "grad_norm": 1.7826905250549316, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 84800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05558520555496216, | |
| "epoch": 8.697435897435897, | |
| "step": 84800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8052608966827393 | |
| }, | |
| { | |
| "epoch": 8.717948717948717, | |
| "grad_norm": 1.611828327178955, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3896, | |
| "step": 85000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011729295365512371, | |
| "epoch": 8.717948717948717, | |
| "step": 85000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.688883066177368 | |
| }, | |
| { | |
| "epoch": 8.73846153846154, | |
| "grad_norm": 1.9572718143463135, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 85200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.042601849883794785, | |
| "epoch": 8.73846153846154, | |
| "step": 85200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.0067548751831055 | |
| }, | |
| { | |
| "epoch": 8.75897435897436, | |
| "grad_norm": 1.4977508783340454, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 85400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03745841607451439, | |
| "epoch": 8.75897435897436, | |
| "step": 85400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.983603000640869 | |
| }, | |
| { | |
| "epoch": 8.77948717948718, | |
| "grad_norm": 1.6066875457763672, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 85600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06546357274055481, | |
| "epoch": 8.77948717948718, | |
| "step": 85600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.3791723251342773 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 1.5910160541534424, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 85800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04273126646876335, | |
| "epoch": 8.8, | |
| "step": 85800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8508048057556152 | |
| }, | |
| { | |
| "epoch": 8.820512820512821, | |
| "grad_norm": 1.41476309299469, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 86000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0688759908080101, | |
| "epoch": 8.820512820512821, | |
| "step": 86000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.23750001192092896, | |
| "wm_acc_tail": 0.1171875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.5171427726745605 | |
| }, | |
| { | |
| "epoch": 8.841025641025642, | |
| "grad_norm": 1.730489730834961, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 86200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04532737284898758, | |
| "epoch": 8.841025641025642, | |
| "step": 86200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44583335518836975, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1906230449676514 | |
| }, | |
| { | |
| "epoch": 8.861538461538462, | |
| "grad_norm": 1.5530635118484497, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 86400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0751701220870018, | |
| "epoch": 8.861538461538462, | |
| "step": 86400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.938629627227783 | |
| }, | |
| { | |
| "epoch": 8.882051282051282, | |
| "grad_norm": 2.0141303539276123, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 86600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.029315650463104248, | |
| "epoch": 8.882051282051282, | |
| "step": 86600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3291666805744171, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7676546573638916 | |
| }, | |
| { | |
| "epoch": 8.902564102564103, | |
| "grad_norm": 1.692972183227539, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 86800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03632832318544388, | |
| "epoch": 8.902564102564103, | |
| "step": 86800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.30416667461395264, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9708869457244873 | |
| }, | |
| { | |
| "epoch": 8.923076923076923, | |
| "grad_norm": 2.065300941467285, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3881, | |
| "step": 87000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04341608285903931, | |
| "epoch": 8.923076923076923, | |
| "step": 87000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4833333492279053, | |
| "wm_acc_tail": 0.3671875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1396496295928955 | |
| }, | |
| { | |
| "epoch": 8.943589743589744, | |
| "grad_norm": 1.6035358905792236, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3879, | |
| "step": 87200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.061604198068380356, | |
| "epoch": 8.943589743589744, | |
| "step": 87200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3333333432674408, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9299278259277344 | |
| }, | |
| { | |
| "epoch": 8.964102564102564, | |
| "grad_norm": 1.6987371444702148, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3876, | |
| "step": 87400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0016496545867994428, | |
| "epoch": 8.964102564102564, | |
| "step": 87400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.662815570831299 | |
| }, | |
| { | |
| "epoch": 8.984615384615385, | |
| "grad_norm": 1.5681344270706177, | |
| "learning_rate": 2e-05, | |
| "loss": 0.386, | |
| "step": 87600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02579469420015812, | |
| "epoch": 8.984615384615385, | |
| "step": 87600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3375000059604645, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.757530927658081 | |
| }, | |
| { | |
| "epoch": 9.005128205128205, | |
| "grad_norm": 1.523836374282837, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 87800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008759725838899612, | |
| "epoch": 9.005128205128205, | |
| "step": 87800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6175360679626465 | |
| }, | |
| { | |
| "epoch": 9.025641025641026, | |
| "grad_norm": 1.9110239744186401, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3843, | |
| "step": 88000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05376777797937393, | |
| "epoch": 9.025641025641026, | |
| "step": 88000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4958333671092987, | |
| "wm_acc_tail": 0.390625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.164174795150757 | |
| }, | |
| { | |
| "epoch": 9.046153846153846, | |
| "grad_norm": 1.696542739868164, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3892, | |
| "step": 88200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.035584066063165665, | |
| "epoch": 9.046153846153846, | |
| "step": 88200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2916666865348816, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.25823450088501 | |
| }, | |
| { | |
| "epoch": 9.066666666666666, | |
| "grad_norm": 1.7346086502075195, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 88400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.026685476303100586, | |
| "epoch": 9.066666666666666, | |
| "step": 88400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4637415409088135 | |
| }, | |
| { | |
| "epoch": 9.087179487179487, | |
| "grad_norm": 1.6763969659805298, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3847, | |
| "step": 88600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003407008945941925, | |
| "epoch": 9.087179487179487, | |
| "step": 88600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.647148370742798 | |
| }, | |
| { | |
| "epoch": 9.107692307692307, | |
| "grad_norm": 1.6156846284866333, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 88800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03535706549882889, | |
| "epoch": 9.107692307692307, | |
| "step": 88800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.599299192428589 | |
| }, | |
| { | |
| "epoch": 9.128205128205128, | |
| "grad_norm": 1.5090279579162598, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 89000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.032918427139520645, | |
| "epoch": 9.128205128205128, | |
| "step": 89000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6074137687683105 | |
| }, | |
| { | |
| "epoch": 9.148717948717948, | |
| "grad_norm": 2.4588117599487305, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 89200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02214999310672283, | |
| "epoch": 9.148717948717948, | |
| "step": 89200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5125000476837158, | |
| "wm_acc_tail": 0.40625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.015423059463501 | |
| }, | |
| { | |
| "epoch": 9.169230769230769, | |
| "grad_norm": 1.7402222156524658, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 89400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.016100866720080376, | |
| "epoch": 9.169230769230769, | |
| "step": 89400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6048927307128906 | |
| }, | |
| { | |
| "epoch": 9.189743589743589, | |
| "grad_norm": 1.5109972953796387, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 89600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.015733519569039345, | |
| "epoch": 9.189743589743589, | |
| "step": 89600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.079684257507324 | |
| }, | |
| { | |
| "epoch": 9.21025641025641, | |
| "grad_norm": 1.6004693508148193, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 89800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05729304626584053, | |
| "epoch": 9.21025641025641, | |
| "step": 89800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6178343296051025 | |
| }, | |
| { | |
| "epoch": 9.23076923076923, | |
| "grad_norm": 2.0117807388305664, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 90000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06802148371934891, | |
| "epoch": 9.23076923076923, | |
| "step": 90000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.2421875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.465695381164551 | |
| }, | |
| { | |
| "epoch": 9.25128205128205, | |
| "grad_norm": 1.797462821006775, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3896, | |
| "step": 90200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03545604646205902, | |
| "epoch": 9.25128205128205, | |
| "step": 90200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5541666746139526, | |
| "wm_acc_tail": 0.4375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.621760129928589 | |
| }, | |
| { | |
| "epoch": 9.271794871794873, | |
| "grad_norm": 1.829601764678955, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 90400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.04079307243227959, | |
| "epoch": 9.271794871794873, | |
| "step": 90400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3125000298023224, | |
| "wm_acc_tail": 0.1875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.929374933242798 | |
| }, | |
| { | |
| "epoch": 9.292307692307693, | |
| "grad_norm": 1.5879427194595337, | |
| "learning_rate": 2e-05, | |
| "loss": 0.389, | |
| "step": 90600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.029541900381445885, | |
| "epoch": 9.292307692307693, | |
| "step": 90600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5486197471618652 | |
| }, | |
| { | |
| "epoch": 9.312820512820513, | |
| "grad_norm": 1.6390506029129028, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 90800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.047224000096321106, | |
| "epoch": 9.312820512820513, | |
| "step": 90800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4750000238418579, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.1843974590301514 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "grad_norm": 1.659643530845642, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 91000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05992817133665085, | |
| "epoch": 9.333333333333334, | |
| "step": 91000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4655826091766357 | |
| }, | |
| { | |
| "epoch": 9.353846153846154, | |
| "grad_norm": 1.6733235120773315, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3845, | |
| "step": 91200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.011832883581519127, | |
| "epoch": 9.353846153846154, | |
| "step": 91200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.224056243896484 | |
| }, | |
| { | |
| "epoch": 9.374358974358975, | |
| "grad_norm": 1.636035442352295, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3849, | |
| "step": 91400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.021136382594704628, | |
| "epoch": 9.374358974358975, | |
| "step": 91400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40416669845581055, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7372372150421143 | |
| }, | |
| { | |
| "epoch": 9.394871794871795, | |
| "grad_norm": 1.6149243116378784, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3863, | |
| "step": 91600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.00504174642264843, | |
| "epoch": 9.394871794871795, | |
| "step": 91600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.89072585105896 | |
| }, | |
| { | |
| "epoch": 9.415384615384616, | |
| "grad_norm": 1.7013003826141357, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3845, | |
| "step": 91800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01108713448047638, | |
| "epoch": 9.415384615384616, | |
| "step": 91800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.339104652404785 | |
| }, | |
| { | |
| "epoch": 9.435897435897436, | |
| "grad_norm": 1.7523878812789917, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3861, | |
| "step": 92000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019201140850782394, | |
| "epoch": 9.435897435897436, | |
| "step": 92000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36250001192092896, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.872222661972046 | |
| }, | |
| { | |
| "epoch": 9.456410256410257, | |
| "grad_norm": 1.6118862628936768, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 92200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018610605970025063, | |
| "epoch": 9.456410256410257, | |
| "step": 92200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4951095581054688 | |
| }, | |
| { | |
| "epoch": 9.476923076923077, | |
| "grad_norm": 1.57500159740448, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 92400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01652495190501213, | |
| "epoch": 9.476923076923077, | |
| "step": 92400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.652731418609619 | |
| }, | |
| { | |
| "epoch": 9.497435897435897, | |
| "grad_norm": 1.8554848432540894, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 92600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03488713502883911, | |
| "epoch": 9.497435897435897, | |
| "step": 92600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.26250001788139343, | |
| "wm_acc_tail": 0.1328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.179281711578369 | |
| }, | |
| { | |
| "epoch": 9.517948717948718, | |
| "grad_norm": 1.5718894004821777, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3841, | |
| "step": 92800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0030382887925952673, | |
| "epoch": 9.517948717948718, | |
| "step": 92800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.320451498031616 | |
| }, | |
| { | |
| "epoch": 9.538461538461538, | |
| "grad_norm": 1.7305283546447754, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 93000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06717889755964279, | |
| "epoch": 9.538461538461538, | |
| "step": 93000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2118139266967773 | |
| }, | |
| { | |
| "epoch": 9.558974358974359, | |
| "grad_norm": 1.771408200263977, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3842, | |
| "step": 93200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01965961419045925, | |
| "epoch": 9.558974358974359, | |
| "step": 93200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.788145065307617 | |
| }, | |
| { | |
| "epoch": 9.57948717948718, | |
| "grad_norm": 1.7519792318344116, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 93400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.003925821278244257, | |
| "epoch": 9.57948717948718, | |
| "step": 93400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.171875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.771604537963867 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 1.6967359781265259, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 93600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.058353714644908905, | |
| "epoch": 9.6, | |
| "step": 93600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3583333492279053, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7426536083221436 | |
| }, | |
| { | |
| "epoch": 9.62051282051282, | |
| "grad_norm": 1.4712340831756592, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3883, | |
| "step": 93800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.016383500769734383, | |
| "epoch": 9.62051282051282, | |
| "step": 93800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.462956428527832 | |
| }, | |
| { | |
| "epoch": 9.64102564102564, | |
| "grad_norm": 1.6195602416992188, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3899, | |
| "step": 94000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002548337448388338, | |
| "epoch": 9.64102564102564, | |
| "step": 94000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6327197551727295 | |
| }, | |
| { | |
| "epoch": 9.661538461538461, | |
| "grad_norm": 1.5548760890960693, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3882, | |
| "step": 94200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.014035292901098728, | |
| "epoch": 9.661538461538461, | |
| "step": 94200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.664316415786743 | |
| }, | |
| { | |
| "epoch": 9.682051282051281, | |
| "grad_norm": 1.607347846031189, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 94400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.043002329766750336, | |
| "epoch": 9.682051282051281, | |
| "step": 94400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.0430703163146973 | |
| }, | |
| { | |
| "epoch": 9.702564102564102, | |
| "grad_norm": 1.6007635593414307, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3862, | |
| "step": 94600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0019076463067904115, | |
| "epoch": 9.702564102564102, | |
| "step": 94600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.44166669249534607, | |
| "wm_acc_tail": 0.34375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4640378952026367 | |
| }, | |
| { | |
| "epoch": 9.723076923076922, | |
| "grad_norm": 1.7263671159744263, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3897, | |
| "step": 94800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.008671373128890991, | |
| "epoch": 9.723076923076922, | |
| "step": 94800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.7310550212860107 | |
| }, | |
| { | |
| "epoch": 9.743589743589745, | |
| "grad_norm": 1.6239341497421265, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3867, | |
| "step": 95000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023917516693472862, | |
| "epoch": 9.743589743589745, | |
| "step": 95000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.36666667461395264, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6633312702178955 | |
| }, | |
| { | |
| "epoch": 9.764102564102565, | |
| "grad_norm": 1.8152554035186768, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 95200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.006927983835339546, | |
| "epoch": 9.764102564102565, | |
| "step": 95200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.5, | |
| "wm_acc_tail": 0.390625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 2.922508716583252 | |
| }, | |
| { | |
| "epoch": 9.784615384615385, | |
| "grad_norm": 1.6458438634872437, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 95400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002104040700942278, | |
| "epoch": 9.784615384615385, | |
| "step": 95400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3541666865348816, | |
| "wm_acc_tail": 0.265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.9006776809692383 | |
| }, | |
| { | |
| "epoch": 9.805128205128206, | |
| "grad_norm": 1.6357539892196655, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3853, | |
| "step": 95600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.025963347405195236, | |
| "epoch": 9.805128205128206, | |
| "step": 95600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4375000298023224, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.220663547515869 | |
| }, | |
| { | |
| "epoch": 9.825641025641026, | |
| "grad_norm": 1.7315382957458496, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3875, | |
| "step": 95800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.027422921732068062, | |
| "epoch": 9.825641025641026, | |
| "step": 95800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32500001788139343, | |
| "wm_acc_tail": 0.1953125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6027278900146484 | |
| }, | |
| { | |
| "epoch": 9.846153846153847, | |
| "grad_norm": 1.8027147054672241, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3884, | |
| "step": 96000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0280799251049757, | |
| "epoch": 9.846153846153847, | |
| "step": 96000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.6957623958587646 | |
| }, | |
| { | |
| "epoch": 9.866666666666667, | |
| "grad_norm": 1.9386242628097534, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3854, | |
| "step": 96200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02363528124988079, | |
| "epoch": 9.866666666666667, | |
| "step": 96200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2708333432674408, | |
| "wm_acc_tail": 0.171875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.377966403961182 | |
| }, | |
| { | |
| "epoch": 9.887179487179488, | |
| "grad_norm": 1.6836209297180176, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 96400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.06412398815155029, | |
| "epoch": 9.887179487179488, | |
| "step": 96400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.31666669249534607, | |
| "wm_acc_tail": 0.203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.730404853820801 | |
| }, | |
| { | |
| "epoch": 9.907692307692308, | |
| "grad_norm": 1.7490516901016235, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3855, | |
| "step": 96600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05375662073493004, | |
| "epoch": 9.907692307692308, | |
| "step": 96600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34583336114883423, | |
| "wm_acc_tail": 0.25, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8690290451049805 | |
| }, | |
| { | |
| "epoch": 9.928205128205128, | |
| "grad_norm": 1.6176785230636597, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 96800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.009080393239855766, | |
| "epoch": 9.928205128205128, | |
| "step": 96800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.37916669249534607, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.525524377822876 | |
| }, | |
| { | |
| "epoch": 9.948717948717949, | |
| "grad_norm": 1.6435387134552002, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3858, | |
| "step": 97000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.02577665075659752, | |
| "epoch": 9.948717948717949, | |
| "step": 97000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.42916667461395264, | |
| "wm_acc_tail": 0.3046875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2681663036346436 | |
| }, | |
| { | |
| "epoch": 9.96923076923077, | |
| "grad_norm": 2.0464680194854736, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 97200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.017398443073034286, | |
| "epoch": 9.96923076923077, | |
| "step": 97200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2458333522081375, | |
| "wm_acc_tail": 0.2109375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.613058567047119 | |
| }, | |
| { | |
| "epoch": 9.98974358974359, | |
| "grad_norm": 2.0473198890686035, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 97400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.0027580142486840487, | |
| "epoch": 9.98974358974359, | |
| "step": 97400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.32083335518836975, | |
| "wm_acc_tail": 0.2265625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.142900466918945 | |
| }, | |
| { | |
| "epoch": 10.01025641025641, | |
| "grad_norm": 1.5655300617218018, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3856, | |
| "step": 97600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.002354752505198121, | |
| "epoch": 10.01025641025641, | |
| "step": 97600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40000003576278687, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.627025604248047 | |
| }, | |
| { | |
| "epoch": 10.03076923076923, | |
| "grad_norm": 1.5845980644226074, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3892, | |
| "step": 97800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.007005937863141298, | |
| "epoch": 10.03076923076923, | |
| "step": 97800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4333333671092987, | |
| "wm_acc_tail": 0.3203125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.422518014907837 | |
| }, | |
| { | |
| "epoch": 10.051282051282051, | |
| "grad_norm": 1.7210978269577026, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 98000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03867940232157707, | |
| "epoch": 10.051282051282051, | |
| "step": 98000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3750000298023224, | |
| "wm_acc_tail": 0.234375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.5429861545562744 | |
| }, | |
| { | |
| "epoch": 10.071794871794872, | |
| "grad_norm": 1.652026891708374, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3859, | |
| "step": 98200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.11132987588644028, | |
| "epoch": 10.071794871794872, | |
| "step": 98200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3916666805744171, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.632103204727173 | |
| }, | |
| { | |
| "epoch": 10.092307692307692, | |
| "grad_norm": 1.8058871030807495, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3852, | |
| "step": 98400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.018086377531290054, | |
| "epoch": 10.092307692307692, | |
| "step": 98400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.3500000238418579, | |
| "wm_acc_tail": 0.2734375, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.8160758018493652 | |
| }, | |
| { | |
| "epoch": 10.112820512820512, | |
| "grad_norm": 1.7162744998931885, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3857, | |
| "step": 98600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.056672602891922, | |
| "epoch": 10.112820512820512, | |
| "step": 98600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.38750001788139343, | |
| "wm_acc_tail": 0.296875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.543635129928589 | |
| }, | |
| { | |
| "epoch": 10.133333333333333, | |
| "grad_norm": 1.6360325813293457, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3871, | |
| "step": 98800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.05071864277124405, | |
| "epoch": 10.133333333333333, | |
| "step": 98800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.40833336114883423, | |
| "wm_acc_tail": 0.28125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.4632625579833984 | |
| }, | |
| { | |
| "epoch": 10.153846153846153, | |
| "grad_norm": 1.8891276121139526, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 99000 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.01021594274789095, | |
| "epoch": 10.153846153846153, | |
| "step": 99000, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.25833335518836975, | |
| "wm_acc_tail": 0.15625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 4.3719892501831055 | |
| }, | |
| { | |
| "epoch": 10.174358974358974, | |
| "grad_norm": 1.579563856124878, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3848, | |
| "step": 99200 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.010247575119137764, | |
| "epoch": 10.174358974358974, | |
| "step": 99200, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.4166666865348816, | |
| "wm_acc_tail": 0.328125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.2909698486328125 | |
| }, | |
| { | |
| "epoch": 10.194871794871794, | |
| "grad_norm": 1.7077383995056152, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 99400 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.019578522071242332, | |
| "epoch": 10.194871794871794, | |
| "step": 99400, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.45000001788139343, | |
| "wm_acc_tail": 0.2890625, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.078817367553711 | |
| }, | |
| { | |
| "epoch": 10.215384615384615, | |
| "grad_norm": 1.6362652778625488, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3851, | |
| "step": 99600 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.03737019747495651, | |
| "epoch": 10.215384615384615, | |
| "step": 99600, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.2958333492279053, | |
| "wm_acc_tail": 0.21875, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.850930690765381 | |
| }, | |
| { | |
| "epoch": 10.235897435897435, | |
| "grad_norm": 1.753855586051941, | |
| "learning_rate": 2e-05, | |
| "loss": 0.387, | |
| "step": 99800 | |
| }, | |
| { | |
| "action_learning_rate": 5e-05, | |
| "action_loss": 0.023653678596019745, | |
| "epoch": 10.235897435897435, | |
| "step": 99800, | |
| "vit_learning_rate": 2e-05, | |
| "wm_acc_mean": 0.34166669845581055, | |
| "wm_acc_tail": 0.2578125, | |
| "wm_learning_rate": 5e-05, | |
| "wm_out_loss": 3.716953754425049 | |
| }, | |
| { | |
| "epoch": 10.256410256410255, | |
| "grad_norm": 1.6769886016845703, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3843, | |
| "step": 100000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 487500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |