{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 24500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004081632653061225, "grad_norm": 0.31208381056785583, "learning_rate": 5e-05, "loss": 4.7857, "step": 1 }, { "epoch": 0.00816326530612245, "grad_norm": 0.21349389851093292, "learning_rate": 4.999795918367347e-05, "loss": 4.9279, "step": 2 }, { "epoch": 0.012244897959183673, "grad_norm": 0.21641631424427032, "learning_rate": 4.999591836734694e-05, "loss": 4.9662, "step": 3 }, { "epoch": 0.0163265306122449, "grad_norm": 0.3751494884490967, "learning_rate": 4.999387755102041e-05, "loss": 4.7939, "step": 4 }, { "epoch": 0.02040816326530612, "grad_norm": 0.2543966770172119, "learning_rate": 4.999183673469388e-05, "loss": 4.5941, "step": 5 }, { "epoch": 0.024489795918367346, "grad_norm": 0.4132165014743805, "learning_rate": 4.998979591836735e-05, "loss": 4.7243, "step": 6 }, { "epoch": 0.02857142857142857, "grad_norm": 0.22350740432739258, "learning_rate": 4.998775510204082e-05, "loss": 4.6492, "step": 7 }, { "epoch": 0.0326530612244898, "grad_norm": 0.5093794465065002, "learning_rate": 4.9985714285714286e-05, "loss": 4.5845, "step": 8 }, { "epoch": 0.036734693877551024, "grad_norm": 0.2958655059337616, "learning_rate": 4.998367346938776e-05, "loss": 4.7989, "step": 9 }, { "epoch": 0.04081632653061224, "grad_norm": 0.6151564717292786, "learning_rate": 4.998163265306123e-05, "loss": 4.4993, "step": 10 }, { "epoch": 0.044897959183673466, "grad_norm": 0.3947940468788147, "learning_rate": 4.9979591836734694e-05, "loss": 4.5567, "step": 11 }, { "epoch": 0.04897959183673469, "grad_norm": 0.476544052362442, "learning_rate": 4.997755102040816e-05, "loss": 4.366, "step": 12 }, { "epoch": 0.053061224489795916, "grad_norm": 0.3369215726852417, "learning_rate": 4.9975510204081635e-05, "loss": 4.8564, "step": 13 }, { "epoch": 0.05714285714285714, "grad_norm": 0.28561851382255554, "learning_rate": 4.997346938775511e-05, "loss": 4.5655, "step": 14 }, { "epoch": 0.061224489795918366, "grad_norm": 0.37364667654037476, "learning_rate": 4.9971428571428576e-05, "loss": 4.7699, "step": 15 }, { "epoch": 0.0653061224489796, "grad_norm": 0.5654757022857666, "learning_rate": 4.996938775510204e-05, "loss": 4.4729, "step": 16 }, { "epoch": 0.06938775510204082, "grad_norm": 0.5995123982429504, "learning_rate": 4.996734693877551e-05, "loss": 4.8811, "step": 17 }, { "epoch": 0.07346938775510205, "grad_norm": 0.3607248067855835, "learning_rate": 4.996530612244898e-05, "loss": 4.8549, "step": 18 }, { "epoch": 0.07755102040816327, "grad_norm": 0.4240165650844574, "learning_rate": 4.996326530612245e-05, "loss": 4.6699, "step": 19 }, { "epoch": 0.08163265306122448, "grad_norm": 0.4819459915161133, "learning_rate": 4.996122448979592e-05, "loss": 4.8953, "step": 20 }, { "epoch": 0.08571428571428572, "grad_norm": 0.6769137382507324, "learning_rate": 4.995918367346939e-05, "loss": 4.5051, "step": 21 }, { "epoch": 0.08979591836734693, "grad_norm": 0.4839104413986206, "learning_rate": 4.995714285714286e-05, "loss": 5.0078, "step": 22 }, { "epoch": 0.09387755102040816, "grad_norm": 0.5650255680084229, "learning_rate": 4.9955102040816327e-05, "loss": 4.924, "step": 23 }, { "epoch": 0.09795918367346938, "grad_norm": 0.42539268732070923, "learning_rate": 4.99530612244898e-05, "loss": 4.7486, "step": 24 }, { "epoch": 0.10204081632653061, "grad_norm": 0.6210854053497314, "learning_rate": 4.995102040816327e-05, "loss": 4.4576, "step": 25 }, { "epoch": 0.10612244897959183, "grad_norm": 0.6120904684066772, "learning_rate": 4.9948979591836735e-05, "loss": 4.6915, "step": 26 }, { "epoch": 0.11020408163265306, "grad_norm": 0.7727541923522949, "learning_rate": 4.99469387755102e-05, "loss": 4.6198, "step": 27 }, { "epoch": 0.11428571428571428, "grad_norm": 0.5692687034606934, "learning_rate": 4.9944897959183676e-05, "loss": 5.0633, "step": 28 }, { "epoch": 0.11836734693877551, "grad_norm": 0.4118441343307495, "learning_rate": 4.994285714285715e-05, "loss": 4.4882, "step": 29 }, { "epoch": 0.12244897959183673, "grad_norm": 0.4685518443584442, "learning_rate": 4.994081632653062e-05, "loss": 4.7623, "step": 30 }, { "epoch": 0.12653061224489795, "grad_norm": 0.4789682626724243, "learning_rate": 4.9938775510204084e-05, "loss": 4.636, "step": 31 }, { "epoch": 0.1306122448979592, "grad_norm": 0.6034049391746521, "learning_rate": 4.993673469387755e-05, "loss": 4.437, "step": 32 }, { "epoch": 0.1346938775510204, "grad_norm": 0.4463157057762146, "learning_rate": 4.9934693877551025e-05, "loss": 4.7459, "step": 33 }, { "epoch": 0.13877551020408163, "grad_norm": 0.6769412755966187, "learning_rate": 4.993265306122449e-05, "loss": 4.5756, "step": 34 }, { "epoch": 0.14285714285714285, "grad_norm": 0.6828305125236511, "learning_rate": 4.993061224489796e-05, "loss": 4.6674, "step": 35 }, { "epoch": 0.1469387755102041, "grad_norm": 0.6295450329780579, "learning_rate": 4.992857142857143e-05, "loss": 4.7476, "step": 36 }, { "epoch": 0.1510204081632653, "grad_norm": 0.5933013558387756, "learning_rate": 4.99265306122449e-05, "loss": 4.4846, "step": 37 }, { "epoch": 0.15510204081632653, "grad_norm": NaN, "learning_rate": 4.9924489795918374e-05, "loss": 4.3515, "step": 38 }, { "epoch": 0.15918367346938775, "grad_norm": 0.4724745452404022, "learning_rate": 4.9924489795918374e-05, "loss": 4.8426, "step": 39 }, { "epoch": 0.16326530612244897, "grad_norm": 0.4767000079154968, "learning_rate": 4.992244897959184e-05, "loss": 4.5094, "step": 40 }, { "epoch": 0.1673469387755102, "grad_norm": 0.678774893283844, "learning_rate": 4.992040816326531e-05, "loss": 4.7565, "step": 41 }, { "epoch": 0.17142857142857143, "grad_norm": 0.5045970678329468, "learning_rate": 4.9918367346938776e-05, "loss": 4.7045, "step": 42 }, { "epoch": 0.17551020408163265, "grad_norm": 0.6203688383102417, "learning_rate": 4.991632653061224e-05, "loss": 4.7477, "step": 43 }, { "epoch": 0.17959183673469387, "grad_norm": 1.189394235610962, "learning_rate": 4.9914285714285717e-05, "loss": 4.4428, "step": 44 }, { "epoch": 0.1836734693877551, "grad_norm": 0.4641496539115906, "learning_rate": 4.991224489795919e-05, "loss": 4.7861, "step": 45 }, { "epoch": 0.18775510204081633, "grad_norm": 0.7707638740539551, "learning_rate": 4.991020408163266e-05, "loss": 4.4325, "step": 46 }, { "epoch": 0.19183673469387755, "grad_norm": 0.644920825958252, "learning_rate": 4.9908163265306125e-05, "loss": 4.4997, "step": 47 }, { "epoch": 0.19591836734693877, "grad_norm": 0.53870689868927, "learning_rate": 4.990612244897959e-05, "loss": 4.5768, "step": 48 }, { "epoch": 0.2, "grad_norm": 0.7646830677986145, "learning_rate": 4.9904081632653066e-05, "loss": 4.6744, "step": 49 }, { "epoch": 0.20408163265306123, "grad_norm": 0.9022605419158936, "learning_rate": 4.990204081632653e-05, "loss": 4.6348, "step": 50 }, { "epoch": 0.20816326530612245, "grad_norm": 0.6827958822250366, "learning_rate": 4.99e-05, "loss": 4.5237, "step": 51 }, { "epoch": 0.21224489795918366, "grad_norm": 0.7681111693382263, "learning_rate": 4.9897959183673474e-05, "loss": 4.6252, "step": 52 }, { "epoch": 0.2163265306122449, "grad_norm": 0.5817428827285767, "learning_rate": 4.989591836734694e-05, "loss": 4.6253, "step": 53 }, { "epoch": 0.22040816326530613, "grad_norm": 0.6264687776565552, "learning_rate": 4.9893877551020415e-05, "loss": 4.5482, "step": 54 }, { "epoch": 0.22448979591836735, "grad_norm": 0.731473445892334, "learning_rate": 4.989183673469388e-05, "loss": 4.6882, "step": 55 }, { "epoch": 0.22857142857142856, "grad_norm": 1.0228747129440308, "learning_rate": 4.988979591836735e-05, "loss": 4.4391, "step": 56 }, { "epoch": 0.23265306122448978, "grad_norm": 0.5823572874069214, "learning_rate": 4.9887755102040816e-05, "loss": 4.4429, "step": 57 }, { "epoch": 0.23673469387755103, "grad_norm": 0.6926144361495972, "learning_rate": 4.9885714285714283e-05, "loss": 4.762, "step": 58 }, { "epoch": 0.24081632653061225, "grad_norm": 0.8256344795227051, "learning_rate": 4.988367346938776e-05, "loss": 4.4058, "step": 59 }, { "epoch": 0.24489795918367346, "grad_norm": 1.2436455488204956, "learning_rate": 4.9881632653061224e-05, "loss": 4.1535, "step": 60 }, { "epoch": 0.24897959183673468, "grad_norm": 0.7361056208610535, "learning_rate": 4.98795918367347e-05, "loss": 4.5739, "step": 61 }, { "epoch": 0.2530612244897959, "grad_norm": 0.8051048517227173, "learning_rate": 4.9877551020408165e-05, "loss": 4.5774, "step": 62 }, { "epoch": 0.2571428571428571, "grad_norm": 0.569408655166626, "learning_rate": 4.987551020408163e-05, "loss": 4.7661, "step": 63 }, { "epoch": 0.2612244897959184, "grad_norm": 1.1339480876922607, "learning_rate": 4.9873469387755106e-05, "loss": 4.56, "step": 64 }, { "epoch": 0.2653061224489796, "grad_norm": 1.1837830543518066, "learning_rate": 4.9871428571428574e-05, "loss": 4.5944, "step": 65 }, { "epoch": 0.2693877551020408, "grad_norm": 0.7772983312606812, "learning_rate": 4.986938775510204e-05, "loss": 4.7844, "step": 66 }, { "epoch": 0.27346938775510204, "grad_norm": 0.9051035642623901, "learning_rate": 4.986734693877551e-05, "loss": 4.3414, "step": 67 }, { "epoch": 0.27755102040816326, "grad_norm": 0.8353317975997925, "learning_rate": 4.986530612244898e-05, "loss": 4.403, "step": 68 }, { "epoch": 0.2816326530612245, "grad_norm": 0.9380103349685669, "learning_rate": 4.9863265306122456e-05, "loss": 4.3104, "step": 69 }, { "epoch": 0.2857142857142857, "grad_norm": 0.8733192086219788, "learning_rate": 4.986122448979592e-05, "loss": 4.3923, "step": 70 }, { "epoch": 0.2897959183673469, "grad_norm": 1.1246919631958008, "learning_rate": 4.985918367346939e-05, "loss": 4.2634, "step": 71 }, { "epoch": 0.2938775510204082, "grad_norm": 1.0740638971328735, "learning_rate": 4.985714285714286e-05, "loss": 4.2678, "step": 72 }, { "epoch": 0.2979591836734694, "grad_norm": 0.6237021684646606, "learning_rate": 4.985510204081633e-05, "loss": 4.5072, "step": 73 }, { "epoch": 0.3020408163265306, "grad_norm": 1.6200965642929077, "learning_rate": 4.98530612244898e-05, "loss": 3.8762, "step": 74 }, { "epoch": 0.30612244897959184, "grad_norm": 1.1674776077270508, "learning_rate": 4.9851020408163265e-05, "loss": 4.438, "step": 75 }, { "epoch": 0.31020408163265306, "grad_norm": 1.0216920375823975, "learning_rate": 4.984897959183674e-05, "loss": 4.3544, "step": 76 }, { "epoch": 0.3142857142857143, "grad_norm": 1.117874026298523, "learning_rate": 4.9846938775510206e-05, "loss": 4.3104, "step": 77 }, { "epoch": 0.3183673469387755, "grad_norm": 1.9295778274536133, "learning_rate": 4.984489795918368e-05, "loss": 4.0092, "step": 78 }, { "epoch": 0.3224489795918367, "grad_norm": 1.3168115615844727, "learning_rate": 4.984285714285715e-05, "loss": 4.3592, "step": 79 }, { "epoch": 0.32653061224489793, "grad_norm": 0.8556247353553772, "learning_rate": 4.9840816326530614e-05, "loss": 4.3987, "step": 80 }, { "epoch": 0.3306122448979592, "grad_norm": 1.06153404712677, "learning_rate": 4.983877551020408e-05, "loss": 4.3472, "step": 81 }, { "epoch": 0.3346938775510204, "grad_norm": 1.0523443222045898, "learning_rate": 4.983673469387755e-05, "loss": 4.4255, "step": 82 }, { "epoch": 0.33877551020408164, "grad_norm": 0.7657639384269714, "learning_rate": 4.983469387755102e-05, "loss": 4.4417, "step": 83 }, { "epoch": 0.34285714285714286, "grad_norm": 0.863507866859436, "learning_rate": 4.9832653061224496e-05, "loss": 4.4696, "step": 84 }, { "epoch": 0.3469387755102041, "grad_norm": 1.0855858325958252, "learning_rate": 4.9830612244897964e-05, "loss": 4.383, "step": 85 }, { "epoch": 0.3510204081632653, "grad_norm": 1.230837345123291, "learning_rate": 4.982857142857143e-05, "loss": 4.3612, "step": 86 }, { "epoch": 0.3551020408163265, "grad_norm": 1.2586658000946045, "learning_rate": 4.98265306122449e-05, "loss": 4.3103, "step": 87 }, { "epoch": 0.35918367346938773, "grad_norm": 0.9537893533706665, "learning_rate": 4.982448979591837e-05, "loss": 4.3089, "step": 88 }, { "epoch": 0.363265306122449, "grad_norm": 1.1135846376419067, "learning_rate": 4.982244897959184e-05, "loss": 4.1239, "step": 89 }, { "epoch": 0.3673469387755102, "grad_norm": 1.1306291818618774, "learning_rate": 4.9820408163265306e-05, "loss": 4.2765, "step": 90 }, { "epoch": 0.37142857142857144, "grad_norm": 0.8709103465080261, "learning_rate": 4.981836734693878e-05, "loss": 4.3445, "step": 91 }, { "epoch": 0.37551020408163266, "grad_norm": 1.0635451078414917, "learning_rate": 4.981632653061225e-05, "loss": 4.5691, "step": 92 }, { "epoch": 0.3795918367346939, "grad_norm": 1.4896990060806274, "learning_rate": 4.981428571428572e-05, "loss": 4.2151, "step": 93 }, { "epoch": 0.3836734693877551, "grad_norm": 0.7532121539115906, "learning_rate": 4.981224489795919e-05, "loss": 4.4396, "step": 94 }, { "epoch": 0.3877551020408163, "grad_norm": 1.5947719812393188, "learning_rate": 4.9810204081632655e-05, "loss": 4.2164, "step": 95 }, { "epoch": 0.39183673469387753, "grad_norm": 0.8807844519615173, "learning_rate": 4.980816326530612e-05, "loss": 4.5486, "step": 96 }, { "epoch": 0.39591836734693875, "grad_norm": 1.7952898740768433, "learning_rate": 4.980612244897959e-05, "loss": 4.2697, "step": 97 }, { "epoch": 0.4, "grad_norm": 1.3350169658660889, "learning_rate": 4.980408163265306e-05, "loss": 4.3054, "step": 98 }, { "epoch": 0.40408163265306124, "grad_norm": 1.244295358657837, "learning_rate": 4.980204081632654e-05, "loss": 4.4519, "step": 99 }, { "epoch": 0.40816326530612246, "grad_norm": 1.5392001867294312, "learning_rate": 4.9800000000000004e-05, "loss": 4.2585, "step": 100 }, { "epoch": 0.4122448979591837, "grad_norm": 0.9577623605728149, "learning_rate": 4.979795918367347e-05, "loss": 4.5015, "step": 101 }, { "epoch": 0.4163265306122449, "grad_norm": 1.4797390699386597, "learning_rate": 4.979591836734694e-05, "loss": 4.2146, "step": 102 }, { "epoch": 0.4204081632653061, "grad_norm": 0.8325679302215576, "learning_rate": 4.979387755102041e-05, "loss": 4.4612, "step": 103 }, { "epoch": 0.42448979591836733, "grad_norm": 1.4537171125411987, "learning_rate": 4.979183673469388e-05, "loss": 4.4503, "step": 104 }, { "epoch": 0.42857142857142855, "grad_norm": 0.7264580726623535, "learning_rate": 4.978979591836735e-05, "loss": 4.3666, "step": 105 }, { "epoch": 0.4326530612244898, "grad_norm": 0.7286907434463501, "learning_rate": 4.978775510204082e-05, "loss": 4.4137, "step": 106 }, { "epoch": 0.43673469387755104, "grad_norm": 1.1407803297042847, "learning_rate": 4.978571428571429e-05, "loss": 4.5346, "step": 107 }, { "epoch": 0.44081632653061226, "grad_norm": 1.2859892845153809, "learning_rate": 4.978367346938776e-05, "loss": 3.6008, "step": 108 }, { "epoch": 0.4448979591836735, "grad_norm": 1.8057981729507446, "learning_rate": 4.978163265306123e-05, "loss": 4.1331, "step": 109 }, { "epoch": 0.4489795918367347, "grad_norm": 1.249578595161438, "learning_rate": 4.9779591836734696e-05, "loss": 4.1813, "step": 110 }, { "epoch": 0.4530612244897959, "grad_norm": 0.8711186051368713, "learning_rate": 4.977755102040816e-05, "loss": 4.2218, "step": 111 }, { "epoch": 0.45714285714285713, "grad_norm": 1.1513988971710205, "learning_rate": 4.977551020408163e-05, "loss": 4.5265, "step": 112 }, { "epoch": 0.46122448979591835, "grad_norm": 1.850088357925415, "learning_rate": 4.9773469387755104e-05, "loss": 4.123, "step": 113 }, { "epoch": 0.46530612244897956, "grad_norm": 1.101750135421753, "learning_rate": 4.977142857142857e-05, "loss": 4.4297, "step": 114 }, { "epoch": 0.46938775510204084, "grad_norm": 1.0823191404342651, "learning_rate": 4.9769387755102045e-05, "loss": 4.4288, "step": 115 }, { "epoch": 0.47346938775510206, "grad_norm": 0.9625919461250305, "learning_rate": 4.976734693877551e-05, "loss": 4.3031, "step": 116 }, { "epoch": 0.4775510204081633, "grad_norm": 1.2763785123825073, "learning_rate": 4.9765306122448986e-05, "loss": 4.3641, "step": 117 }, { "epoch": 0.4816326530612245, "grad_norm": 2.5417606830596924, "learning_rate": 4.976326530612245e-05, "loss": 3.8383, "step": 118 }, { "epoch": 0.4857142857142857, "grad_norm": 0.8448789715766907, "learning_rate": 4.976122448979592e-05, "loss": 4.4557, "step": 119 }, { "epoch": 0.4897959183673469, "grad_norm": 1.09085214138031, "learning_rate": 4.975918367346939e-05, "loss": 4.0239, "step": 120 }, { "epoch": 0.49387755102040815, "grad_norm": 1.3846369981765747, "learning_rate": 4.9757142857142855e-05, "loss": 4.4807, "step": 121 }, { "epoch": 0.49795918367346936, "grad_norm": 1.6815881729125977, "learning_rate": 4.975510204081633e-05, "loss": 4.0543, "step": 122 }, { "epoch": 0.5020408163265306, "grad_norm": 1.1176117658615112, "learning_rate": 4.97530612244898e-05, "loss": 4.234, "step": 123 }, { "epoch": 0.5061224489795918, "grad_norm": 1.3535709381103516, "learning_rate": 4.975102040816327e-05, "loss": 4.3014, "step": 124 }, { "epoch": 0.5102040816326531, "grad_norm": 1.0094329118728638, "learning_rate": 4.974897959183674e-05, "loss": 4.4673, "step": 125 }, { "epoch": 0.5142857142857142, "grad_norm": 1.174735188484192, "learning_rate": 4.9746938775510204e-05, "loss": 4.6155, "step": 126 }, { "epoch": 0.5183673469387755, "grad_norm": 1.549323320388794, "learning_rate": 4.974489795918368e-05, "loss": 4.414, "step": 127 }, { "epoch": 0.5224489795918368, "grad_norm": 0.7437114119529724, "learning_rate": 4.9742857142857145e-05, "loss": 4.2863, "step": 128 }, { "epoch": 0.5265306122448979, "grad_norm": 1.5887006521224976, "learning_rate": 4.974081632653061e-05, "loss": 4.4498, "step": 129 }, { "epoch": 0.5306122448979592, "grad_norm": 1.0564626455307007, "learning_rate": 4.9738775510204086e-05, "loss": 4.3391, "step": 130 }, { "epoch": 0.5346938775510204, "grad_norm": 1.158301591873169, "learning_rate": 4.973673469387755e-05, "loss": 4.3735, "step": 131 }, { "epoch": 0.5387755102040817, "grad_norm": 0.9639889001846313, "learning_rate": 4.973469387755103e-05, "loss": 4.2004, "step": 132 }, { "epoch": 0.5428571428571428, "grad_norm": 0.9946554899215698, "learning_rate": 4.9732653061224494e-05, "loss": 4.4955, "step": 133 }, { "epoch": 0.5469387755102041, "grad_norm": 1.7640198469161987, "learning_rate": 4.973061224489796e-05, "loss": 3.9995, "step": 134 }, { "epoch": 0.5510204081632653, "grad_norm": 1.5254324674606323, "learning_rate": 4.972857142857143e-05, "loss": 4.4405, "step": 135 }, { "epoch": 0.5551020408163265, "grad_norm": 1.232525110244751, "learning_rate": 4.9726530612244895e-05, "loss": 4.1242, "step": 136 }, { "epoch": 0.5591836734693878, "grad_norm": 1.0171490907669067, "learning_rate": 4.972448979591837e-05, "loss": 4.443, "step": 137 }, { "epoch": 0.563265306122449, "grad_norm": 1.6044353246688843, "learning_rate": 4.972244897959184e-05, "loss": 4.3044, "step": 138 }, { "epoch": 0.5673469387755102, "grad_norm": 1.046557068824768, "learning_rate": 4.972040816326531e-05, "loss": 4.2124, "step": 139 }, { "epoch": 0.5714285714285714, "grad_norm": 1.1873852014541626, "learning_rate": 4.971836734693878e-05, "loss": 4.4393, "step": 140 }, { "epoch": 0.5755102040816327, "grad_norm": 2.4648683071136475, "learning_rate": 4.9716326530612245e-05, "loss": 3.6782, "step": 141 }, { "epoch": 0.5795918367346938, "grad_norm": 1.7819944620132446, "learning_rate": 4.971428571428572e-05, "loss": 3.8311, "step": 142 }, { "epoch": 0.5836734693877551, "grad_norm": 1.312848448753357, "learning_rate": 4.9712244897959186e-05, "loss": 4.3572, "step": 143 }, { "epoch": 0.5877551020408164, "grad_norm": 1.4655852317810059, "learning_rate": 4.971020408163265e-05, "loss": 4.1511, "step": 144 }, { "epoch": 0.5918367346938775, "grad_norm": 1.738085150718689, "learning_rate": 4.9708163265306127e-05, "loss": 4.1384, "step": 145 }, { "epoch": 0.5959183673469388, "grad_norm": 1.5898183584213257, "learning_rate": 4.9706122448979594e-05, "loss": 4.0627, "step": 146 }, { "epoch": 0.6, "grad_norm": 1.4708333015441895, "learning_rate": 4.970408163265307e-05, "loss": 4.1278, "step": 147 }, { "epoch": 0.6040816326530613, "grad_norm": 1.6426666975021362, "learning_rate": 4.9702040816326535e-05, "loss": 4.1859, "step": 148 }, { "epoch": 0.6081632653061224, "grad_norm": 1.3246554136276245, "learning_rate": 4.97e-05, "loss": 3.9394, "step": 149 }, { "epoch": 0.6122448979591837, "grad_norm": 1.1911392211914062, "learning_rate": 4.969795918367347e-05, "loss": 4.3372, "step": 150 }, { "epoch": 0.6163265306122448, "grad_norm": 1.3935261964797974, "learning_rate": 4.9695918367346936e-05, "loss": 4.1562, "step": 151 }, { "epoch": 0.6204081632653061, "grad_norm": 1.2537792921066284, "learning_rate": 4.969387755102041e-05, "loss": 4.264, "step": 152 }, { "epoch": 0.6244897959183674, "grad_norm": 1.4883792400360107, "learning_rate": 4.9691836734693884e-05, "loss": 4.2794, "step": 153 }, { "epoch": 0.6285714285714286, "grad_norm": 1.1379858255386353, "learning_rate": 4.968979591836735e-05, "loss": 4.2866, "step": 154 }, { "epoch": 0.6326530612244898, "grad_norm": 1.3454419374465942, "learning_rate": 4.968775510204082e-05, "loss": 4.5469, "step": 155 }, { "epoch": 0.636734693877551, "grad_norm": 1.184134840965271, "learning_rate": 4.9685714285714285e-05, "loss": 4.3111, "step": 156 }, { "epoch": 0.6408163265306123, "grad_norm": 1.4783999919891357, "learning_rate": 4.968367346938776e-05, "loss": 4.0221, "step": 157 }, { "epoch": 0.6448979591836734, "grad_norm": 1.7919408082962036, "learning_rate": 4.9681632653061226e-05, "loss": 4.2913, "step": 158 }, { "epoch": 0.6489795918367347, "grad_norm": 1.4281623363494873, "learning_rate": 4.9679591836734693e-05, "loss": 4.2702, "step": 159 }, { "epoch": 0.6530612244897959, "grad_norm": 2.146357774734497, "learning_rate": 4.967755102040817e-05, "loss": 3.6621, "step": 160 }, { "epoch": 0.6571428571428571, "grad_norm": 1.4628933668136597, "learning_rate": 4.9675510204081634e-05, "loss": 4.2975, "step": 161 }, { "epoch": 0.6612244897959184, "grad_norm": 1.0909277200698853, "learning_rate": 4.967346938775511e-05, "loss": 4.1836, "step": 162 }, { "epoch": 0.6653061224489796, "grad_norm": 1.7163093090057373, "learning_rate": 4.9671428571428576e-05, "loss": 4.0824, "step": 163 }, { "epoch": 0.6693877551020408, "grad_norm": 1.1291508674621582, "learning_rate": 4.966938775510204e-05, "loss": 4.2714, "step": 164 }, { "epoch": 0.673469387755102, "grad_norm": 1.6153613328933716, "learning_rate": 4.966734693877551e-05, "loss": 4.2479, "step": 165 }, { "epoch": 0.6775510204081633, "grad_norm": 2.098374128341675, "learning_rate": 4.9665306122448984e-05, "loss": 4.0386, "step": 166 }, { "epoch": 0.6816326530612244, "grad_norm": 1.1595077514648438, "learning_rate": 4.966326530612245e-05, "loss": 4.4503, "step": 167 }, { "epoch": 0.6857142857142857, "grad_norm": 2.61529541015625, "learning_rate": 4.966122448979592e-05, "loss": 4.0058, "step": 168 }, { "epoch": 0.689795918367347, "grad_norm": 1.3801040649414062, "learning_rate": 4.965918367346939e-05, "loss": 4.524, "step": 169 }, { "epoch": 0.6938775510204082, "grad_norm": 1.5195716619491577, "learning_rate": 4.965714285714286e-05, "loss": 4.4514, "step": 170 }, { "epoch": 0.6979591836734694, "grad_norm": 1.8861066102981567, "learning_rate": 4.965510204081633e-05, "loss": 4.0576, "step": 171 }, { "epoch": 0.7020408163265306, "grad_norm": 1.173241376876831, "learning_rate": 4.96530612244898e-05, "loss": 4.4175, "step": 172 }, { "epoch": 0.7061224489795919, "grad_norm": 1.4320660829544067, "learning_rate": 4.965102040816327e-05, "loss": 4.1949, "step": 173 }, { "epoch": 0.710204081632653, "grad_norm": 1.1446641683578491, "learning_rate": 4.9648979591836734e-05, "loss": 4.1698, "step": 174 }, { "epoch": 0.7142857142857143, "grad_norm": 2.1873323917388916, "learning_rate": 4.96469387755102e-05, "loss": 4.0192, "step": 175 }, { "epoch": 0.7183673469387755, "grad_norm": 1.5609369277954102, "learning_rate": 4.9644897959183675e-05, "loss": 4.1932, "step": 176 }, { "epoch": 0.7224489795918367, "grad_norm": 1.0381983518600464, "learning_rate": 4.964285714285715e-05, "loss": 4.2827, "step": 177 }, { "epoch": 0.726530612244898, "grad_norm": 2.1597723960876465, "learning_rate": 4.9640816326530616e-05, "loss": 4.0081, "step": 178 }, { "epoch": 0.7306122448979592, "grad_norm": 1.4268780946731567, "learning_rate": 4.9638775510204083e-05, "loss": 3.9976, "step": 179 }, { "epoch": 0.7346938775510204, "grad_norm": 2.5165250301361084, "learning_rate": 4.963673469387755e-05, "loss": 3.7326, "step": 180 }, { "epoch": 0.7387755102040816, "grad_norm": 1.0820257663726807, "learning_rate": 4.9634693877551024e-05, "loss": 4.1023, "step": 181 }, { "epoch": 0.7428571428571429, "grad_norm": 1.408072829246521, "learning_rate": 4.963265306122449e-05, "loss": 4.402, "step": 182 }, { "epoch": 0.746938775510204, "grad_norm": 1.5106821060180664, "learning_rate": 4.963061224489796e-05, "loss": 4.3067, "step": 183 }, { "epoch": 0.7510204081632653, "grad_norm": 1.7217953205108643, "learning_rate": 4.962857142857143e-05, "loss": 4.247, "step": 184 }, { "epoch": 0.7551020408163265, "grad_norm": 1.07102370262146, "learning_rate": 4.96265306122449e-05, "loss": 4.5174, "step": 185 }, { "epoch": 0.7591836734693878, "grad_norm": 1.4602986574172974, "learning_rate": 4.9624489795918374e-05, "loss": 4.4873, "step": 186 }, { "epoch": 0.763265306122449, "grad_norm": 1.5682032108306885, "learning_rate": 4.962244897959184e-05, "loss": 4.066, "step": 187 }, { "epoch": 0.7673469387755102, "grad_norm": 1.0258070230484009, "learning_rate": 4.962040816326531e-05, "loss": 4.3638, "step": 188 }, { "epoch": 0.7714285714285715, "grad_norm": 1.6832032203674316, "learning_rate": 4.9618367346938775e-05, "loss": 4.2403, "step": 189 }, { "epoch": 0.7755102040816326, "grad_norm": 1.2022207975387573, "learning_rate": 4.961632653061224e-05, "loss": 4.1655, "step": 190 }, { "epoch": 0.7795918367346939, "grad_norm": 1.4628525972366333, "learning_rate": 4.9614285714285716e-05, "loss": 4.5498, "step": 191 }, { "epoch": 0.7836734693877551, "grad_norm": 1.0907748937606812, "learning_rate": 4.961224489795919e-05, "loss": 4.249, "step": 192 }, { "epoch": 0.7877551020408163, "grad_norm": 1.2278015613555908, "learning_rate": 4.961020408163266e-05, "loss": 4.1296, "step": 193 }, { "epoch": 0.7918367346938775, "grad_norm": 4.94681978225708, "learning_rate": 4.9608163265306124e-05, "loss": 2.7808, "step": 194 }, { "epoch": 0.7959183673469388, "grad_norm": 1.7330247163772583, "learning_rate": 4.960612244897959e-05, "loss": 4.1373, "step": 195 }, { "epoch": 0.8, "grad_norm": 1.398681402206421, "learning_rate": 4.9604081632653065e-05, "loss": 4.4175, "step": 196 }, { "epoch": 0.8040816326530612, "grad_norm": 2.389338493347168, "learning_rate": 4.960204081632653e-05, "loss": 3.8027, "step": 197 }, { "epoch": 0.8081632653061225, "grad_norm": 1.9688960313796997, "learning_rate": 4.96e-05, "loss": 4.1644, "step": 198 }, { "epoch": 0.8122448979591836, "grad_norm": 2.1935484409332275, "learning_rate": 4.959795918367347e-05, "loss": 4.4628, "step": 199 }, { "epoch": 0.8163265306122449, "grad_norm": 1.700683832168579, "learning_rate": 4.959591836734694e-05, "loss": 4.3383, "step": 200 }, { "epoch": 0.8204081632653061, "grad_norm": 1.94700288772583, "learning_rate": 4.9593877551020414e-05, "loss": 3.7817, "step": 201 }, { "epoch": 0.8244897959183674, "grad_norm": 1.8284486532211304, "learning_rate": 4.959183673469388e-05, "loss": 4.4248, "step": 202 }, { "epoch": 0.8285714285714286, "grad_norm": 1.349188208580017, "learning_rate": 4.958979591836735e-05, "loss": 4.1292, "step": 203 }, { "epoch": 0.8326530612244898, "grad_norm": 1.0778595209121704, "learning_rate": 4.9587755102040816e-05, "loss": 4.5371, "step": 204 }, { "epoch": 0.8367346938775511, "grad_norm": 1.0254387855529785, "learning_rate": 4.958571428571428e-05, "loss": 4.3755, "step": 205 }, { "epoch": 0.8408163265306122, "grad_norm": 1.2443711757659912, "learning_rate": 4.958367346938776e-05, "loss": 4.2722, "step": 206 }, { "epoch": 0.8448979591836735, "grad_norm": 1.7367180585861206, "learning_rate": 4.958163265306123e-05, "loss": 3.9733, "step": 207 }, { "epoch": 0.8489795918367347, "grad_norm": 1.348734974861145, "learning_rate": 4.95795918367347e-05, "loss": 4.446, "step": 208 }, { "epoch": 0.8530612244897959, "grad_norm": 2.081690788269043, "learning_rate": 4.9577551020408165e-05, "loss": 3.8294, "step": 209 }, { "epoch": 0.8571428571428571, "grad_norm": 1.4527875185012817, "learning_rate": 4.957551020408164e-05, "loss": 4.1855, "step": 210 }, { "epoch": 0.8612244897959184, "grad_norm": 1.6517484188079834, "learning_rate": 4.9573469387755106e-05, "loss": 4.2467, "step": 211 }, { "epoch": 0.8653061224489796, "grad_norm": 1.9387691020965576, "learning_rate": 4.957142857142857e-05, "loss": 3.8162, "step": 212 }, { "epoch": 0.8693877551020408, "grad_norm": 1.2454255819320679, "learning_rate": 4.956938775510204e-05, "loss": 4.1176, "step": 213 }, { "epoch": 0.8734693877551021, "grad_norm": 2.08258056640625, "learning_rate": 4.956734693877551e-05, "loss": 4.2503, "step": 214 }, { "epoch": 0.8775510204081632, "grad_norm": 2.2561392784118652, "learning_rate": 4.956530612244898e-05, "loss": 4.1175, "step": 215 }, { "epoch": 0.8816326530612245, "grad_norm": 1.0317918062210083, "learning_rate": 4.9563265306122455e-05, "loss": 4.4871, "step": 216 }, { "epoch": 0.8857142857142857, "grad_norm": 1.2974133491516113, "learning_rate": 4.956122448979592e-05, "loss": 4.2455, "step": 217 }, { "epoch": 0.889795918367347, "grad_norm": 1.5318362712860107, "learning_rate": 4.955918367346939e-05, "loss": 4.2882, "step": 218 }, { "epoch": 0.8938775510204081, "grad_norm": 1.6586138010025024, "learning_rate": 4.9557142857142857e-05, "loss": 4.1567, "step": 219 }, { "epoch": 0.8979591836734694, "grad_norm": 1.0005639791488647, "learning_rate": 4.955510204081633e-05, "loss": 4.3838, "step": 220 }, { "epoch": 0.9020408163265307, "grad_norm": 1.1785041093826294, "learning_rate": 4.95530612244898e-05, "loss": 4.2643, "step": 221 }, { "epoch": 0.9061224489795918, "grad_norm": 2.7668910026550293, "learning_rate": 4.9551020408163265e-05, "loss": 4.0727, "step": 222 }, { "epoch": 0.9102040816326531, "grad_norm": 1.4554626941680908, "learning_rate": 4.954897959183674e-05, "loss": 4.4101, "step": 223 }, { "epoch": 0.9142857142857143, "grad_norm": 1.1457873582839966, "learning_rate": 4.9546938775510206e-05, "loss": 4.4943, "step": 224 }, { "epoch": 0.9183673469387755, "grad_norm": 2.4995527267456055, "learning_rate": 4.954489795918368e-05, "loss": 3.9915, "step": 225 }, { "epoch": 0.9224489795918367, "grad_norm": 2.209364891052246, "learning_rate": 4.954285714285715e-05, "loss": 4.0883, "step": 226 }, { "epoch": 0.926530612244898, "grad_norm": 1.402743935585022, "learning_rate": 4.9540816326530614e-05, "loss": 4.1974, "step": 227 }, { "epoch": 0.9306122448979591, "grad_norm": 1.2660311460494995, "learning_rate": 4.953877551020408e-05, "loss": 4.1531, "step": 228 }, { "epoch": 0.9346938775510204, "grad_norm": 1.9203327894210815, "learning_rate": 4.953673469387755e-05, "loss": 4.0245, "step": 229 }, { "epoch": 0.9387755102040817, "grad_norm": 0.9297183156013489, "learning_rate": 4.953469387755102e-05, "loss": 4.5446, "step": 230 }, { "epoch": 0.9428571428571428, "grad_norm": 1.5598822832107544, "learning_rate": 4.9532653061224496e-05, "loss": 4.2826, "step": 231 }, { "epoch": 0.9469387755102041, "grad_norm": 2.193864345550537, "learning_rate": 4.953061224489796e-05, "loss": 4.2414, "step": 232 }, { "epoch": 0.9510204081632653, "grad_norm": 1.3961427211761475, "learning_rate": 4.952857142857143e-05, "loss": 4.0677, "step": 233 }, { "epoch": 0.9551020408163265, "grad_norm": 1.5126782655715942, "learning_rate": 4.95265306122449e-05, "loss": 4.3597, "step": 234 }, { "epoch": 0.9591836734693877, "grad_norm": 1.1713650226593018, "learning_rate": 4.952448979591837e-05, "loss": 4.0724, "step": 235 }, { "epoch": 0.963265306122449, "grad_norm": 1.3646550178527832, "learning_rate": 4.952244897959184e-05, "loss": 4.3955, "step": 236 }, { "epoch": 0.9673469387755103, "grad_norm": 1.9064581394195557, "learning_rate": 4.9520408163265305e-05, "loss": 4.3061, "step": 237 }, { "epoch": 0.9714285714285714, "grad_norm": 1.7987868785858154, "learning_rate": 4.951836734693878e-05, "loss": 4.262, "step": 238 }, { "epoch": 0.9755102040816327, "grad_norm": 1.2431975603103638, "learning_rate": 4.9516326530612246e-05, "loss": 4.1708, "step": 239 }, { "epoch": 0.9795918367346939, "grad_norm": 1.3197264671325684, "learning_rate": 4.951428571428572e-05, "loss": 4.0569, "step": 240 }, { "epoch": 0.9836734693877551, "grad_norm": 2.7940282821655273, "learning_rate": 4.951224489795919e-05, "loss": 4.0586, "step": 241 }, { "epoch": 0.9877551020408163, "grad_norm": 1.8068604469299316, "learning_rate": 4.9510204081632655e-05, "loss": 3.5492, "step": 242 }, { "epoch": 0.9918367346938776, "grad_norm": 1.4273451566696167, "learning_rate": 4.950816326530612e-05, "loss": 4.0677, "step": 243 }, { "epoch": 0.9959183673469387, "grad_norm": 1.296491026878357, "learning_rate": 4.950612244897959e-05, "loss": 4.4846, "step": 244 }, { "epoch": 1.0, "grad_norm": 1.217117428779602, "learning_rate": 4.950408163265306e-05, "loss": 4.1667, "step": 245 }, { "epoch": 1.0040816326530613, "grad_norm": 1.7318886518478394, "learning_rate": 4.950204081632654e-05, "loss": 4.1506, "step": 246 }, { "epoch": 1.0081632653061225, "grad_norm": 1.3467750549316406, "learning_rate": 4.9500000000000004e-05, "loss": 4.4059, "step": 247 }, { "epoch": 1.0122448979591836, "grad_norm": 1.2588825225830078, "learning_rate": 4.949795918367347e-05, "loss": 4.6053, "step": 248 }, { "epoch": 1.0163265306122449, "grad_norm": 1.2650394439697266, "learning_rate": 4.949591836734694e-05, "loss": 4.3627, "step": 249 }, { "epoch": 1.0204081632653061, "grad_norm": 1.3907620906829834, "learning_rate": 4.949387755102041e-05, "loss": 4.1098, "step": 250 }, { "epoch": 1.0244897959183674, "grad_norm": 2.2943613529205322, "learning_rate": 4.949183673469388e-05, "loss": 4.015, "step": 251 }, { "epoch": 1.0285714285714285, "grad_norm": 1.473616361618042, "learning_rate": 4.9489795918367346e-05, "loss": 4.2064, "step": 252 }, { "epoch": 1.0326530612244897, "grad_norm": 2.887422800064087, "learning_rate": 4.948775510204082e-05, "loss": 3.8144, "step": 253 }, { "epoch": 1.036734693877551, "grad_norm": 1.18984055519104, "learning_rate": 4.9485714285714294e-05, "loss": 4.4431, "step": 254 }, { "epoch": 1.0408163265306123, "grad_norm": 1.4181402921676636, "learning_rate": 4.948367346938776e-05, "loss": 4.2367, "step": 255 }, { "epoch": 1.0448979591836736, "grad_norm": 1.2459412813186646, "learning_rate": 4.948163265306123e-05, "loss": 4.0364, "step": 256 }, { "epoch": 1.0489795918367346, "grad_norm": 1.3870526552200317, "learning_rate": 4.9479591836734695e-05, "loss": 4.1149, "step": 257 }, { "epoch": 1.0530612244897959, "grad_norm": 1.9406521320343018, "learning_rate": 4.947755102040816e-05, "loss": 4.0327, "step": 258 }, { "epoch": 1.0571428571428572, "grad_norm": 1.3811335563659668, "learning_rate": 4.9475510204081636e-05, "loss": 4.1732, "step": 259 }, { "epoch": 1.0612244897959184, "grad_norm": 2.383148431777954, "learning_rate": 4.9473469387755104e-05, "loss": 3.7104, "step": 260 }, { "epoch": 1.0653061224489795, "grad_norm": 2.7949836254119873, "learning_rate": 4.947142857142858e-05, "loss": 3.6441, "step": 261 }, { "epoch": 1.0693877551020408, "grad_norm": 2.0354666709899902, "learning_rate": 4.9469387755102045e-05, "loss": 4.3452, "step": 262 }, { "epoch": 1.073469387755102, "grad_norm": 1.6764564514160156, "learning_rate": 4.946734693877551e-05, "loss": 3.8283, "step": 263 }, { "epoch": 1.0775510204081633, "grad_norm": 1.4978727102279663, "learning_rate": 4.9465306122448986e-05, "loss": 3.9989, "step": 264 }, { "epoch": 1.0816326530612246, "grad_norm": 1.603790521621704, "learning_rate": 4.946326530612245e-05, "loss": 4.3359, "step": 265 }, { "epoch": 1.0857142857142856, "grad_norm": 1.5427533388137817, "learning_rate": 4.946122448979592e-05, "loss": 4.4608, "step": 266 }, { "epoch": 1.089795918367347, "grad_norm": 2.5881834030151367, "learning_rate": 4.945918367346939e-05, "loss": 3.8567, "step": 267 }, { "epoch": 1.0938775510204082, "grad_norm": 1.4072855710983276, "learning_rate": 4.9457142857142854e-05, "loss": 4.1094, "step": 268 }, { "epoch": 1.0979591836734695, "grad_norm": 1.1109029054641724, "learning_rate": 4.945510204081633e-05, "loss": 4.2328, "step": 269 }, { "epoch": 1.1020408163265305, "grad_norm": 2.3614375591278076, "learning_rate": 4.94530612244898e-05, "loss": 4.2392, "step": 270 }, { "epoch": 1.1061224489795918, "grad_norm": 1.819162368774414, "learning_rate": 4.945102040816327e-05, "loss": 4.1211, "step": 271 }, { "epoch": 1.110204081632653, "grad_norm": 2.3971340656280518, "learning_rate": 4.9448979591836736e-05, "loss": 4.0622, "step": 272 }, { "epoch": 1.1142857142857143, "grad_norm": 1.3867722749710083, "learning_rate": 4.94469387755102e-05, "loss": 4.3388, "step": 273 }, { "epoch": 1.1183673469387756, "grad_norm": 1.4345884323120117, "learning_rate": 4.944489795918368e-05, "loss": 4.2172, "step": 274 }, { "epoch": 1.1224489795918366, "grad_norm": 1.9774521589279175, "learning_rate": 4.9442857142857144e-05, "loss": 4.1231, "step": 275 }, { "epoch": 1.126530612244898, "grad_norm": 2.2007596492767334, "learning_rate": 4.944081632653061e-05, "loss": 4.0664, "step": 276 }, { "epoch": 1.1306122448979592, "grad_norm": 2.254870653152466, "learning_rate": 4.9438775510204085e-05, "loss": 4.1027, "step": 277 }, { "epoch": 1.1346938775510205, "grad_norm": 2.863422155380249, "learning_rate": 4.943673469387755e-05, "loss": 3.6599, "step": 278 }, { "epoch": 1.1387755102040815, "grad_norm": 1.6436034440994263, "learning_rate": 4.9434693877551026e-05, "loss": 4.223, "step": 279 }, { "epoch": 1.1428571428571428, "grad_norm": 2.5637035369873047, "learning_rate": 4.9432653061224493e-05, "loss": 3.9147, "step": 280 }, { "epoch": 1.146938775510204, "grad_norm": 1.2647186517715454, "learning_rate": 4.943061224489796e-05, "loss": 4.0919, "step": 281 }, { "epoch": 1.1510204081632653, "grad_norm": 2.039769411087036, "learning_rate": 4.942857142857143e-05, "loss": 3.6884, "step": 282 }, { "epoch": 1.1551020408163266, "grad_norm": 1.1950795650482178, "learning_rate": 4.9426530612244895e-05, "loss": 4.31, "step": 283 }, { "epoch": 1.1591836734693877, "grad_norm": 1.5811231136322021, "learning_rate": 4.942448979591837e-05, "loss": 4.2602, "step": 284 }, { "epoch": 1.163265306122449, "grad_norm": 2.4926788806915283, "learning_rate": 4.942244897959184e-05, "loss": 3.9992, "step": 285 }, { "epoch": 1.1673469387755102, "grad_norm": 1.2263604402542114, "learning_rate": 4.942040816326531e-05, "loss": 4.5059, "step": 286 }, { "epoch": 1.1714285714285715, "grad_norm": 1.84638249874115, "learning_rate": 4.941836734693878e-05, "loss": 4.31, "step": 287 }, { "epoch": 1.1755102040816325, "grad_norm": 1.2600041627883911, "learning_rate": 4.9416326530612244e-05, "loss": 4.4787, "step": 288 }, { "epoch": 1.1795918367346938, "grad_norm": 1.9881263971328735, "learning_rate": 4.941428571428572e-05, "loss": 3.8282, "step": 289 }, { "epoch": 1.183673469387755, "grad_norm": 1.7270735502243042, "learning_rate": 4.9412244897959185e-05, "loss": 4.2142, "step": 290 }, { "epoch": 1.1877551020408164, "grad_norm": 1.426923394203186, "learning_rate": 4.941020408163265e-05, "loss": 4.065, "step": 291 }, { "epoch": 1.1918367346938776, "grad_norm": 2.116428852081299, "learning_rate": 4.9408163265306126e-05, "loss": 3.9403, "step": 292 }, { "epoch": 1.1959183673469387, "grad_norm": 1.7209304571151733, "learning_rate": 4.940612244897959e-05, "loss": 4.0551, "step": 293 }, { "epoch": 1.2, "grad_norm": 1.1720383167266846, "learning_rate": 4.940408163265307e-05, "loss": 4.2763, "step": 294 }, { "epoch": 1.2040816326530612, "grad_norm": 2.149209499359131, "learning_rate": 4.9402040816326534e-05, "loss": 4.2315, "step": 295 }, { "epoch": 1.2081632653061225, "grad_norm": 2.5329036712646484, "learning_rate": 4.94e-05, "loss": 3.8584, "step": 296 }, { "epoch": 1.2122448979591836, "grad_norm": 1.554153323173523, "learning_rate": 4.939795918367347e-05, "loss": 4.1764, "step": 297 }, { "epoch": 1.2163265306122448, "grad_norm": 2.6440656185150146, "learning_rate": 4.939591836734694e-05, "loss": 4.3395, "step": 298 }, { "epoch": 1.220408163265306, "grad_norm": 1.9424339532852173, "learning_rate": 4.939387755102041e-05, "loss": 4.3597, "step": 299 }, { "epoch": 1.2244897959183674, "grad_norm": 2.245913505554199, "learning_rate": 4.9391836734693883e-05, "loss": 4.1245, "step": 300 }, { "epoch": 1.2285714285714286, "grad_norm": 2.157371759414673, "learning_rate": 4.938979591836735e-05, "loss": 4.0708, "step": 301 }, { "epoch": 1.2326530612244897, "grad_norm": 1.3916500806808472, "learning_rate": 4.938775510204082e-05, "loss": 4.0141, "step": 302 }, { "epoch": 1.236734693877551, "grad_norm": 1.3122526407241821, "learning_rate": 4.938571428571429e-05, "loss": 4.1014, "step": 303 }, { "epoch": 1.2408163265306122, "grad_norm": 2.0791866779327393, "learning_rate": 4.938367346938776e-05, "loss": 4.0955, "step": 304 }, { "epoch": 1.2448979591836735, "grad_norm": 2.5015666484832764, "learning_rate": 4.9381632653061226e-05, "loss": 3.7886, "step": 305 }, { "epoch": 1.2489795918367346, "grad_norm": 1.589040994644165, "learning_rate": 4.937959183673469e-05, "loss": 4.0593, "step": 306 }, { "epoch": 1.2530612244897958, "grad_norm": 3.15635347366333, "learning_rate": 4.937755102040817e-05, "loss": 3.4831, "step": 307 }, { "epoch": 1.2571428571428571, "grad_norm": 2.6833086013793945, "learning_rate": 4.937551020408164e-05, "loss": 3.8797, "step": 308 }, { "epoch": 1.2612244897959184, "grad_norm": 1.4496700763702393, "learning_rate": 4.937346938775511e-05, "loss": 4.3043, "step": 309 }, { "epoch": 1.2653061224489797, "grad_norm": 2.0160603523254395, "learning_rate": 4.9371428571428575e-05, "loss": 4.058, "step": 310 }, { "epoch": 1.269387755102041, "grad_norm": 1.5909013748168945, "learning_rate": 4.936938775510204e-05, "loss": 4.0198, "step": 311 }, { "epoch": 1.273469387755102, "grad_norm": 1.8569542169570923, "learning_rate": 4.936734693877551e-05, "loss": 4.0653, "step": 312 }, { "epoch": 1.2775510204081633, "grad_norm": 2.7685141563415527, "learning_rate": 4.936530612244898e-05, "loss": 3.726, "step": 313 }, { "epoch": 1.2816326530612245, "grad_norm": 2.1171460151672363, "learning_rate": 4.936326530612245e-05, "loss": 3.8237, "step": 314 }, { "epoch": 1.2857142857142856, "grad_norm": 1.4291337728500366, "learning_rate": 4.9361224489795924e-05, "loss": 4.3037, "step": 315 }, { "epoch": 1.2897959183673469, "grad_norm": 1.7282572984695435, "learning_rate": 4.935918367346939e-05, "loss": 4.0825, "step": 316 }, { "epoch": 1.2938775510204081, "grad_norm": 2.174750804901123, "learning_rate": 4.935714285714286e-05, "loss": 4.0235, "step": 317 }, { "epoch": 1.2979591836734694, "grad_norm": 1.541850209236145, "learning_rate": 4.935510204081633e-05, "loss": 4.2266, "step": 318 }, { "epoch": 1.3020408163265307, "grad_norm": 1.7309280633926392, "learning_rate": 4.93530612244898e-05, "loss": 3.8705, "step": 319 }, { "epoch": 1.306122448979592, "grad_norm": 1.3872014284133911, "learning_rate": 4.9351020408163267e-05, "loss": 4.1114, "step": 320 }, { "epoch": 1.310204081632653, "grad_norm": 3.8115780353546143, "learning_rate": 4.9348979591836734e-05, "loss": 3.3457, "step": 321 }, { "epoch": 1.3142857142857143, "grad_norm": 1.733047604560852, "learning_rate": 4.93469387755102e-05, "loss": 4.0128, "step": 322 }, { "epoch": 1.3183673469387756, "grad_norm": 3.776658535003662, "learning_rate": 4.9344897959183675e-05, "loss": 3.9493, "step": 323 }, { "epoch": 1.3224489795918366, "grad_norm": 1.6025097370147705, "learning_rate": 4.934285714285715e-05, "loss": 4.3979, "step": 324 }, { "epoch": 1.3265306122448979, "grad_norm": 1.8050333261489868, "learning_rate": 4.9340816326530616e-05, "loss": 4.211, "step": 325 }, { "epoch": 1.3306122448979592, "grad_norm": 1.9944652318954468, "learning_rate": 4.933877551020408e-05, "loss": 3.9981, "step": 326 }, { "epoch": 1.3346938775510204, "grad_norm": 1.954699158668518, "learning_rate": 4.933673469387755e-05, "loss": 3.599, "step": 327 }, { "epoch": 1.3387755102040817, "grad_norm": 2.5667917728424072, "learning_rate": 4.9334693877551024e-05, "loss": 3.9778, "step": 328 }, { "epoch": 1.342857142857143, "grad_norm": 1.3636200428009033, "learning_rate": 4.933265306122449e-05, "loss": 4.3161, "step": 329 }, { "epoch": 1.346938775510204, "grad_norm": 4.086418628692627, "learning_rate": 4.933061224489796e-05, "loss": 3.5731, "step": 330 }, { "epoch": 1.3510204081632653, "grad_norm": 1.6994091272354126, "learning_rate": 4.932857142857143e-05, "loss": 4.018, "step": 331 }, { "epoch": 1.3551020408163266, "grad_norm": 1.5542407035827637, "learning_rate": 4.93265306122449e-05, "loss": 4.1876, "step": 332 }, { "epoch": 1.3591836734693876, "grad_norm": 1.3689703941345215, "learning_rate": 4.932448979591837e-05, "loss": 4.3096, "step": 333 }, { "epoch": 1.363265306122449, "grad_norm": 1.8447339534759521, "learning_rate": 4.932244897959184e-05, "loss": 4.3484, "step": 334 }, { "epoch": 1.3673469387755102, "grad_norm": 2.100076198577881, "learning_rate": 4.932040816326531e-05, "loss": 4.2984, "step": 335 }, { "epoch": 1.3714285714285714, "grad_norm": 1.5814971923828125, "learning_rate": 4.9318367346938774e-05, "loss": 4.1512, "step": 336 }, { "epoch": 1.3755102040816327, "grad_norm": 3.7132272720336914, "learning_rate": 4.931632653061224e-05, "loss": 3.7697, "step": 337 }, { "epoch": 1.379591836734694, "grad_norm": 1.1147047281265259, "learning_rate": 4.9314285714285716e-05, "loss": 4.1535, "step": 338 }, { "epoch": 1.383673469387755, "grad_norm": 1.9044201374053955, "learning_rate": 4.931224489795919e-05, "loss": 4.1931, "step": 339 }, { "epoch": 1.3877551020408163, "grad_norm": 2.0986642837524414, "learning_rate": 4.9310204081632657e-05, "loss": 4.1674, "step": 340 }, { "epoch": 1.3918367346938776, "grad_norm": 1.407275676727295, "learning_rate": 4.9308163265306124e-05, "loss": 4.0149, "step": 341 }, { "epoch": 1.3959183673469386, "grad_norm": 2.3383495807647705, "learning_rate": 4.930612244897959e-05, "loss": 4.3014, "step": 342 }, { "epoch": 1.4, "grad_norm": 2.0628459453582764, "learning_rate": 4.9304081632653065e-05, "loss": 3.4493, "step": 343 }, { "epoch": 1.4040816326530612, "grad_norm": 2.986344814300537, "learning_rate": 4.930204081632653e-05, "loss": 3.7192, "step": 344 }, { "epoch": 1.4081632653061225, "grad_norm": 1.944872260093689, "learning_rate": 4.93e-05, "loss": 4.4585, "step": 345 }, { "epoch": 1.4122448979591837, "grad_norm": 2.6270134449005127, "learning_rate": 4.929795918367347e-05, "loss": 3.7829, "step": 346 }, { "epoch": 1.416326530612245, "grad_norm": 2.0975446701049805, "learning_rate": 4.929591836734695e-05, "loss": 3.9572, "step": 347 }, { "epoch": 1.420408163265306, "grad_norm": 2.508650541305542, "learning_rate": 4.9293877551020414e-05, "loss": 4.0997, "step": 348 }, { "epoch": 1.4244897959183673, "grad_norm": 3.529205083847046, "learning_rate": 4.929183673469388e-05, "loss": 3.3324, "step": 349 }, { "epoch": 1.4285714285714286, "grad_norm": 2.1780683994293213, "learning_rate": 4.928979591836735e-05, "loss": 4.1288, "step": 350 }, { "epoch": 1.4326530612244899, "grad_norm": 1.934523344039917, "learning_rate": 4.9287755102040815e-05, "loss": 4.0462, "step": 351 }, { "epoch": 1.436734693877551, "grad_norm": 2.061483383178711, "learning_rate": 4.928571428571429e-05, "loss": 3.9701, "step": 352 }, { "epoch": 1.4408163265306122, "grad_norm": 1.4854933023452759, "learning_rate": 4.9283673469387756e-05, "loss": 4.096, "step": 353 }, { "epoch": 1.4448979591836735, "grad_norm": 1.8180322647094727, "learning_rate": 4.928163265306123e-05, "loss": 4.0539, "step": 354 }, { "epoch": 1.4489795918367347, "grad_norm": 2.695537805557251, "learning_rate": 4.92795918367347e-05, "loss": 3.569, "step": 355 }, { "epoch": 1.453061224489796, "grad_norm": 2.302316904067993, "learning_rate": 4.9277551020408164e-05, "loss": 3.6129, "step": 356 }, { "epoch": 1.457142857142857, "grad_norm": 2.323418378829956, "learning_rate": 4.927551020408164e-05, "loss": 3.8956, "step": 357 }, { "epoch": 1.4612244897959183, "grad_norm": 2.113978862762451, "learning_rate": 4.9273469387755105e-05, "loss": 3.8511, "step": 358 }, { "epoch": 1.4653061224489796, "grad_norm": 1.9458290338516235, "learning_rate": 4.927142857142857e-05, "loss": 3.9588, "step": 359 }, { "epoch": 1.469387755102041, "grad_norm": 1.980067491531372, "learning_rate": 4.926938775510204e-05, "loss": 3.9428, "step": 360 }, { "epoch": 1.473469387755102, "grad_norm": 2.7212142944335938, "learning_rate": 4.9267346938775514e-05, "loss": 3.82, "step": 361 }, { "epoch": 1.4775510204081632, "grad_norm": 1.349443793296814, "learning_rate": 4.926530612244899e-05, "loss": 4.4512, "step": 362 }, { "epoch": 1.4816326530612245, "grad_norm": 2.097014904022217, "learning_rate": 4.9263265306122455e-05, "loss": 3.8332, "step": 363 }, { "epoch": 1.4857142857142858, "grad_norm": 2.4298958778381348, "learning_rate": 4.926122448979592e-05, "loss": 3.8785, "step": 364 }, { "epoch": 1.489795918367347, "grad_norm": 2.3293492794036865, "learning_rate": 4.925918367346939e-05, "loss": 3.9794, "step": 365 }, { "epoch": 1.493877551020408, "grad_norm": 1.4444608688354492, "learning_rate": 4.9257142857142856e-05, "loss": 4.3237, "step": 366 }, { "epoch": 1.4979591836734694, "grad_norm": 1.1569204330444336, "learning_rate": 4.925510204081633e-05, "loss": 4.2963, "step": 367 }, { "epoch": 1.5020408163265306, "grad_norm": 1.328134298324585, "learning_rate": 4.92530612244898e-05, "loss": 4.4036, "step": 368 }, { "epoch": 1.5061224489795917, "grad_norm": 2.0049564838409424, "learning_rate": 4.925102040816327e-05, "loss": 4.1494, "step": 369 }, { "epoch": 1.510204081632653, "grad_norm": 1.622894525527954, "learning_rate": 4.924897959183674e-05, "loss": 4.2895, "step": 370 }, { "epoch": 1.5142857142857142, "grad_norm": 2.512248992919922, "learning_rate": 4.9246938775510205e-05, "loss": 3.8193, "step": 371 }, { "epoch": 1.5183673469387755, "grad_norm": 3.0595366954803467, "learning_rate": 4.924489795918368e-05, "loss": 3.4255, "step": 372 }, { "epoch": 1.5224489795918368, "grad_norm": 1.8143991231918335, "learning_rate": 4.9242857142857146e-05, "loss": 4.1585, "step": 373 }, { "epoch": 1.526530612244898, "grad_norm": 2.107198476791382, "learning_rate": 4.924081632653061e-05, "loss": 4.2618, "step": 374 }, { "epoch": 1.5306122448979593, "grad_norm": 1.6206976175308228, "learning_rate": 4.923877551020408e-05, "loss": 4.1335, "step": 375 }, { "epoch": 1.5346938775510204, "grad_norm": 2.1220099925994873, "learning_rate": 4.923673469387755e-05, "loss": 4.0505, "step": 376 }, { "epoch": 1.5387755102040817, "grad_norm": 3.130084753036499, "learning_rate": 4.923469387755102e-05, "loss": 3.8627, "step": 377 }, { "epoch": 1.5428571428571427, "grad_norm": 1.3648008108139038, "learning_rate": 4.9232653061224495e-05, "loss": 4.2539, "step": 378 }, { "epoch": 1.546938775510204, "grad_norm": 1.659600019454956, "learning_rate": 4.923061224489796e-05, "loss": 3.9672, "step": 379 }, { "epoch": 1.5510204081632653, "grad_norm": 1.1960277557373047, "learning_rate": 4.922857142857143e-05, "loss": 4.4827, "step": 380 }, { "epoch": 1.5551020408163265, "grad_norm": 2.1807801723480225, "learning_rate": 4.92265306122449e-05, "loss": 4.1224, "step": 381 }, { "epoch": 1.5591836734693878, "grad_norm": 1.5804678201675415, "learning_rate": 4.922448979591837e-05, "loss": 4.0969, "step": 382 }, { "epoch": 1.563265306122449, "grad_norm": 2.617016553878784, "learning_rate": 4.922244897959184e-05, "loss": 3.8825, "step": 383 }, { "epoch": 1.5673469387755103, "grad_norm": 2.309892416000366, "learning_rate": 4.9220408163265305e-05, "loss": 4.1585, "step": 384 }, { "epoch": 1.5714285714285714, "grad_norm": 1.7406953573226929, "learning_rate": 4.921836734693878e-05, "loss": 4.3226, "step": 385 }, { "epoch": 1.5755102040816327, "grad_norm": 1.3825101852416992, "learning_rate": 4.9216326530612246e-05, "loss": 4.1656, "step": 386 }, { "epoch": 1.5795918367346937, "grad_norm": 1.2288719415664673, "learning_rate": 4.921428571428572e-05, "loss": 4.2457, "step": 387 }, { "epoch": 1.583673469387755, "grad_norm": 1.4200166463851929, "learning_rate": 4.921224489795919e-05, "loss": 4.4169, "step": 388 }, { "epoch": 1.5877551020408163, "grad_norm": 1.5452178716659546, "learning_rate": 4.9210204081632654e-05, "loss": 4.0425, "step": 389 }, { "epoch": 1.5918367346938775, "grad_norm": 2.349039077758789, "learning_rate": 4.920816326530612e-05, "loss": 3.8136, "step": 390 }, { "epoch": 1.5959183673469388, "grad_norm": 3.240506649017334, "learning_rate": 4.9206122448979595e-05, "loss": 3.9847, "step": 391 }, { "epoch": 1.6, "grad_norm": 1.6027522087097168, "learning_rate": 4.920408163265306e-05, "loss": 3.8129, "step": 392 }, { "epoch": 1.6040816326530614, "grad_norm": 2.5088295936584473, "learning_rate": 4.9202040816326536e-05, "loss": 3.6792, "step": 393 }, { "epoch": 1.6081632653061224, "grad_norm": 1.8181660175323486, "learning_rate": 4.92e-05, "loss": 4.189, "step": 394 }, { "epoch": 1.6122448979591837, "grad_norm": 1.8467906713485718, "learning_rate": 4.919795918367347e-05, "loss": 4.2596, "step": 395 }, { "epoch": 1.6163265306122447, "grad_norm": 2.6677327156066895, "learning_rate": 4.9195918367346944e-05, "loss": 3.7386, "step": 396 }, { "epoch": 1.620408163265306, "grad_norm": 1.792243242263794, "learning_rate": 4.919387755102041e-05, "loss": 4.0538, "step": 397 }, { "epoch": 1.6244897959183673, "grad_norm": 2.19669246673584, "learning_rate": 4.919183673469388e-05, "loss": 3.9942, "step": 398 }, { "epoch": 1.6285714285714286, "grad_norm": 1.637571096420288, "learning_rate": 4.9189795918367346e-05, "loss": 4.2537, "step": 399 }, { "epoch": 1.6326530612244898, "grad_norm": 1.6697475910186768, "learning_rate": 4.918775510204082e-05, "loss": 4.269, "step": 400 }, { "epoch": 1.636734693877551, "grad_norm": 2.2958102226257324, "learning_rate": 4.9185714285714293e-05, "loss": 4.1549, "step": 401 }, { "epoch": 1.6408163265306124, "grad_norm": 2.3954217433929443, "learning_rate": 4.918367346938776e-05, "loss": 3.8372, "step": 402 }, { "epoch": 1.6448979591836734, "grad_norm": 1.8756088018417358, "learning_rate": 4.918163265306123e-05, "loss": 4.4003, "step": 403 }, { "epoch": 1.6489795918367347, "grad_norm": 1.4228523969650269, "learning_rate": 4.9179591836734695e-05, "loss": 4.4226, "step": 404 }, { "epoch": 1.6530612244897958, "grad_norm": 1.6013604402542114, "learning_rate": 4.917755102040816e-05, "loss": 4.2847, "step": 405 }, { "epoch": 1.657142857142857, "grad_norm": 1.226184606552124, "learning_rate": 4.9175510204081636e-05, "loss": 4.3884, "step": 406 }, { "epoch": 1.6612244897959183, "grad_norm": 2.688842296600342, "learning_rate": 4.91734693877551e-05, "loss": 4.14, "step": 407 }, { "epoch": 1.6653061224489796, "grad_norm": 1.5360801219940186, "learning_rate": 4.917142857142858e-05, "loss": 4.02, "step": 408 }, { "epoch": 1.6693877551020408, "grad_norm": 2.3719053268432617, "learning_rate": 4.9169387755102044e-05, "loss": 3.8159, "step": 409 }, { "epoch": 1.6734693877551021, "grad_norm": 2.330063819885254, "learning_rate": 4.916734693877551e-05, "loss": 3.9054, "step": 410 }, { "epoch": 1.6775510204081634, "grad_norm": 2.818535327911377, "learning_rate": 4.9165306122448985e-05, "loss": 3.7335, "step": 411 }, { "epoch": 1.6816326530612244, "grad_norm": 1.996985912322998, "learning_rate": 4.916326530612245e-05, "loss": 4.1958, "step": 412 }, { "epoch": 1.6857142857142857, "grad_norm": 1.8436214923858643, "learning_rate": 4.916122448979592e-05, "loss": 4.0389, "step": 413 }, { "epoch": 1.689795918367347, "grad_norm": 1.5692106485366821, "learning_rate": 4.9159183673469386e-05, "loss": 4.0223, "step": 414 }, { "epoch": 1.693877551020408, "grad_norm": 1.945883870124817, "learning_rate": 4.915714285714286e-05, "loss": 4.3475, "step": 415 }, { "epoch": 1.6979591836734693, "grad_norm": 1.6550970077514648, "learning_rate": 4.9155102040816334e-05, "loss": 4.254, "step": 416 }, { "epoch": 1.7020408163265306, "grad_norm": 1.4021434783935547, "learning_rate": 4.91530612244898e-05, "loss": 4.1759, "step": 417 }, { "epoch": 1.7061224489795919, "grad_norm": 3.820578098297119, "learning_rate": 4.915102040816327e-05, "loss": 3.4438, "step": 418 }, { "epoch": 1.7102040816326531, "grad_norm": 1.4824951887130737, "learning_rate": 4.9148979591836736e-05, "loss": 4.0912, "step": 419 }, { "epoch": 1.7142857142857144, "grad_norm": 2.5240607261657715, "learning_rate": 4.91469387755102e-05, "loss": 4.0671, "step": 420 }, { "epoch": 1.7183673469387755, "grad_norm": 2.0331220626831055, "learning_rate": 4.914489795918368e-05, "loss": 3.9309, "step": 421 }, { "epoch": 1.7224489795918367, "grad_norm": 1.6453821659088135, "learning_rate": 4.9142857142857144e-05, "loss": 4.2847, "step": 422 }, { "epoch": 1.726530612244898, "grad_norm": 1.6307530403137207, "learning_rate": 4.914081632653061e-05, "loss": 4.132, "step": 423 }, { "epoch": 1.730612244897959, "grad_norm": 1.6472804546356201, "learning_rate": 4.9138775510204085e-05, "loss": 3.982, "step": 424 }, { "epoch": 1.7346938775510203, "grad_norm": 2.291857957839966, "learning_rate": 4.913673469387755e-05, "loss": 3.9669, "step": 425 }, { "epoch": 1.7387755102040816, "grad_norm": 1.6785979270935059, "learning_rate": 4.9134693877551026e-05, "loss": 4.1622, "step": 426 }, { "epoch": 1.7428571428571429, "grad_norm": 1.6722347736358643, "learning_rate": 4.913265306122449e-05, "loss": 4.3516, "step": 427 }, { "epoch": 1.7469387755102042, "grad_norm": 1.6274893283843994, "learning_rate": 4.913061224489796e-05, "loss": 4.0917, "step": 428 }, { "epoch": 1.7510204081632654, "grad_norm": 1.2978583574295044, "learning_rate": 4.912857142857143e-05, "loss": 4.2593, "step": 429 }, { "epoch": 1.7551020408163265, "grad_norm": 1.865187168121338, "learning_rate": 4.9126530612244894e-05, "loss": 4.3041, "step": 430 }, { "epoch": 1.7591836734693878, "grad_norm": 2.326470136642456, "learning_rate": 4.912448979591837e-05, "loss": 4.1847, "step": 431 }, { "epoch": 1.763265306122449, "grad_norm": 4.048488140106201, "learning_rate": 4.912244897959184e-05, "loss": 3.4263, "step": 432 }, { "epoch": 1.76734693877551, "grad_norm": 3.281461477279663, "learning_rate": 4.912040816326531e-05, "loss": 3.7527, "step": 433 }, { "epoch": 1.7714285714285714, "grad_norm": 2.1861627101898193, "learning_rate": 4.9118367346938776e-05, "loss": 4.0938, "step": 434 }, { "epoch": 1.7755102040816326, "grad_norm": 2.0684077739715576, "learning_rate": 4.911632653061225e-05, "loss": 4.1738, "step": 435 }, { "epoch": 1.779591836734694, "grad_norm": 2.2376298904418945, "learning_rate": 4.911428571428572e-05, "loss": 3.9355, "step": 436 }, { "epoch": 1.7836734693877552, "grad_norm": 2.2719552516937256, "learning_rate": 4.9112244897959185e-05, "loss": 4.2767, "step": 437 }, { "epoch": 1.7877551020408164, "grad_norm": 3.2865514755249023, "learning_rate": 4.911020408163265e-05, "loss": 3.6417, "step": 438 }, { "epoch": 1.7918367346938775, "grad_norm": 1.5681953430175781, "learning_rate": 4.9108163265306126e-05, "loss": 4.0527, "step": 439 }, { "epoch": 1.7959183673469388, "grad_norm": 2.2869420051574707, "learning_rate": 4.91061224489796e-05, "loss": 4.0947, "step": 440 }, { "epoch": 1.8, "grad_norm": 2.6968371868133545, "learning_rate": 4.9104081632653067e-05, "loss": 3.5891, "step": 441 }, { "epoch": 1.804081632653061, "grad_norm": 2.8543050289154053, "learning_rate": 4.9102040816326534e-05, "loss": 3.9549, "step": 442 }, { "epoch": 1.8081632653061224, "grad_norm": 2.1635921001434326, "learning_rate": 4.91e-05, "loss": 4.0013, "step": 443 }, { "epoch": 1.8122448979591836, "grad_norm": 2.80142879486084, "learning_rate": 4.909795918367347e-05, "loss": 4.0564, "step": 444 }, { "epoch": 1.816326530612245, "grad_norm": 2.409456491470337, "learning_rate": 4.909591836734694e-05, "loss": 4.14, "step": 445 }, { "epoch": 1.8204081632653062, "grad_norm": 3.3332901000976562, "learning_rate": 4.909387755102041e-05, "loss": 3.868, "step": 446 }, { "epoch": 1.8244897959183675, "grad_norm": 1.858022689819336, "learning_rate": 4.909183673469388e-05, "loss": 4.1376, "step": 447 }, { "epoch": 1.8285714285714287, "grad_norm": 2.9506022930145264, "learning_rate": 4.908979591836735e-05, "loss": 3.8297, "step": 448 }, { "epoch": 1.8326530612244898, "grad_norm": 3.623953342437744, "learning_rate": 4.908775510204082e-05, "loss": 3.8931, "step": 449 }, { "epoch": 1.836734693877551, "grad_norm": 2.686211109161377, "learning_rate": 4.908571428571429e-05, "loss": 3.9587, "step": 450 }, { "epoch": 1.8408163265306121, "grad_norm": 2.1395184993743896, "learning_rate": 4.908367346938776e-05, "loss": 4.0361, "step": 451 }, { "epoch": 1.8448979591836734, "grad_norm": 1.6948202848434448, "learning_rate": 4.9081632653061225e-05, "loss": 4.0355, "step": 452 }, { "epoch": 1.8489795918367347, "grad_norm": 3.0452160835266113, "learning_rate": 4.907959183673469e-05, "loss": 3.7335, "step": 453 }, { "epoch": 1.853061224489796, "grad_norm": 1.4993005990982056, "learning_rate": 4.9077551020408166e-05, "loss": 3.9614, "step": 454 }, { "epoch": 1.8571428571428572, "grad_norm": 2.5340726375579834, "learning_rate": 4.907551020408164e-05, "loss": 3.9558, "step": 455 }, { "epoch": 1.8612244897959185, "grad_norm": 2.106754779815674, "learning_rate": 4.907346938775511e-05, "loss": 4.0085, "step": 456 }, { "epoch": 1.8653061224489798, "grad_norm": 2.7748935222625732, "learning_rate": 4.9071428571428574e-05, "loss": 4.2323, "step": 457 }, { "epoch": 1.8693877551020408, "grad_norm": 2.2485105991363525, "learning_rate": 4.906938775510204e-05, "loss": 4.3265, "step": 458 }, { "epoch": 1.873469387755102, "grad_norm": 3.2582807540893555, "learning_rate": 4.906734693877551e-05, "loss": 3.3666, "step": 459 }, { "epoch": 1.8775510204081631, "grad_norm": 1.4354760646820068, "learning_rate": 4.906530612244898e-05, "loss": 4.1379, "step": 460 }, { "epoch": 1.8816326530612244, "grad_norm": 2.38828706741333, "learning_rate": 4.906326530612245e-05, "loss": 3.8881, "step": 461 }, { "epoch": 1.8857142857142857, "grad_norm": 3.394500970840454, "learning_rate": 4.9061224489795924e-05, "loss": 3.4268, "step": 462 }, { "epoch": 1.889795918367347, "grad_norm": 1.719940423965454, "learning_rate": 4.905918367346939e-05, "loss": 4.1648, "step": 463 }, { "epoch": 1.8938775510204082, "grad_norm": 2.0150842666625977, "learning_rate": 4.905714285714286e-05, "loss": 4.2308, "step": 464 }, { "epoch": 1.8979591836734695, "grad_norm": 2.460956335067749, "learning_rate": 4.905510204081633e-05, "loss": 3.8507, "step": 465 }, { "epoch": 1.9020408163265308, "grad_norm": 1.8841853141784668, "learning_rate": 4.90530612244898e-05, "loss": 4.3181, "step": 466 }, { "epoch": 1.9061224489795918, "grad_norm": 4.206210136413574, "learning_rate": 4.9051020408163266e-05, "loss": 3.5219, "step": 467 }, { "epoch": 1.910204081632653, "grad_norm": 1.2955243587493896, "learning_rate": 4.904897959183673e-05, "loss": 4.4395, "step": 468 }, { "epoch": 1.9142857142857141, "grad_norm": 2.055816411972046, "learning_rate": 4.904693877551021e-05, "loss": 4.1408, "step": 469 }, { "epoch": 1.9183673469387754, "grad_norm": 4.837037563323975, "learning_rate": 4.904489795918368e-05, "loss": 2.2998, "step": 470 }, { "epoch": 1.9224489795918367, "grad_norm": 2.390538454055786, "learning_rate": 4.904285714285715e-05, "loss": 4.1098, "step": 471 }, { "epoch": 1.926530612244898, "grad_norm": 2.758477210998535, "learning_rate": 4.9040816326530615e-05, "loss": 3.9599, "step": 472 }, { "epoch": 1.9306122448979592, "grad_norm": 2.04646372795105, "learning_rate": 4.903877551020408e-05, "loss": 4.0497, "step": 473 }, { "epoch": 1.9346938775510205, "grad_norm": 2.365776538848877, "learning_rate": 4.903673469387755e-05, "loss": 4.0815, "step": 474 }, { "epoch": 1.9387755102040818, "grad_norm": 2.105419635772705, "learning_rate": 4.9034693877551023e-05, "loss": 4.1199, "step": 475 }, { "epoch": 1.9428571428571428, "grad_norm": 1.703406810760498, "learning_rate": 4.903265306122449e-05, "loss": 4.268, "step": 476 }, { "epoch": 1.9469387755102041, "grad_norm": 2.145047903060913, "learning_rate": 4.903061224489796e-05, "loss": 3.9785, "step": 477 }, { "epoch": 1.9510204081632652, "grad_norm": 1.4927170276641846, "learning_rate": 4.902857142857143e-05, "loss": 4.1138, "step": 478 }, { "epoch": 1.9551020408163264, "grad_norm": 2.390927791595459, "learning_rate": 4.9026530612244905e-05, "loss": 4.0427, "step": 479 }, { "epoch": 1.9591836734693877, "grad_norm": 2.1192502975463867, "learning_rate": 4.902448979591837e-05, "loss": 4.2377, "step": 480 }, { "epoch": 1.963265306122449, "grad_norm": 1.710703730583191, "learning_rate": 4.902244897959184e-05, "loss": 4.1778, "step": 481 }, { "epoch": 1.9673469387755103, "grad_norm": 1.8420257568359375, "learning_rate": 4.902040816326531e-05, "loss": 4.1754, "step": 482 }, { "epoch": 1.9714285714285715, "grad_norm": 2.443131923675537, "learning_rate": 4.9018367346938774e-05, "loss": 3.9746, "step": 483 }, { "epoch": 1.9755102040816328, "grad_norm": 4.209895133972168, "learning_rate": 4.901632653061225e-05, "loss": 3.3137, "step": 484 }, { "epoch": 1.9795918367346939, "grad_norm": 1.8588672876358032, "learning_rate": 4.9014285714285715e-05, "loss": 3.9745, "step": 485 }, { "epoch": 1.9836734693877551, "grad_norm": 1.847386360168457, "learning_rate": 4.901224489795919e-05, "loss": 4.0885, "step": 486 }, { "epoch": 1.9877551020408162, "grad_norm": 1.7741986513137817, "learning_rate": 4.9010204081632656e-05, "loss": 4.2216, "step": 487 }, { "epoch": 1.9918367346938775, "grad_norm": 2.3858723640441895, "learning_rate": 4.900816326530612e-05, "loss": 3.2655, "step": 488 }, { "epoch": 1.9959183673469387, "grad_norm": 1.46278977394104, "learning_rate": 4.90061224489796e-05, "loss": 4.233, "step": 489 }, { "epoch": 2.0, "grad_norm": 1.6908304691314697, "learning_rate": 4.9004081632653064e-05, "loss": 4.1082, "step": 490 }, { "epoch": 2.0040816326530613, "grad_norm": 2.0103816986083984, "learning_rate": 4.900204081632653e-05, "loss": 4.1111, "step": 491 }, { "epoch": 2.0081632653061225, "grad_norm": 2.3745298385620117, "learning_rate": 4.9e-05, "loss": 3.8728, "step": 492 }, { "epoch": 2.012244897959184, "grad_norm": 1.8273065090179443, "learning_rate": 4.899795918367347e-05, "loss": 3.9098, "step": 493 }, { "epoch": 2.016326530612245, "grad_norm": 2.8681535720825195, "learning_rate": 4.8995918367346946e-05, "loss": 3.6857, "step": 494 }, { "epoch": 2.020408163265306, "grad_norm": 1.706124186515808, "learning_rate": 4.899387755102041e-05, "loss": 4.1781, "step": 495 }, { "epoch": 2.024489795918367, "grad_norm": 3.328505754470825, "learning_rate": 4.899183673469388e-05, "loss": 3.796, "step": 496 }, { "epoch": 2.0285714285714285, "grad_norm": 2.030345916748047, "learning_rate": 4.898979591836735e-05, "loss": 3.9424, "step": 497 }, { "epoch": 2.0326530612244897, "grad_norm": 2.283843755722046, "learning_rate": 4.8987755102040815e-05, "loss": 4.0083, "step": 498 }, { "epoch": 2.036734693877551, "grad_norm": 1.944077968597412, "learning_rate": 4.898571428571429e-05, "loss": 4.1601, "step": 499 }, { "epoch": 2.0408163265306123, "grad_norm": 1.8134136199951172, "learning_rate": 4.8983673469387756e-05, "loss": 4.1393, "step": 500 }, { "epoch": 2.0448979591836736, "grad_norm": 2.8699429035186768, "learning_rate": 4.898163265306123e-05, "loss": 4.0041, "step": 501 }, { "epoch": 2.048979591836735, "grad_norm": 2.121054172515869, "learning_rate": 4.89795918367347e-05, "loss": 4.1028, "step": 502 }, { "epoch": 2.053061224489796, "grad_norm": 1.4880269765853882, "learning_rate": 4.8977551020408164e-05, "loss": 4.2349, "step": 503 }, { "epoch": 2.057142857142857, "grad_norm": 1.8972384929656982, "learning_rate": 4.897551020408164e-05, "loss": 4.2042, "step": 504 }, { "epoch": 2.061224489795918, "grad_norm": 1.8990635871887207, "learning_rate": 4.8973469387755105e-05, "loss": 3.7701, "step": 505 }, { "epoch": 2.0653061224489795, "grad_norm": 1.8344413042068481, "learning_rate": 4.897142857142857e-05, "loss": 4.2009, "step": 506 }, { "epoch": 2.0693877551020408, "grad_norm": 2.3220605850219727, "learning_rate": 4.896938775510204e-05, "loss": 4.0361, "step": 507 }, { "epoch": 2.073469387755102, "grad_norm": 3.070122003555298, "learning_rate": 4.896734693877551e-05, "loss": 3.587, "step": 508 }, { "epoch": 2.0775510204081633, "grad_norm": 2.0632951259613037, "learning_rate": 4.896530612244899e-05, "loss": 3.9261, "step": 509 }, { "epoch": 2.0816326530612246, "grad_norm": 1.9941816329956055, "learning_rate": 4.8963265306122454e-05, "loss": 3.9368, "step": 510 }, { "epoch": 2.085714285714286, "grad_norm": 1.9015990495681763, "learning_rate": 4.896122448979592e-05, "loss": 4.1347, "step": 511 }, { "epoch": 2.089795918367347, "grad_norm": 3.587578773498535, "learning_rate": 4.895918367346939e-05, "loss": 3.5771, "step": 512 }, { "epoch": 2.093877551020408, "grad_norm": 1.527463674545288, "learning_rate": 4.8957142857142855e-05, "loss": 4.5122, "step": 513 }, { "epoch": 2.0979591836734692, "grad_norm": 2.4684059619903564, "learning_rate": 4.895510204081633e-05, "loss": 3.3658, "step": 514 }, { "epoch": 2.1020408163265305, "grad_norm": 1.5342686176300049, "learning_rate": 4.8953061224489797e-05, "loss": 4.25, "step": 515 }, { "epoch": 2.1061224489795918, "grad_norm": 1.528016448020935, "learning_rate": 4.895102040816327e-05, "loss": 4.384, "step": 516 }, { "epoch": 2.110204081632653, "grad_norm": 1.858237624168396, "learning_rate": 4.894897959183674e-05, "loss": 4.0137, "step": 517 }, { "epoch": 2.1142857142857143, "grad_norm": 3.526130199432373, "learning_rate": 4.8946938775510205e-05, "loss": 3.869, "step": 518 }, { "epoch": 2.1183673469387756, "grad_norm": 3.050645351409912, "learning_rate": 4.894489795918368e-05, "loss": 3.6499, "step": 519 }, { "epoch": 2.122448979591837, "grad_norm": 2.9261951446533203, "learning_rate": 4.8942857142857146e-05, "loss": 4.217, "step": 520 }, { "epoch": 2.126530612244898, "grad_norm": 3.6235198974609375, "learning_rate": 4.894081632653061e-05, "loss": 3.8131, "step": 521 }, { "epoch": 2.130612244897959, "grad_norm": 2.2752339839935303, "learning_rate": 4.893877551020408e-05, "loss": 3.7377, "step": 522 }, { "epoch": 2.1346938775510202, "grad_norm": 1.5197199583053589, "learning_rate": 4.8936734693877554e-05, "loss": 4.0163, "step": 523 }, { "epoch": 2.1387755102040815, "grad_norm": 1.5355311632156372, "learning_rate": 4.893469387755103e-05, "loss": 3.9109, "step": 524 }, { "epoch": 2.142857142857143, "grad_norm": 3.180724859237671, "learning_rate": 4.8932653061224495e-05, "loss": 3.6658, "step": 525 }, { "epoch": 2.146938775510204, "grad_norm": 3.8514211177825928, "learning_rate": 4.893061224489796e-05, "loss": 3.4342, "step": 526 }, { "epoch": 2.1510204081632653, "grad_norm": 3.085165023803711, "learning_rate": 4.892857142857143e-05, "loss": 3.7884, "step": 527 }, { "epoch": 2.1551020408163266, "grad_norm": 2.5841381549835205, "learning_rate": 4.89265306122449e-05, "loss": 3.8301, "step": 528 }, { "epoch": 2.159183673469388, "grad_norm": 2.3397488594055176, "learning_rate": 4.892448979591837e-05, "loss": 3.9863, "step": 529 }, { "epoch": 2.163265306122449, "grad_norm": 2.4686577320098877, "learning_rate": 4.892244897959184e-05, "loss": 4.0335, "step": 530 }, { "epoch": 2.16734693877551, "grad_norm": 2.4214425086975098, "learning_rate": 4.8920408163265304e-05, "loss": 3.5155, "step": 531 }, { "epoch": 2.1714285714285713, "grad_norm": 3.9897987842559814, "learning_rate": 4.891836734693878e-05, "loss": 3.6193, "step": 532 }, { "epoch": 2.1755102040816325, "grad_norm": 1.5227090120315552, "learning_rate": 4.891632653061225e-05, "loss": 4.2001, "step": 533 }, { "epoch": 2.179591836734694, "grad_norm": 3.182744264602661, "learning_rate": 4.891428571428572e-05, "loss": 3.7334, "step": 534 }, { "epoch": 2.183673469387755, "grad_norm": 4.885098457336426, "learning_rate": 4.8912244897959186e-05, "loss": 3.2607, "step": 535 }, { "epoch": 2.1877551020408164, "grad_norm": 2.662230968475342, "learning_rate": 4.8910204081632654e-05, "loss": 3.9044, "step": 536 }, { "epoch": 2.1918367346938776, "grad_norm": 2.2320404052734375, "learning_rate": 4.890816326530612e-05, "loss": 4.2045, "step": 537 }, { "epoch": 2.195918367346939, "grad_norm": 1.8471580743789673, "learning_rate": 4.8906122448979595e-05, "loss": 4.2299, "step": 538 }, { "epoch": 2.2, "grad_norm": 2.365006446838379, "learning_rate": 4.890408163265306e-05, "loss": 3.9676, "step": 539 }, { "epoch": 2.204081632653061, "grad_norm": 1.8626211881637573, "learning_rate": 4.8902040816326536e-05, "loss": 4.0618, "step": 540 }, { "epoch": 2.2081632653061223, "grad_norm": 2.237215757369995, "learning_rate": 4.89e-05, "loss": 4.0886, "step": 541 }, { "epoch": 2.2122448979591836, "grad_norm": 1.9139888286590576, "learning_rate": 4.889795918367347e-05, "loss": 4.241, "step": 542 }, { "epoch": 2.216326530612245, "grad_norm": 1.976716160774231, "learning_rate": 4.8895918367346944e-05, "loss": 3.9651, "step": 543 }, { "epoch": 2.220408163265306, "grad_norm": 3.6011099815368652, "learning_rate": 4.889387755102041e-05, "loss": 3.3932, "step": 544 }, { "epoch": 2.2244897959183674, "grad_norm": 1.6348458528518677, "learning_rate": 4.889183673469388e-05, "loss": 4.3606, "step": 545 }, { "epoch": 2.2285714285714286, "grad_norm": 2.1386184692382812, "learning_rate": 4.8889795918367345e-05, "loss": 4.1039, "step": 546 }, { "epoch": 2.23265306122449, "grad_norm": 1.8916404247283936, "learning_rate": 4.888775510204082e-05, "loss": 4.0576, "step": 547 }, { "epoch": 2.236734693877551, "grad_norm": 2.4176509380340576, "learning_rate": 4.888571428571429e-05, "loss": 4.2006, "step": 548 }, { "epoch": 2.240816326530612, "grad_norm": 1.8222568035125732, "learning_rate": 4.888367346938776e-05, "loss": 4.2029, "step": 549 }, { "epoch": 2.2448979591836733, "grad_norm": 3.416325092315674, "learning_rate": 4.888163265306123e-05, "loss": 3.8029, "step": 550 }, { "epoch": 2.2489795918367346, "grad_norm": 2.8479390144348145, "learning_rate": 4.8879591836734694e-05, "loss": 3.8745, "step": 551 }, { "epoch": 2.253061224489796, "grad_norm": 1.7117726802825928, "learning_rate": 4.887755102040816e-05, "loss": 4.1107, "step": 552 }, { "epoch": 2.257142857142857, "grad_norm": 1.4566303491592407, "learning_rate": 4.8875510204081635e-05, "loss": 4.4211, "step": 553 }, { "epoch": 2.2612244897959184, "grad_norm": 2.559088945388794, "learning_rate": 4.88734693877551e-05, "loss": 3.9298, "step": 554 }, { "epoch": 2.2653061224489797, "grad_norm": 2.816575050354004, "learning_rate": 4.8871428571428576e-05, "loss": 4.1238, "step": 555 }, { "epoch": 2.269387755102041, "grad_norm": 2.5363759994506836, "learning_rate": 4.8869387755102044e-05, "loss": 3.8121, "step": 556 }, { "epoch": 2.273469387755102, "grad_norm": 1.5166923999786377, "learning_rate": 4.886734693877551e-05, "loss": 4.1893, "step": 557 }, { "epoch": 2.277551020408163, "grad_norm": 1.500503420829773, "learning_rate": 4.8865306122448985e-05, "loss": 4.1232, "step": 558 }, { "epoch": 2.2816326530612243, "grad_norm": 1.4784247875213623, "learning_rate": 4.886326530612245e-05, "loss": 4.139, "step": 559 }, { "epoch": 2.2857142857142856, "grad_norm": 3.073866605758667, "learning_rate": 4.886122448979592e-05, "loss": 3.8801, "step": 560 }, { "epoch": 2.289795918367347, "grad_norm": 2.0145421028137207, "learning_rate": 4.8859183673469386e-05, "loss": 3.9828, "step": 561 }, { "epoch": 2.293877551020408, "grad_norm": 2.352705717086792, "learning_rate": 4.885714285714286e-05, "loss": 4.0563, "step": 562 }, { "epoch": 2.2979591836734694, "grad_norm": 1.9173990488052368, "learning_rate": 4.8855102040816334e-05, "loss": 4.1156, "step": 563 }, { "epoch": 2.3020408163265307, "grad_norm": 1.384501576423645, "learning_rate": 4.88530612244898e-05, "loss": 4.2057, "step": 564 }, { "epoch": 2.306122448979592, "grad_norm": 3.0036327838897705, "learning_rate": 4.885102040816327e-05, "loss": 3.5186, "step": 565 }, { "epoch": 2.3102040816326532, "grad_norm": 2.0566349029541016, "learning_rate": 4.8848979591836735e-05, "loss": 4.2603, "step": 566 }, { "epoch": 2.314285714285714, "grad_norm": 2.231804370880127, "learning_rate": 4.88469387755102e-05, "loss": 3.982, "step": 567 }, { "epoch": 2.3183673469387753, "grad_norm": 2.9791765213012695, "learning_rate": 4.8844897959183676e-05, "loss": 3.6443, "step": 568 }, { "epoch": 2.3224489795918366, "grad_norm": 3.773937940597534, "learning_rate": 4.884285714285714e-05, "loss": 3.3197, "step": 569 }, { "epoch": 2.326530612244898, "grad_norm": 1.7395929098129272, "learning_rate": 4.884081632653062e-05, "loss": 4.3406, "step": 570 }, { "epoch": 2.330612244897959, "grad_norm": 1.7709885835647583, "learning_rate": 4.8838775510204084e-05, "loss": 4.2235, "step": 571 }, { "epoch": 2.3346938775510204, "grad_norm": 3.955115795135498, "learning_rate": 4.883673469387756e-05, "loss": 3.4628, "step": 572 }, { "epoch": 2.3387755102040817, "grad_norm": 2.647052526473999, "learning_rate": 4.8834693877551025e-05, "loss": 3.9102, "step": 573 }, { "epoch": 2.342857142857143, "grad_norm": 1.91730797290802, "learning_rate": 4.883265306122449e-05, "loss": 4.0026, "step": 574 }, { "epoch": 2.3469387755102042, "grad_norm": 2.742194652557373, "learning_rate": 4.883061224489796e-05, "loss": 3.6936, "step": 575 }, { "epoch": 2.351020408163265, "grad_norm": 2.2882559299468994, "learning_rate": 4.882857142857143e-05, "loss": 4.3543, "step": 576 }, { "epoch": 2.3551020408163263, "grad_norm": 2.0099220275878906, "learning_rate": 4.88265306122449e-05, "loss": 3.9442, "step": 577 }, { "epoch": 2.3591836734693876, "grad_norm": 1.8590749502182007, "learning_rate": 4.8824489795918374e-05, "loss": 4.0716, "step": 578 }, { "epoch": 2.363265306122449, "grad_norm": 4.642070770263672, "learning_rate": 4.882244897959184e-05, "loss": 3.6344, "step": 579 }, { "epoch": 2.36734693877551, "grad_norm": 2.315782070159912, "learning_rate": 4.882040816326531e-05, "loss": 4.0242, "step": 580 }, { "epoch": 2.3714285714285714, "grad_norm": 2.6621146202087402, "learning_rate": 4.8818367346938776e-05, "loss": 3.8276, "step": 581 }, { "epoch": 2.3755102040816327, "grad_norm": 2.551248073577881, "learning_rate": 4.881632653061225e-05, "loss": 3.9337, "step": 582 }, { "epoch": 2.379591836734694, "grad_norm": 1.8711929321289062, "learning_rate": 4.881428571428572e-05, "loss": 4.2112, "step": 583 }, { "epoch": 2.3836734693877553, "grad_norm": 2.603267192840576, "learning_rate": 4.8812244897959184e-05, "loss": 3.8935, "step": 584 }, { "epoch": 2.387755102040816, "grad_norm": 2.4264187812805176, "learning_rate": 4.881020408163265e-05, "loss": 3.8292, "step": 585 }, { "epoch": 2.3918367346938774, "grad_norm": 2.6148033142089844, "learning_rate": 4.8808163265306125e-05, "loss": 4.0954, "step": 586 }, { "epoch": 2.3959183673469386, "grad_norm": 2.0404491424560547, "learning_rate": 4.88061224489796e-05, "loss": 4.0516, "step": 587 }, { "epoch": 2.4, "grad_norm": 1.617725133895874, "learning_rate": 4.8804081632653066e-05, "loss": 4.0802, "step": 588 }, { "epoch": 2.404081632653061, "grad_norm": 3.2886765003204346, "learning_rate": 4.880204081632653e-05, "loss": 3.6348, "step": 589 }, { "epoch": 2.4081632653061225, "grad_norm": 3.0157721042633057, "learning_rate": 4.88e-05, "loss": 3.7453, "step": 590 }, { "epoch": 2.4122448979591837, "grad_norm": 3.185471296310425, "learning_rate": 4.879795918367347e-05, "loss": 3.9637, "step": 591 }, { "epoch": 2.416326530612245, "grad_norm": 3.488638401031494, "learning_rate": 4.879591836734694e-05, "loss": 3.9039, "step": 592 }, { "epoch": 2.4204081632653063, "grad_norm": 3.542009115219116, "learning_rate": 4.879387755102041e-05, "loss": 4.1311, "step": 593 }, { "epoch": 2.424489795918367, "grad_norm": 2.416139841079712, "learning_rate": 4.879183673469388e-05, "loss": 4.1888, "step": 594 }, { "epoch": 2.4285714285714284, "grad_norm": 2.209519147872925, "learning_rate": 4.878979591836735e-05, "loss": 4.2194, "step": 595 }, { "epoch": 2.4326530612244897, "grad_norm": 3.492587089538574, "learning_rate": 4.878775510204082e-05, "loss": 3.7084, "step": 596 }, { "epoch": 2.436734693877551, "grad_norm": 2.4882054328918457, "learning_rate": 4.878571428571429e-05, "loss": 3.8876, "step": 597 }, { "epoch": 2.440816326530612, "grad_norm": 1.4178024530410767, "learning_rate": 4.878367346938776e-05, "loss": 4.4193, "step": 598 }, { "epoch": 2.4448979591836735, "grad_norm": 2.04913592338562, "learning_rate": 4.8781632653061225e-05, "loss": 4.0927, "step": 599 }, { "epoch": 2.4489795918367347, "grad_norm": 2.4318490028381348, "learning_rate": 4.877959183673469e-05, "loss": 3.7256, "step": 600 }, { "epoch": 2.453061224489796, "grad_norm": 1.8998349905014038, "learning_rate": 4.8777551020408166e-05, "loss": 4.0372, "step": 601 }, { "epoch": 2.4571428571428573, "grad_norm": 2.009946346282959, "learning_rate": 4.877551020408164e-05, "loss": 4.0169, "step": 602 }, { "epoch": 2.461224489795918, "grad_norm": 2.492249011993408, "learning_rate": 4.877346938775511e-05, "loss": 3.8505, "step": 603 }, { "epoch": 2.4653061224489794, "grad_norm": 3.8303916454315186, "learning_rate": 4.8771428571428574e-05, "loss": 3.2493, "step": 604 }, { "epoch": 2.4693877551020407, "grad_norm": 2.2663419246673584, "learning_rate": 4.876938775510204e-05, "loss": 4.2524, "step": 605 }, { "epoch": 2.473469387755102, "grad_norm": 5.218384265899658, "learning_rate": 4.876734693877551e-05, "loss": 3.1796, "step": 606 }, { "epoch": 2.477551020408163, "grad_norm": 2.133134126663208, "learning_rate": 4.876530612244898e-05, "loss": 3.9304, "step": 607 }, { "epoch": 2.4816326530612245, "grad_norm": 1.9097503423690796, "learning_rate": 4.876326530612245e-05, "loss": 3.9569, "step": 608 }, { "epoch": 2.4857142857142858, "grad_norm": 3.3220176696777344, "learning_rate": 4.876122448979592e-05, "loss": 3.9045, "step": 609 }, { "epoch": 2.489795918367347, "grad_norm": 2.9468579292297363, "learning_rate": 4.875918367346939e-05, "loss": 4.2393, "step": 610 }, { "epoch": 2.4938775510204083, "grad_norm": 3.723430633544922, "learning_rate": 4.875714285714286e-05, "loss": 3.2772, "step": 611 }, { "epoch": 2.497959183673469, "grad_norm": 1.6669538021087646, "learning_rate": 4.875510204081633e-05, "loss": 4.2286, "step": 612 }, { "epoch": 2.502040816326531, "grad_norm": 2.4023492336273193, "learning_rate": 4.87530612244898e-05, "loss": 3.9593, "step": 613 }, { "epoch": 2.5061224489795917, "grad_norm": 2.2388734817504883, "learning_rate": 4.8751020408163266e-05, "loss": 3.8472, "step": 614 }, { "epoch": 2.510204081632653, "grad_norm": 2.1529605388641357, "learning_rate": 4.874897959183673e-05, "loss": 3.9905, "step": 615 }, { "epoch": 2.5142857142857142, "grad_norm": 2.1263697147369385, "learning_rate": 4.8746938775510207e-05, "loss": 4.1771, "step": 616 }, { "epoch": 2.5183673469387755, "grad_norm": 1.9057724475860596, "learning_rate": 4.874489795918368e-05, "loss": 4.2323, "step": 617 }, { "epoch": 2.522448979591837, "grad_norm": 3.042123317718506, "learning_rate": 4.874285714285715e-05, "loss": 3.8765, "step": 618 }, { "epoch": 2.526530612244898, "grad_norm": 1.4344475269317627, "learning_rate": 4.8740816326530615e-05, "loss": 4.3378, "step": 619 }, { "epoch": 2.5306122448979593, "grad_norm": 2.893986463546753, "learning_rate": 4.873877551020408e-05, "loss": 3.776, "step": 620 }, { "epoch": 2.53469387755102, "grad_norm": 2.03208589553833, "learning_rate": 4.8736734693877556e-05, "loss": 4.0974, "step": 621 }, { "epoch": 2.538775510204082, "grad_norm": 3.2824618816375732, "learning_rate": 4.873469387755102e-05, "loss": 3.7156, "step": 622 }, { "epoch": 2.5428571428571427, "grad_norm": 1.6466050148010254, "learning_rate": 4.873265306122449e-05, "loss": 4.1947, "step": 623 }, { "epoch": 2.546938775510204, "grad_norm": 2.4709701538085938, "learning_rate": 4.8730612244897964e-05, "loss": 3.86, "step": 624 }, { "epoch": 2.5510204081632653, "grad_norm": 2.0036075115203857, "learning_rate": 4.872857142857143e-05, "loss": 4.2919, "step": 625 }, { "epoch": 2.5551020408163265, "grad_norm": 2.60041880607605, "learning_rate": 4.8726530612244905e-05, "loss": 3.9317, "step": 626 }, { "epoch": 2.559183673469388, "grad_norm": 2.165524959564209, "learning_rate": 4.872448979591837e-05, "loss": 4.1001, "step": 627 }, { "epoch": 2.563265306122449, "grad_norm": 1.827864646911621, "learning_rate": 4.872244897959184e-05, "loss": 4.1746, "step": 628 }, { "epoch": 2.5673469387755103, "grad_norm": 3.4870455265045166, "learning_rate": 4.8720408163265306e-05, "loss": 3.7139, "step": 629 }, { "epoch": 2.571428571428571, "grad_norm": 2.2674179077148438, "learning_rate": 4.8718367346938773e-05, "loss": 3.9561, "step": 630 }, { "epoch": 2.575510204081633, "grad_norm": 1.6649507284164429, "learning_rate": 4.871632653061225e-05, "loss": 4.3363, "step": 631 }, { "epoch": 2.5795918367346937, "grad_norm": 1.6773204803466797, "learning_rate": 4.8714285714285714e-05, "loss": 4.0098, "step": 632 }, { "epoch": 2.583673469387755, "grad_norm": 3.5433084964752197, "learning_rate": 4.871224489795919e-05, "loss": 3.4638, "step": 633 }, { "epoch": 2.5877551020408163, "grad_norm": 1.6581430435180664, "learning_rate": 4.8710204081632655e-05, "loss": 3.9084, "step": 634 }, { "epoch": 2.5918367346938775, "grad_norm": 2.59083890914917, "learning_rate": 4.870816326530612e-05, "loss": 3.8333, "step": 635 }, { "epoch": 2.595918367346939, "grad_norm": 2.7265512943267822, "learning_rate": 4.8706122448979597e-05, "loss": 3.2046, "step": 636 }, { "epoch": 2.6, "grad_norm": 1.85256826877594, "learning_rate": 4.8704081632653064e-05, "loss": 4.2693, "step": 637 }, { "epoch": 2.6040816326530614, "grad_norm": 3.146052360534668, "learning_rate": 4.870204081632653e-05, "loss": 3.8899, "step": 638 }, { "epoch": 2.608163265306122, "grad_norm": 1.843074917793274, "learning_rate": 4.87e-05, "loss": 3.934, "step": 639 }, { "epoch": 2.612244897959184, "grad_norm": 4.085612773895264, "learning_rate": 4.869795918367347e-05, "loss": 3.3562, "step": 640 }, { "epoch": 2.6163265306122447, "grad_norm": 2.152792453765869, "learning_rate": 4.8695918367346946e-05, "loss": 4.1304, "step": 641 }, { "epoch": 2.620408163265306, "grad_norm": 1.2495403289794922, "learning_rate": 4.869387755102041e-05, "loss": 4.3927, "step": 642 }, { "epoch": 2.6244897959183673, "grad_norm": 2.9822945594787598, "learning_rate": 4.869183673469388e-05, "loss": 4.1372, "step": 643 }, { "epoch": 2.6285714285714286, "grad_norm": 2.9600460529327393, "learning_rate": 4.868979591836735e-05, "loss": 3.8142, "step": 644 }, { "epoch": 2.63265306122449, "grad_norm": 2.440274238586426, "learning_rate": 4.8687755102040814e-05, "loss": 4.1686, "step": 645 }, { "epoch": 2.636734693877551, "grad_norm": 4.440428256988525, "learning_rate": 4.868571428571429e-05, "loss": 3.3348, "step": 646 }, { "epoch": 2.6408163265306124, "grad_norm": 2.5758750438690186, "learning_rate": 4.8683673469387755e-05, "loss": 4.0382, "step": 647 }, { "epoch": 2.644897959183673, "grad_norm": 1.6841906309127808, "learning_rate": 4.868163265306123e-05, "loss": 4.198, "step": 648 }, { "epoch": 2.648979591836735, "grad_norm": 2.5649797916412354, "learning_rate": 4.8679591836734696e-05, "loss": 4.1467, "step": 649 }, { "epoch": 2.6530612244897958, "grad_norm": 2.047159194946289, "learning_rate": 4.867755102040816e-05, "loss": 3.8975, "step": 650 }, { "epoch": 2.657142857142857, "grad_norm": 2.8831984996795654, "learning_rate": 4.867551020408164e-05, "loss": 3.8389, "step": 651 }, { "epoch": 2.6612244897959183, "grad_norm": 2.6907336711883545, "learning_rate": 4.8673469387755104e-05, "loss": 4.0351, "step": 652 }, { "epoch": 2.6653061224489796, "grad_norm": 2.2075679302215576, "learning_rate": 4.867142857142857e-05, "loss": 4.0802, "step": 653 }, { "epoch": 2.669387755102041, "grad_norm": 2.5450279712677, "learning_rate": 4.866938775510204e-05, "loss": 3.9631, "step": 654 }, { "epoch": 2.673469387755102, "grad_norm": 2.634241819381714, "learning_rate": 4.866734693877551e-05, "loss": 4.0555, "step": 655 }, { "epoch": 2.6775510204081634, "grad_norm": 1.7970397472381592, "learning_rate": 4.8665306122448986e-05, "loss": 3.9154, "step": 656 }, { "epoch": 2.6816326530612242, "grad_norm": 2.234934091567993, "learning_rate": 4.8663265306122454e-05, "loss": 4.1144, "step": 657 }, { "epoch": 2.685714285714286, "grad_norm": 1.6991387605667114, "learning_rate": 4.866122448979592e-05, "loss": 3.9288, "step": 658 }, { "epoch": 2.6897959183673468, "grad_norm": 1.894539713859558, "learning_rate": 4.865918367346939e-05, "loss": 3.9872, "step": 659 }, { "epoch": 2.693877551020408, "grad_norm": 2.3744874000549316, "learning_rate": 4.865714285714286e-05, "loss": 3.9831, "step": 660 }, { "epoch": 2.6979591836734693, "grad_norm": 1.8723852634429932, "learning_rate": 4.865510204081633e-05, "loss": 4.2795, "step": 661 }, { "epoch": 2.7020408163265306, "grad_norm": 1.9270579814910889, "learning_rate": 4.8653061224489796e-05, "loss": 4.2459, "step": 662 }, { "epoch": 2.706122448979592, "grad_norm": 2.478269100189209, "learning_rate": 4.865102040816327e-05, "loss": 4.071, "step": 663 }, { "epoch": 2.710204081632653, "grad_norm": 3.2101786136627197, "learning_rate": 4.864897959183674e-05, "loss": 3.9535, "step": 664 }, { "epoch": 2.7142857142857144, "grad_norm": 4.470340251922607, "learning_rate": 4.864693877551021e-05, "loss": 3.4427, "step": 665 }, { "epoch": 2.7183673469387752, "grad_norm": 3.5648791790008545, "learning_rate": 4.864489795918368e-05, "loss": 3.5552, "step": 666 }, { "epoch": 2.722448979591837, "grad_norm": 1.872355341911316, "learning_rate": 4.8642857142857145e-05, "loss": 3.9443, "step": 667 }, { "epoch": 2.726530612244898, "grad_norm": 2.493741035461426, "learning_rate": 4.864081632653061e-05, "loss": 4.0491, "step": 668 }, { "epoch": 2.730612244897959, "grad_norm": 2.75911808013916, "learning_rate": 4.863877551020408e-05, "loss": 3.8984, "step": 669 }, { "epoch": 2.7346938775510203, "grad_norm": 2.1334848403930664, "learning_rate": 4.863673469387755e-05, "loss": 4.2488, "step": 670 }, { "epoch": 2.7387755102040816, "grad_norm": 3.277419328689575, "learning_rate": 4.863469387755103e-05, "loss": 3.7, "step": 671 }, { "epoch": 2.742857142857143, "grad_norm": 1.7616746425628662, "learning_rate": 4.8632653061224494e-05, "loss": 4.3929, "step": 672 }, { "epoch": 2.746938775510204, "grad_norm": 2.9490787982940674, "learning_rate": 4.863061224489796e-05, "loss": 3.9563, "step": 673 }, { "epoch": 2.7510204081632654, "grad_norm": 3.275411367416382, "learning_rate": 4.862857142857143e-05, "loss": 4.1232, "step": 674 }, { "epoch": 2.7551020408163263, "grad_norm": 3.4017624855041504, "learning_rate": 4.86265306122449e-05, "loss": 4.2191, "step": 675 }, { "epoch": 2.759183673469388, "grad_norm": 2.5841424465179443, "learning_rate": 4.862448979591837e-05, "loss": 4.168, "step": 676 }, { "epoch": 2.763265306122449, "grad_norm": 1.9094399213790894, "learning_rate": 4.862244897959184e-05, "loss": 4.0035, "step": 677 }, { "epoch": 2.76734693877551, "grad_norm": 2.692833662033081, "learning_rate": 4.862040816326531e-05, "loss": 3.7605, "step": 678 }, { "epoch": 2.7714285714285714, "grad_norm": 1.9127922058105469, "learning_rate": 4.861836734693878e-05, "loss": 4.005, "step": 679 }, { "epoch": 2.7755102040816326, "grad_norm": 2.1822094917297363, "learning_rate": 4.861632653061225e-05, "loss": 4.0523, "step": 680 }, { "epoch": 2.779591836734694, "grad_norm": 1.3888942003250122, "learning_rate": 4.861428571428572e-05, "loss": 4.0735, "step": 681 }, { "epoch": 2.783673469387755, "grad_norm": 1.5394748449325562, "learning_rate": 4.8612244897959186e-05, "loss": 4.3241, "step": 682 }, { "epoch": 2.7877551020408164, "grad_norm": 1.9393490552902222, "learning_rate": 4.861020408163265e-05, "loss": 3.9307, "step": 683 }, { "epoch": 2.7918367346938773, "grad_norm": 2.993971586227417, "learning_rate": 4.860816326530612e-05, "loss": 3.4995, "step": 684 }, { "epoch": 2.795918367346939, "grad_norm": 1.519458293914795, "learning_rate": 4.8606122448979594e-05, "loss": 4.2445, "step": 685 }, { "epoch": 2.8, "grad_norm": 2.3919758796691895, "learning_rate": 4.860408163265306e-05, "loss": 4.0602, "step": 686 }, { "epoch": 2.804081632653061, "grad_norm": 1.8097014427185059, "learning_rate": 4.8602040816326535e-05, "loss": 3.9652, "step": 687 }, { "epoch": 2.8081632653061224, "grad_norm": 5.37212610244751, "learning_rate": 4.86e-05, "loss": 2.1146, "step": 688 }, { "epoch": 2.8122448979591836, "grad_norm": 2.0943198204040527, "learning_rate": 4.859795918367347e-05, "loss": 4.1134, "step": 689 }, { "epoch": 2.816326530612245, "grad_norm": 2.8873162269592285, "learning_rate": 4.859591836734694e-05, "loss": 3.598, "step": 690 }, { "epoch": 2.820408163265306, "grad_norm": 2.4267079830169678, "learning_rate": 4.859387755102041e-05, "loss": 4.2497, "step": 691 }, { "epoch": 2.8244897959183675, "grad_norm": 2.6267542839050293, "learning_rate": 4.859183673469388e-05, "loss": 3.8887, "step": 692 }, { "epoch": 2.8285714285714287, "grad_norm": 1.82144033908844, "learning_rate": 4.8589795918367345e-05, "loss": 4.0876, "step": 693 }, { "epoch": 2.83265306122449, "grad_norm": 3.8138856887817383, "learning_rate": 4.858775510204082e-05, "loss": 3.7317, "step": 694 }, { "epoch": 2.836734693877551, "grad_norm": 2.4503273963928223, "learning_rate": 4.858571428571429e-05, "loss": 3.9151, "step": 695 }, { "epoch": 2.840816326530612, "grad_norm": 2.816164016723633, "learning_rate": 4.858367346938776e-05, "loss": 3.9413, "step": 696 }, { "epoch": 2.8448979591836734, "grad_norm": 2.4738824367523193, "learning_rate": 4.858163265306123e-05, "loss": 4.2665, "step": 697 }, { "epoch": 2.8489795918367347, "grad_norm": 1.7612837553024292, "learning_rate": 4.8579591836734694e-05, "loss": 4.0303, "step": 698 }, { "epoch": 2.853061224489796, "grad_norm": 2.123322010040283, "learning_rate": 4.857755102040816e-05, "loss": 3.8943, "step": 699 }, { "epoch": 2.857142857142857, "grad_norm": 1.786180853843689, "learning_rate": 4.8575510204081635e-05, "loss": 4.0746, "step": 700 }, { "epoch": 2.8612244897959185, "grad_norm": 2.5578505992889404, "learning_rate": 4.85734693877551e-05, "loss": 3.9908, "step": 701 }, { "epoch": 2.8653061224489798, "grad_norm": 3.0068886280059814, "learning_rate": 4.8571428571428576e-05, "loss": 3.7256, "step": 702 }, { "epoch": 2.869387755102041, "grad_norm": 3.8580493927001953, "learning_rate": 4.856938775510204e-05, "loss": 3.5362, "step": 703 }, { "epoch": 2.873469387755102, "grad_norm": 3.2555971145629883, "learning_rate": 4.856734693877551e-05, "loss": 3.6709, "step": 704 }, { "epoch": 2.877551020408163, "grad_norm": 4.64248514175415, "learning_rate": 4.8565306122448984e-05, "loss": 3.3096, "step": 705 }, { "epoch": 2.8816326530612244, "grad_norm": 2.5750620365142822, "learning_rate": 4.856326530612245e-05, "loss": 4.2864, "step": 706 }, { "epoch": 2.8857142857142857, "grad_norm": 1.860709547996521, "learning_rate": 4.856122448979592e-05, "loss": 4.1987, "step": 707 }, { "epoch": 2.889795918367347, "grad_norm": 2.254747152328491, "learning_rate": 4.8559183673469385e-05, "loss": 4.327, "step": 708 }, { "epoch": 2.8938775510204082, "grad_norm": 2.7386045455932617, "learning_rate": 4.855714285714286e-05, "loss": 4.0211, "step": 709 }, { "epoch": 2.8979591836734695, "grad_norm": 1.9790445566177368, "learning_rate": 4.855510204081633e-05, "loss": 4.1848, "step": 710 }, { "epoch": 2.9020408163265308, "grad_norm": 2.3202924728393555, "learning_rate": 4.85530612244898e-05, "loss": 3.9005, "step": 711 }, { "epoch": 2.906122448979592, "grad_norm": 2.762800455093384, "learning_rate": 4.855102040816327e-05, "loss": 3.7104, "step": 712 }, { "epoch": 2.910204081632653, "grad_norm": 1.739319086074829, "learning_rate": 4.8548979591836735e-05, "loss": 4.0463, "step": 713 }, { "epoch": 2.914285714285714, "grad_norm": 2.4734842777252197, "learning_rate": 4.854693877551021e-05, "loss": 4.1283, "step": 714 }, { "epoch": 2.9183673469387754, "grad_norm": 2.059896945953369, "learning_rate": 4.8544897959183676e-05, "loss": 4.0633, "step": 715 }, { "epoch": 2.9224489795918367, "grad_norm": 3.7381250858306885, "learning_rate": 4.854285714285714e-05, "loss": 3.5754, "step": 716 }, { "epoch": 2.926530612244898, "grad_norm": 2.666247844696045, "learning_rate": 4.854081632653062e-05, "loss": 3.9494, "step": 717 }, { "epoch": 2.9306122448979592, "grad_norm": 2.5132064819335938, "learning_rate": 4.8538775510204084e-05, "loss": 3.9642, "step": 718 }, { "epoch": 2.9346938775510205, "grad_norm": 2.251354694366455, "learning_rate": 4.853673469387756e-05, "loss": 3.7617, "step": 719 }, { "epoch": 2.938775510204082, "grad_norm": 2.0761706829071045, "learning_rate": 4.8534693877551025e-05, "loss": 4.0825, "step": 720 }, { "epoch": 2.942857142857143, "grad_norm": 3.4587700366973877, "learning_rate": 4.853265306122449e-05, "loss": 3.8877, "step": 721 }, { "epoch": 2.946938775510204, "grad_norm": 2.9675376415252686, "learning_rate": 4.853061224489796e-05, "loss": 3.7371, "step": 722 }, { "epoch": 2.951020408163265, "grad_norm": 3.513636827468872, "learning_rate": 4.8528571428571426e-05, "loss": 3.6095, "step": 723 }, { "epoch": 2.9551020408163264, "grad_norm": 2.9239494800567627, "learning_rate": 4.85265306122449e-05, "loss": 4.0293, "step": 724 }, { "epoch": 2.9591836734693877, "grad_norm": 2.679203510284424, "learning_rate": 4.8524489795918374e-05, "loss": 3.981, "step": 725 }, { "epoch": 2.963265306122449, "grad_norm": 4.5686869621276855, "learning_rate": 4.852244897959184e-05, "loss": 3.2178, "step": 726 }, { "epoch": 2.9673469387755103, "grad_norm": 2.6278209686279297, "learning_rate": 4.852040816326531e-05, "loss": 3.6997, "step": 727 }, { "epoch": 2.9714285714285715, "grad_norm": 2.9550044536590576, "learning_rate": 4.8518367346938775e-05, "loss": 3.9951, "step": 728 }, { "epoch": 2.975510204081633, "grad_norm": 1.8106061220169067, "learning_rate": 4.851632653061225e-05, "loss": 4.032, "step": 729 }, { "epoch": 2.979591836734694, "grad_norm": 2.0224337577819824, "learning_rate": 4.8514285714285716e-05, "loss": 4.3436, "step": 730 }, { "epoch": 2.983673469387755, "grad_norm": 1.9558039903640747, "learning_rate": 4.8512244897959184e-05, "loss": 4.2054, "step": 731 }, { "epoch": 2.987755102040816, "grad_norm": 1.7902204990386963, "learning_rate": 4.851020408163266e-05, "loss": 3.9585, "step": 732 }, { "epoch": 2.9918367346938775, "grad_norm": 2.2829864025115967, "learning_rate": 4.8508163265306125e-05, "loss": 4.0932, "step": 733 }, { "epoch": 2.9959183673469387, "grad_norm": 1.9579980373382568, "learning_rate": 4.85061224489796e-05, "loss": 4.037, "step": 734 }, { "epoch": 3.0, "grad_norm": 2.9275217056274414, "learning_rate": 4.8504081632653066e-05, "loss": 3.8989, "step": 735 }, { "epoch": 3.0040816326530613, "grad_norm": 2.4157466888427734, "learning_rate": 4.850204081632653e-05, "loss": 3.7835, "step": 736 }, { "epoch": 3.0081632653061225, "grad_norm": 3.12717604637146, "learning_rate": 4.85e-05, "loss": 3.4649, "step": 737 }, { "epoch": 3.012244897959184, "grad_norm": 3.9428348541259766, "learning_rate": 4.849795918367347e-05, "loss": 3.3644, "step": 738 }, { "epoch": 3.016326530612245, "grad_norm": 1.7658127546310425, "learning_rate": 4.849591836734694e-05, "loss": 4.3846, "step": 739 }, { "epoch": 3.020408163265306, "grad_norm": 2.3285818099975586, "learning_rate": 4.849387755102041e-05, "loss": 3.6861, "step": 740 }, { "epoch": 3.024489795918367, "grad_norm": 2.505471706390381, "learning_rate": 4.849183673469388e-05, "loss": 3.7933, "step": 741 }, { "epoch": 3.0285714285714285, "grad_norm": 2.591945171356201, "learning_rate": 4.848979591836735e-05, "loss": 3.8798, "step": 742 }, { "epoch": 3.0326530612244897, "grad_norm": 4.230240345001221, "learning_rate": 4.8487755102040816e-05, "loss": 3.5212, "step": 743 }, { "epoch": 3.036734693877551, "grad_norm": 2.146355390548706, "learning_rate": 4.848571428571429e-05, "loss": 3.8386, "step": 744 }, { "epoch": 3.0408163265306123, "grad_norm": 1.668731927871704, "learning_rate": 4.848367346938776e-05, "loss": 3.9682, "step": 745 }, { "epoch": 3.0448979591836736, "grad_norm": 2.627453327178955, "learning_rate": 4.8481632653061224e-05, "loss": 3.9281, "step": 746 }, { "epoch": 3.048979591836735, "grad_norm": 2.2124927043914795, "learning_rate": 4.847959183673469e-05, "loss": 3.9161, "step": 747 }, { "epoch": 3.053061224489796, "grad_norm": 2.2624504566192627, "learning_rate": 4.8477551020408165e-05, "loss": 3.8272, "step": 748 }, { "epoch": 3.057142857142857, "grad_norm": 4.236105442047119, "learning_rate": 4.847551020408164e-05, "loss": 3.5318, "step": 749 }, { "epoch": 3.061224489795918, "grad_norm": 2.9824178218841553, "learning_rate": 4.8473469387755106e-05, "loss": 4.1548, "step": 750 }, { "epoch": 3.0653061224489795, "grad_norm": 2.682950735092163, "learning_rate": 4.8471428571428573e-05, "loss": 3.7936, "step": 751 }, { "epoch": 3.0693877551020408, "grad_norm": 1.9654483795166016, "learning_rate": 4.846938775510204e-05, "loss": 3.9164, "step": 752 }, { "epoch": 3.073469387755102, "grad_norm": 3.4635086059570312, "learning_rate": 4.8467346938775514e-05, "loss": 3.8306, "step": 753 }, { "epoch": 3.0775510204081633, "grad_norm": 2.095092296600342, "learning_rate": 4.846530612244898e-05, "loss": 4.1557, "step": 754 }, { "epoch": 3.0816326530612246, "grad_norm": 3.539020538330078, "learning_rate": 4.846326530612245e-05, "loss": 3.5707, "step": 755 }, { "epoch": 3.085714285714286, "grad_norm": 4.370172023773193, "learning_rate": 4.846122448979592e-05, "loss": 3.6649, "step": 756 }, { "epoch": 3.089795918367347, "grad_norm": 1.8873621225357056, "learning_rate": 4.845918367346939e-05, "loss": 4.159, "step": 757 }, { "epoch": 3.093877551020408, "grad_norm": 4.365425109863281, "learning_rate": 4.8457142857142864e-05, "loss": 3.2002, "step": 758 }, { "epoch": 3.0979591836734692, "grad_norm": 1.987883448600769, "learning_rate": 4.845510204081633e-05, "loss": 3.9884, "step": 759 }, { "epoch": 3.1020408163265305, "grad_norm": 2.3586394786834717, "learning_rate": 4.84530612244898e-05, "loss": 4.2152, "step": 760 }, { "epoch": 3.1061224489795918, "grad_norm": 3.1681227684020996, "learning_rate": 4.8451020408163265e-05, "loss": 3.7521, "step": 761 }, { "epoch": 3.110204081632653, "grad_norm": 3.3571765422821045, "learning_rate": 4.844897959183673e-05, "loss": 3.7325, "step": 762 }, { "epoch": 3.1142857142857143, "grad_norm": 3.6549901962280273, "learning_rate": 4.8446938775510206e-05, "loss": 3.6918, "step": 763 }, { "epoch": 3.1183673469387756, "grad_norm": 2.370530366897583, "learning_rate": 4.844489795918368e-05, "loss": 4.0719, "step": 764 }, { "epoch": 3.122448979591837, "grad_norm": 1.8329648971557617, "learning_rate": 4.844285714285715e-05, "loss": 4.2026, "step": 765 }, { "epoch": 3.126530612244898, "grad_norm": 3.0611650943756104, "learning_rate": 4.8440816326530614e-05, "loss": 3.9035, "step": 766 }, { "epoch": 3.130612244897959, "grad_norm": 2.4426615238189697, "learning_rate": 4.843877551020408e-05, "loss": 3.958, "step": 767 }, { "epoch": 3.1346938775510202, "grad_norm": 2.219430446624756, "learning_rate": 4.8436734693877555e-05, "loss": 3.9941, "step": 768 }, { "epoch": 3.1387755102040815, "grad_norm": 2.1231610774993896, "learning_rate": 4.843469387755102e-05, "loss": 4.0171, "step": 769 }, { "epoch": 3.142857142857143, "grad_norm": 2.6964354515075684, "learning_rate": 4.843265306122449e-05, "loss": 3.9055, "step": 770 }, { "epoch": 3.146938775510204, "grad_norm": 1.9531049728393555, "learning_rate": 4.8430612244897963e-05, "loss": 4.2325, "step": 771 }, { "epoch": 3.1510204081632653, "grad_norm": 1.9727600812911987, "learning_rate": 4.842857142857143e-05, "loss": 4.0012, "step": 772 }, { "epoch": 3.1551020408163266, "grad_norm": 1.7199817895889282, "learning_rate": 4.8426530612244904e-05, "loss": 4.0746, "step": 773 }, { "epoch": 3.159183673469388, "grad_norm": 2.5235395431518555, "learning_rate": 4.842448979591837e-05, "loss": 3.988, "step": 774 }, { "epoch": 3.163265306122449, "grad_norm": 4.0095438957214355, "learning_rate": 4.842244897959184e-05, "loss": 3.7604, "step": 775 }, { "epoch": 3.16734693877551, "grad_norm": 2.103764295578003, "learning_rate": 4.8420408163265306e-05, "loss": 4.0506, "step": 776 }, { "epoch": 3.1714285714285713, "grad_norm": 2.094419479370117, "learning_rate": 4.841836734693877e-05, "loss": 4.1856, "step": 777 }, { "epoch": 3.1755102040816325, "grad_norm": 3.3594350814819336, "learning_rate": 4.841632653061225e-05, "loss": 3.9009, "step": 778 }, { "epoch": 3.179591836734694, "grad_norm": 2.417377471923828, "learning_rate": 4.841428571428572e-05, "loss": 4.2882, "step": 779 }, { "epoch": 3.183673469387755, "grad_norm": 2.769059181213379, "learning_rate": 4.841224489795919e-05, "loss": 3.8787, "step": 780 }, { "epoch": 3.1877551020408164, "grad_norm": 1.8266363143920898, "learning_rate": 4.8410204081632655e-05, "loss": 3.9787, "step": 781 }, { "epoch": 3.1918367346938776, "grad_norm": 2.282308340072632, "learning_rate": 4.840816326530612e-05, "loss": 4.2628, "step": 782 }, { "epoch": 3.195918367346939, "grad_norm": 1.9992351531982422, "learning_rate": 4.8406122448979596e-05, "loss": 3.9457, "step": 783 }, { "epoch": 3.2, "grad_norm": 3.262006998062134, "learning_rate": 4.840408163265306e-05, "loss": 3.8193, "step": 784 }, { "epoch": 3.204081632653061, "grad_norm": 2.7947616577148438, "learning_rate": 4.840204081632653e-05, "loss": 3.923, "step": 785 }, { "epoch": 3.2081632653061223, "grad_norm": 3.1738972663879395, "learning_rate": 4.8400000000000004e-05, "loss": 3.6559, "step": 786 }, { "epoch": 3.2122448979591836, "grad_norm": 1.9924403429031372, "learning_rate": 4.839795918367347e-05, "loss": 4.1635, "step": 787 }, { "epoch": 3.216326530612245, "grad_norm": 1.601217269897461, "learning_rate": 4.8395918367346945e-05, "loss": 4.204, "step": 788 }, { "epoch": 3.220408163265306, "grad_norm": 2.710226535797119, "learning_rate": 4.839387755102041e-05, "loss": 4.0894, "step": 789 }, { "epoch": 3.2244897959183674, "grad_norm": 2.4061129093170166, "learning_rate": 4.839183673469388e-05, "loss": 3.7219, "step": 790 }, { "epoch": 3.2285714285714286, "grad_norm": 1.9369512796401978, "learning_rate": 4.8389795918367347e-05, "loss": 4.2878, "step": 791 }, { "epoch": 3.23265306122449, "grad_norm": 2.2224934101104736, "learning_rate": 4.8387755102040814e-05, "loss": 3.8487, "step": 792 }, { "epoch": 3.236734693877551, "grad_norm": 2.1411075592041016, "learning_rate": 4.838571428571429e-05, "loss": 4.0305, "step": 793 }, { "epoch": 3.240816326530612, "grad_norm": 3.3243656158447266, "learning_rate": 4.8383673469387755e-05, "loss": 3.7948, "step": 794 }, { "epoch": 3.2448979591836733, "grad_norm": 3.5797572135925293, "learning_rate": 4.838163265306123e-05, "loss": 3.6172, "step": 795 }, { "epoch": 3.2489795918367346, "grad_norm": 2.9918296337127686, "learning_rate": 4.8379591836734696e-05, "loss": 3.7525, "step": 796 }, { "epoch": 3.253061224489796, "grad_norm": 3.888568162918091, "learning_rate": 4.837755102040817e-05, "loss": 3.8338, "step": 797 }, { "epoch": 3.257142857142857, "grad_norm": 2.3832311630249023, "learning_rate": 4.837551020408164e-05, "loss": 3.8825, "step": 798 }, { "epoch": 3.2612244897959184, "grad_norm": 1.8825222253799438, "learning_rate": 4.8373469387755104e-05, "loss": 3.8712, "step": 799 }, { "epoch": 3.2653061224489797, "grad_norm": 1.571219801902771, "learning_rate": 4.837142857142857e-05, "loss": 4.1404, "step": 800 }, { "epoch": 3.269387755102041, "grad_norm": 2.375882148742676, "learning_rate": 4.836938775510204e-05, "loss": 4.0338, "step": 801 }, { "epoch": 3.273469387755102, "grad_norm": 1.5480848550796509, "learning_rate": 4.836734693877551e-05, "loss": 4.0419, "step": 802 }, { "epoch": 3.277551020408163, "grad_norm": 2.1182632446289062, "learning_rate": 4.8365306122448986e-05, "loss": 4.0653, "step": 803 }, { "epoch": 3.2816326530612243, "grad_norm": 2.2096590995788574, "learning_rate": 4.836326530612245e-05, "loss": 4.1865, "step": 804 }, { "epoch": 3.2857142857142856, "grad_norm": 1.927366852760315, "learning_rate": 4.836122448979592e-05, "loss": 4.2014, "step": 805 }, { "epoch": 3.289795918367347, "grad_norm": 4.3754072189331055, "learning_rate": 4.835918367346939e-05, "loss": 3.3428, "step": 806 }, { "epoch": 3.293877551020408, "grad_norm": 1.8827592134475708, "learning_rate": 4.835714285714286e-05, "loss": 4.2924, "step": 807 }, { "epoch": 3.2979591836734694, "grad_norm": 1.938480019569397, "learning_rate": 4.835510204081633e-05, "loss": 4.1536, "step": 808 }, { "epoch": 3.3020408163265307, "grad_norm": 2.6143739223480225, "learning_rate": 4.8353061224489795e-05, "loss": 4.0517, "step": 809 }, { "epoch": 3.306122448979592, "grad_norm": 2.882808208465576, "learning_rate": 4.835102040816327e-05, "loss": 4.0516, "step": 810 }, { "epoch": 3.3102040816326532, "grad_norm": 2.597074031829834, "learning_rate": 4.8348979591836737e-05, "loss": 3.79, "step": 811 }, { "epoch": 3.314285714285714, "grad_norm": 2.635510206222534, "learning_rate": 4.834693877551021e-05, "loss": 3.7743, "step": 812 }, { "epoch": 3.3183673469387753, "grad_norm": 2.2580580711364746, "learning_rate": 4.834489795918368e-05, "loss": 3.9982, "step": 813 }, { "epoch": 3.3224489795918366, "grad_norm": 2.866758346557617, "learning_rate": 4.8342857142857145e-05, "loss": 3.922, "step": 814 }, { "epoch": 3.326530612244898, "grad_norm": 3.795773506164551, "learning_rate": 4.834081632653061e-05, "loss": 3.6871, "step": 815 }, { "epoch": 3.330612244897959, "grad_norm": 2.643890619277954, "learning_rate": 4.833877551020408e-05, "loss": 4.0952, "step": 816 }, { "epoch": 3.3346938775510204, "grad_norm": 3.0202481746673584, "learning_rate": 4.833673469387755e-05, "loss": 3.148, "step": 817 }, { "epoch": 3.3387755102040817, "grad_norm": 2.70841646194458, "learning_rate": 4.833469387755103e-05, "loss": 3.8877, "step": 818 }, { "epoch": 3.342857142857143, "grad_norm": 2.520998239517212, "learning_rate": 4.8332653061224494e-05, "loss": 4.1986, "step": 819 }, { "epoch": 3.3469387755102042, "grad_norm": 6.192816257476807, "learning_rate": 4.833061224489796e-05, "loss": 2.0138, "step": 820 }, { "epoch": 3.351020408163265, "grad_norm": 4.110122203826904, "learning_rate": 4.832857142857143e-05, "loss": 3.176, "step": 821 }, { "epoch": 3.3551020408163263, "grad_norm": 4.001208305358887, "learning_rate": 4.83265306122449e-05, "loss": 3.7232, "step": 822 }, { "epoch": 3.3591836734693876, "grad_norm": 2.1857175827026367, "learning_rate": 4.832448979591837e-05, "loss": 4.1929, "step": 823 }, { "epoch": 3.363265306122449, "grad_norm": 2.208874464035034, "learning_rate": 4.8322448979591836e-05, "loss": 4.203, "step": 824 }, { "epoch": 3.36734693877551, "grad_norm": 3.497007131576538, "learning_rate": 4.832040816326531e-05, "loss": 3.61, "step": 825 }, { "epoch": 3.3714285714285714, "grad_norm": 2.7457022666931152, "learning_rate": 4.831836734693878e-05, "loss": 4.0059, "step": 826 }, { "epoch": 3.3755102040816327, "grad_norm": 2.6396195888519287, "learning_rate": 4.831632653061225e-05, "loss": 3.9954, "step": 827 }, { "epoch": 3.379591836734694, "grad_norm": 1.7511093616485596, "learning_rate": 4.831428571428572e-05, "loss": 4.3354, "step": 828 }, { "epoch": 3.3836734693877553, "grad_norm": 3.1404130458831787, "learning_rate": 4.8312244897959185e-05, "loss": 3.659, "step": 829 }, { "epoch": 3.387755102040816, "grad_norm": 3.599909782409668, "learning_rate": 4.831020408163265e-05, "loss": 3.8333, "step": 830 }, { "epoch": 3.3918367346938774, "grad_norm": 1.7001049518585205, "learning_rate": 4.830816326530612e-05, "loss": 4.0837, "step": 831 }, { "epoch": 3.3959183673469386, "grad_norm": 3.0943477153778076, "learning_rate": 4.8306122448979594e-05, "loss": 4.0788, "step": 832 }, { "epoch": 3.4, "grad_norm": 3.68554949760437, "learning_rate": 4.830408163265307e-05, "loss": 3.6914, "step": 833 }, { "epoch": 3.404081632653061, "grad_norm": 3.6025116443634033, "learning_rate": 4.8302040816326535e-05, "loss": 3.8119, "step": 834 }, { "epoch": 3.4081632653061225, "grad_norm": 4.04935884475708, "learning_rate": 4.83e-05, "loss": 3.1232, "step": 835 }, { "epoch": 3.4122448979591837, "grad_norm": 4.480173110961914, "learning_rate": 4.829795918367347e-05, "loss": 3.3429, "step": 836 }, { "epoch": 3.416326530612245, "grad_norm": 1.12873375415802, "learning_rate": 4.829591836734694e-05, "loss": 4.3398, "step": 837 }, { "epoch": 3.4204081632653063, "grad_norm": 2.256218910217285, "learning_rate": 4.829387755102041e-05, "loss": 3.9689, "step": 838 }, { "epoch": 3.424489795918367, "grad_norm": 3.096395969390869, "learning_rate": 4.829183673469388e-05, "loss": 3.9244, "step": 839 }, { "epoch": 3.4285714285714284, "grad_norm": 2.952430009841919, "learning_rate": 4.828979591836735e-05, "loss": 4.0033, "step": 840 }, { "epoch": 3.4326530612244897, "grad_norm": 2.161222457885742, "learning_rate": 4.828775510204082e-05, "loss": 4.1661, "step": 841 }, { "epoch": 3.436734693877551, "grad_norm": 2.1046836376190186, "learning_rate": 4.828571428571429e-05, "loss": 4.0327, "step": 842 }, { "epoch": 3.440816326530612, "grad_norm": 2.04301381111145, "learning_rate": 4.828367346938776e-05, "loss": 4.1251, "step": 843 }, { "epoch": 3.4448979591836735, "grad_norm": 2.3768222332000732, "learning_rate": 4.8281632653061226e-05, "loss": 4.013, "step": 844 }, { "epoch": 3.4489795918367347, "grad_norm": 2.930633068084717, "learning_rate": 4.827959183673469e-05, "loss": 3.6751, "step": 845 }, { "epoch": 3.453061224489796, "grad_norm": 2.5039281845092773, "learning_rate": 4.827755102040817e-05, "loss": 4.2837, "step": 846 }, { "epoch": 3.4571428571428573, "grad_norm": 1.9727540016174316, "learning_rate": 4.8275510204081634e-05, "loss": 4.0544, "step": 847 }, { "epoch": 3.461224489795918, "grad_norm": 2.163698434829712, "learning_rate": 4.82734693877551e-05, "loss": 3.8916, "step": 848 }, { "epoch": 3.4653061224489794, "grad_norm": 3.2381927967071533, "learning_rate": 4.8271428571428575e-05, "loss": 4.0184, "step": 849 }, { "epoch": 3.4693877551020407, "grad_norm": 3.3665127754211426, "learning_rate": 4.826938775510204e-05, "loss": 4.0471, "step": 850 }, { "epoch": 3.473469387755102, "grad_norm": 2.0017282962799072, "learning_rate": 4.8267346938775516e-05, "loss": 3.871, "step": 851 }, { "epoch": 3.477551020408163, "grad_norm": 3.407783269882202, "learning_rate": 4.8265306122448984e-05, "loss": 3.8828, "step": 852 }, { "epoch": 3.4816326530612245, "grad_norm": 2.592487096786499, "learning_rate": 4.826326530612245e-05, "loss": 4.0385, "step": 853 }, { "epoch": 3.4857142857142858, "grad_norm": 3.8073630332946777, "learning_rate": 4.826122448979592e-05, "loss": 3.488, "step": 854 }, { "epoch": 3.489795918367347, "grad_norm": 4.598300933837891, "learning_rate": 4.8259183673469385e-05, "loss": 3.7605, "step": 855 }, { "epoch": 3.4938775510204083, "grad_norm": 2.972259998321533, "learning_rate": 4.825714285714286e-05, "loss": 4.025, "step": 856 }, { "epoch": 3.497959183673469, "grad_norm": 3.320223093032837, "learning_rate": 4.825510204081633e-05, "loss": 3.893, "step": 857 }, { "epoch": 3.502040816326531, "grad_norm": 3.159522533416748, "learning_rate": 4.82530612244898e-05, "loss": 3.8934, "step": 858 }, { "epoch": 3.5061224489795917, "grad_norm": 2.3902852535247803, "learning_rate": 4.825102040816327e-05, "loss": 3.8544, "step": 859 }, { "epoch": 3.510204081632653, "grad_norm": 3.631992816925049, "learning_rate": 4.8248979591836734e-05, "loss": 4.033, "step": 860 }, { "epoch": 3.5142857142857142, "grad_norm": 1.809043049812317, "learning_rate": 4.824693877551021e-05, "loss": 4.1439, "step": 861 }, { "epoch": 3.5183673469387755, "grad_norm": 4.002199649810791, "learning_rate": 4.8244897959183675e-05, "loss": 3.6151, "step": 862 }, { "epoch": 3.522448979591837, "grad_norm": 2.278243064880371, "learning_rate": 4.824285714285714e-05, "loss": 4.0973, "step": 863 }, { "epoch": 3.526530612244898, "grad_norm": 3.2373945713043213, "learning_rate": 4.8240816326530616e-05, "loss": 3.6859, "step": 864 }, { "epoch": 3.5306122448979593, "grad_norm": 2.449594020843506, "learning_rate": 4.823877551020408e-05, "loss": 4.1248, "step": 865 }, { "epoch": 3.53469387755102, "grad_norm": 3.9114160537719727, "learning_rate": 4.823673469387756e-05, "loss": 3.5048, "step": 866 }, { "epoch": 3.538775510204082, "grad_norm": 2.1923556327819824, "learning_rate": 4.8234693877551024e-05, "loss": 3.9133, "step": 867 }, { "epoch": 3.5428571428571427, "grad_norm": 2.0893092155456543, "learning_rate": 4.823265306122449e-05, "loss": 4.0194, "step": 868 }, { "epoch": 3.546938775510204, "grad_norm": 1.919203758239746, "learning_rate": 4.823061224489796e-05, "loss": 4.3262, "step": 869 }, { "epoch": 3.5510204081632653, "grad_norm": 2.3204376697540283, "learning_rate": 4.8228571428571426e-05, "loss": 3.954, "step": 870 }, { "epoch": 3.5551020408163265, "grad_norm": 2.4948885440826416, "learning_rate": 4.82265306122449e-05, "loss": 3.8746, "step": 871 }, { "epoch": 3.559183673469388, "grad_norm": 3.420686960220337, "learning_rate": 4.8224489795918373e-05, "loss": 3.4509, "step": 872 }, { "epoch": 3.563265306122449, "grad_norm": 1.6252578496932983, "learning_rate": 4.822244897959184e-05, "loss": 4.2982, "step": 873 }, { "epoch": 3.5673469387755103, "grad_norm": 2.296715497970581, "learning_rate": 4.822040816326531e-05, "loss": 4.0565, "step": 874 }, { "epoch": 3.571428571428571, "grad_norm": 2.062633752822876, "learning_rate": 4.8218367346938775e-05, "loss": 4.1341, "step": 875 }, { "epoch": 3.575510204081633, "grad_norm": 3.668715476989746, "learning_rate": 4.821632653061225e-05, "loss": 3.5975, "step": 876 }, { "epoch": 3.5795918367346937, "grad_norm": 2.1621785163879395, "learning_rate": 4.8214285714285716e-05, "loss": 3.9416, "step": 877 }, { "epoch": 3.583673469387755, "grad_norm": 2.4055256843566895, "learning_rate": 4.821224489795918e-05, "loss": 4.0451, "step": 878 }, { "epoch": 3.5877551020408163, "grad_norm": 2.611443519592285, "learning_rate": 4.821020408163266e-05, "loss": 4.0994, "step": 879 }, { "epoch": 3.5918367346938775, "grad_norm": 2.05330753326416, "learning_rate": 4.8208163265306124e-05, "loss": 4.3557, "step": 880 }, { "epoch": 3.595918367346939, "grad_norm": 2.2528862953186035, "learning_rate": 4.82061224489796e-05, "loss": 3.6986, "step": 881 }, { "epoch": 3.6, "grad_norm": 1.9765815734863281, "learning_rate": 4.8204081632653065e-05, "loss": 3.9308, "step": 882 }, { "epoch": 3.6040816326530614, "grad_norm": 3.558507204055786, "learning_rate": 4.820204081632653e-05, "loss": 3.4783, "step": 883 }, { "epoch": 3.608163265306122, "grad_norm": 2.0426411628723145, "learning_rate": 4.82e-05, "loss": 3.9029, "step": 884 }, { "epoch": 3.612244897959184, "grad_norm": 3.5221757888793945, "learning_rate": 4.8197959183673466e-05, "loss": 3.5777, "step": 885 }, { "epoch": 3.6163265306122447, "grad_norm": 2.870327949523926, "learning_rate": 4.819591836734694e-05, "loss": 3.9827, "step": 886 }, { "epoch": 3.620408163265306, "grad_norm": 3.1879351139068604, "learning_rate": 4.8193877551020414e-05, "loss": 3.9287, "step": 887 }, { "epoch": 3.6244897959183673, "grad_norm": 1.597001314163208, "learning_rate": 4.819183673469388e-05, "loss": 4.363, "step": 888 }, { "epoch": 3.6285714285714286, "grad_norm": 2.7021982669830322, "learning_rate": 4.818979591836735e-05, "loss": 4.1318, "step": 889 }, { "epoch": 3.63265306122449, "grad_norm": 1.9900134801864624, "learning_rate": 4.818775510204082e-05, "loss": 3.9887, "step": 890 }, { "epoch": 3.636734693877551, "grad_norm": 2.8085978031158447, "learning_rate": 4.818571428571429e-05, "loss": 3.9527, "step": 891 }, { "epoch": 3.6408163265306124, "grad_norm": 2.286219596862793, "learning_rate": 4.818367346938776e-05, "loss": 4.0823, "step": 892 }, { "epoch": 3.644897959183673, "grad_norm": 3.217055320739746, "learning_rate": 4.8181632653061224e-05, "loss": 3.86, "step": 893 }, { "epoch": 3.648979591836735, "grad_norm": 2.3500547409057617, "learning_rate": 4.81795918367347e-05, "loss": 4.1584, "step": 894 }, { "epoch": 3.6530612244897958, "grad_norm": 3.914839267730713, "learning_rate": 4.8177551020408165e-05, "loss": 3.6142, "step": 895 }, { "epoch": 3.657142857142857, "grad_norm": 2.1657397747039795, "learning_rate": 4.817551020408164e-05, "loss": 4.1419, "step": 896 }, { "epoch": 3.6612244897959183, "grad_norm": 3.0736300945281982, "learning_rate": 4.8173469387755106e-05, "loss": 3.9675, "step": 897 }, { "epoch": 3.6653061224489796, "grad_norm": 2.011793375015259, "learning_rate": 4.817142857142857e-05, "loss": 3.9741, "step": 898 }, { "epoch": 3.669387755102041, "grad_norm": 2.905874013900757, "learning_rate": 4.816938775510204e-05, "loss": 3.9604, "step": 899 }, { "epoch": 3.673469387755102, "grad_norm": 3.969813585281372, "learning_rate": 4.8167346938775514e-05, "loss": 3.6267, "step": 900 }, { "epoch": 3.6775510204081634, "grad_norm": 2.267146110534668, "learning_rate": 4.816530612244898e-05, "loss": 4.1035, "step": 901 }, { "epoch": 3.6816326530612242, "grad_norm": 4.779007911682129, "learning_rate": 4.816326530612245e-05, "loss": 3.1992, "step": 902 }, { "epoch": 3.685714285714286, "grad_norm": 2.766939878463745, "learning_rate": 4.816122448979592e-05, "loss": 3.9446, "step": 903 }, { "epoch": 3.6897959183673468, "grad_norm": 2.9722135066986084, "learning_rate": 4.815918367346939e-05, "loss": 3.8391, "step": 904 }, { "epoch": 3.693877551020408, "grad_norm": 2.3063666820526123, "learning_rate": 4.815714285714286e-05, "loss": 4.3122, "step": 905 }, { "epoch": 3.6979591836734693, "grad_norm": 3.836848497390747, "learning_rate": 4.815510204081633e-05, "loss": 3.5489, "step": 906 }, { "epoch": 3.7020408163265306, "grad_norm": 2.7610952854156494, "learning_rate": 4.81530612244898e-05, "loss": 3.9617, "step": 907 }, { "epoch": 3.706122448979592, "grad_norm": 3.171037197113037, "learning_rate": 4.8151020408163265e-05, "loss": 3.8737, "step": 908 }, { "epoch": 3.710204081632653, "grad_norm": 2.90105938911438, "learning_rate": 4.814897959183673e-05, "loss": 3.904, "step": 909 }, { "epoch": 3.7142857142857144, "grad_norm": 4.425792694091797, "learning_rate": 4.8146938775510206e-05, "loss": 3.2432, "step": 910 }, { "epoch": 3.7183673469387752, "grad_norm": 3.7296929359436035, "learning_rate": 4.814489795918368e-05, "loss": 3.8065, "step": 911 }, { "epoch": 3.722448979591837, "grad_norm": 3.4185447692871094, "learning_rate": 4.8142857142857147e-05, "loss": 3.7131, "step": 912 }, { "epoch": 3.726530612244898, "grad_norm": 2.602130889892578, "learning_rate": 4.8140816326530614e-05, "loss": 4.2071, "step": 913 }, { "epoch": 3.730612244897959, "grad_norm": 1.7248843908309937, "learning_rate": 4.813877551020408e-05, "loss": 4.1466, "step": 914 }, { "epoch": 3.7346938775510203, "grad_norm": 2.6862752437591553, "learning_rate": 4.8136734693877555e-05, "loss": 3.9118, "step": 915 }, { "epoch": 3.7387755102040816, "grad_norm": 2.032500982284546, "learning_rate": 4.813469387755102e-05, "loss": 4.0346, "step": 916 }, { "epoch": 3.742857142857143, "grad_norm": 2.930602788925171, "learning_rate": 4.813265306122449e-05, "loss": 3.9951, "step": 917 }, { "epoch": 3.746938775510204, "grad_norm": 2.385185956954956, "learning_rate": 4.813061224489796e-05, "loss": 3.9301, "step": 918 }, { "epoch": 3.7510204081632654, "grad_norm": 2.1603736877441406, "learning_rate": 4.812857142857143e-05, "loss": 4.1221, "step": 919 }, { "epoch": 3.7551020408163263, "grad_norm": 3.0866756439208984, "learning_rate": 4.8126530612244904e-05, "loss": 3.9543, "step": 920 }, { "epoch": 3.759183673469388, "grad_norm": 2.038254976272583, "learning_rate": 4.812448979591837e-05, "loss": 3.8709, "step": 921 }, { "epoch": 3.763265306122449, "grad_norm": 3.3009469509124756, "learning_rate": 4.812244897959184e-05, "loss": 3.7943, "step": 922 }, { "epoch": 3.76734693877551, "grad_norm": 2.220026731491089, "learning_rate": 4.8120408163265305e-05, "loss": 4.2329, "step": 923 }, { "epoch": 3.7714285714285714, "grad_norm": 3.74888014793396, "learning_rate": 4.811836734693877e-05, "loss": 3.5018, "step": 924 }, { "epoch": 3.7755102040816326, "grad_norm": 3.959214687347412, "learning_rate": 4.8116326530612246e-05, "loss": 4.1499, "step": 925 }, { "epoch": 3.779591836734694, "grad_norm": 3.329833745956421, "learning_rate": 4.811428571428572e-05, "loss": 4.1545, "step": 926 }, { "epoch": 3.783673469387755, "grad_norm": 1.8648470640182495, "learning_rate": 4.811224489795919e-05, "loss": 3.8464, "step": 927 }, { "epoch": 3.7877551020408164, "grad_norm": 2.6845688819885254, "learning_rate": 4.8110204081632654e-05, "loss": 3.9292, "step": 928 }, { "epoch": 3.7918367346938773, "grad_norm": 2.5107173919677734, "learning_rate": 4.810816326530612e-05, "loss": 4.0215, "step": 929 }, { "epoch": 3.795918367346939, "grad_norm": 2.328237533569336, "learning_rate": 4.8106122448979595e-05, "loss": 4.1381, "step": 930 }, { "epoch": 3.8, "grad_norm": 2.6199822425842285, "learning_rate": 4.810408163265306e-05, "loss": 4.0547, "step": 931 }, { "epoch": 3.804081632653061, "grad_norm": 2.333458423614502, "learning_rate": 4.810204081632653e-05, "loss": 3.8966, "step": 932 }, { "epoch": 3.8081632653061224, "grad_norm": 2.5612542629241943, "learning_rate": 4.8100000000000004e-05, "loss": 4.1092, "step": 933 }, { "epoch": 3.8122448979591836, "grad_norm": 2.8241055011749268, "learning_rate": 4.809795918367348e-05, "loss": 4.1126, "step": 934 }, { "epoch": 3.816326530612245, "grad_norm": 1.65156090259552, "learning_rate": 4.8095918367346945e-05, "loss": 4.1396, "step": 935 }, { "epoch": 3.820408163265306, "grad_norm": 2.939361333847046, "learning_rate": 4.809387755102041e-05, "loss": 3.7678, "step": 936 }, { "epoch": 3.8244897959183675, "grad_norm": 4.164088726043701, "learning_rate": 4.809183673469388e-05, "loss": 3.4326, "step": 937 }, { "epoch": 3.8285714285714287, "grad_norm": 3.1582157611846924, "learning_rate": 4.8089795918367346e-05, "loss": 3.7434, "step": 938 }, { "epoch": 3.83265306122449, "grad_norm": 2.26613712310791, "learning_rate": 4.808775510204082e-05, "loss": 4.0243, "step": 939 }, { "epoch": 3.836734693877551, "grad_norm": 4.937869548797607, "learning_rate": 4.808571428571429e-05, "loss": 3.24, "step": 940 }, { "epoch": 3.840816326530612, "grad_norm": 2.043041706085205, "learning_rate": 4.808367346938776e-05, "loss": 3.9576, "step": 941 }, { "epoch": 3.8448979591836734, "grad_norm": 2.2964305877685547, "learning_rate": 4.808163265306123e-05, "loss": 4.0724, "step": 942 }, { "epoch": 3.8489795918367347, "grad_norm": 2.4655299186706543, "learning_rate": 4.8079591836734695e-05, "loss": 4.0064, "step": 943 }, { "epoch": 3.853061224489796, "grad_norm": 2.9731926918029785, "learning_rate": 4.807755102040817e-05, "loss": 3.7286, "step": 944 }, { "epoch": 3.857142857142857, "grad_norm": 4.887332439422607, "learning_rate": 4.8075510204081636e-05, "loss": 3.0191, "step": 945 }, { "epoch": 3.8612244897959185, "grad_norm": 4.488724708557129, "learning_rate": 4.80734693877551e-05, "loss": 3.2265, "step": 946 }, { "epoch": 3.8653061224489798, "grad_norm": 3.1133153438568115, "learning_rate": 4.807142857142857e-05, "loss": 3.5129, "step": 947 }, { "epoch": 3.869387755102041, "grad_norm": 4.092627048492432, "learning_rate": 4.8069387755102044e-05, "loss": 3.3265, "step": 948 }, { "epoch": 3.873469387755102, "grad_norm": 2.609614849090576, "learning_rate": 4.806734693877551e-05, "loss": 3.88, "step": 949 }, { "epoch": 3.877551020408163, "grad_norm": 2.610332489013672, "learning_rate": 4.8065306122448985e-05, "loss": 4.1855, "step": 950 }, { "epoch": 3.8816326530612244, "grad_norm": 1.859093427658081, "learning_rate": 4.806326530612245e-05, "loss": 4.2752, "step": 951 }, { "epoch": 3.8857142857142857, "grad_norm": 3.0327320098876953, "learning_rate": 4.806122448979592e-05, "loss": 3.8249, "step": 952 }, { "epoch": 3.889795918367347, "grad_norm": 1.8704159259796143, "learning_rate": 4.805918367346939e-05, "loss": 4.0285, "step": 953 }, { "epoch": 3.8938775510204082, "grad_norm": 2.7514491081237793, "learning_rate": 4.805714285714286e-05, "loss": 3.4129, "step": 954 }, { "epoch": 3.8979591836734695, "grad_norm": 2.2343544960021973, "learning_rate": 4.805510204081633e-05, "loss": 3.9668, "step": 955 }, { "epoch": 3.9020408163265308, "grad_norm": 2.8982460498809814, "learning_rate": 4.8053061224489795e-05, "loss": 3.6794, "step": 956 }, { "epoch": 3.906122448979592, "grad_norm": 5.510666847229004, "learning_rate": 4.805102040816327e-05, "loss": 3.1056, "step": 957 }, { "epoch": 3.910204081632653, "grad_norm": 3.717081069946289, "learning_rate": 4.8048979591836736e-05, "loss": 3.6827, "step": 958 }, { "epoch": 3.914285714285714, "grad_norm": 2.95226788520813, "learning_rate": 4.804693877551021e-05, "loss": 3.6547, "step": 959 }, { "epoch": 3.9183673469387754, "grad_norm": 3.9405441284179688, "learning_rate": 4.804489795918368e-05, "loss": 3.5729, "step": 960 }, { "epoch": 3.9224489795918367, "grad_norm": 2.8608033657073975, "learning_rate": 4.8042857142857144e-05, "loss": 4.2161, "step": 961 }, { "epoch": 3.926530612244898, "grad_norm": 3.030956745147705, "learning_rate": 4.804081632653061e-05, "loss": 3.2924, "step": 962 }, { "epoch": 3.9306122448979592, "grad_norm": 3.1510977745056152, "learning_rate": 4.803877551020408e-05, "loss": 3.8415, "step": 963 }, { "epoch": 3.9346938775510205, "grad_norm": 3.209500789642334, "learning_rate": 4.803673469387755e-05, "loss": 3.7859, "step": 964 }, { "epoch": 3.938775510204082, "grad_norm": 2.518446207046509, "learning_rate": 4.8034693877551026e-05, "loss": 4.0369, "step": 965 }, { "epoch": 3.942857142857143, "grad_norm": 2.933751344680786, "learning_rate": 4.803265306122449e-05, "loss": 3.8269, "step": 966 }, { "epoch": 3.946938775510204, "grad_norm": 2.496227979660034, "learning_rate": 4.803061224489796e-05, "loss": 3.8564, "step": 967 }, { "epoch": 3.951020408163265, "grad_norm": 2.728717803955078, "learning_rate": 4.802857142857143e-05, "loss": 3.9356, "step": 968 }, { "epoch": 3.9551020408163264, "grad_norm": 1.954904317855835, "learning_rate": 4.80265306122449e-05, "loss": 4.1823, "step": 969 }, { "epoch": 3.9591836734693877, "grad_norm": 2.875450372695923, "learning_rate": 4.802448979591837e-05, "loss": 3.8632, "step": 970 }, { "epoch": 3.963265306122449, "grad_norm": 1.865355372428894, "learning_rate": 4.8022448979591836e-05, "loss": 4.4653, "step": 971 }, { "epoch": 3.9673469387755103, "grad_norm": 2.119278907775879, "learning_rate": 4.802040816326531e-05, "loss": 4.0126, "step": 972 }, { "epoch": 3.9714285714285715, "grad_norm": 4.366546630859375, "learning_rate": 4.801836734693878e-05, "loss": 3.4084, "step": 973 }, { "epoch": 3.975510204081633, "grad_norm": 3.550924062728882, "learning_rate": 4.801632653061225e-05, "loss": 3.8286, "step": 974 }, { "epoch": 3.979591836734694, "grad_norm": 2.9824299812316895, "learning_rate": 4.801428571428572e-05, "loss": 4.0991, "step": 975 }, { "epoch": 3.983673469387755, "grad_norm": 2.5901002883911133, "learning_rate": 4.8012244897959185e-05, "loss": 3.861, "step": 976 }, { "epoch": 3.987755102040816, "grad_norm": 1.8107521533966064, "learning_rate": 4.801020408163265e-05, "loss": 4.1367, "step": 977 }, { "epoch": 3.9918367346938775, "grad_norm": 1.879692792892456, "learning_rate": 4.8008163265306126e-05, "loss": 3.8494, "step": 978 }, { "epoch": 3.9959183673469387, "grad_norm": 2.021596908569336, "learning_rate": 4.800612244897959e-05, "loss": 4.1421, "step": 979 }, { "epoch": 4.0, "grad_norm": 3.059981346130371, "learning_rate": 4.800408163265307e-05, "loss": 3.7263, "step": 980 }, { "epoch": 4.004081632653061, "grad_norm": 1.1488847732543945, "learning_rate": 4.8002040816326534e-05, "loss": 4.3085, "step": 981 }, { "epoch": 4.0081632653061225, "grad_norm": 2.2764456272125244, "learning_rate": 4.8e-05, "loss": 3.9791, "step": 982 }, { "epoch": 4.012244897959183, "grad_norm": 2.0791075229644775, "learning_rate": 4.7997959183673475e-05, "loss": 3.8875, "step": 983 }, { "epoch": 4.016326530612245, "grad_norm": 2.0372183322906494, "learning_rate": 4.799591836734694e-05, "loss": 4.0863, "step": 984 }, { "epoch": 4.020408163265306, "grad_norm": 2.955033302307129, "learning_rate": 4.799387755102041e-05, "loss": 4.0032, "step": 985 }, { "epoch": 4.024489795918368, "grad_norm": 2.986849308013916, "learning_rate": 4.7991836734693876e-05, "loss": 3.7508, "step": 986 }, { "epoch": 4.0285714285714285, "grad_norm": 3.344866991043091, "learning_rate": 4.798979591836735e-05, "loss": 3.8382, "step": 987 }, { "epoch": 4.03265306122449, "grad_norm": 1.7606037855148315, "learning_rate": 4.7987755102040824e-05, "loss": 4.105, "step": 988 }, { "epoch": 4.036734693877551, "grad_norm": 2.1844215393066406, "learning_rate": 4.798571428571429e-05, "loss": 4.1665, "step": 989 }, { "epoch": 4.040816326530612, "grad_norm": 2.7389323711395264, "learning_rate": 4.798367346938776e-05, "loss": 3.9408, "step": 990 }, { "epoch": 4.044897959183674, "grad_norm": 2.3995909690856934, "learning_rate": 4.7981632653061226e-05, "loss": 4.1526, "step": 991 }, { "epoch": 4.048979591836734, "grad_norm": 3.356044292449951, "learning_rate": 4.797959183673469e-05, "loss": 3.8465, "step": 992 }, { "epoch": 4.053061224489796, "grad_norm": 2.2474992275238037, "learning_rate": 4.797755102040817e-05, "loss": 3.9022, "step": 993 }, { "epoch": 4.057142857142857, "grad_norm": 2.2419934272766113, "learning_rate": 4.7975510204081634e-05, "loss": 3.9973, "step": 994 }, { "epoch": 4.061224489795919, "grad_norm": 2.1205875873565674, "learning_rate": 4.797346938775511e-05, "loss": 3.9717, "step": 995 }, { "epoch": 4.0653061224489795, "grad_norm": 1.7254136800765991, "learning_rate": 4.7971428571428575e-05, "loss": 4.269, "step": 996 }, { "epoch": 4.069387755102041, "grad_norm": 2.050009250640869, "learning_rate": 4.796938775510204e-05, "loss": 3.9521, "step": 997 }, { "epoch": 4.073469387755102, "grad_norm": 2.0053369998931885, "learning_rate": 4.7967346938775516e-05, "loss": 3.9157, "step": 998 }, { "epoch": 4.077551020408163, "grad_norm": 2.5217673778533936, "learning_rate": 4.796530612244898e-05, "loss": 3.9921, "step": 999 }, { "epoch": 4.081632653061225, "grad_norm": 2.151611328125, "learning_rate": 4.796326530612245e-05, "loss": 4.1868, "step": 1000 }, { "epoch": 4.085714285714285, "grad_norm": 2.851884603500366, "learning_rate": 4.796122448979592e-05, "loss": 4.0156, "step": 1001 }, { "epoch": 4.089795918367347, "grad_norm": 4.567970275878906, "learning_rate": 4.795918367346939e-05, "loss": 3.2577, "step": 1002 }, { "epoch": 4.093877551020408, "grad_norm": 6.1287007331848145, "learning_rate": 4.795714285714286e-05, "loss": 1.9459, "step": 1003 }, { "epoch": 4.09795918367347, "grad_norm": 2.881437063217163, "learning_rate": 4.795510204081633e-05, "loss": 3.6043, "step": 1004 }, { "epoch": 4.1020408163265305, "grad_norm": 2.2544922828674316, "learning_rate": 4.79530612244898e-05, "loss": 4.1488, "step": 1005 }, { "epoch": 4.106122448979592, "grad_norm": 2.758648157119751, "learning_rate": 4.7951020408163266e-05, "loss": 3.877, "step": 1006 }, { "epoch": 4.110204081632653, "grad_norm": 2.6097333431243896, "learning_rate": 4.7948979591836734e-05, "loss": 4.0046, "step": 1007 }, { "epoch": 4.114285714285714, "grad_norm": 3.1163887977600098, "learning_rate": 4.794693877551021e-05, "loss": 3.7572, "step": 1008 }, { "epoch": 4.118367346938776, "grad_norm": 2.5292015075683594, "learning_rate": 4.7944897959183675e-05, "loss": 3.9096, "step": 1009 }, { "epoch": 4.122448979591836, "grad_norm": 2.8312344551086426, "learning_rate": 4.794285714285714e-05, "loss": 3.8304, "step": 1010 }, { "epoch": 4.126530612244898, "grad_norm": 1.7633081674575806, "learning_rate": 4.7940816326530616e-05, "loss": 4.1787, "step": 1011 }, { "epoch": 4.130612244897959, "grad_norm": 3.17323637008667, "learning_rate": 4.793877551020408e-05, "loss": 3.9335, "step": 1012 }, { "epoch": 4.134693877551021, "grad_norm": 2.5765914916992188, "learning_rate": 4.793673469387756e-05, "loss": 3.9823, "step": 1013 }, { "epoch": 4.1387755102040815, "grad_norm": 2.652157783508301, "learning_rate": 4.7934693877551024e-05, "loss": 3.9855, "step": 1014 }, { "epoch": 4.142857142857143, "grad_norm": 3.9893991947174072, "learning_rate": 4.793265306122449e-05, "loss": 3.4262, "step": 1015 }, { "epoch": 4.146938775510204, "grad_norm": 3.028343439102173, "learning_rate": 4.793061224489796e-05, "loss": 4.0573, "step": 1016 }, { "epoch": 4.151020408163265, "grad_norm": 2.920008897781372, "learning_rate": 4.7928571428571425e-05, "loss": 4.0599, "step": 1017 }, { "epoch": 4.155102040816327, "grad_norm": 2.3653616905212402, "learning_rate": 4.79265306122449e-05, "loss": 4.1343, "step": 1018 }, { "epoch": 4.159183673469387, "grad_norm": 3.540327310562134, "learning_rate": 4.792448979591837e-05, "loss": 3.9359, "step": 1019 }, { "epoch": 4.163265306122449, "grad_norm": 2.7349436283111572, "learning_rate": 4.792244897959184e-05, "loss": 4.0342, "step": 1020 }, { "epoch": 4.16734693877551, "grad_norm": 4.179746627807617, "learning_rate": 4.792040816326531e-05, "loss": 3.6444, "step": 1021 }, { "epoch": 4.171428571428572, "grad_norm": 1.8282794952392578, "learning_rate": 4.7918367346938774e-05, "loss": 4.1148, "step": 1022 }, { "epoch": 4.1755102040816325, "grad_norm": 2.441394805908203, "learning_rate": 4.791632653061225e-05, "loss": 3.855, "step": 1023 }, { "epoch": 4.179591836734694, "grad_norm": 2.219372272491455, "learning_rate": 4.7914285714285715e-05, "loss": 3.8298, "step": 1024 }, { "epoch": 4.183673469387755, "grad_norm": 3.898660659790039, "learning_rate": 4.791224489795918e-05, "loss": 4.0475, "step": 1025 }, { "epoch": 4.187755102040816, "grad_norm": 2.46108078956604, "learning_rate": 4.7910204081632656e-05, "loss": 3.8519, "step": 1026 }, { "epoch": 4.191836734693878, "grad_norm": 3.4182820320129395, "learning_rate": 4.790816326530613e-05, "loss": 3.4593, "step": 1027 }, { "epoch": 4.1959183673469385, "grad_norm": 3.491408586502075, "learning_rate": 4.79061224489796e-05, "loss": 4.0528, "step": 1028 }, { "epoch": 4.2, "grad_norm": 4.136327743530273, "learning_rate": 4.7904081632653065e-05, "loss": 3.578, "step": 1029 }, { "epoch": 4.204081632653061, "grad_norm": 2.1824936866760254, "learning_rate": 4.790204081632653e-05, "loss": 3.8417, "step": 1030 }, { "epoch": 4.208163265306123, "grad_norm": 3.6152710914611816, "learning_rate": 4.79e-05, "loss": 3.9034, "step": 1031 }, { "epoch": 4.2122448979591836, "grad_norm": 5.586485385894775, "learning_rate": 4.789795918367347e-05, "loss": 3.4351, "step": 1032 }, { "epoch": 4.216326530612245, "grad_norm": 3.182133674621582, "learning_rate": 4.789591836734694e-05, "loss": 3.9019, "step": 1033 }, { "epoch": 4.220408163265306, "grad_norm": 2.887012481689453, "learning_rate": 4.7893877551020414e-05, "loss": 3.8156, "step": 1034 }, { "epoch": 4.224489795918367, "grad_norm": 2.425987958908081, "learning_rate": 4.789183673469388e-05, "loss": 4.0841, "step": 1035 }, { "epoch": 4.228571428571429, "grad_norm": 3.534796714782715, "learning_rate": 4.788979591836735e-05, "loss": 3.8013, "step": 1036 }, { "epoch": 4.2326530612244895, "grad_norm": 3.268441915512085, "learning_rate": 4.788775510204082e-05, "loss": 3.7069, "step": 1037 }, { "epoch": 4.236734693877551, "grad_norm": 2.3780477046966553, "learning_rate": 4.788571428571429e-05, "loss": 4.0782, "step": 1038 }, { "epoch": 4.240816326530612, "grad_norm": 3.4069502353668213, "learning_rate": 4.7883673469387756e-05, "loss": 3.8799, "step": 1039 }, { "epoch": 4.244897959183674, "grad_norm": 2.110553741455078, "learning_rate": 4.788163265306122e-05, "loss": 3.8024, "step": 1040 }, { "epoch": 4.248979591836735, "grad_norm": 3.878704309463501, "learning_rate": 4.78795918367347e-05, "loss": 3.4766, "step": 1041 }, { "epoch": 4.253061224489796, "grad_norm": 2.1264469623565674, "learning_rate": 4.787755102040817e-05, "loss": 3.9339, "step": 1042 }, { "epoch": 4.257142857142857, "grad_norm": 2.5596747398376465, "learning_rate": 4.787551020408164e-05, "loss": 3.817, "step": 1043 }, { "epoch": 4.261224489795918, "grad_norm": 3.38661527633667, "learning_rate": 4.7873469387755105e-05, "loss": 3.8242, "step": 1044 }, { "epoch": 4.26530612244898, "grad_norm": 4.874504566192627, "learning_rate": 4.787142857142857e-05, "loss": 3.1393, "step": 1045 }, { "epoch": 4.2693877551020405, "grad_norm": 3.165247678756714, "learning_rate": 4.786938775510204e-05, "loss": 4.0657, "step": 1046 }, { "epoch": 4.273469387755102, "grad_norm": 2.3010149002075195, "learning_rate": 4.7867346938775513e-05, "loss": 4.1103, "step": 1047 }, { "epoch": 4.277551020408163, "grad_norm": 4.566170692443848, "learning_rate": 4.786530612244898e-05, "loss": 3.3567, "step": 1048 }, { "epoch": 4.281632653061225, "grad_norm": 3.810955286026001, "learning_rate": 4.7863265306122454e-05, "loss": 4.0029, "step": 1049 }, { "epoch": 4.285714285714286, "grad_norm": 2.5010416507720947, "learning_rate": 4.786122448979592e-05, "loss": 4.1196, "step": 1050 }, { "epoch": 4.289795918367347, "grad_norm": 2.3333966732025146, "learning_rate": 4.785918367346939e-05, "loss": 3.8528, "step": 1051 }, { "epoch": 4.293877551020408, "grad_norm": 6.429322242736816, "learning_rate": 4.785714285714286e-05, "loss": 4.1093, "step": 1052 }, { "epoch": 4.29795918367347, "grad_norm": 2.4151058197021484, "learning_rate": 4.785510204081633e-05, "loss": 4.0927, "step": 1053 }, { "epoch": 4.302040816326531, "grad_norm": 2.505505084991455, "learning_rate": 4.78530612244898e-05, "loss": 4.0063, "step": 1054 }, { "epoch": 4.3061224489795915, "grad_norm": 2.5098583698272705, "learning_rate": 4.7851020408163264e-05, "loss": 4.152, "step": 1055 }, { "epoch": 4.310204081632653, "grad_norm": 2.8614888191223145, "learning_rate": 4.784897959183673e-05, "loss": 3.9206, "step": 1056 }, { "epoch": 4.314285714285714, "grad_norm": 3.4764528274536133, "learning_rate": 4.7846938775510205e-05, "loss": 3.7339, "step": 1057 }, { "epoch": 4.318367346938776, "grad_norm": 2.7497828006744385, "learning_rate": 4.784489795918368e-05, "loss": 3.8349, "step": 1058 }, { "epoch": 4.322448979591837, "grad_norm": 4.164638996124268, "learning_rate": 4.7842857142857146e-05, "loss": 3.4755, "step": 1059 }, { "epoch": 4.326530612244898, "grad_norm": 5.681775093078613, "learning_rate": 4.784081632653061e-05, "loss": 2.9771, "step": 1060 }, { "epoch": 4.330612244897959, "grad_norm": 4.251182556152344, "learning_rate": 4.783877551020408e-05, "loss": 3.556, "step": 1061 }, { "epoch": 4.33469387755102, "grad_norm": 2.3188817501068115, "learning_rate": 4.7836734693877554e-05, "loss": 4.184, "step": 1062 }, { "epoch": 4.338775510204082, "grad_norm": 2.2667829990386963, "learning_rate": 4.783469387755102e-05, "loss": 3.9733, "step": 1063 }, { "epoch": 4.3428571428571425, "grad_norm": 2.5881547927856445, "learning_rate": 4.783265306122449e-05, "loss": 3.9565, "step": 1064 }, { "epoch": 4.346938775510204, "grad_norm": 2.3491382598876953, "learning_rate": 4.783061224489796e-05, "loss": 3.9279, "step": 1065 }, { "epoch": 4.351020408163265, "grad_norm": 4.0639262199401855, "learning_rate": 4.782857142857143e-05, "loss": 3.6242, "step": 1066 }, { "epoch": 4.355102040816327, "grad_norm": 4.126925945281982, "learning_rate": 4.7826530612244903e-05, "loss": 3.5996, "step": 1067 }, { "epoch": 4.359183673469388, "grad_norm": 2.3391969203948975, "learning_rate": 4.782448979591837e-05, "loss": 4.0282, "step": 1068 }, { "epoch": 4.363265306122449, "grad_norm": 3.103057384490967, "learning_rate": 4.782244897959184e-05, "loss": 3.9207, "step": 1069 }, { "epoch": 4.36734693877551, "grad_norm": 2.5597565174102783, "learning_rate": 4.7820408163265305e-05, "loss": 4.1576, "step": 1070 }, { "epoch": 4.371428571428572, "grad_norm": 2.088205575942993, "learning_rate": 4.781836734693878e-05, "loss": 3.9818, "step": 1071 }, { "epoch": 4.375510204081633, "grad_norm": 2.9937214851379395, "learning_rate": 4.7816326530612246e-05, "loss": 4.1617, "step": 1072 }, { "epoch": 4.3795918367346935, "grad_norm": 4.051003932952881, "learning_rate": 4.781428571428572e-05, "loss": 3.5191, "step": 1073 }, { "epoch": 4.383673469387755, "grad_norm": 1.69303560256958, "learning_rate": 4.781224489795919e-05, "loss": 4.3402, "step": 1074 }, { "epoch": 4.387755102040816, "grad_norm": 3.25715708732605, "learning_rate": 4.7810204081632654e-05, "loss": 3.7899, "step": 1075 }, { "epoch": 4.391836734693878, "grad_norm": 2.810202121734619, "learning_rate": 4.780816326530613e-05, "loss": 4.1262, "step": 1076 }, { "epoch": 4.395918367346939, "grad_norm": 3.488553047180176, "learning_rate": 4.7806122448979595e-05, "loss": 3.1105, "step": 1077 }, { "epoch": 4.4, "grad_norm": 3.595381498336792, "learning_rate": 4.780408163265306e-05, "loss": 3.7431, "step": 1078 }, { "epoch": 4.404081632653061, "grad_norm": 2.712284803390503, "learning_rate": 4.780204081632653e-05, "loss": 3.9427, "step": 1079 }, { "epoch": 4.408163265306122, "grad_norm": 3.0894036293029785, "learning_rate": 4.78e-05, "loss": 3.7534, "step": 1080 }, { "epoch": 4.412244897959184, "grad_norm": 2.754199743270874, "learning_rate": 4.779795918367348e-05, "loss": 4.0788, "step": 1081 }, { "epoch": 4.416326530612245, "grad_norm": 3.1573922634124756, "learning_rate": 4.7795918367346944e-05, "loss": 3.8515, "step": 1082 }, { "epoch": 4.420408163265306, "grad_norm": 3.9437873363494873, "learning_rate": 4.779387755102041e-05, "loss": 3.7372, "step": 1083 }, { "epoch": 4.424489795918367, "grad_norm": 4.240090370178223, "learning_rate": 4.779183673469388e-05, "loss": 3.6423, "step": 1084 }, { "epoch": 4.428571428571429, "grad_norm": 3.2087948322296143, "learning_rate": 4.7789795918367346e-05, "loss": 3.6572, "step": 1085 }, { "epoch": 4.43265306122449, "grad_norm": 2.201953887939453, "learning_rate": 4.778775510204082e-05, "loss": 3.8288, "step": 1086 }, { "epoch": 4.436734693877551, "grad_norm": 4.64414644241333, "learning_rate": 4.7785714285714287e-05, "loss": 3.1418, "step": 1087 }, { "epoch": 4.440816326530612, "grad_norm": 5.571309566497803, "learning_rate": 4.778367346938776e-05, "loss": 2.8954, "step": 1088 }, { "epoch": 4.444897959183674, "grad_norm": 4.455643177032471, "learning_rate": 4.778163265306123e-05, "loss": 3.528, "step": 1089 }, { "epoch": 4.448979591836735, "grad_norm": 3.596524715423584, "learning_rate": 4.7779591836734695e-05, "loss": 3.819, "step": 1090 }, { "epoch": 4.453061224489796, "grad_norm": 4.968591690063477, "learning_rate": 4.777755102040817e-05, "loss": 3.0929, "step": 1091 }, { "epoch": 4.457142857142857, "grad_norm": 3.4099247455596924, "learning_rate": 4.7775510204081636e-05, "loss": 3.7408, "step": 1092 }, { "epoch": 4.461224489795918, "grad_norm": 4.900144577026367, "learning_rate": 4.77734693877551e-05, "loss": 3.3614, "step": 1093 }, { "epoch": 4.46530612244898, "grad_norm": 3.793720006942749, "learning_rate": 4.777142857142857e-05, "loss": 3.7493, "step": 1094 }, { "epoch": 4.469387755102041, "grad_norm": 2.758420705795288, "learning_rate": 4.7769387755102044e-05, "loss": 3.9649, "step": 1095 }, { "epoch": 4.473469387755102, "grad_norm": 5.407266139984131, "learning_rate": 4.776734693877552e-05, "loss": 3.3062, "step": 1096 }, { "epoch": 4.477551020408163, "grad_norm": 2.4409523010253906, "learning_rate": 4.7765306122448985e-05, "loss": 4.0256, "step": 1097 }, { "epoch": 4.481632653061224, "grad_norm": 3.176772117614746, "learning_rate": 4.776326530612245e-05, "loss": 4.0839, "step": 1098 }, { "epoch": 4.485714285714286, "grad_norm": 3.096646785736084, "learning_rate": 4.776122448979592e-05, "loss": 3.7556, "step": 1099 }, { "epoch": 4.489795918367347, "grad_norm": 5.863664627075195, "learning_rate": 4.7759183673469386e-05, "loss": 3.2796, "step": 1100 }, { "epoch": 4.493877551020408, "grad_norm": 2.7625081539154053, "learning_rate": 4.775714285714286e-05, "loss": 3.8177, "step": 1101 }, { "epoch": 4.497959183673469, "grad_norm": 2.283360481262207, "learning_rate": 4.775510204081633e-05, "loss": 4.0204, "step": 1102 }, { "epoch": 4.502040816326531, "grad_norm": 2.925088882446289, "learning_rate": 4.77530612244898e-05, "loss": 3.8122, "step": 1103 }, { "epoch": 4.506122448979592, "grad_norm": 4.155097484588623, "learning_rate": 4.775102040816327e-05, "loss": 3.5959, "step": 1104 }, { "epoch": 4.510204081632653, "grad_norm": 4.056398391723633, "learning_rate": 4.7748979591836735e-05, "loss": 3.9871, "step": 1105 }, { "epoch": 4.514285714285714, "grad_norm": 3.195453643798828, "learning_rate": 4.774693877551021e-05, "loss": 4.0979, "step": 1106 }, { "epoch": 4.518367346938776, "grad_norm": 3.004354238510132, "learning_rate": 4.7744897959183677e-05, "loss": 4.1729, "step": 1107 }, { "epoch": 4.522448979591837, "grad_norm": 2.1533658504486084, "learning_rate": 4.7742857142857144e-05, "loss": 4.3079, "step": 1108 }, { "epoch": 4.526530612244898, "grad_norm": 2.7869086265563965, "learning_rate": 4.774081632653061e-05, "loss": 3.7109, "step": 1109 }, { "epoch": 4.530612244897959, "grad_norm": 3.083414316177368, "learning_rate": 4.773877551020408e-05, "loss": 3.8455, "step": 1110 }, { "epoch": 4.53469387755102, "grad_norm": 2.530806541442871, "learning_rate": 4.773673469387755e-05, "loss": 3.6711, "step": 1111 }, { "epoch": 4.538775510204082, "grad_norm": 3.6886541843414307, "learning_rate": 4.7734693877551026e-05, "loss": 3.3936, "step": 1112 }, { "epoch": 4.542857142857143, "grad_norm": 3.356630325317383, "learning_rate": 4.773265306122449e-05, "loss": 3.689, "step": 1113 }, { "epoch": 4.546938775510204, "grad_norm": 2.0304086208343506, "learning_rate": 4.773061224489796e-05, "loss": 4.2475, "step": 1114 }, { "epoch": 4.551020408163265, "grad_norm": 2.846590757369995, "learning_rate": 4.7728571428571434e-05, "loss": 3.8365, "step": 1115 }, { "epoch": 4.555102040816326, "grad_norm": 2.1200313568115234, "learning_rate": 4.77265306122449e-05, "loss": 4.1204, "step": 1116 }, { "epoch": 4.559183673469388, "grad_norm": 3.6199791431427, "learning_rate": 4.772448979591837e-05, "loss": 3.6858, "step": 1117 }, { "epoch": 4.563265306122449, "grad_norm": 3.8170297145843506, "learning_rate": 4.7722448979591835e-05, "loss": 3.7667, "step": 1118 }, { "epoch": 4.56734693877551, "grad_norm": 3.2425732612609863, "learning_rate": 4.772040816326531e-05, "loss": 3.9244, "step": 1119 }, { "epoch": 4.571428571428571, "grad_norm": 2.3945388793945312, "learning_rate": 4.771836734693878e-05, "loss": 3.9522, "step": 1120 }, { "epoch": 4.575510204081633, "grad_norm": 3.1418228149414062, "learning_rate": 4.771632653061225e-05, "loss": 3.8725, "step": 1121 }, { "epoch": 4.579591836734694, "grad_norm": 2.2997078895568848, "learning_rate": 4.771428571428572e-05, "loss": 3.8524, "step": 1122 }, { "epoch": 4.583673469387755, "grad_norm": 2.4458348751068115, "learning_rate": 4.7712244897959184e-05, "loss": 3.8823, "step": 1123 }, { "epoch": 4.587755102040816, "grad_norm": 2.354665994644165, "learning_rate": 4.771020408163265e-05, "loss": 4.2484, "step": 1124 }, { "epoch": 4.591836734693878, "grad_norm": 1.8651002645492554, "learning_rate": 4.7708163265306125e-05, "loss": 4.092, "step": 1125 }, { "epoch": 4.595918367346939, "grad_norm": 1.8886109590530396, "learning_rate": 4.770612244897959e-05, "loss": 4.2366, "step": 1126 }, { "epoch": 4.6, "grad_norm": 2.6979382038116455, "learning_rate": 4.7704081632653066e-05, "loss": 3.8033, "step": 1127 }, { "epoch": 4.604081632653061, "grad_norm": 2.5744528770446777, "learning_rate": 4.7702040816326534e-05, "loss": 3.8716, "step": 1128 }, { "epoch": 4.608163265306122, "grad_norm": 2.359174966812134, "learning_rate": 4.77e-05, "loss": 4.1962, "step": 1129 }, { "epoch": 4.612244897959184, "grad_norm": 2.923964500427246, "learning_rate": 4.7697959183673475e-05, "loss": 3.8771, "step": 1130 }, { "epoch": 4.616326530612245, "grad_norm": 2.1836154460906982, "learning_rate": 4.769591836734694e-05, "loss": 4.0557, "step": 1131 }, { "epoch": 4.6204081632653065, "grad_norm": 2.0617315769195557, "learning_rate": 4.769387755102041e-05, "loss": 4.2881, "step": 1132 }, { "epoch": 4.624489795918367, "grad_norm": 2.8480865955352783, "learning_rate": 4.7691836734693876e-05, "loss": 4.0112, "step": 1133 }, { "epoch": 4.628571428571428, "grad_norm": 2.9142894744873047, "learning_rate": 4.768979591836735e-05, "loss": 3.947, "step": 1134 }, { "epoch": 4.63265306122449, "grad_norm": 3.2820980548858643, "learning_rate": 4.7687755102040824e-05, "loss": 3.9198, "step": 1135 }, { "epoch": 4.636734693877551, "grad_norm": 3.0535709857940674, "learning_rate": 4.768571428571429e-05, "loss": 3.8777, "step": 1136 }, { "epoch": 4.640816326530612, "grad_norm": 4.3979597091674805, "learning_rate": 4.768367346938776e-05, "loss": 3.2549, "step": 1137 }, { "epoch": 4.644897959183673, "grad_norm": 4.188146114349365, "learning_rate": 4.7681632653061225e-05, "loss": 3.5552, "step": 1138 }, { "epoch": 4.648979591836735, "grad_norm": 4.162293910980225, "learning_rate": 4.767959183673469e-05, "loss": 3.7054, "step": 1139 }, { "epoch": 4.653061224489796, "grad_norm": 2.9659106731414795, "learning_rate": 4.7677551020408166e-05, "loss": 4.1245, "step": 1140 }, { "epoch": 4.6571428571428575, "grad_norm": 3.1165997982025146, "learning_rate": 4.767551020408163e-05, "loss": 3.7306, "step": 1141 }, { "epoch": 4.661224489795918, "grad_norm": 3.126666307449341, "learning_rate": 4.767346938775511e-05, "loss": 3.9365, "step": 1142 }, { "epoch": 4.66530612244898, "grad_norm": 3.168727159500122, "learning_rate": 4.7671428571428574e-05, "loss": 4.0664, "step": 1143 }, { "epoch": 4.669387755102041, "grad_norm": 2.496616840362549, "learning_rate": 4.766938775510204e-05, "loss": 3.9061, "step": 1144 }, { "epoch": 4.673469387755102, "grad_norm": 2.865919351577759, "learning_rate": 4.7667346938775515e-05, "loss": 4.0097, "step": 1145 }, { "epoch": 4.677551020408163, "grad_norm": 2.4849088191986084, "learning_rate": 4.766530612244898e-05, "loss": 3.9985, "step": 1146 }, { "epoch": 4.681632653061224, "grad_norm": 3.564363718032837, "learning_rate": 4.766326530612245e-05, "loss": 3.6814, "step": 1147 }, { "epoch": 4.685714285714286, "grad_norm": 2.734201669692993, "learning_rate": 4.766122448979592e-05, "loss": 4.2268, "step": 1148 }, { "epoch": 4.689795918367347, "grad_norm": 2.5020763874053955, "learning_rate": 4.765918367346939e-05, "loss": 3.808, "step": 1149 }, { "epoch": 4.6938775510204085, "grad_norm": 1.9160670042037964, "learning_rate": 4.7657142857142865e-05, "loss": 4.1005, "step": 1150 }, { "epoch": 4.697959183673469, "grad_norm": 3.406057119369507, "learning_rate": 4.765510204081633e-05, "loss": 3.9098, "step": 1151 }, { "epoch": 4.70204081632653, "grad_norm": 2.9728498458862305, "learning_rate": 4.76530612244898e-05, "loss": 3.7738, "step": 1152 }, { "epoch": 4.706122448979592, "grad_norm": 3.418842315673828, "learning_rate": 4.7651020408163266e-05, "loss": 3.5803, "step": 1153 }, { "epoch": 4.710204081632653, "grad_norm": 2.2976057529449463, "learning_rate": 4.764897959183673e-05, "loss": 3.901, "step": 1154 }, { "epoch": 4.714285714285714, "grad_norm": 3.148548126220703, "learning_rate": 4.764693877551021e-05, "loss": 3.946, "step": 1155 }, { "epoch": 4.718367346938775, "grad_norm": 4.110194683074951, "learning_rate": 4.7644897959183674e-05, "loss": 3.6381, "step": 1156 }, { "epoch": 4.722448979591837, "grad_norm": 2.1815130710601807, "learning_rate": 4.764285714285715e-05, "loss": 3.9422, "step": 1157 }, { "epoch": 4.726530612244898, "grad_norm": 2.192923069000244, "learning_rate": 4.7640816326530615e-05, "loss": 4.1678, "step": 1158 }, { "epoch": 4.7306122448979595, "grad_norm": 3.129530906677246, "learning_rate": 4.763877551020409e-05, "loss": 3.6054, "step": 1159 }, { "epoch": 4.73469387755102, "grad_norm": 3.216701030731201, "learning_rate": 4.7636734693877556e-05, "loss": 3.9472, "step": 1160 }, { "epoch": 4.738775510204082, "grad_norm": 3.098956823348999, "learning_rate": 4.763469387755102e-05, "loss": 3.8638, "step": 1161 }, { "epoch": 4.742857142857143, "grad_norm": 3.6093668937683105, "learning_rate": 4.763265306122449e-05, "loss": 3.776, "step": 1162 }, { "epoch": 4.746938775510204, "grad_norm": 2.9016525745391846, "learning_rate": 4.763061224489796e-05, "loss": 4.0356, "step": 1163 }, { "epoch": 4.751020408163265, "grad_norm": 2.494969367980957, "learning_rate": 4.762857142857143e-05, "loss": 3.9625, "step": 1164 }, { "epoch": 4.755102040816326, "grad_norm": 2.267827033996582, "learning_rate": 4.76265306122449e-05, "loss": 4.0932, "step": 1165 }, { "epoch": 4.759183673469388, "grad_norm": 2.874357223510742, "learning_rate": 4.762448979591837e-05, "loss": 3.914, "step": 1166 }, { "epoch": 4.763265306122449, "grad_norm": 3.645012617111206, "learning_rate": 4.762244897959184e-05, "loss": 3.9473, "step": 1167 }, { "epoch": 4.7673469387755105, "grad_norm": 2.7622363567352295, "learning_rate": 4.762040816326531e-05, "loss": 4.0483, "step": 1168 }, { "epoch": 4.771428571428571, "grad_norm": 2.8887715339660645, "learning_rate": 4.761836734693878e-05, "loss": 3.9841, "step": 1169 }, { "epoch": 4.775510204081632, "grad_norm": 3.2529523372650146, "learning_rate": 4.761632653061225e-05, "loss": 3.7816, "step": 1170 }, { "epoch": 4.779591836734694, "grad_norm": 3.2264318466186523, "learning_rate": 4.7614285714285715e-05, "loss": 3.8256, "step": 1171 }, { "epoch": 4.783673469387755, "grad_norm": 3.7765495777130127, "learning_rate": 4.761224489795918e-05, "loss": 3.839, "step": 1172 }, { "epoch": 4.7877551020408164, "grad_norm": 2.10556697845459, "learning_rate": 4.7610204081632656e-05, "loss": 3.9856, "step": 1173 }, { "epoch": 4.791836734693877, "grad_norm": 3.734642505645752, "learning_rate": 4.760816326530613e-05, "loss": 3.425, "step": 1174 }, { "epoch": 4.795918367346939, "grad_norm": 5.38313627243042, "learning_rate": 4.76061224489796e-05, "loss": 3.1023, "step": 1175 }, { "epoch": 4.8, "grad_norm": 3.9918384552001953, "learning_rate": 4.7604081632653064e-05, "loss": 3.7734, "step": 1176 }, { "epoch": 4.8040816326530615, "grad_norm": 2.520986557006836, "learning_rate": 4.760204081632653e-05, "loss": 4.0102, "step": 1177 }, { "epoch": 4.808163265306122, "grad_norm": 2.276796817779541, "learning_rate": 4.76e-05, "loss": 4.096, "step": 1178 }, { "epoch": 4.812244897959184, "grad_norm": 3.597397804260254, "learning_rate": 4.759795918367347e-05, "loss": 3.6283, "step": 1179 }, { "epoch": 4.816326530612245, "grad_norm": 4.897284507751465, "learning_rate": 4.759591836734694e-05, "loss": 3.1732, "step": 1180 }, { "epoch": 4.820408163265306, "grad_norm": 4.149960041046143, "learning_rate": 4.759387755102041e-05, "loss": 3.4561, "step": 1181 }, { "epoch": 4.8244897959183675, "grad_norm": 4.061084270477295, "learning_rate": 4.759183673469388e-05, "loss": 3.419, "step": 1182 }, { "epoch": 4.828571428571428, "grad_norm": 4.566517353057861, "learning_rate": 4.758979591836735e-05, "loss": 3.1853, "step": 1183 }, { "epoch": 4.83265306122449, "grad_norm": 3.74914813041687, "learning_rate": 4.758775510204082e-05, "loss": 3.7692, "step": 1184 }, { "epoch": 4.836734693877551, "grad_norm": 2.594545841217041, "learning_rate": 4.758571428571429e-05, "loss": 4.0994, "step": 1185 }, { "epoch": 4.840816326530613, "grad_norm": 2.6723854541778564, "learning_rate": 4.7583673469387756e-05, "loss": 4.2541, "step": 1186 }, { "epoch": 4.844897959183673, "grad_norm": 2.974424123764038, "learning_rate": 4.758163265306122e-05, "loss": 3.7026, "step": 1187 }, { "epoch": 4.848979591836734, "grad_norm": 3.254304885864258, "learning_rate": 4.75795918367347e-05, "loss": 3.6858, "step": 1188 }, { "epoch": 4.853061224489796, "grad_norm": 2.346928358078003, "learning_rate": 4.757755102040817e-05, "loss": 3.8833, "step": 1189 }, { "epoch": 4.857142857142857, "grad_norm": 2.0816471576690674, "learning_rate": 4.757551020408164e-05, "loss": 4.0348, "step": 1190 }, { "epoch": 4.8612244897959185, "grad_norm": 3.1647725105285645, "learning_rate": 4.7573469387755105e-05, "loss": 4.0679, "step": 1191 }, { "epoch": 4.865306122448979, "grad_norm": 2.0972063541412354, "learning_rate": 4.757142857142857e-05, "loss": 4.1149, "step": 1192 }, { "epoch": 4.869387755102041, "grad_norm": 5.4071245193481445, "learning_rate": 4.756938775510204e-05, "loss": 3.1616, "step": 1193 }, { "epoch": 4.873469387755102, "grad_norm": 3.811211585998535, "learning_rate": 4.756734693877551e-05, "loss": 3.794, "step": 1194 }, { "epoch": 4.877551020408164, "grad_norm": 2.1627562046051025, "learning_rate": 4.756530612244898e-05, "loss": 4.2491, "step": 1195 }, { "epoch": 4.881632653061224, "grad_norm": 3.8031857013702393, "learning_rate": 4.7563265306122454e-05, "loss": 3.6956, "step": 1196 }, { "epoch": 4.885714285714286, "grad_norm": 3.455050230026245, "learning_rate": 4.756122448979592e-05, "loss": 3.6275, "step": 1197 }, { "epoch": 4.889795918367347, "grad_norm": 2.2446558475494385, "learning_rate": 4.755918367346939e-05, "loss": 4.3028, "step": 1198 }, { "epoch": 4.893877551020408, "grad_norm": 2.9382612705230713, "learning_rate": 4.755714285714286e-05, "loss": 3.8128, "step": 1199 }, { "epoch": 4.8979591836734695, "grad_norm": 4.382110118865967, "learning_rate": 4.755510204081633e-05, "loss": 3.7805, "step": 1200 }, { "epoch": 4.90204081632653, "grad_norm": 2.8034791946411133, "learning_rate": 4.7553061224489796e-05, "loss": 3.8123, "step": 1201 }, { "epoch": 4.906122448979592, "grad_norm": 5.062742710113525, "learning_rate": 4.7551020408163263e-05, "loss": 3.6116, "step": 1202 }, { "epoch": 4.910204081632653, "grad_norm": 3.8504979610443115, "learning_rate": 4.754897959183674e-05, "loss": 3.5191, "step": 1203 }, { "epoch": 4.914285714285715, "grad_norm": 2.491396903991699, "learning_rate": 4.754693877551021e-05, "loss": 4.0607, "step": 1204 }, { "epoch": 4.918367346938775, "grad_norm": 1.6760658025741577, "learning_rate": 4.754489795918368e-05, "loss": 4.0119, "step": 1205 }, { "epoch": 4.922448979591836, "grad_norm": 3.133110284805298, "learning_rate": 4.7542857142857146e-05, "loss": 3.3649, "step": 1206 }, { "epoch": 4.926530612244898, "grad_norm": 1.942281723022461, "learning_rate": 4.754081632653061e-05, "loss": 4.3244, "step": 1207 }, { "epoch": 4.930612244897959, "grad_norm": 5.000426292419434, "learning_rate": 4.7538775510204087e-05, "loss": 3.4379, "step": 1208 }, { "epoch": 4.9346938775510205, "grad_norm": 3.2622973918914795, "learning_rate": 4.7536734693877554e-05, "loss": 3.2421, "step": 1209 }, { "epoch": 4.938775510204081, "grad_norm": 2.334247350692749, "learning_rate": 4.753469387755102e-05, "loss": 4.0029, "step": 1210 }, { "epoch": 4.942857142857143, "grad_norm": 2.273616313934326, "learning_rate": 4.7532653061224495e-05, "loss": 4.09, "step": 1211 }, { "epoch": 4.946938775510204, "grad_norm": 2.617971658706665, "learning_rate": 4.753061224489796e-05, "loss": 3.7949, "step": 1212 }, { "epoch": 4.951020408163266, "grad_norm": 3.146622657775879, "learning_rate": 4.7528571428571436e-05, "loss": 3.8287, "step": 1213 }, { "epoch": 4.955102040816326, "grad_norm": 2.0632212162017822, "learning_rate": 4.75265306122449e-05, "loss": 4.149, "step": 1214 }, { "epoch": 4.959183673469388, "grad_norm": 3.68566632270813, "learning_rate": 4.752448979591837e-05, "loss": 3.6245, "step": 1215 }, { "epoch": 4.963265306122449, "grad_norm": 2.153022050857544, "learning_rate": 4.752244897959184e-05, "loss": 4.0004, "step": 1216 }, { "epoch": 4.96734693877551, "grad_norm": 4.071282386779785, "learning_rate": 4.7520408163265304e-05, "loss": 3.5169, "step": 1217 }, { "epoch": 4.9714285714285715, "grad_norm": 2.1909329891204834, "learning_rate": 4.751836734693878e-05, "loss": 3.8238, "step": 1218 }, { "epoch": 4.975510204081632, "grad_norm": 3.259181261062622, "learning_rate": 4.7516326530612245e-05, "loss": 3.5971, "step": 1219 }, { "epoch": 4.979591836734694, "grad_norm": 3.144493818283081, "learning_rate": 4.751428571428572e-05, "loss": 3.6574, "step": 1220 }, { "epoch": 4.983673469387755, "grad_norm": 2.319155693054199, "learning_rate": 4.7512244897959186e-05, "loss": 3.9378, "step": 1221 }, { "epoch": 4.987755102040817, "grad_norm": 2.2440695762634277, "learning_rate": 4.7510204081632653e-05, "loss": 4.4604, "step": 1222 }, { "epoch": 4.9918367346938775, "grad_norm": 3.701066732406616, "learning_rate": 4.750816326530613e-05, "loss": 3.4089, "step": 1223 }, { "epoch": 4.995918367346938, "grad_norm": 1.9803801774978638, "learning_rate": 4.7506122448979594e-05, "loss": 4.1114, "step": 1224 }, { "epoch": 5.0, "grad_norm": 4.903793811798096, "learning_rate": 4.750408163265306e-05, "loss": 3.6993, "step": 1225 }, { "epoch": 5.004081632653061, "grad_norm": 2.1329288482666016, "learning_rate": 4.750204081632653e-05, "loss": 3.9633, "step": 1226 }, { "epoch": 5.0081632653061225, "grad_norm": 2.3859915733337402, "learning_rate": 4.75e-05, "loss": 3.8713, "step": 1227 }, { "epoch": 5.012244897959183, "grad_norm": 2.8195199966430664, "learning_rate": 4.7497959183673477e-05, "loss": 4.121, "step": 1228 }, { "epoch": 5.016326530612245, "grad_norm": 2.4813268184661865, "learning_rate": 4.7495918367346944e-05, "loss": 3.8612, "step": 1229 }, { "epoch": 5.020408163265306, "grad_norm": 2.2456555366516113, "learning_rate": 4.749387755102041e-05, "loss": 4.034, "step": 1230 }, { "epoch": 5.024489795918368, "grad_norm": 3.36114239692688, "learning_rate": 4.749183673469388e-05, "loss": 3.7341, "step": 1231 }, { "epoch": 5.0285714285714285, "grad_norm": 3.3325767517089844, "learning_rate": 4.7489795918367345e-05, "loss": 4.0586, "step": 1232 }, { "epoch": 5.03265306122449, "grad_norm": 2.266441822052002, "learning_rate": 4.748775510204082e-05, "loss": 3.891, "step": 1233 }, { "epoch": 5.036734693877551, "grad_norm": 3.172562599182129, "learning_rate": 4.7485714285714286e-05, "loss": 3.7261, "step": 1234 }, { "epoch": 5.040816326530612, "grad_norm": 3.5944244861602783, "learning_rate": 4.748367346938776e-05, "loss": 3.7542, "step": 1235 }, { "epoch": 5.044897959183674, "grad_norm": 2.057438611984253, "learning_rate": 4.748163265306123e-05, "loss": 4.2378, "step": 1236 }, { "epoch": 5.048979591836734, "grad_norm": 2.348797082901001, "learning_rate": 4.7479591836734694e-05, "loss": 3.9869, "step": 1237 }, { "epoch": 5.053061224489796, "grad_norm": 3.4814326763153076, "learning_rate": 4.747755102040817e-05, "loss": 3.7755, "step": 1238 }, { "epoch": 5.057142857142857, "grad_norm": 2.2350542545318604, "learning_rate": 4.7475510204081635e-05, "loss": 4.0769, "step": 1239 }, { "epoch": 5.061224489795919, "grad_norm": 3.0686678886413574, "learning_rate": 4.74734693877551e-05, "loss": 4.0405, "step": 1240 }, { "epoch": 5.0653061224489795, "grad_norm": 2.179368734359741, "learning_rate": 4.747142857142857e-05, "loss": 4.1179, "step": 1241 }, { "epoch": 5.069387755102041, "grad_norm": 3.2472212314605713, "learning_rate": 4.746938775510204e-05, "loss": 3.5953, "step": 1242 }, { "epoch": 5.073469387755102, "grad_norm": 4.4742960929870605, "learning_rate": 4.746734693877552e-05, "loss": 3.5746, "step": 1243 }, { "epoch": 5.077551020408163, "grad_norm": 4.774820327758789, "learning_rate": 4.7465306122448984e-05, "loss": 3.2945, "step": 1244 }, { "epoch": 5.081632653061225, "grad_norm": 2.3640520572662354, "learning_rate": 4.746326530612245e-05, "loss": 3.7995, "step": 1245 }, { "epoch": 5.085714285714285, "grad_norm": 2.227599859237671, "learning_rate": 4.746122448979592e-05, "loss": 4.0084, "step": 1246 }, { "epoch": 5.089795918367347, "grad_norm": 5.00445556640625, "learning_rate": 4.7459183673469386e-05, "loss": 3.5086, "step": 1247 }, { "epoch": 5.093877551020408, "grad_norm": 3.653754711151123, "learning_rate": 4.745714285714286e-05, "loss": 3.6559, "step": 1248 }, { "epoch": 5.09795918367347, "grad_norm": 2.086796283721924, "learning_rate": 4.745510204081633e-05, "loss": 4.0637, "step": 1249 }, { "epoch": 5.1020408163265305, "grad_norm": 3.9845998287200928, "learning_rate": 4.74530612244898e-05, "loss": 3.6187, "step": 1250 }, { "epoch": 5.106122448979592, "grad_norm": 1.9729236364364624, "learning_rate": 4.745102040816327e-05, "loss": 4.2402, "step": 1251 }, { "epoch": 5.110204081632653, "grad_norm": 2.320157766342163, "learning_rate": 4.744897959183674e-05, "loss": 3.7931, "step": 1252 }, { "epoch": 5.114285714285714, "grad_norm": 3.7149436473846436, "learning_rate": 4.744693877551021e-05, "loss": 3.275, "step": 1253 }, { "epoch": 5.118367346938776, "grad_norm": 4.578770160675049, "learning_rate": 4.7444897959183676e-05, "loss": 3.4625, "step": 1254 }, { "epoch": 5.122448979591836, "grad_norm": 4.202553749084473, "learning_rate": 4.744285714285714e-05, "loss": 3.7209, "step": 1255 }, { "epoch": 5.126530612244898, "grad_norm": 4.5691375732421875, "learning_rate": 4.744081632653061e-05, "loss": 3.7102, "step": 1256 }, { "epoch": 5.130612244897959, "grad_norm": 3.970576286315918, "learning_rate": 4.7438775510204084e-05, "loss": 3.7682, "step": 1257 }, { "epoch": 5.134693877551021, "grad_norm": 2.69069766998291, "learning_rate": 4.743673469387756e-05, "loss": 4.0746, "step": 1258 }, { "epoch": 5.1387755102040815, "grad_norm": 3.380105495452881, "learning_rate": 4.7434693877551025e-05, "loss": 3.8273, "step": 1259 }, { "epoch": 5.142857142857143, "grad_norm": 2.2965199947357178, "learning_rate": 4.743265306122449e-05, "loss": 4.0818, "step": 1260 }, { "epoch": 5.146938775510204, "grad_norm": 3.7393956184387207, "learning_rate": 4.743061224489796e-05, "loss": 3.6769, "step": 1261 }, { "epoch": 5.151020408163265, "grad_norm": 3.376577854156494, "learning_rate": 4.742857142857143e-05, "loss": 3.797, "step": 1262 }, { "epoch": 5.155102040816327, "grad_norm": 3.99249267578125, "learning_rate": 4.74265306122449e-05, "loss": 3.3491, "step": 1263 }, { "epoch": 5.159183673469387, "grad_norm": 2.662099599838257, "learning_rate": 4.742448979591837e-05, "loss": 3.9364, "step": 1264 }, { "epoch": 5.163265306122449, "grad_norm": 2.216075897216797, "learning_rate": 4.7422448979591835e-05, "loss": 4.0616, "step": 1265 }, { "epoch": 5.16734693877551, "grad_norm": 5.034954071044922, "learning_rate": 4.742040816326531e-05, "loss": 3.1764, "step": 1266 }, { "epoch": 5.171428571428572, "grad_norm": 3.3090999126434326, "learning_rate": 4.741836734693878e-05, "loss": 3.6724, "step": 1267 }, { "epoch": 5.1755102040816325, "grad_norm": 4.357028007507324, "learning_rate": 4.741632653061225e-05, "loss": 3.4443, "step": 1268 }, { "epoch": 5.179591836734694, "grad_norm": 2.4580724239349365, "learning_rate": 4.741428571428572e-05, "loss": 3.9615, "step": 1269 }, { "epoch": 5.183673469387755, "grad_norm": 5.241481781005859, "learning_rate": 4.7412244897959184e-05, "loss": 3.0243, "step": 1270 }, { "epoch": 5.187755102040816, "grad_norm": 3.9602346420288086, "learning_rate": 4.741020408163265e-05, "loss": 3.6554, "step": 1271 }, { "epoch": 5.191836734693878, "grad_norm": 2.9043004512786865, "learning_rate": 4.7408163265306125e-05, "loss": 3.9613, "step": 1272 }, { "epoch": 5.1959183673469385, "grad_norm": 4.927180767059326, "learning_rate": 4.740612244897959e-05, "loss": 3.5766, "step": 1273 }, { "epoch": 5.2, "grad_norm": 2.3715708255767822, "learning_rate": 4.7404081632653066e-05, "loss": 4.2774, "step": 1274 }, { "epoch": 5.204081632653061, "grad_norm": 3.0796196460723877, "learning_rate": 4.740204081632653e-05, "loss": 3.7614, "step": 1275 }, { "epoch": 5.208163265306123, "grad_norm": 3.3364920616149902, "learning_rate": 4.74e-05, "loss": 3.8859, "step": 1276 }, { "epoch": 5.2122448979591836, "grad_norm": 3.959017038345337, "learning_rate": 4.7397959183673474e-05, "loss": 3.5848, "step": 1277 }, { "epoch": 5.216326530612245, "grad_norm": 5.659678936004639, "learning_rate": 4.739591836734694e-05, "loss": 3.0375, "step": 1278 }, { "epoch": 5.220408163265306, "grad_norm": 2.623404026031494, "learning_rate": 4.739387755102041e-05, "loss": 3.949, "step": 1279 }, { "epoch": 5.224489795918367, "grad_norm": 2.676919460296631, "learning_rate": 4.7391836734693875e-05, "loss": 4.0479, "step": 1280 }, { "epoch": 5.228571428571429, "grad_norm": 2.4125254154205322, "learning_rate": 4.738979591836735e-05, "loss": 4.041, "step": 1281 }, { "epoch": 5.2326530612244895, "grad_norm": 4.698483467102051, "learning_rate": 4.738775510204082e-05, "loss": 3.1747, "step": 1282 }, { "epoch": 5.236734693877551, "grad_norm": 3.606189012527466, "learning_rate": 4.738571428571429e-05, "loss": 3.7031, "step": 1283 }, { "epoch": 5.240816326530612, "grad_norm": 2.6119613647460938, "learning_rate": 4.738367346938776e-05, "loss": 4.1119, "step": 1284 }, { "epoch": 5.244897959183674, "grad_norm": 5.514690399169922, "learning_rate": 4.7381632653061225e-05, "loss": 2.8558, "step": 1285 }, { "epoch": 5.248979591836735, "grad_norm": 4.151477336883545, "learning_rate": 4.737959183673469e-05, "loss": 3.9542, "step": 1286 }, { "epoch": 5.253061224489796, "grad_norm": 3.241819381713867, "learning_rate": 4.7377551020408166e-05, "loss": 4.0065, "step": 1287 }, { "epoch": 5.257142857142857, "grad_norm": 2.6764488220214844, "learning_rate": 4.737551020408163e-05, "loss": 4.2267, "step": 1288 }, { "epoch": 5.261224489795918, "grad_norm": 3.322535276412964, "learning_rate": 4.737346938775511e-05, "loss": 3.6416, "step": 1289 }, { "epoch": 5.26530612244898, "grad_norm": 2.0631489753723145, "learning_rate": 4.7371428571428574e-05, "loss": 4.1921, "step": 1290 }, { "epoch": 5.2693877551020405, "grad_norm": 3.266005754470825, "learning_rate": 4.736938775510204e-05, "loss": 3.8989, "step": 1291 }, { "epoch": 5.273469387755102, "grad_norm": 3.3327600955963135, "learning_rate": 4.7367346938775515e-05, "loss": 3.8619, "step": 1292 }, { "epoch": 5.277551020408163, "grad_norm": 3.132192611694336, "learning_rate": 4.736530612244898e-05, "loss": 3.9296, "step": 1293 }, { "epoch": 5.281632653061225, "grad_norm": 5.432551860809326, "learning_rate": 4.736326530612245e-05, "loss": 3.2174, "step": 1294 }, { "epoch": 5.285714285714286, "grad_norm": 2.3932607173919678, "learning_rate": 4.7361224489795916e-05, "loss": 4.1182, "step": 1295 }, { "epoch": 5.289795918367347, "grad_norm": 3.6602354049682617, "learning_rate": 4.735918367346939e-05, "loss": 3.8062, "step": 1296 }, { "epoch": 5.293877551020408, "grad_norm": 2.524132490158081, "learning_rate": 4.7357142857142864e-05, "loss": 4.0923, "step": 1297 }, { "epoch": 5.29795918367347, "grad_norm": 2.174565553665161, "learning_rate": 4.735510204081633e-05, "loss": 4.2958, "step": 1298 }, { "epoch": 5.302040816326531, "grad_norm": 3.186697483062744, "learning_rate": 4.73530612244898e-05, "loss": 3.9713, "step": 1299 }, { "epoch": 5.3061224489795915, "grad_norm": 2.969675302505493, "learning_rate": 4.7351020408163265e-05, "loss": 4.0017, "step": 1300 }, { "epoch": 5.310204081632653, "grad_norm": 2.2564287185668945, "learning_rate": 4.734897959183674e-05, "loss": 3.9507, "step": 1301 }, { "epoch": 5.314285714285714, "grad_norm": 3.713836431503296, "learning_rate": 4.7346938775510206e-05, "loss": 3.8306, "step": 1302 }, { "epoch": 5.318367346938776, "grad_norm": 3.0796291828155518, "learning_rate": 4.7344897959183674e-05, "loss": 3.7891, "step": 1303 }, { "epoch": 5.322448979591837, "grad_norm": 2.5685017108917236, "learning_rate": 4.734285714285715e-05, "loss": 3.8186, "step": 1304 }, { "epoch": 5.326530612244898, "grad_norm": 2.344951868057251, "learning_rate": 4.7340816326530615e-05, "loss": 4.2209, "step": 1305 }, { "epoch": 5.330612244897959, "grad_norm": 2.3413755893707275, "learning_rate": 4.733877551020409e-05, "loss": 3.9447, "step": 1306 }, { "epoch": 5.33469387755102, "grad_norm": 2.5536293983459473, "learning_rate": 4.7336734693877556e-05, "loss": 3.9085, "step": 1307 }, { "epoch": 5.338775510204082, "grad_norm": 3.441080093383789, "learning_rate": 4.733469387755102e-05, "loss": 3.8713, "step": 1308 }, { "epoch": 5.3428571428571425, "grad_norm": 2.7866368293762207, "learning_rate": 4.733265306122449e-05, "loss": 3.8313, "step": 1309 }, { "epoch": 5.346938775510204, "grad_norm": 2.5590312480926514, "learning_rate": 4.733061224489796e-05, "loss": 3.9732, "step": 1310 }, { "epoch": 5.351020408163265, "grad_norm": 5.087259292602539, "learning_rate": 4.732857142857143e-05, "loss": 3.0793, "step": 1311 }, { "epoch": 5.355102040816327, "grad_norm": 2.4828789234161377, "learning_rate": 4.7326530612244905e-05, "loss": 3.8221, "step": 1312 }, { "epoch": 5.359183673469388, "grad_norm": 4.055386543273926, "learning_rate": 4.732448979591837e-05, "loss": 3.7346, "step": 1313 }, { "epoch": 5.363265306122449, "grad_norm": 2.9454572200775146, "learning_rate": 4.732244897959184e-05, "loss": 3.8914, "step": 1314 }, { "epoch": 5.36734693877551, "grad_norm": 3.1860690116882324, "learning_rate": 4.7320408163265306e-05, "loss": 3.6132, "step": 1315 }, { "epoch": 5.371428571428572, "grad_norm": 3.2466518878936768, "learning_rate": 4.731836734693878e-05, "loss": 3.8286, "step": 1316 }, { "epoch": 5.375510204081633, "grad_norm": 5.410083770751953, "learning_rate": 4.731632653061225e-05, "loss": 3.0753, "step": 1317 }, { "epoch": 5.3795918367346935, "grad_norm": 4.043656349182129, "learning_rate": 4.7314285714285714e-05, "loss": 3.709, "step": 1318 }, { "epoch": 5.383673469387755, "grad_norm": 3.481121778488159, "learning_rate": 4.731224489795918e-05, "loss": 3.8199, "step": 1319 }, { "epoch": 5.387755102040816, "grad_norm": 4.764258861541748, "learning_rate": 4.7310204081632655e-05, "loss": 3.408, "step": 1320 }, { "epoch": 5.391836734693878, "grad_norm": 4.025985240936279, "learning_rate": 4.730816326530613e-05, "loss": 3.4461, "step": 1321 }, { "epoch": 5.395918367346939, "grad_norm": 2.613243579864502, "learning_rate": 4.7306122448979596e-05, "loss": 3.818, "step": 1322 }, { "epoch": 5.4, "grad_norm": 6.271228790283203, "learning_rate": 4.7304081632653064e-05, "loss": 3.3331, "step": 1323 }, { "epoch": 5.404081632653061, "grad_norm": 3.092454433441162, "learning_rate": 4.730204081632653e-05, "loss": 3.8824, "step": 1324 }, { "epoch": 5.408163265306122, "grad_norm": 2.3777449131011963, "learning_rate": 4.73e-05, "loss": 4.2116, "step": 1325 }, { "epoch": 5.412244897959184, "grad_norm": 2.995842695236206, "learning_rate": 4.729795918367347e-05, "loss": 4.1744, "step": 1326 }, { "epoch": 5.416326530612245, "grad_norm": 3.5668249130249023, "learning_rate": 4.729591836734694e-05, "loss": 3.8162, "step": 1327 }, { "epoch": 5.420408163265306, "grad_norm": 3.105966806411743, "learning_rate": 4.729387755102041e-05, "loss": 3.7751, "step": 1328 }, { "epoch": 5.424489795918367, "grad_norm": 2.3701975345611572, "learning_rate": 4.729183673469388e-05, "loss": 4.0405, "step": 1329 }, { "epoch": 5.428571428571429, "grad_norm": 3.733086347579956, "learning_rate": 4.728979591836735e-05, "loss": 3.7346, "step": 1330 }, { "epoch": 5.43265306122449, "grad_norm": 3.3402040004730225, "learning_rate": 4.728775510204082e-05, "loss": 4.1053, "step": 1331 }, { "epoch": 5.436734693877551, "grad_norm": 5.1626715660095215, "learning_rate": 4.728571428571429e-05, "loss": 3.0319, "step": 1332 }, { "epoch": 5.440816326530612, "grad_norm": 3.5126748085021973, "learning_rate": 4.7283673469387755e-05, "loss": 3.9254, "step": 1333 }, { "epoch": 5.444897959183674, "grad_norm": 4.78966760635376, "learning_rate": 4.728163265306122e-05, "loss": 3.4118, "step": 1334 }, { "epoch": 5.448979591836735, "grad_norm": 2.073439359664917, "learning_rate": 4.7279591836734696e-05, "loss": 4.0832, "step": 1335 }, { "epoch": 5.453061224489796, "grad_norm": 2.0577199459075928, "learning_rate": 4.727755102040817e-05, "loss": 4.1545, "step": 1336 }, { "epoch": 5.457142857142857, "grad_norm": 5.997805595397949, "learning_rate": 4.727551020408164e-05, "loss": 3.216, "step": 1337 }, { "epoch": 5.461224489795918, "grad_norm": 2.95035457611084, "learning_rate": 4.7273469387755104e-05, "loss": 4.1253, "step": 1338 }, { "epoch": 5.46530612244898, "grad_norm": 6.220886707305908, "learning_rate": 4.727142857142857e-05, "loss": 3.7426, "step": 1339 }, { "epoch": 5.469387755102041, "grad_norm": 3.017545223236084, "learning_rate": 4.726938775510204e-05, "loss": 3.7952, "step": 1340 }, { "epoch": 5.473469387755102, "grad_norm": 4.463277816772461, "learning_rate": 4.726734693877551e-05, "loss": 3.3285, "step": 1341 }, { "epoch": 5.477551020408163, "grad_norm": 2.7347469329833984, "learning_rate": 4.726530612244898e-05, "loss": 4.1235, "step": 1342 }, { "epoch": 5.481632653061224, "grad_norm": 3.925990581512451, "learning_rate": 4.7263265306122453e-05, "loss": 4.0635, "step": 1343 }, { "epoch": 5.485714285714286, "grad_norm": 3.498380184173584, "learning_rate": 4.726122448979592e-05, "loss": 3.8963, "step": 1344 }, { "epoch": 5.489795918367347, "grad_norm": 3.748148202896118, "learning_rate": 4.7259183673469394e-05, "loss": 3.8787, "step": 1345 }, { "epoch": 5.493877551020408, "grad_norm": 2.291809558868408, "learning_rate": 4.725714285714286e-05, "loss": 4.1119, "step": 1346 }, { "epoch": 5.497959183673469, "grad_norm": 1.3415695428848267, "learning_rate": 4.725510204081633e-05, "loss": 4.2757, "step": 1347 }, { "epoch": 5.502040816326531, "grad_norm": 2.965339183807373, "learning_rate": 4.7253061224489796e-05, "loss": 3.7534, "step": 1348 }, { "epoch": 5.506122448979592, "grad_norm": 1.8257802724838257, "learning_rate": 4.725102040816326e-05, "loss": 4.3069, "step": 1349 }, { "epoch": 5.510204081632653, "grad_norm": 4.213277339935303, "learning_rate": 4.724897959183674e-05, "loss": 3.5051, "step": 1350 }, { "epoch": 5.514285714285714, "grad_norm": 1.8602097034454346, "learning_rate": 4.724693877551021e-05, "loss": 3.9815, "step": 1351 }, { "epoch": 5.518367346938776, "grad_norm": 2.61942720413208, "learning_rate": 4.724489795918368e-05, "loss": 4.057, "step": 1352 }, { "epoch": 5.522448979591837, "grad_norm": 2.837108850479126, "learning_rate": 4.7242857142857145e-05, "loss": 4.0075, "step": 1353 }, { "epoch": 5.526530612244898, "grad_norm": 2.4747486114501953, "learning_rate": 4.724081632653061e-05, "loss": 3.8105, "step": 1354 }, { "epoch": 5.530612244897959, "grad_norm": 2.9002044200897217, "learning_rate": 4.7238775510204086e-05, "loss": 3.6492, "step": 1355 }, { "epoch": 5.53469387755102, "grad_norm": 2.5395262241363525, "learning_rate": 4.723673469387755e-05, "loss": 3.9371, "step": 1356 }, { "epoch": 5.538775510204082, "grad_norm": 3.337535858154297, "learning_rate": 4.723469387755102e-05, "loss": 3.8856, "step": 1357 }, { "epoch": 5.542857142857143, "grad_norm": 6.554989337921143, "learning_rate": 4.7232653061224494e-05, "loss": 2.9728, "step": 1358 }, { "epoch": 5.546938775510204, "grad_norm": 3.349668502807617, "learning_rate": 4.723061224489796e-05, "loss": 4.029, "step": 1359 }, { "epoch": 5.551020408163265, "grad_norm": 3.0527236461639404, "learning_rate": 4.7228571428571435e-05, "loss": 4.1043, "step": 1360 }, { "epoch": 5.555102040816326, "grad_norm": 2.1902053356170654, "learning_rate": 4.72265306122449e-05, "loss": 3.8951, "step": 1361 }, { "epoch": 5.559183673469388, "grad_norm": 3.793531894683838, "learning_rate": 4.722448979591837e-05, "loss": 3.5906, "step": 1362 }, { "epoch": 5.563265306122449, "grad_norm": 2.6477773189544678, "learning_rate": 4.722244897959184e-05, "loss": 4.1866, "step": 1363 }, { "epoch": 5.56734693877551, "grad_norm": 2.7121312618255615, "learning_rate": 4.7220408163265304e-05, "loss": 4.1641, "step": 1364 }, { "epoch": 5.571428571428571, "grad_norm": 4.2801513671875, "learning_rate": 4.721836734693878e-05, "loss": 3.5507, "step": 1365 }, { "epoch": 5.575510204081633, "grad_norm": 2.436906099319458, "learning_rate": 4.721632653061225e-05, "loss": 3.8584, "step": 1366 }, { "epoch": 5.579591836734694, "grad_norm": 3.361693859100342, "learning_rate": 4.721428571428572e-05, "loss": 3.6357, "step": 1367 }, { "epoch": 5.583673469387755, "grad_norm": 2.1003551483154297, "learning_rate": 4.7212244897959186e-05, "loss": 4.0906, "step": 1368 }, { "epoch": 5.587755102040816, "grad_norm": 4.124433517456055, "learning_rate": 4.721020408163265e-05, "loss": 3.6851, "step": 1369 }, { "epoch": 5.591836734693878, "grad_norm": 4.189907073974609, "learning_rate": 4.720816326530613e-05, "loss": 3.8437, "step": 1370 }, { "epoch": 5.595918367346939, "grad_norm": 2.711799144744873, "learning_rate": 4.7206122448979594e-05, "loss": 3.777, "step": 1371 }, { "epoch": 5.6, "grad_norm": 3.800370216369629, "learning_rate": 4.720408163265306e-05, "loss": 3.9115, "step": 1372 }, { "epoch": 5.604081632653061, "grad_norm": 2.723203659057617, "learning_rate": 4.720204081632653e-05, "loss": 3.9269, "step": 1373 }, { "epoch": 5.608163265306122, "grad_norm": 2.647730827331543, "learning_rate": 4.72e-05, "loss": 3.9945, "step": 1374 }, { "epoch": 5.612244897959184, "grad_norm": 2.2540321350097656, "learning_rate": 4.7197959183673476e-05, "loss": 3.9155, "step": 1375 }, { "epoch": 5.616326530612245, "grad_norm": 4.50883150100708, "learning_rate": 4.719591836734694e-05, "loss": 3.7187, "step": 1376 }, { "epoch": 5.6204081632653065, "grad_norm": 3.5691866874694824, "learning_rate": 4.719387755102041e-05, "loss": 3.5756, "step": 1377 }, { "epoch": 5.624489795918367, "grad_norm": 2.5219929218292236, "learning_rate": 4.719183673469388e-05, "loss": 4.154, "step": 1378 }, { "epoch": 5.628571428571428, "grad_norm": 2.883610963821411, "learning_rate": 4.7189795918367345e-05, "loss": 4.0538, "step": 1379 }, { "epoch": 5.63265306122449, "grad_norm": 2.4423844814300537, "learning_rate": 4.718775510204082e-05, "loss": 3.9522, "step": 1380 }, { "epoch": 5.636734693877551, "grad_norm": 2.9619922637939453, "learning_rate": 4.7185714285714286e-05, "loss": 4.2241, "step": 1381 }, { "epoch": 5.640816326530612, "grad_norm": 4.339078903198242, "learning_rate": 4.718367346938776e-05, "loss": 3.997, "step": 1382 }, { "epoch": 5.644897959183673, "grad_norm": 4.103945732116699, "learning_rate": 4.7181632653061227e-05, "loss": 3.3427, "step": 1383 }, { "epoch": 5.648979591836735, "grad_norm": 3.6753549575805664, "learning_rate": 4.7179591836734694e-05, "loss": 3.6266, "step": 1384 }, { "epoch": 5.653061224489796, "grad_norm": 2.38569974899292, "learning_rate": 4.717755102040817e-05, "loss": 3.9541, "step": 1385 }, { "epoch": 5.6571428571428575, "grad_norm": 7.0252461433410645, "learning_rate": 4.7175510204081635e-05, "loss": 1.8613, "step": 1386 }, { "epoch": 5.661224489795918, "grad_norm": 2.684413194656372, "learning_rate": 4.71734693877551e-05, "loss": 3.8691, "step": 1387 }, { "epoch": 5.66530612244898, "grad_norm": 3.2925968170166016, "learning_rate": 4.717142857142857e-05, "loss": 4.0091, "step": 1388 }, { "epoch": 5.669387755102041, "grad_norm": 3.268996238708496, "learning_rate": 4.716938775510204e-05, "loss": 3.6782, "step": 1389 }, { "epoch": 5.673469387755102, "grad_norm": 2.2815136909484863, "learning_rate": 4.716734693877552e-05, "loss": 3.7768, "step": 1390 }, { "epoch": 5.677551020408163, "grad_norm": 3.3690316677093506, "learning_rate": 4.7165306122448984e-05, "loss": 3.8956, "step": 1391 }, { "epoch": 5.681632653061224, "grad_norm": 3.4791200160980225, "learning_rate": 4.716326530612245e-05, "loss": 4.1373, "step": 1392 }, { "epoch": 5.685714285714286, "grad_norm": 4.305490970611572, "learning_rate": 4.716122448979592e-05, "loss": 3.8829, "step": 1393 }, { "epoch": 5.689795918367347, "grad_norm": 3.793100595474243, "learning_rate": 4.715918367346939e-05, "loss": 3.2219, "step": 1394 }, { "epoch": 5.6938775510204085, "grad_norm": 4.668273448944092, "learning_rate": 4.715714285714286e-05, "loss": 3.0343, "step": 1395 }, { "epoch": 5.697959183673469, "grad_norm": 2.5727882385253906, "learning_rate": 4.7155102040816326e-05, "loss": 3.8272, "step": 1396 }, { "epoch": 5.70204081632653, "grad_norm": 2.1241612434387207, "learning_rate": 4.71530612244898e-05, "loss": 4.0662, "step": 1397 }, { "epoch": 5.706122448979592, "grad_norm": 4.340733051300049, "learning_rate": 4.715102040816327e-05, "loss": 3.5368, "step": 1398 }, { "epoch": 5.710204081632653, "grad_norm": 3.9521825313568115, "learning_rate": 4.714897959183674e-05, "loss": 3.626, "step": 1399 }, { "epoch": 5.714285714285714, "grad_norm": 4.431751251220703, "learning_rate": 4.714693877551021e-05, "loss": 3.6406, "step": 1400 }, { "epoch": 5.718367346938775, "grad_norm": 2.697492837905884, "learning_rate": 4.7144897959183675e-05, "loss": 4.0917, "step": 1401 }, { "epoch": 5.722448979591837, "grad_norm": 2.861018419265747, "learning_rate": 4.714285714285714e-05, "loss": 3.9671, "step": 1402 }, { "epoch": 5.726530612244898, "grad_norm": 2.5768823623657227, "learning_rate": 4.714081632653061e-05, "loss": 4.0793, "step": 1403 }, { "epoch": 5.7306122448979595, "grad_norm": 2.977675676345825, "learning_rate": 4.7138775510204084e-05, "loss": 3.2784, "step": 1404 }, { "epoch": 5.73469387755102, "grad_norm": 4.467680931091309, "learning_rate": 4.713673469387756e-05, "loss": 3.601, "step": 1405 }, { "epoch": 5.738775510204082, "grad_norm": 2.355409860610962, "learning_rate": 4.7134693877551025e-05, "loss": 4.0609, "step": 1406 }, { "epoch": 5.742857142857143, "grad_norm": 4.551338195800781, "learning_rate": 4.713265306122449e-05, "loss": 3.4977, "step": 1407 }, { "epoch": 5.746938775510204, "grad_norm": 2.547366142272949, "learning_rate": 4.713061224489796e-05, "loss": 3.8933, "step": 1408 }, { "epoch": 5.751020408163265, "grad_norm": 2.8850557804107666, "learning_rate": 4.712857142857143e-05, "loss": 3.7886, "step": 1409 }, { "epoch": 5.755102040816326, "grad_norm": 2.388864278793335, "learning_rate": 4.71265306122449e-05, "loss": 3.9102, "step": 1410 }, { "epoch": 5.759183673469388, "grad_norm": 3.2222344875335693, "learning_rate": 4.712448979591837e-05, "loss": 3.9043, "step": 1411 }, { "epoch": 5.763265306122449, "grad_norm": 3.5325376987457275, "learning_rate": 4.712244897959184e-05, "loss": 3.8335, "step": 1412 }, { "epoch": 5.7673469387755105, "grad_norm": 3.106281280517578, "learning_rate": 4.712040816326531e-05, "loss": 3.7059, "step": 1413 }, { "epoch": 5.771428571428571, "grad_norm": 4.255339622497559, "learning_rate": 4.711836734693878e-05, "loss": 3.5633, "step": 1414 }, { "epoch": 5.775510204081632, "grad_norm": 4.382383346557617, "learning_rate": 4.711632653061225e-05, "loss": 3.9206, "step": 1415 }, { "epoch": 5.779591836734694, "grad_norm": 2.76023530960083, "learning_rate": 4.7114285714285716e-05, "loss": 3.8702, "step": 1416 }, { "epoch": 5.783673469387755, "grad_norm": 2.7525575160980225, "learning_rate": 4.711224489795918e-05, "loss": 3.9784, "step": 1417 }, { "epoch": 5.7877551020408164, "grad_norm": 3.3183233737945557, "learning_rate": 4.711020408163265e-05, "loss": 3.6033, "step": 1418 }, { "epoch": 5.791836734693877, "grad_norm": 4.247617244720459, "learning_rate": 4.7108163265306124e-05, "loss": 3.4707, "step": 1419 }, { "epoch": 5.795918367346939, "grad_norm": 4.117465019226074, "learning_rate": 4.71061224489796e-05, "loss": 3.7949, "step": 1420 }, { "epoch": 5.8, "grad_norm": 3.2661569118499756, "learning_rate": 4.7104081632653065e-05, "loss": 3.8852, "step": 1421 }, { "epoch": 5.8040816326530615, "grad_norm": 5.102473258972168, "learning_rate": 4.710204081632653e-05, "loss": 3.0904, "step": 1422 }, { "epoch": 5.808163265306122, "grad_norm": 3.6204674243927, "learning_rate": 4.71e-05, "loss": 3.4129, "step": 1423 }, { "epoch": 5.812244897959184, "grad_norm": 4.20855188369751, "learning_rate": 4.7097959183673474e-05, "loss": 3.7345, "step": 1424 }, { "epoch": 5.816326530612245, "grad_norm": 3.551603078842163, "learning_rate": 4.709591836734694e-05, "loss": 3.7083, "step": 1425 }, { "epoch": 5.820408163265306, "grad_norm": 3.9002063274383545, "learning_rate": 4.709387755102041e-05, "loss": 3.6492, "step": 1426 }, { "epoch": 5.8244897959183675, "grad_norm": 3.4096176624298096, "learning_rate": 4.7091836734693875e-05, "loss": 4.0186, "step": 1427 }, { "epoch": 5.828571428571428, "grad_norm": 4.761558532714844, "learning_rate": 4.708979591836735e-05, "loss": 3.4702, "step": 1428 }, { "epoch": 5.83265306122449, "grad_norm": 3.900190591812134, "learning_rate": 4.708775510204082e-05, "loss": 3.5093, "step": 1429 }, { "epoch": 5.836734693877551, "grad_norm": 2.9348907470703125, "learning_rate": 4.708571428571429e-05, "loss": 4.0314, "step": 1430 }, { "epoch": 5.840816326530613, "grad_norm": 2.6771042346954346, "learning_rate": 4.708367346938776e-05, "loss": 4.0688, "step": 1431 }, { "epoch": 5.844897959183673, "grad_norm": 3.6043078899383545, "learning_rate": 4.7081632653061224e-05, "loss": 4.0571, "step": 1432 }, { "epoch": 5.848979591836734, "grad_norm": 3.0132899284362793, "learning_rate": 4.70795918367347e-05, "loss": 3.8254, "step": 1433 }, { "epoch": 5.853061224489796, "grad_norm": 3.091517448425293, "learning_rate": 4.7077551020408165e-05, "loss": 3.9643, "step": 1434 }, { "epoch": 5.857142857142857, "grad_norm": 4.0164265632629395, "learning_rate": 4.707551020408163e-05, "loss": 3.5683, "step": 1435 }, { "epoch": 5.8612244897959185, "grad_norm": 5.3882551193237305, "learning_rate": 4.7073469387755106e-05, "loss": 3.3151, "step": 1436 }, { "epoch": 5.865306122448979, "grad_norm": 2.585749387741089, "learning_rate": 4.707142857142857e-05, "loss": 3.9889, "step": 1437 }, { "epoch": 5.869387755102041, "grad_norm": 3.959552764892578, "learning_rate": 4.706938775510205e-05, "loss": 3.7099, "step": 1438 }, { "epoch": 5.873469387755102, "grad_norm": 3.2306952476501465, "learning_rate": 4.7067346938775514e-05, "loss": 3.998, "step": 1439 }, { "epoch": 5.877551020408164, "grad_norm": 3.608922004699707, "learning_rate": 4.706530612244898e-05, "loss": 3.9588, "step": 1440 }, { "epoch": 5.881632653061224, "grad_norm": 3.634094715118408, "learning_rate": 4.706326530612245e-05, "loss": 3.8313, "step": 1441 }, { "epoch": 5.885714285714286, "grad_norm": 5.360996246337891, "learning_rate": 4.7061224489795916e-05, "loss": 3.3551, "step": 1442 }, { "epoch": 5.889795918367347, "grad_norm": 2.579594135284424, "learning_rate": 4.705918367346939e-05, "loss": 3.8619, "step": 1443 }, { "epoch": 5.893877551020408, "grad_norm": 2.8573503494262695, "learning_rate": 4.7057142857142864e-05, "loss": 3.9134, "step": 1444 }, { "epoch": 5.8979591836734695, "grad_norm": 2.2150967121124268, "learning_rate": 4.705510204081633e-05, "loss": 4.2663, "step": 1445 }, { "epoch": 5.90204081632653, "grad_norm": 3.4378252029418945, "learning_rate": 4.70530612244898e-05, "loss": 3.7871, "step": 1446 }, { "epoch": 5.906122448979592, "grad_norm": 3.30329966545105, "learning_rate": 4.7051020408163265e-05, "loss": 3.8512, "step": 1447 }, { "epoch": 5.910204081632653, "grad_norm": 2.8953099250793457, "learning_rate": 4.704897959183674e-05, "loss": 4.0303, "step": 1448 }, { "epoch": 5.914285714285715, "grad_norm": 2.2146213054656982, "learning_rate": 4.7046938775510206e-05, "loss": 3.8937, "step": 1449 }, { "epoch": 5.918367346938775, "grad_norm": 3.254061698913574, "learning_rate": 4.704489795918367e-05, "loss": 3.7939, "step": 1450 }, { "epoch": 5.922448979591836, "grad_norm": 3.2802343368530273, "learning_rate": 4.704285714285715e-05, "loss": 3.5265, "step": 1451 }, { "epoch": 5.926530612244898, "grad_norm": 3.8720924854278564, "learning_rate": 4.7040816326530614e-05, "loss": 3.7895, "step": 1452 }, { "epoch": 5.930612244897959, "grad_norm": 2.1756019592285156, "learning_rate": 4.703877551020409e-05, "loss": 4.4182, "step": 1453 }, { "epoch": 5.9346938775510205, "grad_norm": 3.6495602130889893, "learning_rate": 4.7036734693877555e-05, "loss": 3.0364, "step": 1454 }, { "epoch": 5.938775510204081, "grad_norm": 4.214722633361816, "learning_rate": 4.703469387755102e-05, "loss": 3.5418, "step": 1455 }, { "epoch": 5.942857142857143, "grad_norm": 3.0410892963409424, "learning_rate": 4.703265306122449e-05, "loss": 3.9478, "step": 1456 }, { "epoch": 5.946938775510204, "grad_norm": 2.474628210067749, "learning_rate": 4.7030612244897956e-05, "loss": 3.9542, "step": 1457 }, { "epoch": 5.951020408163266, "grad_norm": 4.176997661590576, "learning_rate": 4.702857142857143e-05, "loss": 3.4443, "step": 1458 }, { "epoch": 5.955102040816326, "grad_norm": 4.11267614364624, "learning_rate": 4.7026530612244904e-05, "loss": 3.7278, "step": 1459 }, { "epoch": 5.959183673469388, "grad_norm": 3.062110424041748, "learning_rate": 4.702448979591837e-05, "loss": 3.7939, "step": 1460 }, { "epoch": 5.963265306122449, "grad_norm": 2.8541553020477295, "learning_rate": 4.702244897959184e-05, "loss": 3.7623, "step": 1461 }, { "epoch": 5.96734693877551, "grad_norm": 3.2310099601745605, "learning_rate": 4.7020408163265306e-05, "loss": 3.7341, "step": 1462 }, { "epoch": 5.9714285714285715, "grad_norm": 2.8551695346832275, "learning_rate": 4.701836734693878e-05, "loss": 3.9183, "step": 1463 }, { "epoch": 5.975510204081632, "grad_norm": 3.0027196407318115, "learning_rate": 4.701632653061225e-05, "loss": 4.0166, "step": 1464 }, { "epoch": 5.979591836734694, "grad_norm": 3.46342134475708, "learning_rate": 4.7014285714285714e-05, "loss": 3.8636, "step": 1465 }, { "epoch": 5.983673469387755, "grad_norm": 2.0645554065704346, "learning_rate": 4.701224489795919e-05, "loss": 3.9994, "step": 1466 }, { "epoch": 5.987755102040817, "grad_norm": 5.102116584777832, "learning_rate": 4.7010204081632655e-05, "loss": 3.5245, "step": 1467 }, { "epoch": 5.9918367346938775, "grad_norm": 3.8957550525665283, "learning_rate": 4.700816326530613e-05, "loss": 4.0364, "step": 1468 }, { "epoch": 5.995918367346938, "grad_norm": 2.9270780086517334, "learning_rate": 4.7006122448979596e-05, "loss": 3.7692, "step": 1469 }, { "epoch": 6.0, "grad_norm": 3.153651237487793, "learning_rate": 4.700408163265306e-05, "loss": 3.8713, "step": 1470 }, { "epoch": 6.004081632653061, "grad_norm": 3.4704549312591553, "learning_rate": 4.700204081632653e-05, "loss": 3.8841, "step": 1471 }, { "epoch": 6.0081632653061225, "grad_norm": 2.7232003211975098, "learning_rate": 4.7e-05, "loss": 3.9557, "step": 1472 }, { "epoch": 6.012244897959183, "grad_norm": 2.558441162109375, "learning_rate": 4.699795918367347e-05, "loss": 3.9818, "step": 1473 }, { "epoch": 6.016326530612245, "grad_norm": 3.357534885406494, "learning_rate": 4.699591836734694e-05, "loss": 3.5392, "step": 1474 }, { "epoch": 6.020408163265306, "grad_norm": 4.006593704223633, "learning_rate": 4.699387755102041e-05, "loss": 3.6011, "step": 1475 }, { "epoch": 6.024489795918368, "grad_norm": 4.0395050048828125, "learning_rate": 4.699183673469388e-05, "loss": 3.7876, "step": 1476 }, { "epoch": 6.0285714285714285, "grad_norm": 3.0085415840148926, "learning_rate": 4.698979591836735e-05, "loss": 4.0212, "step": 1477 }, { "epoch": 6.03265306122449, "grad_norm": 3.5859222412109375, "learning_rate": 4.698775510204082e-05, "loss": 3.7423, "step": 1478 }, { "epoch": 6.036734693877551, "grad_norm": 3.5201616287231445, "learning_rate": 4.698571428571429e-05, "loss": 3.6656, "step": 1479 }, { "epoch": 6.040816326530612, "grad_norm": 3.528316020965576, "learning_rate": 4.6983673469387755e-05, "loss": 3.8137, "step": 1480 }, { "epoch": 6.044897959183674, "grad_norm": 2.717564582824707, "learning_rate": 4.698163265306122e-05, "loss": 3.9393, "step": 1481 }, { "epoch": 6.048979591836734, "grad_norm": 2.859187364578247, "learning_rate": 4.6979591836734696e-05, "loss": 3.721, "step": 1482 }, { "epoch": 6.053061224489796, "grad_norm": 3.5077099800109863, "learning_rate": 4.697755102040817e-05, "loss": 3.5073, "step": 1483 }, { "epoch": 6.057142857142857, "grad_norm": 6.252333641052246, "learning_rate": 4.697551020408164e-05, "loss": 3.0105, "step": 1484 }, { "epoch": 6.061224489795919, "grad_norm": 3.1067090034484863, "learning_rate": 4.6973469387755104e-05, "loss": 3.9265, "step": 1485 }, { "epoch": 6.0653061224489795, "grad_norm": 3.802170515060425, "learning_rate": 4.697142857142857e-05, "loss": 3.8568, "step": 1486 }, { "epoch": 6.069387755102041, "grad_norm": 3.5347259044647217, "learning_rate": 4.6969387755102045e-05, "loss": 3.6106, "step": 1487 }, { "epoch": 6.073469387755102, "grad_norm": 4.3730878829956055, "learning_rate": 4.696734693877551e-05, "loss": 3.8892, "step": 1488 }, { "epoch": 6.077551020408163, "grad_norm": 3.3961222171783447, "learning_rate": 4.696530612244898e-05, "loss": 3.5854, "step": 1489 }, { "epoch": 6.081632653061225, "grad_norm": 2.692037582397461, "learning_rate": 4.696326530612245e-05, "loss": 3.9592, "step": 1490 }, { "epoch": 6.085714285714285, "grad_norm": 2.9159910678863525, "learning_rate": 4.696122448979592e-05, "loss": 4.0251, "step": 1491 }, { "epoch": 6.089795918367347, "grad_norm": 3.3660888671875, "learning_rate": 4.6959183673469394e-05, "loss": 3.7269, "step": 1492 }, { "epoch": 6.093877551020408, "grad_norm": 4.913279056549072, "learning_rate": 4.695714285714286e-05, "loss": 3.5424, "step": 1493 }, { "epoch": 6.09795918367347, "grad_norm": 3.8492636680603027, "learning_rate": 4.695510204081633e-05, "loss": 3.7084, "step": 1494 }, { "epoch": 6.1020408163265305, "grad_norm": 3.2924928665161133, "learning_rate": 4.6953061224489795e-05, "loss": 3.7363, "step": 1495 }, { "epoch": 6.106122448979592, "grad_norm": 3.3022267818450928, "learning_rate": 4.695102040816326e-05, "loss": 4.0439, "step": 1496 }, { "epoch": 6.110204081632653, "grad_norm": 3.3258743286132812, "learning_rate": 4.6948979591836736e-05, "loss": 3.8511, "step": 1497 }, { "epoch": 6.114285714285714, "grad_norm": 3.4730982780456543, "learning_rate": 4.694693877551021e-05, "loss": 3.6726, "step": 1498 }, { "epoch": 6.118367346938776, "grad_norm": 5.207296848297119, "learning_rate": 4.694489795918368e-05, "loss": 3.4001, "step": 1499 }, { "epoch": 6.122448979591836, "grad_norm": 3.0459280014038086, "learning_rate": 4.6942857142857145e-05, "loss": 3.7888, "step": 1500 }, { "epoch": 6.126530612244898, "grad_norm": 5.5036444664001465, "learning_rate": 4.694081632653061e-05, "loss": 3.3879, "step": 1501 }, { "epoch": 6.130612244897959, "grad_norm": 3.3715505599975586, "learning_rate": 4.6938775510204086e-05, "loss": 3.718, "step": 1502 }, { "epoch": 6.134693877551021, "grad_norm": 6.328632354736328, "learning_rate": 4.693673469387755e-05, "loss": 2.8528, "step": 1503 }, { "epoch": 6.1387755102040815, "grad_norm": 6.169166088104248, "learning_rate": 4.693469387755102e-05, "loss": 3.1225, "step": 1504 }, { "epoch": 6.142857142857143, "grad_norm": 3.851283073425293, "learning_rate": 4.6932653061224494e-05, "loss": 3.6764, "step": 1505 }, { "epoch": 6.146938775510204, "grad_norm": 4.877415180206299, "learning_rate": 4.693061224489796e-05, "loss": 3.3909, "step": 1506 }, { "epoch": 6.151020408163265, "grad_norm": 2.774212598800659, "learning_rate": 4.6928571428571435e-05, "loss": 3.8542, "step": 1507 }, { "epoch": 6.155102040816327, "grad_norm": 3.5535759925842285, "learning_rate": 4.69265306122449e-05, "loss": 3.8674, "step": 1508 }, { "epoch": 6.159183673469387, "grad_norm": 5.826491832733154, "learning_rate": 4.692448979591837e-05, "loss": 3.38, "step": 1509 }, { "epoch": 6.163265306122449, "grad_norm": 4.554327487945557, "learning_rate": 4.6922448979591836e-05, "loss": 3.5024, "step": 1510 }, { "epoch": 6.16734693877551, "grad_norm": 2.5490009784698486, "learning_rate": 4.69204081632653e-05, "loss": 4.2357, "step": 1511 }, { "epoch": 6.171428571428572, "grad_norm": 4.131012439727783, "learning_rate": 4.691836734693878e-05, "loss": 3.6463, "step": 1512 }, { "epoch": 6.1755102040816325, "grad_norm": 4.733335494995117, "learning_rate": 4.691632653061225e-05, "loss": 3.6437, "step": 1513 }, { "epoch": 6.179591836734694, "grad_norm": 3.35317325592041, "learning_rate": 4.691428571428572e-05, "loss": 4.1284, "step": 1514 }, { "epoch": 6.183673469387755, "grad_norm": 2.5149364471435547, "learning_rate": 4.6912244897959185e-05, "loss": 4.0401, "step": 1515 }, { "epoch": 6.187755102040816, "grad_norm": 2.2018895149230957, "learning_rate": 4.691020408163265e-05, "loss": 4.2207, "step": 1516 }, { "epoch": 6.191836734693878, "grad_norm": 3.7184808254241943, "learning_rate": 4.6908163265306126e-05, "loss": 3.6345, "step": 1517 }, { "epoch": 6.1959183673469385, "grad_norm": 2.849036931991577, "learning_rate": 4.6906122448979593e-05, "loss": 3.905, "step": 1518 }, { "epoch": 6.2, "grad_norm": 4.180723190307617, "learning_rate": 4.690408163265306e-05, "loss": 3.7469, "step": 1519 }, { "epoch": 6.204081632653061, "grad_norm": 6.903573513031006, "learning_rate": 4.6902040816326534e-05, "loss": 2.9728, "step": 1520 }, { "epoch": 6.208163265306123, "grad_norm": 3.370190143585205, "learning_rate": 4.69e-05, "loss": 4.0641, "step": 1521 }, { "epoch": 6.2122448979591836, "grad_norm": 3.75942325592041, "learning_rate": 4.6897959183673475e-05, "loss": 3.7799, "step": 1522 }, { "epoch": 6.216326530612245, "grad_norm": 2.133937120437622, "learning_rate": 4.689591836734694e-05, "loss": 4.3639, "step": 1523 }, { "epoch": 6.220408163265306, "grad_norm": 2.4319982528686523, "learning_rate": 4.689387755102041e-05, "loss": 4.2307, "step": 1524 }, { "epoch": 6.224489795918367, "grad_norm": 2.573714256286621, "learning_rate": 4.689183673469388e-05, "loss": 4.1853, "step": 1525 }, { "epoch": 6.228571428571429, "grad_norm": 2.6122326850891113, "learning_rate": 4.688979591836735e-05, "loss": 4.0824, "step": 1526 }, { "epoch": 6.2326530612244895, "grad_norm": 3.857078790664673, "learning_rate": 4.688775510204082e-05, "loss": 3.4794, "step": 1527 }, { "epoch": 6.236734693877551, "grad_norm": 3.377915143966675, "learning_rate": 4.6885714285714285e-05, "loss": 3.9915, "step": 1528 }, { "epoch": 6.240816326530612, "grad_norm": 3.413435220718384, "learning_rate": 4.688367346938776e-05, "loss": 3.7406, "step": 1529 }, { "epoch": 6.244897959183674, "grad_norm": 2.146453380584717, "learning_rate": 4.6881632653061226e-05, "loss": 4.0577, "step": 1530 }, { "epoch": 6.248979591836735, "grad_norm": 4.52384090423584, "learning_rate": 4.68795918367347e-05, "loss": 3.2747, "step": 1531 }, { "epoch": 6.253061224489796, "grad_norm": 2.9533071517944336, "learning_rate": 4.687755102040817e-05, "loss": 3.6038, "step": 1532 }, { "epoch": 6.257142857142857, "grad_norm": 5.4979352951049805, "learning_rate": 4.6875510204081634e-05, "loss": 2.9968, "step": 1533 }, { "epoch": 6.261224489795918, "grad_norm": 3.0298047065734863, "learning_rate": 4.68734693877551e-05, "loss": 4.0195, "step": 1534 }, { "epoch": 6.26530612244898, "grad_norm": 2.747450351715088, "learning_rate": 4.687142857142857e-05, "loss": 4.0621, "step": 1535 }, { "epoch": 6.2693877551020405, "grad_norm": 3.211946487426758, "learning_rate": 4.686938775510204e-05, "loss": 3.9554, "step": 1536 }, { "epoch": 6.273469387755102, "grad_norm": 5.481612205505371, "learning_rate": 4.6867346938775516e-05, "loss": 2.9637, "step": 1537 }, { "epoch": 6.277551020408163, "grad_norm": 3.2575340270996094, "learning_rate": 4.686530612244898e-05, "loss": 3.8208, "step": 1538 }, { "epoch": 6.281632653061225, "grad_norm": 2.573972702026367, "learning_rate": 4.686326530612245e-05, "loss": 3.7599, "step": 1539 }, { "epoch": 6.285714285714286, "grad_norm": 4.473576068878174, "learning_rate": 4.686122448979592e-05, "loss": 3.5102, "step": 1540 }, { "epoch": 6.289795918367347, "grad_norm": 3.56998872756958, "learning_rate": 4.685918367346939e-05, "loss": 4.0893, "step": 1541 }, { "epoch": 6.293877551020408, "grad_norm": 2.3387715816497803, "learning_rate": 4.685714285714286e-05, "loss": 4.0597, "step": 1542 }, { "epoch": 6.29795918367347, "grad_norm": 2.957627773284912, "learning_rate": 4.6855102040816326e-05, "loss": 4.1534, "step": 1543 }, { "epoch": 6.302040816326531, "grad_norm": 3.483400821685791, "learning_rate": 4.68530612244898e-05, "loss": 3.9688, "step": 1544 }, { "epoch": 6.3061224489795915, "grad_norm": 4.91212797164917, "learning_rate": 4.685102040816327e-05, "loss": 3.6546, "step": 1545 }, { "epoch": 6.310204081632653, "grad_norm": 3.5840585231781006, "learning_rate": 4.684897959183674e-05, "loss": 3.7477, "step": 1546 }, { "epoch": 6.314285714285714, "grad_norm": 4.153992176055908, "learning_rate": 4.684693877551021e-05, "loss": 3.5834, "step": 1547 }, { "epoch": 6.318367346938776, "grad_norm": 3.0763487815856934, "learning_rate": 4.6844897959183675e-05, "loss": 3.9261, "step": 1548 }, { "epoch": 6.322448979591837, "grad_norm": 3.581453800201416, "learning_rate": 4.684285714285714e-05, "loss": 3.8283, "step": 1549 }, { "epoch": 6.326530612244898, "grad_norm": 3.6825647354125977, "learning_rate": 4.684081632653061e-05, "loss": 3.7056, "step": 1550 }, { "epoch": 6.330612244897959, "grad_norm": 3.4555654525756836, "learning_rate": 4.683877551020408e-05, "loss": 3.9394, "step": 1551 }, { "epoch": 6.33469387755102, "grad_norm": 3.310176372528076, "learning_rate": 4.683673469387756e-05, "loss": 3.8589, "step": 1552 }, { "epoch": 6.338775510204082, "grad_norm": 2.662368059158325, "learning_rate": 4.6834693877551024e-05, "loss": 3.8762, "step": 1553 }, { "epoch": 6.3428571428571425, "grad_norm": 5.415704250335693, "learning_rate": 4.683265306122449e-05, "loss": 3.2567, "step": 1554 }, { "epoch": 6.346938775510204, "grad_norm": 4.747129440307617, "learning_rate": 4.683061224489796e-05, "loss": 3.5765, "step": 1555 }, { "epoch": 6.351020408163265, "grad_norm": 3.6128220558166504, "learning_rate": 4.682857142857143e-05, "loss": 3.9799, "step": 1556 }, { "epoch": 6.355102040816327, "grad_norm": 2.6211209297180176, "learning_rate": 4.68265306122449e-05, "loss": 4.0713, "step": 1557 }, { "epoch": 6.359183673469388, "grad_norm": 2.382000207901001, "learning_rate": 4.6824489795918367e-05, "loss": 4.1793, "step": 1558 }, { "epoch": 6.363265306122449, "grad_norm": 2.713893413543701, "learning_rate": 4.682244897959184e-05, "loss": 3.7872, "step": 1559 }, { "epoch": 6.36734693877551, "grad_norm": 4.848239421844482, "learning_rate": 4.682040816326531e-05, "loss": 3.4604, "step": 1560 }, { "epoch": 6.371428571428572, "grad_norm": 3.6877779960632324, "learning_rate": 4.681836734693878e-05, "loss": 3.6682, "step": 1561 }, { "epoch": 6.375510204081633, "grad_norm": 3.676565170288086, "learning_rate": 4.681632653061225e-05, "loss": 3.7935, "step": 1562 }, { "epoch": 6.3795918367346935, "grad_norm": 4.228854179382324, "learning_rate": 4.6814285714285716e-05, "loss": 3.5781, "step": 1563 }, { "epoch": 6.383673469387755, "grad_norm": 2.801658868789673, "learning_rate": 4.681224489795918e-05, "loss": 3.9249, "step": 1564 }, { "epoch": 6.387755102040816, "grad_norm": 2.2002298831939697, "learning_rate": 4.681020408163265e-05, "loss": 4.2726, "step": 1565 }, { "epoch": 6.391836734693878, "grad_norm": 4.065996170043945, "learning_rate": 4.6808163265306124e-05, "loss": 3.6783, "step": 1566 }, { "epoch": 6.395918367346939, "grad_norm": 3.8590595722198486, "learning_rate": 4.68061224489796e-05, "loss": 3.5171, "step": 1567 }, { "epoch": 6.4, "grad_norm": 4.2250494956970215, "learning_rate": 4.6804081632653065e-05, "loss": 3.7014, "step": 1568 }, { "epoch": 6.404081632653061, "grad_norm": 2.59545636177063, "learning_rate": 4.680204081632653e-05, "loss": 3.8846, "step": 1569 }, { "epoch": 6.408163265306122, "grad_norm": 2.209808349609375, "learning_rate": 4.6800000000000006e-05, "loss": 4.0538, "step": 1570 }, { "epoch": 6.412244897959184, "grad_norm": 4.364974021911621, "learning_rate": 4.679795918367347e-05, "loss": 3.7164, "step": 1571 }, { "epoch": 6.416326530612245, "grad_norm": 2.9823191165924072, "learning_rate": 4.679591836734694e-05, "loss": 3.7307, "step": 1572 }, { "epoch": 6.420408163265306, "grad_norm": 3.2456600666046143, "learning_rate": 4.679387755102041e-05, "loss": 3.8308, "step": 1573 }, { "epoch": 6.424489795918367, "grad_norm": 3.2786989212036133, "learning_rate": 4.679183673469388e-05, "loss": 3.7141, "step": 1574 }, { "epoch": 6.428571428571429, "grad_norm": 3.3618662357330322, "learning_rate": 4.6789795918367355e-05, "loss": 3.838, "step": 1575 }, { "epoch": 6.43265306122449, "grad_norm": 3.025897979736328, "learning_rate": 4.678775510204082e-05, "loss": 3.9387, "step": 1576 }, { "epoch": 6.436734693877551, "grad_norm": 4.344978332519531, "learning_rate": 4.678571428571429e-05, "loss": 3.7105, "step": 1577 }, { "epoch": 6.440816326530612, "grad_norm": 3.398200750350952, "learning_rate": 4.6783673469387756e-05, "loss": 3.8153, "step": 1578 }, { "epoch": 6.444897959183674, "grad_norm": 2.2687606811523438, "learning_rate": 4.6781632653061224e-05, "loss": 3.9738, "step": 1579 }, { "epoch": 6.448979591836735, "grad_norm": 4.877004146575928, "learning_rate": 4.67795918367347e-05, "loss": 3.3307, "step": 1580 }, { "epoch": 6.453061224489796, "grad_norm": 4.482147216796875, "learning_rate": 4.6777551020408165e-05, "loss": 3.3941, "step": 1581 }, { "epoch": 6.457142857142857, "grad_norm": 3.2700483798980713, "learning_rate": 4.677551020408163e-05, "loss": 3.8618, "step": 1582 }, { "epoch": 6.461224489795918, "grad_norm": 5.368599891662598, "learning_rate": 4.6773469387755106e-05, "loss": 3.398, "step": 1583 }, { "epoch": 6.46530612244898, "grad_norm": 6.083723068237305, "learning_rate": 4.677142857142857e-05, "loss": 3.1066, "step": 1584 }, { "epoch": 6.469387755102041, "grad_norm": 3.479326009750366, "learning_rate": 4.676938775510205e-05, "loss": 3.9797, "step": 1585 }, { "epoch": 6.473469387755102, "grad_norm": 3.0747427940368652, "learning_rate": 4.6767346938775514e-05, "loss": 3.9481, "step": 1586 }, { "epoch": 6.477551020408163, "grad_norm": 3.937739849090576, "learning_rate": 4.676530612244898e-05, "loss": 3.9482, "step": 1587 }, { "epoch": 6.481632653061224, "grad_norm": 3.107757568359375, "learning_rate": 4.676326530612245e-05, "loss": 3.9511, "step": 1588 }, { "epoch": 6.485714285714286, "grad_norm": 4.019410610198975, "learning_rate": 4.6761224489795915e-05, "loss": 3.3353, "step": 1589 }, { "epoch": 6.489795918367347, "grad_norm": 4.718302249908447, "learning_rate": 4.675918367346939e-05, "loss": 3.6617, "step": 1590 }, { "epoch": 6.493877551020408, "grad_norm": 4.926938056945801, "learning_rate": 4.675714285714286e-05, "loss": 3.8285, "step": 1591 }, { "epoch": 6.497959183673469, "grad_norm": 3.519352436065674, "learning_rate": 4.675510204081633e-05, "loss": 3.9339, "step": 1592 }, { "epoch": 6.502040816326531, "grad_norm": 3.591094732284546, "learning_rate": 4.67530612244898e-05, "loss": 3.8555, "step": 1593 }, { "epoch": 6.506122448979592, "grad_norm": 5.983556270599365, "learning_rate": 4.6751020408163264e-05, "loss": 3.1254, "step": 1594 }, { "epoch": 6.510204081632653, "grad_norm": 2.5619943141937256, "learning_rate": 4.674897959183674e-05, "loss": 3.8639, "step": 1595 }, { "epoch": 6.514285714285714, "grad_norm": 3.0176947116851807, "learning_rate": 4.6746938775510205e-05, "loss": 3.9498, "step": 1596 }, { "epoch": 6.518367346938776, "grad_norm": 2.752774715423584, "learning_rate": 4.674489795918367e-05, "loss": 4.0771, "step": 1597 }, { "epoch": 6.522448979591837, "grad_norm": 2.756340503692627, "learning_rate": 4.6742857142857146e-05, "loss": 4.0164, "step": 1598 }, { "epoch": 6.526530612244898, "grad_norm": 3.8569631576538086, "learning_rate": 4.6740816326530614e-05, "loss": 3.2593, "step": 1599 }, { "epoch": 6.530612244897959, "grad_norm": 3.2720775604248047, "learning_rate": 4.673877551020409e-05, "loss": 3.634, "step": 1600 }, { "epoch": 6.53469387755102, "grad_norm": 2.380638360977173, "learning_rate": 4.6736734693877555e-05, "loss": 3.7358, "step": 1601 }, { "epoch": 6.538775510204082, "grad_norm": 3.5848608016967773, "learning_rate": 4.673469387755102e-05, "loss": 4.0292, "step": 1602 }, { "epoch": 6.542857142857143, "grad_norm": 5.201129913330078, "learning_rate": 4.673265306122449e-05, "loss": 3.0009, "step": 1603 }, { "epoch": 6.546938775510204, "grad_norm": 2.747796058654785, "learning_rate": 4.6730612244897956e-05, "loss": 3.8334, "step": 1604 }, { "epoch": 6.551020408163265, "grad_norm": 2.6218156814575195, "learning_rate": 4.672857142857143e-05, "loss": 3.9328, "step": 1605 }, { "epoch": 6.555102040816326, "grad_norm": 2.769289016723633, "learning_rate": 4.6726530612244904e-05, "loss": 4.0484, "step": 1606 }, { "epoch": 6.559183673469388, "grad_norm": 7.393383502960205, "learning_rate": 4.672448979591837e-05, "loss": 3.2451, "step": 1607 }, { "epoch": 6.563265306122449, "grad_norm": 1.3555930852890015, "learning_rate": 4.672244897959184e-05, "loss": 4.2392, "step": 1608 }, { "epoch": 6.56734693877551, "grad_norm": 3.129751205444336, "learning_rate": 4.6720408163265305e-05, "loss": 3.9285, "step": 1609 }, { "epoch": 6.571428571428571, "grad_norm": 3.111750841140747, "learning_rate": 4.671836734693878e-05, "loss": 3.7154, "step": 1610 }, { "epoch": 6.575510204081633, "grad_norm": 3.4805335998535156, "learning_rate": 4.6716326530612246e-05, "loss": 3.4631, "step": 1611 }, { "epoch": 6.579591836734694, "grad_norm": 2.0373375415802, "learning_rate": 4.671428571428571e-05, "loss": 3.9649, "step": 1612 }, { "epoch": 6.583673469387755, "grad_norm": 2.8728833198547363, "learning_rate": 4.671224489795919e-05, "loss": 3.8669, "step": 1613 }, { "epoch": 6.587755102040816, "grad_norm": 2.880336046218872, "learning_rate": 4.671020408163266e-05, "loss": 4.1142, "step": 1614 }, { "epoch": 6.591836734693878, "grad_norm": 3.2361032962799072, "learning_rate": 4.670816326530613e-05, "loss": 3.9205, "step": 1615 }, { "epoch": 6.595918367346939, "grad_norm": 2.81103253364563, "learning_rate": 4.6706122448979595e-05, "loss": 4.03, "step": 1616 }, { "epoch": 6.6, "grad_norm": 3.4008970260620117, "learning_rate": 4.670408163265306e-05, "loss": 3.9534, "step": 1617 }, { "epoch": 6.604081632653061, "grad_norm": 3.5960426330566406, "learning_rate": 4.670204081632653e-05, "loss": 3.6735, "step": 1618 }, { "epoch": 6.608163265306122, "grad_norm": 4.765956401824951, "learning_rate": 4.6700000000000003e-05, "loss": 3.6703, "step": 1619 }, { "epoch": 6.612244897959184, "grad_norm": 2.619323492050171, "learning_rate": 4.669795918367347e-05, "loss": 4.241, "step": 1620 }, { "epoch": 6.616326530612245, "grad_norm": 3.107386589050293, "learning_rate": 4.6695918367346945e-05, "loss": 3.84, "step": 1621 }, { "epoch": 6.6204081632653065, "grad_norm": 2.3824565410614014, "learning_rate": 4.669387755102041e-05, "loss": 3.8431, "step": 1622 }, { "epoch": 6.624489795918367, "grad_norm": 2.615316390991211, "learning_rate": 4.669183673469388e-05, "loss": 3.7767, "step": 1623 }, { "epoch": 6.628571428571428, "grad_norm": 3.523437738418579, "learning_rate": 4.668979591836735e-05, "loss": 3.7883, "step": 1624 }, { "epoch": 6.63265306122449, "grad_norm": 2.8260202407836914, "learning_rate": 4.668775510204082e-05, "loss": 4.0404, "step": 1625 }, { "epoch": 6.636734693877551, "grad_norm": 2.537468910217285, "learning_rate": 4.668571428571429e-05, "loss": 3.9327, "step": 1626 }, { "epoch": 6.640816326530612, "grad_norm": 3.101623296737671, "learning_rate": 4.6683673469387754e-05, "loss": 4.0878, "step": 1627 }, { "epoch": 6.644897959183673, "grad_norm": 3.9979095458984375, "learning_rate": 4.668163265306123e-05, "loss": 3.9606, "step": 1628 }, { "epoch": 6.648979591836735, "grad_norm": 3.125481128692627, "learning_rate": 4.66795918367347e-05, "loss": 3.8305, "step": 1629 }, { "epoch": 6.653061224489796, "grad_norm": 3.699666976928711, "learning_rate": 4.667755102040817e-05, "loss": 3.7891, "step": 1630 }, { "epoch": 6.6571428571428575, "grad_norm": 5.687355995178223, "learning_rate": 4.6675510204081636e-05, "loss": 2.7792, "step": 1631 }, { "epoch": 6.661224489795918, "grad_norm": 3.917177438735962, "learning_rate": 4.66734693877551e-05, "loss": 3.5934, "step": 1632 }, { "epoch": 6.66530612244898, "grad_norm": 8.541518211364746, "learning_rate": 4.667142857142857e-05, "loss": 1.7356, "step": 1633 }, { "epoch": 6.669387755102041, "grad_norm": 3.619117259979248, "learning_rate": 4.6669387755102044e-05, "loss": 3.7557, "step": 1634 }, { "epoch": 6.673469387755102, "grad_norm": 3.197187662124634, "learning_rate": 4.666734693877551e-05, "loss": 3.7657, "step": 1635 }, { "epoch": 6.677551020408163, "grad_norm": 4.093136787414551, "learning_rate": 4.666530612244898e-05, "loss": 3.4569, "step": 1636 }, { "epoch": 6.681632653061224, "grad_norm": 5.447947978973389, "learning_rate": 4.666326530612245e-05, "loss": 3.2224, "step": 1637 }, { "epoch": 6.685714285714286, "grad_norm": 2.6144657135009766, "learning_rate": 4.666122448979592e-05, "loss": 4.0025, "step": 1638 }, { "epoch": 6.689795918367347, "grad_norm": 2.190629720687866, "learning_rate": 4.6659183673469393e-05, "loss": 4.1334, "step": 1639 }, { "epoch": 6.6938775510204085, "grad_norm": 2.451249599456787, "learning_rate": 4.665714285714286e-05, "loss": 3.9793, "step": 1640 }, { "epoch": 6.697959183673469, "grad_norm": 4.944197177886963, "learning_rate": 4.665510204081633e-05, "loss": 3.4605, "step": 1641 }, { "epoch": 6.70204081632653, "grad_norm": 4.239243030548096, "learning_rate": 4.6653061224489795e-05, "loss": 3.7833, "step": 1642 }, { "epoch": 6.706122448979592, "grad_norm": 6.291205406188965, "learning_rate": 4.665102040816326e-05, "loss": 3.0474, "step": 1643 }, { "epoch": 6.710204081632653, "grad_norm": 5.196091175079346, "learning_rate": 4.6648979591836736e-05, "loss": 3.4064, "step": 1644 }, { "epoch": 6.714285714285714, "grad_norm": 2.834550619125366, "learning_rate": 4.664693877551021e-05, "loss": 3.7622, "step": 1645 }, { "epoch": 6.718367346938775, "grad_norm": 4.238369941711426, "learning_rate": 4.664489795918368e-05, "loss": 3.7427, "step": 1646 }, { "epoch": 6.722448979591837, "grad_norm": 3.6790008544921875, "learning_rate": 4.6642857142857144e-05, "loss": 4.0662, "step": 1647 }, { "epoch": 6.726530612244898, "grad_norm": 2.4492387771606445, "learning_rate": 4.664081632653061e-05, "loss": 4.0502, "step": 1648 }, { "epoch": 6.7306122448979595, "grad_norm": 2.3098320960998535, "learning_rate": 4.6638775510204085e-05, "loss": 4.1699, "step": 1649 }, { "epoch": 6.73469387755102, "grad_norm": 3.138406991958618, "learning_rate": 4.663673469387755e-05, "loss": 3.8905, "step": 1650 }, { "epoch": 6.738775510204082, "grad_norm": 2.7032082080841064, "learning_rate": 4.663469387755102e-05, "loss": 3.7795, "step": 1651 }, { "epoch": 6.742857142857143, "grad_norm": 3.271637201309204, "learning_rate": 4.663265306122449e-05, "loss": 3.817, "step": 1652 }, { "epoch": 6.746938775510204, "grad_norm": 5.359696865081787, "learning_rate": 4.663061224489796e-05, "loss": 2.9569, "step": 1653 }, { "epoch": 6.751020408163265, "grad_norm": 4.283518314361572, "learning_rate": 4.6628571428571434e-05, "loss": 3.6907, "step": 1654 }, { "epoch": 6.755102040816326, "grad_norm": 2.937781810760498, "learning_rate": 4.66265306122449e-05, "loss": 3.8445, "step": 1655 }, { "epoch": 6.759183673469388, "grad_norm": 2.9863269329071045, "learning_rate": 4.662448979591837e-05, "loss": 4.1981, "step": 1656 }, { "epoch": 6.763265306122449, "grad_norm": 2.7597334384918213, "learning_rate": 4.6622448979591836e-05, "loss": 3.8304, "step": 1657 }, { "epoch": 6.7673469387755105, "grad_norm": 2.53123140335083, "learning_rate": 4.66204081632653e-05, "loss": 4.1013, "step": 1658 }, { "epoch": 6.771428571428571, "grad_norm": 2.384814500808716, "learning_rate": 4.6618367346938777e-05, "loss": 3.891, "step": 1659 }, { "epoch": 6.775510204081632, "grad_norm": 3.664268970489502, "learning_rate": 4.661632653061225e-05, "loss": 4.0019, "step": 1660 }, { "epoch": 6.779591836734694, "grad_norm": 5.111462593078613, "learning_rate": 4.661428571428572e-05, "loss": 3.8899, "step": 1661 }, { "epoch": 6.783673469387755, "grad_norm": 5.132206916809082, "learning_rate": 4.6612244897959185e-05, "loss": 3.1578, "step": 1662 }, { "epoch": 6.7877551020408164, "grad_norm": 3.9065723419189453, "learning_rate": 4.661020408163266e-05, "loss": 3.8451, "step": 1663 }, { "epoch": 6.791836734693877, "grad_norm": 2.77536678314209, "learning_rate": 4.6608163265306126e-05, "loss": 3.8874, "step": 1664 }, { "epoch": 6.795918367346939, "grad_norm": 4.864473342895508, "learning_rate": 4.660612244897959e-05, "loss": 3.5589, "step": 1665 }, { "epoch": 6.8, "grad_norm": 2.5170254707336426, "learning_rate": 4.660408163265306e-05, "loss": 3.9184, "step": 1666 }, { "epoch": 6.8040816326530615, "grad_norm": 3.140448570251465, "learning_rate": 4.6602040816326534e-05, "loss": 3.7273, "step": 1667 }, { "epoch": 6.808163265306122, "grad_norm": 2.3237078189849854, "learning_rate": 4.660000000000001e-05, "loss": 4.0884, "step": 1668 }, { "epoch": 6.812244897959184, "grad_norm": 2.9317336082458496, "learning_rate": 4.6597959183673475e-05, "loss": 4.0563, "step": 1669 }, { "epoch": 6.816326530612245, "grad_norm": 5.1238837242126465, "learning_rate": 4.659591836734694e-05, "loss": 3.5364, "step": 1670 }, { "epoch": 6.820408163265306, "grad_norm": 5.017144680023193, "learning_rate": 4.659387755102041e-05, "loss": 3.4573, "step": 1671 }, { "epoch": 6.8244897959183675, "grad_norm": 2.559743881225586, "learning_rate": 4.6591836734693876e-05, "loss": 3.7664, "step": 1672 }, { "epoch": 6.828571428571428, "grad_norm": 2.703197717666626, "learning_rate": 4.658979591836735e-05, "loss": 4.1143, "step": 1673 }, { "epoch": 6.83265306122449, "grad_norm": 3.6187145709991455, "learning_rate": 4.658775510204082e-05, "loss": 3.7461, "step": 1674 }, { "epoch": 6.836734693877551, "grad_norm": 5.446225166320801, "learning_rate": 4.658571428571429e-05, "loss": 3.2869, "step": 1675 }, { "epoch": 6.840816326530613, "grad_norm": 3.8010826110839844, "learning_rate": 4.658367346938776e-05, "loss": 3.1969, "step": 1676 }, { "epoch": 6.844897959183673, "grad_norm": 2.3872616291046143, "learning_rate": 4.6581632653061226e-05, "loss": 4.0489, "step": 1677 }, { "epoch": 6.848979591836734, "grad_norm": 3.2691304683685303, "learning_rate": 4.65795918367347e-05, "loss": 3.7829, "step": 1678 }, { "epoch": 6.853061224489796, "grad_norm": 3.0824859142303467, "learning_rate": 4.6577551020408167e-05, "loss": 3.9809, "step": 1679 }, { "epoch": 6.857142857142857, "grad_norm": 2.2076194286346436, "learning_rate": 4.6575510204081634e-05, "loss": 4.0306, "step": 1680 }, { "epoch": 6.8612244897959185, "grad_norm": 4.217861652374268, "learning_rate": 4.65734693877551e-05, "loss": 3.2678, "step": 1681 }, { "epoch": 6.865306122448979, "grad_norm": 3.9295849800109863, "learning_rate": 4.6571428571428575e-05, "loss": 3.7568, "step": 1682 }, { "epoch": 6.869387755102041, "grad_norm": 4.328980445861816, "learning_rate": 4.656938775510204e-05, "loss": 4.027, "step": 1683 }, { "epoch": 6.873469387755102, "grad_norm": 4.190057277679443, "learning_rate": 4.6567346938775516e-05, "loss": 3.5626, "step": 1684 }, { "epoch": 6.877551020408164, "grad_norm": 3.777836561203003, "learning_rate": 4.656530612244898e-05, "loss": 3.6731, "step": 1685 }, { "epoch": 6.881632653061224, "grad_norm": 2.5241596698760986, "learning_rate": 4.656326530612245e-05, "loss": 3.9413, "step": 1686 }, { "epoch": 6.885714285714286, "grad_norm": 3.6199347972869873, "learning_rate": 4.656122448979592e-05, "loss": 3.6483, "step": 1687 }, { "epoch": 6.889795918367347, "grad_norm": 1.9331564903259277, "learning_rate": 4.655918367346939e-05, "loss": 4.2683, "step": 1688 }, { "epoch": 6.893877551020408, "grad_norm": 4.875341415405273, "learning_rate": 4.655714285714286e-05, "loss": 3.9521, "step": 1689 }, { "epoch": 6.8979591836734695, "grad_norm": 3.193406820297241, "learning_rate": 4.6555102040816325e-05, "loss": 4.1948, "step": 1690 }, { "epoch": 6.90204081632653, "grad_norm": 2.5293850898742676, "learning_rate": 4.65530612244898e-05, "loss": 3.9302, "step": 1691 }, { "epoch": 6.906122448979592, "grad_norm": 4.248106002807617, "learning_rate": 4.6551020408163266e-05, "loss": 2.9923, "step": 1692 }, { "epoch": 6.910204081632653, "grad_norm": 2.5102102756500244, "learning_rate": 4.654897959183674e-05, "loss": 3.7558, "step": 1693 }, { "epoch": 6.914285714285715, "grad_norm": 4.958053112030029, "learning_rate": 4.654693877551021e-05, "loss": 3.4595, "step": 1694 }, { "epoch": 6.918367346938775, "grad_norm": 2.6554505825042725, "learning_rate": 4.6544897959183674e-05, "loss": 3.9215, "step": 1695 }, { "epoch": 6.922448979591836, "grad_norm": 3.838826894760132, "learning_rate": 4.654285714285714e-05, "loss": 3.5776, "step": 1696 }, { "epoch": 6.926530612244898, "grad_norm": 4.55290412902832, "learning_rate": 4.654081632653061e-05, "loss": 3.6682, "step": 1697 }, { "epoch": 6.930612244897959, "grad_norm": 3.610182046890259, "learning_rate": 4.653877551020408e-05, "loss": 3.8348, "step": 1698 }, { "epoch": 6.9346938775510205, "grad_norm": 3.7271549701690674, "learning_rate": 4.6536734693877556e-05, "loss": 3.5935, "step": 1699 }, { "epoch": 6.938775510204081, "grad_norm": 4.7940802574157715, "learning_rate": 4.6534693877551024e-05, "loss": 3.6448, "step": 1700 }, { "epoch": 6.942857142857143, "grad_norm": 4.347556114196777, "learning_rate": 4.653265306122449e-05, "loss": 3.5932, "step": 1701 }, { "epoch": 6.946938775510204, "grad_norm": 4.0162272453308105, "learning_rate": 4.653061224489796e-05, "loss": 3.7277, "step": 1702 }, { "epoch": 6.951020408163266, "grad_norm": 4.478635311126709, "learning_rate": 4.652857142857143e-05, "loss": 3.3257, "step": 1703 }, { "epoch": 6.955102040816326, "grad_norm": 2.938697338104248, "learning_rate": 4.65265306122449e-05, "loss": 3.8688, "step": 1704 }, { "epoch": 6.959183673469388, "grad_norm": 4.402482986450195, "learning_rate": 4.6524489795918366e-05, "loss": 3.3774, "step": 1705 }, { "epoch": 6.963265306122449, "grad_norm": 2.8209052085876465, "learning_rate": 4.652244897959184e-05, "loss": 3.8419, "step": 1706 }, { "epoch": 6.96734693877551, "grad_norm": 3.5361108779907227, "learning_rate": 4.6520408163265314e-05, "loss": 3.9542, "step": 1707 }, { "epoch": 6.9714285714285715, "grad_norm": 3.596496105194092, "learning_rate": 4.651836734693878e-05, "loss": 3.8095, "step": 1708 }, { "epoch": 6.975510204081632, "grad_norm": 2.591906785964966, "learning_rate": 4.651632653061225e-05, "loss": 4.0088, "step": 1709 }, { "epoch": 6.979591836734694, "grad_norm": 3.1859922409057617, "learning_rate": 4.6514285714285715e-05, "loss": 3.8859, "step": 1710 }, { "epoch": 6.983673469387755, "grad_norm": 2.8107962608337402, "learning_rate": 4.651224489795918e-05, "loss": 3.8254, "step": 1711 }, { "epoch": 6.987755102040817, "grad_norm": 4.163600444793701, "learning_rate": 4.6510204081632656e-05, "loss": 3.8528, "step": 1712 }, { "epoch": 6.9918367346938775, "grad_norm": 4.7575788497924805, "learning_rate": 4.650816326530612e-05, "loss": 3.5076, "step": 1713 }, { "epoch": 6.995918367346938, "grad_norm": 4.2431206703186035, "learning_rate": 4.65061224489796e-05, "loss": 3.3046, "step": 1714 }, { "epoch": 7.0, "grad_norm": 5.235470771789551, "learning_rate": 4.6504081632653064e-05, "loss": 3.3981, "step": 1715 }, { "epoch": 7.004081632653061, "grad_norm": 3.4686496257781982, "learning_rate": 4.650204081632653e-05, "loss": 3.9293, "step": 1716 }, { "epoch": 7.0081632653061225, "grad_norm": 2.341029167175293, "learning_rate": 4.6500000000000005e-05, "loss": 4.0345, "step": 1717 }, { "epoch": 7.012244897959183, "grad_norm": 2.3826749324798584, "learning_rate": 4.649795918367347e-05, "loss": 4.0387, "step": 1718 }, { "epoch": 7.016326530612245, "grad_norm": 4.437036991119385, "learning_rate": 4.649591836734694e-05, "loss": 3.4645, "step": 1719 }, { "epoch": 7.020408163265306, "grad_norm": 3.1922435760498047, "learning_rate": 4.649387755102041e-05, "loss": 3.759, "step": 1720 }, { "epoch": 7.024489795918368, "grad_norm": 3.0202417373657227, "learning_rate": 4.649183673469388e-05, "loss": 3.8303, "step": 1721 }, { "epoch": 7.0285714285714285, "grad_norm": 4.3093581199646, "learning_rate": 4.6489795918367355e-05, "loss": 3.3042, "step": 1722 }, { "epoch": 7.03265306122449, "grad_norm": 3.813889503479004, "learning_rate": 4.648775510204082e-05, "loss": 3.7951, "step": 1723 }, { "epoch": 7.036734693877551, "grad_norm": 3.790637969970703, "learning_rate": 4.648571428571429e-05, "loss": 3.5662, "step": 1724 }, { "epoch": 7.040816326530612, "grad_norm": 3.2049760818481445, "learning_rate": 4.6483673469387756e-05, "loss": 4.1225, "step": 1725 }, { "epoch": 7.044897959183674, "grad_norm": 3.4425716400146484, "learning_rate": 4.648163265306122e-05, "loss": 3.985, "step": 1726 }, { "epoch": 7.048979591836734, "grad_norm": 3.6680405139923096, "learning_rate": 4.64795918367347e-05, "loss": 3.6875, "step": 1727 }, { "epoch": 7.053061224489796, "grad_norm": 3.4859585762023926, "learning_rate": 4.6477551020408164e-05, "loss": 3.9337, "step": 1728 }, { "epoch": 7.057142857142857, "grad_norm": 5.2459821701049805, "learning_rate": 4.647551020408164e-05, "loss": 3.8407, "step": 1729 }, { "epoch": 7.061224489795919, "grad_norm": 5.827667236328125, "learning_rate": 4.6473469387755105e-05, "loss": 3.0685, "step": 1730 }, { "epoch": 7.0653061224489795, "grad_norm": 3.5377230644226074, "learning_rate": 4.647142857142857e-05, "loss": 3.6775, "step": 1731 }, { "epoch": 7.069387755102041, "grad_norm": 3.0701491832733154, "learning_rate": 4.6469387755102046e-05, "loss": 3.8859, "step": 1732 }, { "epoch": 7.073469387755102, "grad_norm": 3.9325640201568604, "learning_rate": 4.646734693877551e-05, "loss": 3.7952, "step": 1733 }, { "epoch": 7.077551020408163, "grad_norm": 3.9684853553771973, "learning_rate": 4.646530612244898e-05, "loss": 3.7303, "step": 1734 }, { "epoch": 7.081632653061225, "grad_norm": 3.3211326599121094, "learning_rate": 4.646326530612245e-05, "loss": 3.5523, "step": 1735 }, { "epoch": 7.085714285714285, "grad_norm": 2.0155029296875, "learning_rate": 4.646122448979592e-05, "loss": 3.9344, "step": 1736 }, { "epoch": 7.089795918367347, "grad_norm": 2.8321259021759033, "learning_rate": 4.645918367346939e-05, "loss": 3.7254, "step": 1737 }, { "epoch": 7.093877551020408, "grad_norm": 2.400239944458008, "learning_rate": 4.645714285714286e-05, "loss": 3.8611, "step": 1738 }, { "epoch": 7.09795918367347, "grad_norm": 4.521646976470947, "learning_rate": 4.645510204081633e-05, "loss": 3.7366, "step": 1739 }, { "epoch": 7.1020408163265305, "grad_norm": 4.0420308113098145, "learning_rate": 4.64530612244898e-05, "loss": 4.0054, "step": 1740 }, { "epoch": 7.106122448979592, "grad_norm": 3.314556360244751, "learning_rate": 4.6451020408163264e-05, "loss": 3.5743, "step": 1741 }, { "epoch": 7.110204081632653, "grad_norm": 2.7982025146484375, "learning_rate": 4.644897959183674e-05, "loss": 4.0144, "step": 1742 }, { "epoch": 7.114285714285714, "grad_norm": 3.986062526702881, "learning_rate": 4.6446938775510205e-05, "loss": 3.5322, "step": 1743 }, { "epoch": 7.118367346938776, "grad_norm": 3.2673070430755615, "learning_rate": 4.644489795918367e-05, "loss": 3.8677, "step": 1744 }, { "epoch": 7.122448979591836, "grad_norm": 3.238614559173584, "learning_rate": 4.6442857142857146e-05, "loss": 3.7309, "step": 1745 }, { "epoch": 7.126530612244898, "grad_norm": 2.642702102661133, "learning_rate": 4.644081632653061e-05, "loss": 4.1644, "step": 1746 }, { "epoch": 7.130612244897959, "grad_norm": 3.7421715259552, "learning_rate": 4.643877551020409e-05, "loss": 3.5494, "step": 1747 }, { "epoch": 7.134693877551021, "grad_norm": 3.622678279876709, "learning_rate": 4.6436734693877554e-05, "loss": 3.8246, "step": 1748 }, { "epoch": 7.1387755102040815, "grad_norm": 2.6411101818084717, "learning_rate": 4.643469387755102e-05, "loss": 4.025, "step": 1749 }, { "epoch": 7.142857142857143, "grad_norm": 3.0764482021331787, "learning_rate": 4.643265306122449e-05, "loss": 3.665, "step": 1750 }, { "epoch": 7.146938775510204, "grad_norm": 4.073435306549072, "learning_rate": 4.643061224489796e-05, "loss": 3.391, "step": 1751 }, { "epoch": 7.151020408163265, "grad_norm": 3.1105241775512695, "learning_rate": 4.642857142857143e-05, "loss": 3.6905, "step": 1752 }, { "epoch": 7.155102040816327, "grad_norm": 3.854149341583252, "learning_rate": 4.64265306122449e-05, "loss": 3.1255, "step": 1753 }, { "epoch": 7.159183673469387, "grad_norm": 3.147512912750244, "learning_rate": 4.642448979591837e-05, "loss": 3.7985, "step": 1754 }, { "epoch": 7.163265306122449, "grad_norm": 2.847069263458252, "learning_rate": 4.642244897959184e-05, "loss": 4.0518, "step": 1755 }, { "epoch": 7.16734693877551, "grad_norm": 5.0933942794799805, "learning_rate": 4.642040816326531e-05, "loss": 3.3718, "step": 1756 }, { "epoch": 7.171428571428572, "grad_norm": 3.5318219661712646, "learning_rate": 4.641836734693878e-05, "loss": 4.0258, "step": 1757 }, { "epoch": 7.1755102040816325, "grad_norm": 3.6468589305877686, "learning_rate": 4.6416326530612246e-05, "loss": 3.7389, "step": 1758 }, { "epoch": 7.179591836734694, "grad_norm": 2.384176731109619, "learning_rate": 4.641428571428571e-05, "loss": 4.0139, "step": 1759 }, { "epoch": 7.183673469387755, "grad_norm": 6.694237232208252, "learning_rate": 4.641224489795919e-05, "loss": 2.6589, "step": 1760 }, { "epoch": 7.187755102040816, "grad_norm": 5.249095439910889, "learning_rate": 4.641020408163266e-05, "loss": 3.4231, "step": 1761 }, { "epoch": 7.191836734693878, "grad_norm": 2.9954631328582764, "learning_rate": 4.640816326530613e-05, "loss": 3.7599, "step": 1762 }, { "epoch": 7.1959183673469385, "grad_norm": 4.066277027130127, "learning_rate": 4.6406122448979595e-05, "loss": 3.4857, "step": 1763 }, { "epoch": 7.2, "grad_norm": 5.218094348907471, "learning_rate": 4.640408163265306e-05, "loss": 3.3318, "step": 1764 }, { "epoch": 7.204081632653061, "grad_norm": 4.017576217651367, "learning_rate": 4.640204081632653e-05, "loss": 3.7391, "step": 1765 }, { "epoch": 7.208163265306123, "grad_norm": 7.10989236831665, "learning_rate": 4.64e-05, "loss": 3.0394, "step": 1766 }, { "epoch": 7.2122448979591836, "grad_norm": 2.890772581100464, "learning_rate": 4.639795918367347e-05, "loss": 3.8908, "step": 1767 }, { "epoch": 7.216326530612245, "grad_norm": 2.8729007244110107, "learning_rate": 4.6395918367346944e-05, "loss": 4.2063, "step": 1768 }, { "epoch": 7.220408163265306, "grad_norm": 4.06869649887085, "learning_rate": 4.639387755102041e-05, "loss": 4.0599, "step": 1769 }, { "epoch": 7.224489795918367, "grad_norm": 3.7515006065368652, "learning_rate": 4.639183673469388e-05, "loss": 3.7104, "step": 1770 }, { "epoch": 7.228571428571429, "grad_norm": 3.101898431777954, "learning_rate": 4.638979591836735e-05, "loss": 3.7303, "step": 1771 }, { "epoch": 7.2326530612244895, "grad_norm": 5.403934001922607, "learning_rate": 4.638775510204082e-05, "loss": 3.6405, "step": 1772 }, { "epoch": 7.236734693877551, "grad_norm": 3.695929765701294, "learning_rate": 4.6385714285714286e-05, "loss": 3.8015, "step": 1773 }, { "epoch": 7.240816326530612, "grad_norm": 6.777254581451416, "learning_rate": 4.6383673469387754e-05, "loss": 3.1133, "step": 1774 }, { "epoch": 7.244897959183674, "grad_norm": 2.492482900619507, "learning_rate": 4.638163265306123e-05, "loss": 3.8736, "step": 1775 }, { "epoch": 7.248979591836735, "grad_norm": 3.223872661590576, "learning_rate": 4.63795918367347e-05, "loss": 3.9962, "step": 1776 }, { "epoch": 7.253061224489796, "grad_norm": 4.030156135559082, "learning_rate": 4.637755102040817e-05, "loss": 3.8101, "step": 1777 }, { "epoch": 7.257142857142857, "grad_norm": 5.508490085601807, "learning_rate": 4.6375510204081636e-05, "loss": 2.8636, "step": 1778 }, { "epoch": 7.261224489795918, "grad_norm": 3.0734260082244873, "learning_rate": 4.63734693877551e-05, "loss": 3.8341, "step": 1779 }, { "epoch": 7.26530612244898, "grad_norm": 3.5150599479675293, "learning_rate": 4.637142857142857e-05, "loss": 3.8074, "step": 1780 }, { "epoch": 7.2693877551020405, "grad_norm": 3.7556285858154297, "learning_rate": 4.6369387755102044e-05, "loss": 3.5905, "step": 1781 }, { "epoch": 7.273469387755102, "grad_norm": 4.814029693603516, "learning_rate": 4.636734693877551e-05, "loss": 3.5528, "step": 1782 }, { "epoch": 7.277551020408163, "grad_norm": 3.9276344776153564, "learning_rate": 4.6365306122448985e-05, "loss": 3.9507, "step": 1783 }, { "epoch": 7.281632653061225, "grad_norm": 2.846560478210449, "learning_rate": 4.636326530612245e-05, "loss": 3.8527, "step": 1784 }, { "epoch": 7.285714285714286, "grad_norm": 3.099113941192627, "learning_rate": 4.636122448979592e-05, "loss": 3.9978, "step": 1785 }, { "epoch": 7.289795918367347, "grad_norm": 4.407896518707275, "learning_rate": 4.635918367346939e-05, "loss": 3.1943, "step": 1786 }, { "epoch": 7.293877551020408, "grad_norm": 2.6170589923858643, "learning_rate": 4.635714285714286e-05, "loss": 4.0261, "step": 1787 }, { "epoch": 7.29795918367347, "grad_norm": 3.3889119625091553, "learning_rate": 4.635510204081633e-05, "loss": 3.8338, "step": 1788 }, { "epoch": 7.302040816326531, "grad_norm": 6.045722961425781, "learning_rate": 4.6353061224489794e-05, "loss": 3.4836, "step": 1789 }, { "epoch": 7.3061224489795915, "grad_norm": 4.366628646850586, "learning_rate": 4.635102040816327e-05, "loss": 3.7115, "step": 1790 }, { "epoch": 7.310204081632653, "grad_norm": 3.011810302734375, "learning_rate": 4.6348979591836735e-05, "loss": 3.9872, "step": 1791 }, { "epoch": 7.314285714285714, "grad_norm": 3.068915605545044, "learning_rate": 4.634693877551021e-05, "loss": 4.0103, "step": 1792 }, { "epoch": 7.318367346938776, "grad_norm": 4.04351806640625, "learning_rate": 4.6344897959183676e-05, "loss": 3.7098, "step": 1793 }, { "epoch": 7.322448979591837, "grad_norm": 2.8581759929656982, "learning_rate": 4.6342857142857143e-05, "loss": 3.7717, "step": 1794 }, { "epoch": 7.326530612244898, "grad_norm": 5.303111553192139, "learning_rate": 4.634081632653062e-05, "loss": 3.318, "step": 1795 }, { "epoch": 7.330612244897959, "grad_norm": 2.7883763313293457, "learning_rate": 4.6338775510204085e-05, "loss": 4.1461, "step": 1796 }, { "epoch": 7.33469387755102, "grad_norm": 2.0483651161193848, "learning_rate": 4.633673469387755e-05, "loss": 4.2277, "step": 1797 }, { "epoch": 7.338775510204082, "grad_norm": 3.8228025436401367, "learning_rate": 4.633469387755102e-05, "loss": 3.7402, "step": 1798 }, { "epoch": 7.3428571428571425, "grad_norm": 2.6197404861450195, "learning_rate": 4.633265306122449e-05, "loss": 4.0494, "step": 1799 }, { "epoch": 7.346938775510204, "grad_norm": 2.8040003776550293, "learning_rate": 4.6330612244897967e-05, "loss": 3.9524, "step": 1800 }, { "epoch": 7.351020408163265, "grad_norm": 4.354310035705566, "learning_rate": 4.6328571428571434e-05, "loss": 3.6171, "step": 1801 }, { "epoch": 7.355102040816327, "grad_norm": 2.965986490249634, "learning_rate": 4.63265306122449e-05, "loss": 3.7736, "step": 1802 }, { "epoch": 7.359183673469388, "grad_norm": 3.4172563552856445, "learning_rate": 4.632448979591837e-05, "loss": 3.919, "step": 1803 }, { "epoch": 7.363265306122449, "grad_norm": 4.401014804840088, "learning_rate": 4.6322448979591835e-05, "loss": 3.5246, "step": 1804 }, { "epoch": 7.36734693877551, "grad_norm": 6.080940246582031, "learning_rate": 4.632040816326531e-05, "loss": 3.3967, "step": 1805 }, { "epoch": 7.371428571428572, "grad_norm": 6.633454322814941, "learning_rate": 4.6318367346938776e-05, "loss": 2.9507, "step": 1806 }, { "epoch": 7.375510204081633, "grad_norm": 2.376652479171753, "learning_rate": 4.631632653061225e-05, "loss": 4.0976, "step": 1807 }, { "epoch": 7.3795918367346935, "grad_norm": 2.936316967010498, "learning_rate": 4.631428571428572e-05, "loss": 3.7218, "step": 1808 }, { "epoch": 7.383673469387755, "grad_norm": 4.1932501792907715, "learning_rate": 4.6312244897959184e-05, "loss": 3.8547, "step": 1809 }, { "epoch": 7.387755102040816, "grad_norm": 5.633331775665283, "learning_rate": 4.631020408163266e-05, "loss": 3.7157, "step": 1810 }, { "epoch": 7.391836734693878, "grad_norm": 2.5914647579193115, "learning_rate": 4.6308163265306125e-05, "loss": 4.0097, "step": 1811 }, { "epoch": 7.395918367346939, "grad_norm": 2.657057046890259, "learning_rate": 4.630612244897959e-05, "loss": 3.7048, "step": 1812 }, { "epoch": 7.4, "grad_norm": 4.15771484375, "learning_rate": 4.630408163265306e-05, "loss": 3.5671, "step": 1813 }, { "epoch": 7.404081632653061, "grad_norm": 2.6797120571136475, "learning_rate": 4.6302040816326533e-05, "loss": 3.816, "step": 1814 }, { "epoch": 7.408163265306122, "grad_norm": 2.989525079727173, "learning_rate": 4.630000000000001e-05, "loss": 3.7477, "step": 1815 }, { "epoch": 7.412244897959184, "grad_norm": 3.801218271255493, "learning_rate": 4.6297959183673474e-05, "loss": 4.0255, "step": 1816 }, { "epoch": 7.416326530612245, "grad_norm": 3.6316514015197754, "learning_rate": 4.629591836734694e-05, "loss": 4.0965, "step": 1817 }, { "epoch": 7.420408163265306, "grad_norm": 4.183012962341309, "learning_rate": 4.629387755102041e-05, "loss": 3.9548, "step": 1818 }, { "epoch": 7.424489795918367, "grad_norm": 4.810744762420654, "learning_rate": 4.6291836734693876e-05, "loss": 3.7918, "step": 1819 }, { "epoch": 7.428571428571429, "grad_norm": 3.1472747325897217, "learning_rate": 4.628979591836735e-05, "loss": 3.9057, "step": 1820 }, { "epoch": 7.43265306122449, "grad_norm": 5.4517035484313965, "learning_rate": 4.628775510204082e-05, "loss": 3.5239, "step": 1821 }, { "epoch": 7.436734693877551, "grad_norm": 3.6596829891204834, "learning_rate": 4.628571428571429e-05, "loss": 3.1907, "step": 1822 }, { "epoch": 7.440816326530612, "grad_norm": 5.343937873840332, "learning_rate": 4.628367346938776e-05, "loss": 3.58, "step": 1823 }, { "epoch": 7.444897959183674, "grad_norm": 5.188425540924072, "learning_rate": 4.6281632653061225e-05, "loss": 3.4997, "step": 1824 }, { "epoch": 7.448979591836735, "grad_norm": 3.498345375061035, "learning_rate": 4.62795918367347e-05, "loss": 3.7916, "step": 1825 }, { "epoch": 7.453061224489796, "grad_norm": 8.573161125183105, "learning_rate": 4.6277551020408166e-05, "loss": 1.6735, "step": 1826 }, { "epoch": 7.457142857142857, "grad_norm": 6.342918872833252, "learning_rate": 4.627551020408163e-05, "loss": 3.0868, "step": 1827 }, { "epoch": 7.461224489795918, "grad_norm": 4.405806064605713, "learning_rate": 4.62734693877551e-05, "loss": 3.5955, "step": 1828 }, { "epoch": 7.46530612244898, "grad_norm": 3.905116558074951, "learning_rate": 4.6271428571428574e-05, "loss": 3.8087, "step": 1829 }, { "epoch": 7.469387755102041, "grad_norm": 6.490808486938477, "learning_rate": 4.626938775510205e-05, "loss": 3.0009, "step": 1830 }, { "epoch": 7.473469387755102, "grad_norm": 3.118901491165161, "learning_rate": 4.6267346938775515e-05, "loss": 3.9714, "step": 1831 }, { "epoch": 7.477551020408163, "grad_norm": 3.7729806900024414, "learning_rate": 4.626530612244898e-05, "loss": 3.7051, "step": 1832 }, { "epoch": 7.481632653061224, "grad_norm": 4.097546577453613, "learning_rate": 4.626326530612245e-05, "loss": 3.5314, "step": 1833 }, { "epoch": 7.485714285714286, "grad_norm": 3.131793737411499, "learning_rate": 4.6261224489795917e-05, "loss": 3.8864, "step": 1834 }, { "epoch": 7.489795918367347, "grad_norm": 5.550705909729004, "learning_rate": 4.625918367346939e-05, "loss": 3.3546, "step": 1835 }, { "epoch": 7.493877551020408, "grad_norm": 3.2875277996063232, "learning_rate": 4.625714285714286e-05, "loss": 3.7961, "step": 1836 }, { "epoch": 7.497959183673469, "grad_norm": 2.541102886199951, "learning_rate": 4.625510204081633e-05, "loss": 4.1612, "step": 1837 }, { "epoch": 7.502040816326531, "grad_norm": 2.582371473312378, "learning_rate": 4.62530612244898e-05, "loss": 3.9615, "step": 1838 }, { "epoch": 7.506122448979592, "grad_norm": 4.422463893890381, "learning_rate": 4.6251020408163266e-05, "loss": 3.932, "step": 1839 }, { "epoch": 7.510204081632653, "grad_norm": 5.605561256408691, "learning_rate": 4.624897959183674e-05, "loss": 3.1118, "step": 1840 }, { "epoch": 7.514285714285714, "grad_norm": 4.759947776794434, "learning_rate": 4.624693877551021e-05, "loss": 3.8024, "step": 1841 }, { "epoch": 7.518367346938776, "grad_norm": 4.663572311401367, "learning_rate": 4.6244897959183674e-05, "loss": 3.6508, "step": 1842 }, { "epoch": 7.522448979591837, "grad_norm": 2.9476613998413086, "learning_rate": 4.624285714285714e-05, "loss": 4.0666, "step": 1843 }, { "epoch": 7.526530612244898, "grad_norm": 4.714024543762207, "learning_rate": 4.6240816326530615e-05, "loss": 3.4411, "step": 1844 }, { "epoch": 7.530612244897959, "grad_norm": 5.537649631500244, "learning_rate": 4.623877551020408e-05, "loss": 3.5183, "step": 1845 }, { "epoch": 7.53469387755102, "grad_norm": 5.35687780380249, "learning_rate": 4.6236734693877556e-05, "loss": 3.3853, "step": 1846 }, { "epoch": 7.538775510204082, "grad_norm": 3.0049514770507812, "learning_rate": 4.623469387755102e-05, "loss": 4.0425, "step": 1847 }, { "epoch": 7.542857142857143, "grad_norm": 3.2080485820770264, "learning_rate": 4.623265306122449e-05, "loss": 3.69, "step": 1848 }, { "epoch": 7.546938775510204, "grad_norm": 3.215212345123291, "learning_rate": 4.6230612244897964e-05, "loss": 3.8059, "step": 1849 }, { "epoch": 7.551020408163265, "grad_norm": 4.276371955871582, "learning_rate": 4.622857142857143e-05, "loss": 3.6556, "step": 1850 }, { "epoch": 7.555102040816326, "grad_norm": 4.815982341766357, "learning_rate": 4.62265306122449e-05, "loss": 3.7793, "step": 1851 }, { "epoch": 7.559183673469388, "grad_norm": 3.854556083679199, "learning_rate": 4.6224489795918366e-05, "loss": 3.6812, "step": 1852 }, { "epoch": 7.563265306122449, "grad_norm": 2.7386362552642822, "learning_rate": 4.622244897959184e-05, "loss": 3.9688, "step": 1853 }, { "epoch": 7.56734693877551, "grad_norm": 2.4540951251983643, "learning_rate": 4.622040816326531e-05, "loss": 4.0382, "step": 1854 }, { "epoch": 7.571428571428571, "grad_norm": 3.839186191558838, "learning_rate": 4.621836734693878e-05, "loss": 3.966, "step": 1855 }, { "epoch": 7.575510204081633, "grad_norm": 3.814138412475586, "learning_rate": 4.621632653061225e-05, "loss": 3.9448, "step": 1856 }, { "epoch": 7.579591836734694, "grad_norm": 3.4527151584625244, "learning_rate": 4.6214285714285715e-05, "loss": 3.9031, "step": 1857 }, { "epoch": 7.583673469387755, "grad_norm": 3.813098192214966, "learning_rate": 4.621224489795918e-05, "loss": 3.6216, "step": 1858 }, { "epoch": 7.587755102040816, "grad_norm": 2.762826442718506, "learning_rate": 4.6210204081632656e-05, "loss": 3.8267, "step": 1859 }, { "epoch": 7.591836734693878, "grad_norm": 3.4264633655548096, "learning_rate": 4.620816326530612e-05, "loss": 3.9012, "step": 1860 }, { "epoch": 7.595918367346939, "grad_norm": 2.9337873458862305, "learning_rate": 4.62061224489796e-05, "loss": 3.8065, "step": 1861 }, { "epoch": 7.6, "grad_norm": 3.961327075958252, "learning_rate": 4.6204081632653064e-05, "loss": 3.684, "step": 1862 }, { "epoch": 7.604081632653061, "grad_norm": 2.969911575317383, "learning_rate": 4.620204081632653e-05, "loss": 4.0654, "step": 1863 }, { "epoch": 7.608163265306122, "grad_norm": 2.807749032974243, "learning_rate": 4.6200000000000005e-05, "loss": 4.0196, "step": 1864 }, { "epoch": 7.612244897959184, "grad_norm": 4.88359260559082, "learning_rate": 4.619795918367347e-05, "loss": 3.6411, "step": 1865 }, { "epoch": 7.616326530612245, "grad_norm": 4.368223667144775, "learning_rate": 4.619591836734694e-05, "loss": 3.7686, "step": 1866 }, { "epoch": 7.6204081632653065, "grad_norm": 2.808892250061035, "learning_rate": 4.6193877551020406e-05, "loss": 3.8643, "step": 1867 }, { "epoch": 7.624489795918367, "grad_norm": 4.406306266784668, "learning_rate": 4.619183673469388e-05, "loss": 3.6996, "step": 1868 }, { "epoch": 7.628571428571428, "grad_norm": 3.882239818572998, "learning_rate": 4.6189795918367354e-05, "loss": 4.0078, "step": 1869 }, { "epoch": 7.63265306122449, "grad_norm": 4.116113662719727, "learning_rate": 4.618775510204082e-05, "loss": 3.4198, "step": 1870 }, { "epoch": 7.636734693877551, "grad_norm": 4.169960021972656, "learning_rate": 4.618571428571429e-05, "loss": 3.5099, "step": 1871 }, { "epoch": 7.640816326530612, "grad_norm": 3.39062237739563, "learning_rate": 4.6183673469387755e-05, "loss": 3.9437, "step": 1872 }, { "epoch": 7.644897959183673, "grad_norm": 6.070479869842529, "learning_rate": 4.618163265306122e-05, "loss": 2.9546, "step": 1873 }, { "epoch": 7.648979591836735, "grad_norm": 3.258699417114258, "learning_rate": 4.6179591836734696e-05, "loss": 3.8724, "step": 1874 }, { "epoch": 7.653061224489796, "grad_norm": 5.096837043762207, "learning_rate": 4.6177551020408164e-05, "loss": 3.5895, "step": 1875 }, { "epoch": 7.6571428571428575, "grad_norm": 4.358465194702148, "learning_rate": 4.617551020408164e-05, "loss": 3.5251, "step": 1876 }, { "epoch": 7.661224489795918, "grad_norm": 2.4857046604156494, "learning_rate": 4.6173469387755105e-05, "loss": 4.1942, "step": 1877 }, { "epoch": 7.66530612244898, "grad_norm": 4.457594871520996, "learning_rate": 4.617142857142857e-05, "loss": 3.6137, "step": 1878 }, { "epoch": 7.669387755102041, "grad_norm": 4.389279365539551, "learning_rate": 4.6169387755102046e-05, "loss": 3.6331, "step": 1879 }, { "epoch": 7.673469387755102, "grad_norm": 1.5410290956497192, "learning_rate": 4.616734693877551e-05, "loss": 4.2122, "step": 1880 }, { "epoch": 7.677551020408163, "grad_norm": 5.898764610290527, "learning_rate": 4.616530612244898e-05, "loss": 2.9772, "step": 1881 }, { "epoch": 7.681632653061224, "grad_norm": 3.1833791732788086, "learning_rate": 4.616326530612245e-05, "loss": 3.8936, "step": 1882 }, { "epoch": 7.685714285714286, "grad_norm": 3.277834415435791, "learning_rate": 4.616122448979592e-05, "loss": 4.043, "step": 1883 }, { "epoch": 7.689795918367347, "grad_norm": 4.085977554321289, "learning_rate": 4.6159183673469395e-05, "loss": 3.7233, "step": 1884 }, { "epoch": 7.6938775510204085, "grad_norm": 5.152985095977783, "learning_rate": 4.615714285714286e-05, "loss": 3.6326, "step": 1885 }, { "epoch": 7.697959183673469, "grad_norm": 2.3967673778533936, "learning_rate": 4.615510204081633e-05, "loss": 4.2347, "step": 1886 }, { "epoch": 7.70204081632653, "grad_norm": 3.52793288230896, "learning_rate": 4.6153061224489796e-05, "loss": 3.8298, "step": 1887 }, { "epoch": 7.706122448979592, "grad_norm": 2.8519179821014404, "learning_rate": 4.615102040816327e-05, "loss": 3.9012, "step": 1888 }, { "epoch": 7.710204081632653, "grad_norm": 4.122255802154541, "learning_rate": 4.614897959183674e-05, "loss": 3.627, "step": 1889 }, { "epoch": 7.714285714285714, "grad_norm": 4.730788230895996, "learning_rate": 4.6146938775510204e-05, "loss": 3.5037, "step": 1890 }, { "epoch": 7.718367346938775, "grad_norm": 4.685481548309326, "learning_rate": 4.614489795918368e-05, "loss": 3.2161, "step": 1891 }, { "epoch": 7.722448979591837, "grad_norm": 4.365912437438965, "learning_rate": 4.6142857142857145e-05, "loss": 2.9021, "step": 1892 }, { "epoch": 7.726530612244898, "grad_norm": 3.3037002086639404, "learning_rate": 4.614081632653062e-05, "loss": 3.845, "step": 1893 }, { "epoch": 7.7306122448979595, "grad_norm": 2.974790096282959, "learning_rate": 4.6138775510204086e-05, "loss": 3.9212, "step": 1894 }, { "epoch": 7.73469387755102, "grad_norm": 5.18635368347168, "learning_rate": 4.6136734693877554e-05, "loss": 3.5586, "step": 1895 }, { "epoch": 7.738775510204082, "grad_norm": 2.832162857055664, "learning_rate": 4.613469387755102e-05, "loss": 3.8936, "step": 1896 }, { "epoch": 7.742857142857143, "grad_norm": 5.8007588386535645, "learning_rate": 4.613265306122449e-05, "loss": 3.4094, "step": 1897 }, { "epoch": 7.746938775510204, "grad_norm": 6.796947002410889, "learning_rate": 4.613061224489796e-05, "loss": 2.8459, "step": 1898 }, { "epoch": 7.751020408163265, "grad_norm": 3.9210731983184814, "learning_rate": 4.612857142857143e-05, "loss": 3.8402, "step": 1899 }, { "epoch": 7.755102040816326, "grad_norm": 5.029566287994385, "learning_rate": 4.61265306122449e-05, "loss": 3.6229, "step": 1900 }, { "epoch": 7.759183673469388, "grad_norm": 3.7152810096740723, "learning_rate": 4.612448979591837e-05, "loss": 3.6305, "step": 1901 }, { "epoch": 7.763265306122449, "grad_norm": 3.005697011947632, "learning_rate": 4.612244897959184e-05, "loss": 3.7444, "step": 1902 }, { "epoch": 7.7673469387755105, "grad_norm": 3.2550039291381836, "learning_rate": 4.612040816326531e-05, "loss": 4.1247, "step": 1903 }, { "epoch": 7.771428571428571, "grad_norm": 3.370626211166382, "learning_rate": 4.611836734693878e-05, "loss": 3.9868, "step": 1904 }, { "epoch": 7.775510204081632, "grad_norm": 4.819148063659668, "learning_rate": 4.6116326530612245e-05, "loss": 3.8313, "step": 1905 }, { "epoch": 7.779591836734694, "grad_norm": 5.433138847351074, "learning_rate": 4.611428571428571e-05, "loss": 3.3806, "step": 1906 }, { "epoch": 7.783673469387755, "grad_norm": 3.373509407043457, "learning_rate": 4.6112244897959186e-05, "loss": 3.9039, "step": 1907 }, { "epoch": 7.7877551020408164, "grad_norm": 4.395197868347168, "learning_rate": 4.611020408163266e-05, "loss": 3.5016, "step": 1908 }, { "epoch": 7.791836734693877, "grad_norm": 4.5784220695495605, "learning_rate": 4.610816326530613e-05, "loss": 3.6808, "step": 1909 }, { "epoch": 7.795918367346939, "grad_norm": 4.060685634613037, "learning_rate": 4.6106122448979594e-05, "loss": 3.8002, "step": 1910 }, { "epoch": 7.8, "grad_norm": 4.91602087020874, "learning_rate": 4.610408163265306e-05, "loss": 3.3174, "step": 1911 }, { "epoch": 7.8040816326530615, "grad_norm": 5.025340557098389, "learning_rate": 4.610204081632653e-05, "loss": 3.631, "step": 1912 }, { "epoch": 7.808163265306122, "grad_norm": 3.8778951168060303, "learning_rate": 4.61e-05, "loss": 3.936, "step": 1913 }, { "epoch": 7.812244897959184, "grad_norm": 5.49656343460083, "learning_rate": 4.609795918367347e-05, "loss": 3.222, "step": 1914 }, { "epoch": 7.816326530612245, "grad_norm": 3.9216346740722656, "learning_rate": 4.6095918367346943e-05, "loss": 3.9027, "step": 1915 }, { "epoch": 7.820408163265306, "grad_norm": 5.305903434753418, "learning_rate": 4.609387755102041e-05, "loss": 3.2622, "step": 1916 }, { "epoch": 7.8244897959183675, "grad_norm": 4.111706733703613, "learning_rate": 4.609183673469388e-05, "loss": 3.6667, "step": 1917 }, { "epoch": 7.828571428571428, "grad_norm": 6.123554229736328, "learning_rate": 4.608979591836735e-05, "loss": 3.191, "step": 1918 }, { "epoch": 7.83265306122449, "grad_norm": 3.1248159408569336, "learning_rate": 4.608775510204082e-05, "loss": 3.9012, "step": 1919 }, { "epoch": 7.836734693877551, "grad_norm": 2.3250203132629395, "learning_rate": 4.6085714285714286e-05, "loss": 4.1926, "step": 1920 }, { "epoch": 7.840816326530613, "grad_norm": 3.8940088748931885, "learning_rate": 4.608367346938775e-05, "loss": 3.8862, "step": 1921 }, { "epoch": 7.844897959183673, "grad_norm": 4.133308410644531, "learning_rate": 4.608163265306123e-05, "loss": 3.6433, "step": 1922 }, { "epoch": 7.848979591836734, "grad_norm": 3.9032275676727295, "learning_rate": 4.60795918367347e-05, "loss": 3.7887, "step": 1923 }, { "epoch": 7.853061224489796, "grad_norm": 4.085447788238525, "learning_rate": 4.607755102040817e-05, "loss": 3.7379, "step": 1924 }, { "epoch": 7.857142857142857, "grad_norm": 3.1945371627807617, "learning_rate": 4.6075510204081635e-05, "loss": 4.1617, "step": 1925 }, { "epoch": 7.8612244897959185, "grad_norm": 3.6296491622924805, "learning_rate": 4.60734693877551e-05, "loss": 3.6967, "step": 1926 }, { "epoch": 7.865306122448979, "grad_norm": 7.127316951751709, "learning_rate": 4.607142857142857e-05, "loss": 3.0849, "step": 1927 }, { "epoch": 7.869387755102041, "grad_norm": 3.756284236907959, "learning_rate": 4.606938775510204e-05, "loss": 3.7728, "step": 1928 }, { "epoch": 7.873469387755102, "grad_norm": 4.202821731567383, "learning_rate": 4.606734693877551e-05, "loss": 3.8209, "step": 1929 }, { "epoch": 7.877551020408164, "grad_norm": 2.5883607864379883, "learning_rate": 4.6065306122448984e-05, "loss": 4.0121, "step": 1930 }, { "epoch": 7.881632653061224, "grad_norm": 3.0707056522369385, "learning_rate": 4.606326530612245e-05, "loss": 3.9066, "step": 1931 }, { "epoch": 7.885714285714286, "grad_norm": 5.45291805267334, "learning_rate": 4.6061224489795925e-05, "loss": 3.3512, "step": 1932 }, { "epoch": 7.889795918367347, "grad_norm": 2.611347198486328, "learning_rate": 4.605918367346939e-05, "loss": 4.1997, "step": 1933 }, { "epoch": 7.893877551020408, "grad_norm": 5.712066173553467, "learning_rate": 4.605714285714286e-05, "loss": 3.3676, "step": 1934 }, { "epoch": 7.8979591836734695, "grad_norm": 5.250341415405273, "learning_rate": 4.605510204081633e-05, "loss": 3.3081, "step": 1935 }, { "epoch": 7.90204081632653, "grad_norm": 3.0143771171569824, "learning_rate": 4.6053061224489794e-05, "loss": 4.0823, "step": 1936 }, { "epoch": 7.906122448979592, "grad_norm": 3.318963050842285, "learning_rate": 4.605102040816327e-05, "loss": 3.9289, "step": 1937 }, { "epoch": 7.910204081632653, "grad_norm": 3.0127716064453125, "learning_rate": 4.604897959183674e-05, "loss": 3.7535, "step": 1938 }, { "epoch": 7.914285714285715, "grad_norm": 2.7074644565582275, "learning_rate": 4.604693877551021e-05, "loss": 3.8831, "step": 1939 }, { "epoch": 7.918367346938775, "grad_norm": 3.047713041305542, "learning_rate": 4.6044897959183676e-05, "loss": 3.9974, "step": 1940 }, { "epoch": 7.922448979591836, "grad_norm": 4.0427422523498535, "learning_rate": 4.604285714285714e-05, "loss": 3.2743, "step": 1941 }, { "epoch": 7.926530612244898, "grad_norm": 2.3506288528442383, "learning_rate": 4.604081632653062e-05, "loss": 3.9466, "step": 1942 }, { "epoch": 7.930612244897959, "grad_norm": 2.829826593399048, "learning_rate": 4.6038775510204084e-05, "loss": 3.9453, "step": 1943 }, { "epoch": 7.9346938775510205, "grad_norm": 4.164653778076172, "learning_rate": 4.603673469387755e-05, "loss": 3.8082, "step": 1944 }, { "epoch": 7.938775510204081, "grad_norm": 6.184420585632324, "learning_rate": 4.6034693877551025e-05, "loss": 2.9247, "step": 1945 }, { "epoch": 7.942857142857143, "grad_norm": 3.3635406494140625, "learning_rate": 4.603265306122449e-05, "loss": 3.8383, "step": 1946 }, { "epoch": 7.946938775510204, "grad_norm": 5.483679294586182, "learning_rate": 4.6030612244897966e-05, "loss": 3.6765, "step": 1947 }, { "epoch": 7.951020408163266, "grad_norm": 2.5158910751342773, "learning_rate": 4.602857142857143e-05, "loss": 4.3698, "step": 1948 }, { "epoch": 7.955102040816326, "grad_norm": 3.0381197929382324, "learning_rate": 4.60265306122449e-05, "loss": 3.7418, "step": 1949 }, { "epoch": 7.959183673469388, "grad_norm": 3.952542781829834, "learning_rate": 4.602448979591837e-05, "loss": 3.9354, "step": 1950 }, { "epoch": 7.963265306122449, "grad_norm": 4.239914894104004, "learning_rate": 4.6022448979591835e-05, "loss": 3.6537, "step": 1951 }, { "epoch": 7.96734693877551, "grad_norm": 5.390611171722412, "learning_rate": 4.602040816326531e-05, "loss": 3.4846, "step": 1952 }, { "epoch": 7.9714285714285715, "grad_norm": 5.637478828430176, "learning_rate": 4.6018367346938776e-05, "loss": 3.4576, "step": 1953 }, { "epoch": 7.975510204081632, "grad_norm": 4.28271484375, "learning_rate": 4.601632653061225e-05, "loss": 3.7578, "step": 1954 }, { "epoch": 7.979591836734694, "grad_norm": 5.705329418182373, "learning_rate": 4.6014285714285717e-05, "loss": 3.254, "step": 1955 }, { "epoch": 7.983673469387755, "grad_norm": 3.5070252418518066, "learning_rate": 4.6012244897959184e-05, "loss": 3.9009, "step": 1956 }, { "epoch": 7.987755102040817, "grad_norm": 7.259873390197754, "learning_rate": 4.601020408163266e-05, "loss": 2.8438, "step": 1957 }, { "epoch": 7.9918367346938775, "grad_norm": 4.106003284454346, "learning_rate": 4.6008163265306125e-05, "loss": 3.5352, "step": 1958 }, { "epoch": 7.995918367346938, "grad_norm": 2.8490099906921387, "learning_rate": 4.600612244897959e-05, "loss": 3.8511, "step": 1959 }, { "epoch": 8.0, "grad_norm": 3.042637825012207, "learning_rate": 4.600408163265306e-05, "loss": 3.8111, "step": 1960 }, { "epoch": 8.004081632653062, "grad_norm": 4.88106632232666, "learning_rate": 4.600204081632653e-05, "loss": 3.5442, "step": 1961 }, { "epoch": 8.008163265306122, "grad_norm": 2.954679012298584, "learning_rate": 4.600000000000001e-05, "loss": 3.7103, "step": 1962 }, { "epoch": 8.012244897959183, "grad_norm": 5.273850440979004, "learning_rate": 4.5997959183673474e-05, "loss": 3.4235, "step": 1963 }, { "epoch": 8.016326530612245, "grad_norm": 2.8382108211517334, "learning_rate": 4.599591836734694e-05, "loss": 3.7045, "step": 1964 }, { "epoch": 8.020408163265307, "grad_norm": 4.126638412475586, "learning_rate": 4.599387755102041e-05, "loss": 3.9437, "step": 1965 }, { "epoch": 8.024489795918367, "grad_norm": 2.498065948486328, "learning_rate": 4.5991836734693875e-05, "loss": 3.9177, "step": 1966 }, { "epoch": 8.028571428571428, "grad_norm": 2.6728880405426025, "learning_rate": 4.598979591836735e-05, "loss": 3.6686, "step": 1967 }, { "epoch": 8.03265306122449, "grad_norm": 2.711717128753662, "learning_rate": 4.5987755102040816e-05, "loss": 3.9229, "step": 1968 }, { "epoch": 8.036734693877552, "grad_norm": 5.400395393371582, "learning_rate": 4.598571428571429e-05, "loss": 3.6722, "step": 1969 }, { "epoch": 8.040816326530612, "grad_norm": 3.5668883323669434, "learning_rate": 4.598367346938776e-05, "loss": 3.7065, "step": 1970 }, { "epoch": 8.044897959183674, "grad_norm": 4.397403717041016, "learning_rate": 4.5981632653061224e-05, "loss": 3.7169, "step": 1971 }, { "epoch": 8.048979591836735, "grad_norm": 3.783431053161621, "learning_rate": 4.59795918367347e-05, "loss": 3.8757, "step": 1972 }, { "epoch": 8.053061224489795, "grad_norm": 3.831486701965332, "learning_rate": 4.5977551020408166e-05, "loss": 3.6525, "step": 1973 }, { "epoch": 8.057142857142857, "grad_norm": 4.088062286376953, "learning_rate": 4.597551020408163e-05, "loss": 3.6539, "step": 1974 }, { "epoch": 8.061224489795919, "grad_norm": 4.050139904022217, "learning_rate": 4.59734693877551e-05, "loss": 3.6866, "step": 1975 }, { "epoch": 8.06530612244898, "grad_norm": 5.297530651092529, "learning_rate": 4.5971428571428574e-05, "loss": 3.2162, "step": 1976 }, { "epoch": 8.06938775510204, "grad_norm": 3.286078929901123, "learning_rate": 4.596938775510205e-05, "loss": 3.78, "step": 1977 }, { "epoch": 8.073469387755102, "grad_norm": 3.1575968265533447, "learning_rate": 4.5967346938775515e-05, "loss": 3.8737, "step": 1978 }, { "epoch": 8.077551020408164, "grad_norm": 4.888733863830566, "learning_rate": 4.596530612244898e-05, "loss": 3.4085, "step": 1979 }, { "epoch": 8.081632653061224, "grad_norm": 4.321664333343506, "learning_rate": 4.596326530612245e-05, "loss": 3.1019, "step": 1980 }, { "epoch": 8.085714285714285, "grad_norm": 3.3008229732513428, "learning_rate": 4.596122448979592e-05, "loss": 3.8256, "step": 1981 }, { "epoch": 8.089795918367347, "grad_norm": 2.235851764678955, "learning_rate": 4.595918367346939e-05, "loss": 3.9168, "step": 1982 }, { "epoch": 8.093877551020409, "grad_norm": 6.277608871459961, "learning_rate": 4.595714285714286e-05, "loss": 2.9211, "step": 1983 }, { "epoch": 8.097959183673469, "grad_norm": 4.391018867492676, "learning_rate": 4.595510204081633e-05, "loss": 3.3654, "step": 1984 }, { "epoch": 8.10204081632653, "grad_norm": 5.177350044250488, "learning_rate": 4.59530612244898e-05, "loss": 3.2711, "step": 1985 }, { "epoch": 8.106122448979592, "grad_norm": 4.2404561042785645, "learning_rate": 4.595102040816327e-05, "loss": 3.4259, "step": 1986 }, { "epoch": 8.110204081632654, "grad_norm": 4.232795238494873, "learning_rate": 4.594897959183674e-05, "loss": 3.5706, "step": 1987 }, { "epoch": 8.114285714285714, "grad_norm": 3.0810368061065674, "learning_rate": 4.5946938775510206e-05, "loss": 3.7039, "step": 1988 }, { "epoch": 8.118367346938776, "grad_norm": 4.107366561889648, "learning_rate": 4.5944897959183673e-05, "loss": 3.7473, "step": 1989 }, { "epoch": 8.122448979591837, "grad_norm": 2.7241101264953613, "learning_rate": 4.594285714285714e-05, "loss": 4.1608, "step": 1990 }, { "epoch": 8.126530612244897, "grad_norm": 3.029231309890747, "learning_rate": 4.5940816326530614e-05, "loss": 3.8887, "step": 1991 }, { "epoch": 8.130612244897959, "grad_norm": 8.07568359375, "learning_rate": 4.593877551020409e-05, "loss": 2.9754, "step": 1992 }, { "epoch": 8.13469387755102, "grad_norm": 2.736288070678711, "learning_rate": 4.5936734693877555e-05, "loss": 3.9684, "step": 1993 }, { "epoch": 8.138775510204082, "grad_norm": 4.798498630523682, "learning_rate": 4.593469387755102e-05, "loss": 3.6356, "step": 1994 }, { "epoch": 8.142857142857142, "grad_norm": 2.2648041248321533, "learning_rate": 4.593265306122449e-05, "loss": 4.2092, "step": 1995 }, { "epoch": 8.146938775510204, "grad_norm": 5.077346324920654, "learning_rate": 4.5930612244897964e-05, "loss": 3.8499, "step": 1996 }, { "epoch": 8.151020408163266, "grad_norm": 5.536068439483643, "learning_rate": 4.592857142857143e-05, "loss": 3.5768, "step": 1997 }, { "epoch": 8.155102040816326, "grad_norm": 4.302311420440674, "learning_rate": 4.59265306122449e-05, "loss": 3.4669, "step": 1998 }, { "epoch": 8.159183673469387, "grad_norm": 3.923048257827759, "learning_rate": 4.592448979591837e-05, "loss": 4.0503, "step": 1999 }, { "epoch": 8.16326530612245, "grad_norm": 3.454805374145508, "learning_rate": 4.592244897959184e-05, "loss": 3.9618, "step": 2000 }, { "epoch": 8.16734693877551, "grad_norm": 5.137290954589844, "learning_rate": 4.592040816326531e-05, "loss": 3.5577, "step": 2001 }, { "epoch": 8.17142857142857, "grad_norm": 5.450822830200195, "learning_rate": 4.591836734693878e-05, "loss": 3.4254, "step": 2002 }, { "epoch": 8.175510204081633, "grad_norm": 5.836850166320801, "learning_rate": 4.591632653061225e-05, "loss": 3.3905, "step": 2003 }, { "epoch": 8.179591836734694, "grad_norm": 4.794488430023193, "learning_rate": 4.5914285714285714e-05, "loss": 3.7415, "step": 2004 }, { "epoch": 8.183673469387756, "grad_norm": 4.10203218460083, "learning_rate": 4.591224489795918e-05, "loss": 3.357, "step": 2005 }, { "epoch": 8.187755102040816, "grad_norm": 3.910001516342163, "learning_rate": 4.5910204081632655e-05, "loss": 3.7254, "step": 2006 }, { "epoch": 8.191836734693878, "grad_norm": 4.100981712341309, "learning_rate": 4.590816326530612e-05, "loss": 3.7061, "step": 2007 }, { "epoch": 8.19591836734694, "grad_norm": 4.032186508178711, "learning_rate": 4.5906122448979596e-05, "loss": 3.6909, "step": 2008 }, { "epoch": 8.2, "grad_norm": 3.884716033935547, "learning_rate": 4.590408163265306e-05, "loss": 3.6312, "step": 2009 }, { "epoch": 8.204081632653061, "grad_norm": 4.809353351593018, "learning_rate": 4.590204081632653e-05, "loss": 3.5398, "step": 2010 }, { "epoch": 8.208163265306123, "grad_norm": 3.3418097496032715, "learning_rate": 4.5900000000000004e-05, "loss": 3.8621, "step": 2011 }, { "epoch": 8.212244897959184, "grad_norm": 5.985930442810059, "learning_rate": 4.589795918367347e-05, "loss": 3.4014, "step": 2012 }, { "epoch": 8.216326530612244, "grad_norm": 4.407713890075684, "learning_rate": 4.589591836734694e-05, "loss": 3.47, "step": 2013 }, { "epoch": 8.220408163265306, "grad_norm": 2.8312883377075195, "learning_rate": 4.5893877551020406e-05, "loss": 3.834, "step": 2014 }, { "epoch": 8.224489795918368, "grad_norm": 3.434075355529785, "learning_rate": 4.589183673469388e-05, "loss": 4.0851, "step": 2015 }, { "epoch": 8.228571428571428, "grad_norm": 4.336065769195557, "learning_rate": 4.5889795918367354e-05, "loss": 3.9024, "step": 2016 }, { "epoch": 8.23265306122449, "grad_norm": 4.73456335067749, "learning_rate": 4.588775510204082e-05, "loss": 3.8637, "step": 2017 }, { "epoch": 8.236734693877551, "grad_norm": 3.3908185958862305, "learning_rate": 4.588571428571429e-05, "loss": 3.9729, "step": 2018 }, { "epoch": 8.240816326530613, "grad_norm": 3.5648086071014404, "learning_rate": 4.5883673469387755e-05, "loss": 3.9405, "step": 2019 }, { "epoch": 8.244897959183673, "grad_norm": 2.6023690700531006, "learning_rate": 4.588163265306122e-05, "loss": 4.2148, "step": 2020 }, { "epoch": 8.248979591836735, "grad_norm": 5.216060161590576, "learning_rate": 4.5879591836734696e-05, "loss": 3.484, "step": 2021 }, { "epoch": 8.253061224489796, "grad_norm": 4.758233070373535, "learning_rate": 4.587755102040816e-05, "loss": 3.8898, "step": 2022 }, { "epoch": 8.257142857142856, "grad_norm": 4.308887481689453, "learning_rate": 4.587551020408164e-05, "loss": 4.0094, "step": 2023 }, { "epoch": 8.261224489795918, "grad_norm": 3.9662418365478516, "learning_rate": 4.5873469387755104e-05, "loss": 3.6985, "step": 2024 }, { "epoch": 8.26530612244898, "grad_norm": 5.6631035804748535, "learning_rate": 4.587142857142858e-05, "loss": 3.724, "step": 2025 }, { "epoch": 8.269387755102041, "grad_norm": 2.7122411727905273, "learning_rate": 4.5869387755102045e-05, "loss": 3.8081, "step": 2026 }, { "epoch": 8.273469387755101, "grad_norm": 3.576700210571289, "learning_rate": 4.586734693877551e-05, "loss": 3.85, "step": 2027 }, { "epoch": 8.277551020408163, "grad_norm": 3.630927085876465, "learning_rate": 4.586530612244898e-05, "loss": 3.7378, "step": 2028 }, { "epoch": 8.281632653061225, "grad_norm": 3.531193494796753, "learning_rate": 4.5863265306122447e-05, "loss": 3.7154, "step": 2029 }, { "epoch": 8.285714285714286, "grad_norm": 2.414198637008667, "learning_rate": 4.586122448979592e-05, "loss": 4.1613, "step": 2030 }, { "epoch": 8.289795918367346, "grad_norm": 4.285306453704834, "learning_rate": 4.5859183673469394e-05, "loss": 3.5343, "step": 2031 }, { "epoch": 8.293877551020408, "grad_norm": 5.228806972503662, "learning_rate": 4.585714285714286e-05, "loss": 3.3849, "step": 2032 }, { "epoch": 8.29795918367347, "grad_norm": 4.990856647491455, "learning_rate": 4.585510204081633e-05, "loss": 3.1691, "step": 2033 }, { "epoch": 8.30204081632653, "grad_norm": 3.3135833740234375, "learning_rate": 4.5853061224489796e-05, "loss": 3.7019, "step": 2034 }, { "epoch": 8.306122448979592, "grad_norm": 3.701636791229248, "learning_rate": 4.585102040816327e-05, "loss": 3.8783, "step": 2035 }, { "epoch": 8.310204081632653, "grad_norm": 3.020620107650757, "learning_rate": 4.584897959183674e-05, "loss": 3.8343, "step": 2036 }, { "epoch": 8.314285714285715, "grad_norm": 4.251446723937988, "learning_rate": 4.5846938775510204e-05, "loss": 3.5605, "step": 2037 }, { "epoch": 8.318367346938775, "grad_norm": 3.713653087615967, "learning_rate": 4.584489795918368e-05, "loss": 3.6671, "step": 2038 }, { "epoch": 8.322448979591837, "grad_norm": 2.8080990314483643, "learning_rate": 4.5842857142857145e-05, "loss": 4.0292, "step": 2039 }, { "epoch": 8.326530612244898, "grad_norm": 7.890597820281982, "learning_rate": 4.584081632653062e-05, "loss": 2.761, "step": 2040 }, { "epoch": 8.33061224489796, "grad_norm": 5.785500526428223, "learning_rate": 4.5838775510204086e-05, "loss": 3.5462, "step": 2041 }, { "epoch": 8.33469387755102, "grad_norm": 4.1573805809021, "learning_rate": 4.583673469387755e-05, "loss": 3.9929, "step": 2042 }, { "epoch": 8.338775510204082, "grad_norm": 4.262792587280273, "learning_rate": 4.583469387755102e-05, "loss": 3.6335, "step": 2043 }, { "epoch": 8.342857142857143, "grad_norm": 3.3995416164398193, "learning_rate": 4.583265306122449e-05, "loss": 3.8575, "step": 2044 }, { "epoch": 8.346938775510203, "grad_norm": 3.0961875915527344, "learning_rate": 4.583061224489796e-05, "loss": 4.1312, "step": 2045 }, { "epoch": 8.351020408163265, "grad_norm": 2.7571663856506348, "learning_rate": 4.5828571428571435e-05, "loss": 4.0151, "step": 2046 }, { "epoch": 8.355102040816327, "grad_norm": 4.7400078773498535, "learning_rate": 4.58265306122449e-05, "loss": 3.6131, "step": 2047 }, { "epoch": 8.359183673469389, "grad_norm": 4.246169090270996, "learning_rate": 4.582448979591837e-05, "loss": 3.6728, "step": 2048 }, { "epoch": 8.363265306122448, "grad_norm": 4.668584823608398, "learning_rate": 4.5822448979591836e-05, "loss": 3.5935, "step": 2049 }, { "epoch": 8.36734693877551, "grad_norm": 5.697848320007324, "learning_rate": 4.582040816326531e-05, "loss": 3.3133, "step": 2050 }, { "epoch": 8.371428571428572, "grad_norm": 6.697824001312256, "learning_rate": 4.581836734693878e-05, "loss": 3.1098, "step": 2051 }, { "epoch": 8.375510204081632, "grad_norm": 3.6264333724975586, "learning_rate": 4.5816326530612245e-05, "loss": 3.52, "step": 2052 }, { "epoch": 8.379591836734694, "grad_norm": 4.252877712249756, "learning_rate": 4.581428571428572e-05, "loss": 3.767, "step": 2053 }, { "epoch": 8.383673469387755, "grad_norm": 5.109703540802002, "learning_rate": 4.5812244897959186e-05, "loss": 3.244, "step": 2054 }, { "epoch": 8.387755102040817, "grad_norm": 4.720422267913818, "learning_rate": 4.581020408163266e-05, "loss": 3.5857, "step": 2055 }, { "epoch": 8.391836734693877, "grad_norm": 3.7612462043762207, "learning_rate": 4.580816326530613e-05, "loss": 3.9129, "step": 2056 }, { "epoch": 8.395918367346939, "grad_norm": 8.095978736877441, "learning_rate": 4.5806122448979594e-05, "loss": 2.6502, "step": 2057 }, { "epoch": 8.4, "grad_norm": 6.625248908996582, "learning_rate": 4.580408163265306e-05, "loss": 2.8847, "step": 2058 }, { "epoch": 8.404081632653062, "grad_norm": 2.9485023021698, "learning_rate": 4.580204081632653e-05, "loss": 4.1017, "step": 2059 }, { "epoch": 8.408163265306122, "grad_norm": 5.838598728179932, "learning_rate": 4.58e-05, "loss": 3.2762, "step": 2060 }, { "epoch": 8.412244897959184, "grad_norm": 7.09675931930542, "learning_rate": 4.579795918367347e-05, "loss": 2.9379, "step": 2061 }, { "epoch": 8.416326530612245, "grad_norm": 6.244068145751953, "learning_rate": 4.579591836734694e-05, "loss": 3.172, "step": 2062 }, { "epoch": 8.420408163265305, "grad_norm": 3.6897499561309814, "learning_rate": 4.579387755102041e-05, "loss": 3.6749, "step": 2063 }, { "epoch": 8.424489795918367, "grad_norm": 2.7111752033233643, "learning_rate": 4.579183673469388e-05, "loss": 4.0895, "step": 2064 }, { "epoch": 8.428571428571429, "grad_norm": 2.9361612796783447, "learning_rate": 4.578979591836735e-05, "loss": 3.6821, "step": 2065 }, { "epoch": 8.43265306122449, "grad_norm": 3.993398666381836, "learning_rate": 4.578775510204082e-05, "loss": 3.7249, "step": 2066 }, { "epoch": 8.43673469387755, "grad_norm": 6.058777809143066, "learning_rate": 4.5785714285714285e-05, "loss": 3.525, "step": 2067 }, { "epoch": 8.440816326530612, "grad_norm": 3.8227157592773438, "learning_rate": 4.578367346938775e-05, "loss": 3.7657, "step": 2068 }, { "epoch": 8.444897959183674, "grad_norm": 3.0897603034973145, "learning_rate": 4.5781632653061226e-05, "loss": 3.7833, "step": 2069 }, { "epoch": 8.448979591836734, "grad_norm": 3.9774866104125977, "learning_rate": 4.57795918367347e-05, "loss": 3.7823, "step": 2070 }, { "epoch": 8.453061224489796, "grad_norm": 3.0376598834991455, "learning_rate": 4.577755102040817e-05, "loss": 4.1778, "step": 2071 }, { "epoch": 8.457142857142857, "grad_norm": 3.9598212242126465, "learning_rate": 4.5775510204081635e-05, "loss": 3.8832, "step": 2072 }, { "epoch": 8.461224489795919, "grad_norm": 2.539721727371216, "learning_rate": 4.57734693877551e-05, "loss": 3.9046, "step": 2073 }, { "epoch": 8.465306122448979, "grad_norm": 2.8462882041931152, "learning_rate": 4.5771428571428576e-05, "loss": 4.0234, "step": 2074 }, { "epoch": 8.46938775510204, "grad_norm": 5.847943305969238, "learning_rate": 4.576938775510204e-05, "loss": 3.3591, "step": 2075 }, { "epoch": 8.473469387755102, "grad_norm": 3.0352115631103516, "learning_rate": 4.576734693877551e-05, "loss": 4.191, "step": 2076 }, { "epoch": 8.477551020408164, "grad_norm": 3.7597908973693848, "learning_rate": 4.5765306122448984e-05, "loss": 3.573, "step": 2077 }, { "epoch": 8.481632653061224, "grad_norm": 4.579709529876709, "learning_rate": 4.576326530612245e-05, "loss": 2.845, "step": 2078 }, { "epoch": 8.485714285714286, "grad_norm": 4.950470447540283, "learning_rate": 4.5761224489795925e-05, "loss": 3.6137, "step": 2079 }, { "epoch": 8.489795918367347, "grad_norm": 3.0441324710845947, "learning_rate": 4.575918367346939e-05, "loss": 3.9556, "step": 2080 }, { "epoch": 8.493877551020407, "grad_norm": 3.7552993297576904, "learning_rate": 4.575714285714286e-05, "loss": 3.66, "step": 2081 }, { "epoch": 8.49795918367347, "grad_norm": 3.0498406887054443, "learning_rate": 4.5755102040816326e-05, "loss": 3.7427, "step": 2082 }, { "epoch": 8.50204081632653, "grad_norm": 4.40868616104126, "learning_rate": 4.575306122448979e-05, "loss": 3.4536, "step": 2083 }, { "epoch": 8.506122448979593, "grad_norm": 6.15797233581543, "learning_rate": 4.575102040816327e-05, "loss": 3.1509, "step": 2084 }, { "epoch": 8.510204081632653, "grad_norm": 3.136312484741211, "learning_rate": 4.574897959183674e-05, "loss": 4.0369, "step": 2085 }, { "epoch": 8.514285714285714, "grad_norm": 4.474608898162842, "learning_rate": 4.574693877551021e-05, "loss": 3.6103, "step": 2086 }, { "epoch": 8.518367346938776, "grad_norm": 3.272007703781128, "learning_rate": 4.5744897959183675e-05, "loss": 4.0062, "step": 2087 }, { "epoch": 8.522448979591836, "grad_norm": 3.717165470123291, "learning_rate": 4.574285714285714e-05, "loss": 3.859, "step": 2088 }, { "epoch": 8.526530612244898, "grad_norm": 6.310216903686523, "learning_rate": 4.5740816326530616e-05, "loss": 3.2921, "step": 2089 }, { "epoch": 8.53061224489796, "grad_norm": 3.6101772785186768, "learning_rate": 4.5738775510204083e-05, "loss": 3.6539, "step": 2090 }, { "epoch": 8.534693877551021, "grad_norm": 3.896721601486206, "learning_rate": 4.573673469387755e-05, "loss": 3.7748, "step": 2091 }, { "epoch": 8.538775510204081, "grad_norm": 5.375670433044434, "learning_rate": 4.5734693877551025e-05, "loss": 3.3882, "step": 2092 }, { "epoch": 8.542857142857143, "grad_norm": 3.8442254066467285, "learning_rate": 4.573265306122449e-05, "loss": 3.9367, "step": 2093 }, { "epoch": 8.546938775510204, "grad_norm": 3.5197484493255615, "learning_rate": 4.5730612244897966e-05, "loss": 3.8716, "step": 2094 }, { "epoch": 8.551020408163264, "grad_norm": 3.954022169113159, "learning_rate": 4.572857142857143e-05, "loss": 3.5325, "step": 2095 }, { "epoch": 8.555102040816326, "grad_norm": 2.809762716293335, "learning_rate": 4.57265306122449e-05, "loss": 4.3547, "step": 2096 }, { "epoch": 8.559183673469388, "grad_norm": 3.0707523822784424, "learning_rate": 4.572448979591837e-05, "loss": 3.7491, "step": 2097 }, { "epoch": 8.56326530612245, "grad_norm": 3.3519887924194336, "learning_rate": 4.5722448979591834e-05, "loss": 3.8808, "step": 2098 }, { "epoch": 8.56734693877551, "grad_norm": 5.124628067016602, "learning_rate": 4.572040816326531e-05, "loss": 3.45, "step": 2099 }, { "epoch": 8.571428571428571, "grad_norm": 6.148406028747559, "learning_rate": 4.571836734693878e-05, "loss": 2.7894, "step": 2100 }, { "epoch": 8.575510204081633, "grad_norm": 3.608147144317627, "learning_rate": 4.571632653061225e-05, "loss": 3.7719, "step": 2101 }, { "epoch": 8.579591836734695, "grad_norm": 2.6479604244232178, "learning_rate": 4.5714285714285716e-05, "loss": 3.9958, "step": 2102 }, { "epoch": 8.583673469387755, "grad_norm": 4.648083686828613, "learning_rate": 4.571224489795918e-05, "loss": 3.2228, "step": 2103 }, { "epoch": 8.587755102040816, "grad_norm": 4.832110404968262, "learning_rate": 4.571020408163266e-05, "loss": 3.5328, "step": 2104 }, { "epoch": 8.591836734693878, "grad_norm": 5.780827045440674, "learning_rate": 4.5708163265306124e-05, "loss": 3.3026, "step": 2105 }, { "epoch": 8.59591836734694, "grad_norm": 2.910975694656372, "learning_rate": 4.570612244897959e-05, "loss": 3.8843, "step": 2106 }, { "epoch": 8.6, "grad_norm": 4.331634998321533, "learning_rate": 4.570408163265306e-05, "loss": 3.5116, "step": 2107 }, { "epoch": 8.604081632653061, "grad_norm": 4.840641498565674, "learning_rate": 4.570204081632653e-05, "loss": 3.8508, "step": 2108 }, { "epoch": 8.608163265306123, "grad_norm": 7.579926013946533, "learning_rate": 4.5700000000000006e-05, "loss": 3.0386, "step": 2109 }, { "epoch": 8.612244897959183, "grad_norm": 5.2953877449035645, "learning_rate": 4.5697959183673473e-05, "loss": 3.1852, "step": 2110 }, { "epoch": 8.616326530612245, "grad_norm": 4.127625465393066, "learning_rate": 4.569591836734694e-05, "loss": 3.7064, "step": 2111 }, { "epoch": 8.620408163265306, "grad_norm": 3.2029519081115723, "learning_rate": 4.569387755102041e-05, "loss": 3.8216, "step": 2112 }, { "epoch": 8.624489795918368, "grad_norm": 4.194674968719482, "learning_rate": 4.569183673469388e-05, "loss": 3.5974, "step": 2113 }, { "epoch": 8.628571428571428, "grad_norm": 4.150041580200195, "learning_rate": 4.568979591836735e-05, "loss": 3.7725, "step": 2114 }, { "epoch": 8.63265306122449, "grad_norm": NaN, "learning_rate": 4.5687755102040816e-05, "loss": 1.5681, "step": 2115 }, { "epoch": 8.636734693877552, "grad_norm": 3.0608458518981934, "learning_rate": 4.5687755102040816e-05, "loss": 3.8643, "step": 2116 }, { "epoch": 8.640816326530611, "grad_norm": 8.52764892578125, "learning_rate": 4.568571428571429e-05, "loss": 2.8691, "step": 2117 }, { "epoch": 8.644897959183673, "grad_norm": 4.65006160736084, "learning_rate": 4.568367346938776e-05, "loss": 3.4923, "step": 2118 }, { "epoch": 8.648979591836735, "grad_norm": 3.5792245864868164, "learning_rate": 4.568163265306123e-05, "loss": 3.7528, "step": 2119 }, { "epoch": 8.653061224489797, "grad_norm": 4.7064008712768555, "learning_rate": 4.56795918367347e-05, "loss": 3.59, "step": 2120 }, { "epoch": 8.657142857142857, "grad_norm": 5.692558765411377, "learning_rate": 4.5677551020408165e-05, "loss": 3.3723, "step": 2121 }, { "epoch": 8.661224489795918, "grad_norm": 2.680117130279541, "learning_rate": 4.567551020408163e-05, "loss": 4.0014, "step": 2122 }, { "epoch": 8.66530612244898, "grad_norm": 5.633948802947998, "learning_rate": 4.56734693877551e-05, "loss": 3.2246, "step": 2123 }, { "epoch": 8.66938775510204, "grad_norm": 5.539498329162598, "learning_rate": 4.567142857142857e-05, "loss": 3.7998, "step": 2124 }, { "epoch": 8.673469387755102, "grad_norm": 4.699003219604492, "learning_rate": 4.566938775510205e-05, "loss": 3.4357, "step": 2125 }, { "epoch": 8.677551020408163, "grad_norm": 4.349917411804199, "learning_rate": 4.5667346938775514e-05, "loss": 3.7729, "step": 2126 }, { "epoch": 8.681632653061225, "grad_norm": 5.778204917907715, "learning_rate": 4.566530612244898e-05, "loss": 3.4694, "step": 2127 }, { "epoch": 8.685714285714285, "grad_norm": 4.019281387329102, "learning_rate": 4.566326530612245e-05, "loss": 3.8657, "step": 2128 }, { "epoch": 8.689795918367347, "grad_norm": 3.029322385787964, "learning_rate": 4.566122448979592e-05, "loss": 3.7429, "step": 2129 }, { "epoch": 8.693877551020408, "grad_norm": 4.056897163391113, "learning_rate": 4.565918367346939e-05, "loss": 3.5965, "step": 2130 }, { "epoch": 8.69795918367347, "grad_norm": 4.556769847869873, "learning_rate": 4.5657142857142857e-05, "loss": 3.6944, "step": 2131 }, { "epoch": 8.70204081632653, "grad_norm": 4.44797420501709, "learning_rate": 4.565510204081633e-05, "loss": 3.6689, "step": 2132 }, { "epoch": 8.706122448979592, "grad_norm": 3.357208251953125, "learning_rate": 4.56530612244898e-05, "loss": 3.8411, "step": 2133 }, { "epoch": 8.710204081632654, "grad_norm": 3.478302478790283, "learning_rate": 4.565102040816327e-05, "loss": 3.8713, "step": 2134 }, { "epoch": 8.714285714285714, "grad_norm": 3.2510151863098145, "learning_rate": 4.564897959183674e-05, "loss": 4.0457, "step": 2135 }, { "epoch": 8.718367346938775, "grad_norm": 2.904005527496338, "learning_rate": 4.5646938775510206e-05, "loss": 3.8682, "step": 2136 }, { "epoch": 8.722448979591837, "grad_norm": 4.496063709259033, "learning_rate": 4.564489795918367e-05, "loss": 3.7791, "step": 2137 }, { "epoch": 8.726530612244899, "grad_norm": 4.183704376220703, "learning_rate": 4.564285714285714e-05, "loss": 3.7767, "step": 2138 }, { "epoch": 8.730612244897959, "grad_norm": 4.333836078643799, "learning_rate": 4.5640816326530614e-05, "loss": 3.5402, "step": 2139 }, { "epoch": 8.73469387755102, "grad_norm": 3.141507625579834, "learning_rate": 4.563877551020409e-05, "loss": 3.6968, "step": 2140 }, { "epoch": 8.738775510204082, "grad_norm": 3.539743661880493, "learning_rate": 4.5636734693877555e-05, "loss": 3.7919, "step": 2141 }, { "epoch": 8.742857142857144, "grad_norm": 3.180464267730713, "learning_rate": 4.563469387755102e-05, "loss": 3.8164, "step": 2142 }, { "epoch": 8.746938775510204, "grad_norm": 4.428040027618408, "learning_rate": 4.563265306122449e-05, "loss": 3.6584, "step": 2143 }, { "epoch": 8.751020408163265, "grad_norm": 3.5918774604797363, "learning_rate": 4.563061224489796e-05, "loss": 3.9841, "step": 2144 }, { "epoch": 8.755102040816327, "grad_norm": 3.7016875743865967, "learning_rate": 4.562857142857143e-05, "loss": 3.9017, "step": 2145 }, { "epoch": 8.759183673469387, "grad_norm": 4.254742622375488, "learning_rate": 4.56265306122449e-05, "loss": 3.9721, "step": 2146 }, { "epoch": 8.763265306122449, "grad_norm": 4.038427352905273, "learning_rate": 4.562448979591837e-05, "loss": 3.8707, "step": 2147 }, { "epoch": 8.76734693877551, "grad_norm": 6.010951995849609, "learning_rate": 4.562244897959184e-05, "loss": 3.3305, "step": 2148 }, { "epoch": 8.771428571428572, "grad_norm": 7.208872318267822, "learning_rate": 4.562040816326531e-05, "loss": 2.853, "step": 2149 }, { "epoch": 8.775510204081632, "grad_norm": 3.5854947566986084, "learning_rate": 4.561836734693878e-05, "loss": 4.1131, "step": 2150 }, { "epoch": 8.779591836734694, "grad_norm": 3.3641064167022705, "learning_rate": 4.5616326530612247e-05, "loss": 3.9964, "step": 2151 }, { "epoch": 8.783673469387756, "grad_norm": 2.6809163093566895, "learning_rate": 4.5614285714285714e-05, "loss": 3.8629, "step": 2152 }, { "epoch": 8.787755102040816, "grad_norm": 3.730428457260132, "learning_rate": 4.561224489795918e-05, "loss": 3.5778, "step": 2153 }, { "epoch": 8.791836734693877, "grad_norm": 3.2047340869903564, "learning_rate": 4.5610204081632655e-05, "loss": 3.9225, "step": 2154 }, { "epoch": 8.795918367346939, "grad_norm": 3.9319052696228027, "learning_rate": 4.560816326530613e-05, "loss": 3.9197, "step": 2155 }, { "epoch": 8.8, "grad_norm": 3.008528709411621, "learning_rate": 4.5606122448979596e-05, "loss": 3.8097, "step": 2156 }, { "epoch": 8.80408163265306, "grad_norm": 4.982025146484375, "learning_rate": 4.560408163265306e-05, "loss": 3.7196, "step": 2157 }, { "epoch": 8.808163265306122, "grad_norm": 4.402260780334473, "learning_rate": 4.560204081632653e-05, "loss": 3.1439, "step": 2158 }, { "epoch": 8.812244897959184, "grad_norm": 5.336364269256592, "learning_rate": 4.5600000000000004e-05, "loss": 3.7888, "step": 2159 }, { "epoch": 8.816326530612244, "grad_norm": 3.252803087234497, "learning_rate": 4.559795918367347e-05, "loss": 3.7794, "step": 2160 }, { "epoch": 8.820408163265306, "grad_norm": 3.495887517929077, "learning_rate": 4.559591836734694e-05, "loss": 3.8522, "step": 2161 }, { "epoch": 8.824489795918367, "grad_norm": 4.947022438049316, "learning_rate": 4.5593877551020405e-05, "loss": 3.6354, "step": 2162 }, { "epoch": 8.82857142857143, "grad_norm": 3.5205180644989014, "learning_rate": 4.559183673469388e-05, "loss": 3.7743, "step": 2163 }, { "epoch": 8.83265306122449, "grad_norm": 2.925812244415283, "learning_rate": 4.558979591836735e-05, "loss": 4.0208, "step": 2164 }, { "epoch": 8.83673469387755, "grad_norm": 4.558804512023926, "learning_rate": 4.558775510204082e-05, "loss": 3.4739, "step": 2165 }, { "epoch": 8.840816326530613, "grad_norm": 2.7280123233795166, "learning_rate": 4.558571428571429e-05, "loss": 4.0114, "step": 2166 }, { "epoch": 8.844897959183674, "grad_norm": 5.261590480804443, "learning_rate": 4.5583673469387754e-05, "loss": 3.4877, "step": 2167 }, { "epoch": 8.848979591836734, "grad_norm": 4.544395923614502, "learning_rate": 4.558163265306123e-05, "loss": 3.7191, "step": 2168 }, { "epoch": 8.853061224489796, "grad_norm": 3.3667311668395996, "learning_rate": 4.5579591836734695e-05, "loss": 4.1245, "step": 2169 }, { "epoch": 8.857142857142858, "grad_norm": 7.6999616622924805, "learning_rate": 4.557755102040816e-05, "loss": 3.0647, "step": 2170 }, { "epoch": 8.861224489795918, "grad_norm": 4.395918369293213, "learning_rate": 4.5575510204081636e-05, "loss": 3.751, "step": 2171 }, { "epoch": 8.86530612244898, "grad_norm": 1.6282992362976074, "learning_rate": 4.5573469387755104e-05, "loss": 4.1699, "step": 2172 }, { "epoch": 8.869387755102041, "grad_norm": 3.227646827697754, "learning_rate": 4.557142857142858e-05, "loss": 4.0251, "step": 2173 }, { "epoch": 8.873469387755103, "grad_norm": 3.1621005535125732, "learning_rate": 4.5569387755102045e-05, "loss": 3.8719, "step": 2174 }, { "epoch": 8.877551020408163, "grad_norm": 2.7974352836608887, "learning_rate": 4.556734693877551e-05, "loss": 3.9779, "step": 2175 }, { "epoch": 8.881632653061224, "grad_norm": 3.432586908340454, "learning_rate": 4.556530612244898e-05, "loss": 3.9472, "step": 2176 }, { "epoch": 8.885714285714286, "grad_norm": 3.235727310180664, "learning_rate": 4.5563265306122446e-05, "loss": 3.9604, "step": 2177 }, { "epoch": 8.889795918367348, "grad_norm": 6.447215557098389, "learning_rate": 4.556122448979592e-05, "loss": 3.1008, "step": 2178 }, { "epoch": 8.893877551020408, "grad_norm": 3.7800378799438477, "learning_rate": 4.5559183673469394e-05, "loss": 3.9633, "step": 2179 }, { "epoch": 8.89795918367347, "grad_norm": 2.616582155227661, "learning_rate": 4.555714285714286e-05, "loss": 3.9936, "step": 2180 }, { "epoch": 8.902040816326531, "grad_norm": 5.606696605682373, "learning_rate": 4.555510204081633e-05, "loss": 3.3242, "step": 2181 }, { "epoch": 8.906122448979591, "grad_norm": 5.8379058837890625, "learning_rate": 4.5553061224489795e-05, "loss": 3.0684, "step": 2182 }, { "epoch": 8.910204081632653, "grad_norm": 3.941474199295044, "learning_rate": 4.555102040816327e-05, "loss": 3.6528, "step": 2183 }, { "epoch": 8.914285714285715, "grad_norm": 4.089192867279053, "learning_rate": 4.5548979591836736e-05, "loss": 3.8082, "step": 2184 }, { "epoch": 8.918367346938776, "grad_norm": 5.292041301727295, "learning_rate": 4.55469387755102e-05, "loss": 3.6018, "step": 2185 }, { "epoch": 8.922448979591836, "grad_norm": 4.494574546813965, "learning_rate": 4.554489795918368e-05, "loss": 3.583, "step": 2186 }, { "epoch": 8.926530612244898, "grad_norm": 5.627106189727783, "learning_rate": 4.5542857142857144e-05, "loss": 3.2062, "step": 2187 }, { "epoch": 8.93061224489796, "grad_norm": 4.146030426025391, "learning_rate": 4.554081632653062e-05, "loss": 3.886, "step": 2188 }, { "epoch": 8.93469387755102, "grad_norm": 4.680374622344971, "learning_rate": 4.5538775510204085e-05, "loss": 3.6475, "step": 2189 }, { "epoch": 8.938775510204081, "grad_norm": 2.636439800262451, "learning_rate": 4.553673469387755e-05, "loss": 4.121, "step": 2190 }, { "epoch": 8.942857142857143, "grad_norm": 3.3952548503875732, "learning_rate": 4.553469387755102e-05, "loss": 3.8412, "step": 2191 }, { "epoch": 8.946938775510205, "grad_norm": 3.2659642696380615, "learning_rate": 4.553265306122449e-05, "loss": 3.9956, "step": 2192 }, { "epoch": 8.951020408163265, "grad_norm": 6.667266368865967, "learning_rate": 4.553061224489796e-05, "loss": 3.4257, "step": 2193 }, { "epoch": 8.955102040816326, "grad_norm": 3.644054412841797, "learning_rate": 4.5528571428571435e-05, "loss": 3.8559, "step": 2194 }, { "epoch": 8.959183673469388, "grad_norm": 3.4454920291900635, "learning_rate": 4.55265306122449e-05, "loss": 3.7565, "step": 2195 }, { "epoch": 8.963265306122448, "grad_norm": 3.2357499599456787, "learning_rate": 4.552448979591837e-05, "loss": 4.0177, "step": 2196 }, { "epoch": 8.96734693877551, "grad_norm": 3.113154649734497, "learning_rate": 4.5522448979591836e-05, "loss": 3.7908, "step": 2197 }, { "epoch": 8.971428571428572, "grad_norm": 6.851480484008789, "learning_rate": 4.552040816326531e-05, "loss": 3.3288, "step": 2198 }, { "epoch": 8.975510204081633, "grad_norm": 5.15071964263916, "learning_rate": 4.551836734693878e-05, "loss": 3.7612, "step": 2199 }, { "epoch": 8.979591836734693, "grad_norm": 2.874068021774292, "learning_rate": 4.5516326530612244e-05, "loss": 3.8426, "step": 2200 }, { "epoch": 8.983673469387755, "grad_norm": 6.13784122467041, "learning_rate": 4.551428571428572e-05, "loss": 2.904, "step": 2201 }, { "epoch": 8.987755102040817, "grad_norm": 3.4882354736328125, "learning_rate": 4.5512244897959185e-05, "loss": 3.9855, "step": 2202 }, { "epoch": 8.991836734693878, "grad_norm": 5.277752876281738, "learning_rate": 4.551020408163266e-05, "loss": 3.602, "step": 2203 }, { "epoch": 8.995918367346938, "grad_norm": 8.03091812133789, "learning_rate": 4.5508163265306126e-05, "loss": 2.7565, "step": 2204 }, { "epoch": 9.0, "grad_norm": 3.2225258350372314, "learning_rate": 4.550612244897959e-05, "loss": 3.7824, "step": 2205 }, { "epoch": 9.004081632653062, "grad_norm": 3.3578124046325684, "learning_rate": 4.550408163265306e-05, "loss": 3.5788, "step": 2206 }, { "epoch": 9.008163265306122, "grad_norm": 3.6685948371887207, "learning_rate": 4.5502040816326534e-05, "loss": 3.7294, "step": 2207 }, { "epoch": 9.012244897959183, "grad_norm": 2.614370584487915, "learning_rate": 4.55e-05, "loss": 3.8034, "step": 2208 }, { "epoch": 9.016326530612245, "grad_norm": 2.604295015335083, "learning_rate": 4.5497959183673475e-05, "loss": 3.9645, "step": 2209 }, { "epoch": 9.020408163265307, "grad_norm": 3.4254345893859863, "learning_rate": 4.549591836734694e-05, "loss": 3.6961, "step": 2210 }, { "epoch": 9.024489795918367, "grad_norm": 7.1481781005859375, "learning_rate": 4.549387755102041e-05, "loss": 2.8386, "step": 2211 }, { "epoch": 9.028571428571428, "grad_norm": 2.4893858432769775, "learning_rate": 4.5491836734693883e-05, "loss": 3.9779, "step": 2212 }, { "epoch": 9.03265306122449, "grad_norm": 3.1968235969543457, "learning_rate": 4.548979591836735e-05, "loss": 4.0549, "step": 2213 }, { "epoch": 9.036734693877552, "grad_norm": 4.23896598815918, "learning_rate": 4.548775510204082e-05, "loss": 3.5634, "step": 2214 }, { "epoch": 9.040816326530612, "grad_norm": 9.699271202087402, "learning_rate": 4.5485714285714285e-05, "loss": 3.2597, "step": 2215 }, { "epoch": 9.044897959183674, "grad_norm": 3.763866662979126, "learning_rate": 4.548367346938775e-05, "loss": 3.9174, "step": 2216 }, { "epoch": 9.048979591836735, "grad_norm": 3.26397967338562, "learning_rate": 4.5481632653061226e-05, "loss": 3.7732, "step": 2217 }, { "epoch": 9.053061224489795, "grad_norm": 3.6593239307403564, "learning_rate": 4.54795918367347e-05, "loss": 3.7984, "step": 2218 }, { "epoch": 9.057142857142857, "grad_norm": 3.7393832206726074, "learning_rate": 4.547755102040817e-05, "loss": 3.8331, "step": 2219 }, { "epoch": 9.061224489795919, "grad_norm": 4.274553298950195, "learning_rate": 4.5475510204081634e-05, "loss": 3.5402, "step": 2220 }, { "epoch": 9.06530612244898, "grad_norm": 3.232009172439575, "learning_rate": 4.54734693877551e-05, "loss": 3.901, "step": 2221 }, { "epoch": 9.06938775510204, "grad_norm": 3.202669143676758, "learning_rate": 4.5471428571428575e-05, "loss": 3.9524, "step": 2222 }, { "epoch": 9.073469387755102, "grad_norm": 4.008829593658447, "learning_rate": 4.546938775510204e-05, "loss": 3.6295, "step": 2223 }, { "epoch": 9.077551020408164, "grad_norm": 4.073777675628662, "learning_rate": 4.546734693877551e-05, "loss": 3.8853, "step": 2224 }, { "epoch": 9.081632653061224, "grad_norm": 5.788895606994629, "learning_rate": 4.546530612244898e-05, "loss": 3.7003, "step": 2225 }, { "epoch": 9.085714285714285, "grad_norm": 3.303926467895508, "learning_rate": 4.546326530612245e-05, "loss": 3.7026, "step": 2226 }, { "epoch": 9.089795918367347, "grad_norm": 3.2811646461486816, "learning_rate": 4.5461224489795924e-05, "loss": 3.6766, "step": 2227 }, { "epoch": 9.093877551020409, "grad_norm": 2.6797728538513184, "learning_rate": 4.545918367346939e-05, "loss": 4.1341, "step": 2228 }, { "epoch": 9.097959183673469, "grad_norm": 2.852165699005127, "learning_rate": 4.545714285714286e-05, "loss": 3.9883, "step": 2229 }, { "epoch": 9.10204081632653, "grad_norm": 3.5684428215026855, "learning_rate": 4.5455102040816326e-05, "loss": 3.8262, "step": 2230 }, { "epoch": 9.106122448979592, "grad_norm": 5.770827770233154, "learning_rate": 4.545306122448979e-05, "loss": 3.5022, "step": 2231 }, { "epoch": 9.110204081632654, "grad_norm": 4.675970554351807, "learning_rate": 4.545102040816327e-05, "loss": 3.5453, "step": 2232 }, { "epoch": 9.114285714285714, "grad_norm": 3.9797675609588623, "learning_rate": 4.544897959183674e-05, "loss": 3.8914, "step": 2233 }, { "epoch": 9.118367346938776, "grad_norm": 2.9387879371643066, "learning_rate": 4.544693877551021e-05, "loss": 4.0976, "step": 2234 }, { "epoch": 9.122448979591837, "grad_norm": 9.69559097290039, "learning_rate": 4.5444897959183675e-05, "loss": 1.6013, "step": 2235 }, { "epoch": 9.126530612244897, "grad_norm": 3.4122085571289062, "learning_rate": 4.544285714285714e-05, "loss": 3.6722, "step": 2236 }, { "epoch": 9.130612244897959, "grad_norm": 4.078353404998779, "learning_rate": 4.5440816326530616e-05, "loss": 3.8727, "step": 2237 }, { "epoch": 9.13469387755102, "grad_norm": 3.3800041675567627, "learning_rate": 4.543877551020408e-05, "loss": 3.717, "step": 2238 }, { "epoch": 9.138775510204082, "grad_norm": 5.159090042114258, "learning_rate": 4.543673469387755e-05, "loss": 3.3956, "step": 2239 }, { "epoch": 9.142857142857142, "grad_norm": 6.087352752685547, "learning_rate": 4.5434693877551024e-05, "loss": 3.2414, "step": 2240 }, { "epoch": 9.146938775510204, "grad_norm": 3.3181188106536865, "learning_rate": 4.543265306122449e-05, "loss": 3.8888, "step": 2241 }, { "epoch": 9.151020408163266, "grad_norm": 3.6297051906585693, "learning_rate": 4.5430612244897965e-05, "loss": 3.8408, "step": 2242 }, { "epoch": 9.155102040816326, "grad_norm": 6.310901641845703, "learning_rate": 4.542857142857143e-05, "loss": 3.1478, "step": 2243 }, { "epoch": 9.159183673469387, "grad_norm": 5.643520355224609, "learning_rate": 4.54265306122449e-05, "loss": 3.6349, "step": 2244 }, { "epoch": 9.16326530612245, "grad_norm": 3.4389145374298096, "learning_rate": 4.5424489795918366e-05, "loss": 3.9893, "step": 2245 }, { "epoch": 9.16734693877551, "grad_norm": 4.380707263946533, "learning_rate": 4.5422448979591834e-05, "loss": 3.6611, "step": 2246 }, { "epoch": 9.17142857142857, "grad_norm": 4.372661590576172, "learning_rate": 4.542040816326531e-05, "loss": 3.8711, "step": 2247 }, { "epoch": 9.175510204081633, "grad_norm": 3.0044310092926025, "learning_rate": 4.541836734693878e-05, "loss": 3.7844, "step": 2248 }, { "epoch": 9.179591836734694, "grad_norm": 6.099770545959473, "learning_rate": 4.541632653061225e-05, "loss": 2.776, "step": 2249 }, { "epoch": 9.183673469387756, "grad_norm": 2.9282217025756836, "learning_rate": 4.5414285714285716e-05, "loss": 4.1531, "step": 2250 }, { "epoch": 9.187755102040816, "grad_norm": 4.276403427124023, "learning_rate": 4.541224489795919e-05, "loss": 3.5386, "step": 2251 }, { "epoch": 9.191836734693878, "grad_norm": 4.147186756134033, "learning_rate": 4.5410204081632657e-05, "loss": 3.7135, "step": 2252 }, { "epoch": 9.19591836734694, "grad_norm": 4.111138343811035, "learning_rate": 4.5408163265306124e-05, "loss": 3.6567, "step": 2253 }, { "epoch": 9.2, "grad_norm": 4.9662861824035645, "learning_rate": 4.540612244897959e-05, "loss": 3.4582, "step": 2254 }, { "epoch": 9.204081632653061, "grad_norm": 5.966487884521484, "learning_rate": 4.5404081632653065e-05, "loss": 3.2851, "step": 2255 }, { "epoch": 9.208163265306123, "grad_norm": 4.766385555267334, "learning_rate": 4.540204081632654e-05, "loss": 3.4398, "step": 2256 }, { "epoch": 9.212244897959184, "grad_norm": 3.4530699253082275, "learning_rate": 4.5400000000000006e-05, "loss": 3.8377, "step": 2257 }, { "epoch": 9.216326530612244, "grad_norm": 4.3592376708984375, "learning_rate": 4.539795918367347e-05, "loss": 3.6478, "step": 2258 }, { "epoch": 9.220408163265306, "grad_norm": 6.9906535148620605, "learning_rate": 4.539591836734694e-05, "loss": 2.7872, "step": 2259 }, { "epoch": 9.224489795918368, "grad_norm": 7.638073921203613, "learning_rate": 4.539387755102041e-05, "loss": 3.0349, "step": 2260 }, { "epoch": 9.228571428571428, "grad_norm": 1.710308313369751, "learning_rate": 4.539183673469388e-05, "loss": 4.1285, "step": 2261 }, { "epoch": 9.23265306122449, "grad_norm": 3.419379234313965, "learning_rate": 4.538979591836735e-05, "loss": 3.6807, "step": 2262 }, { "epoch": 9.236734693877551, "grad_norm": 6.182472229003906, "learning_rate": 4.538775510204082e-05, "loss": 3.4431, "step": 2263 }, { "epoch": 9.240816326530613, "grad_norm": 5.238536357879639, "learning_rate": 4.538571428571429e-05, "loss": 3.6319, "step": 2264 }, { "epoch": 9.244897959183673, "grad_norm": 5.911540985107422, "learning_rate": 4.5383673469387756e-05, "loss": 3.237, "step": 2265 }, { "epoch": 9.248979591836735, "grad_norm": 5.035656452178955, "learning_rate": 4.538163265306123e-05, "loss": 3.5465, "step": 2266 }, { "epoch": 9.253061224489796, "grad_norm": 5.071677207946777, "learning_rate": 4.53795918367347e-05, "loss": 3.4667, "step": 2267 }, { "epoch": 9.257142857142856, "grad_norm": 3.67299747467041, "learning_rate": 4.5377551020408164e-05, "loss": 3.6955, "step": 2268 }, { "epoch": 9.261224489795918, "grad_norm": 6.3514227867126465, "learning_rate": 4.537551020408163e-05, "loss": 3.4102, "step": 2269 }, { "epoch": 9.26530612244898, "grad_norm": 5.411976337432861, "learning_rate": 4.53734693877551e-05, "loss": 3.5524, "step": 2270 }, { "epoch": 9.269387755102041, "grad_norm": 4.748917579650879, "learning_rate": 4.537142857142857e-05, "loss": 3.7345, "step": 2271 }, { "epoch": 9.273469387755101, "grad_norm": 6.133298397064209, "learning_rate": 4.5369387755102047e-05, "loss": 3.7046, "step": 2272 }, { "epoch": 9.277551020408163, "grad_norm": 5.974246501922607, "learning_rate": 4.5367346938775514e-05, "loss": 3.5937, "step": 2273 }, { "epoch": 9.281632653061225, "grad_norm": 4.160175800323486, "learning_rate": 4.536530612244898e-05, "loss": 3.7095, "step": 2274 }, { "epoch": 9.285714285714286, "grad_norm": 3.548449993133545, "learning_rate": 4.536326530612245e-05, "loss": 4.0273, "step": 2275 }, { "epoch": 9.289795918367346, "grad_norm": 3.6706151962280273, "learning_rate": 4.536122448979592e-05, "loss": 3.7044, "step": 2276 }, { "epoch": 9.293877551020408, "grad_norm": 3.8313422203063965, "learning_rate": 4.535918367346939e-05, "loss": 3.7951, "step": 2277 }, { "epoch": 9.29795918367347, "grad_norm": 3.227854013442993, "learning_rate": 4.5357142857142856e-05, "loss": 3.9466, "step": 2278 }, { "epoch": 9.30204081632653, "grad_norm": 5.979804515838623, "learning_rate": 4.535510204081633e-05, "loss": 3.3443, "step": 2279 }, { "epoch": 9.306122448979592, "grad_norm": 4.37161922454834, "learning_rate": 4.53530612244898e-05, "loss": 3.4644, "step": 2280 }, { "epoch": 9.310204081632653, "grad_norm": 4.816438674926758, "learning_rate": 4.535102040816327e-05, "loss": 3.6574, "step": 2281 }, { "epoch": 9.314285714285715, "grad_norm": 4.340306758880615, "learning_rate": 4.534897959183674e-05, "loss": 3.4694, "step": 2282 }, { "epoch": 9.318367346938775, "grad_norm": 6.250181674957275, "learning_rate": 4.5346938775510205e-05, "loss": 3.2577, "step": 2283 }, { "epoch": 9.322448979591837, "grad_norm": 6.210066318511963, "learning_rate": 4.534489795918367e-05, "loss": 3.1876, "step": 2284 }, { "epoch": 9.326530612244898, "grad_norm": 4.05753231048584, "learning_rate": 4.534285714285714e-05, "loss": 3.7493, "step": 2285 }, { "epoch": 9.33061224489796, "grad_norm": 4.557634353637695, "learning_rate": 4.5340816326530613e-05, "loss": 3.8249, "step": 2286 }, { "epoch": 9.33469387755102, "grad_norm": 4.465378761291504, "learning_rate": 4.533877551020409e-05, "loss": 3.5891, "step": 2287 }, { "epoch": 9.338775510204082, "grad_norm": 5.549867153167725, "learning_rate": 4.5336734693877554e-05, "loss": 3.6667, "step": 2288 }, { "epoch": 9.342857142857143, "grad_norm": 5.632387638092041, "learning_rate": 4.533469387755102e-05, "loss": 3.4151, "step": 2289 }, { "epoch": 9.346938775510203, "grad_norm": 4.167417526245117, "learning_rate": 4.533265306122449e-05, "loss": 3.8701, "step": 2290 }, { "epoch": 9.351020408163265, "grad_norm": 4.839095592498779, "learning_rate": 4.533061224489796e-05, "loss": 3.1862, "step": 2291 }, { "epoch": 9.355102040816327, "grad_norm": 4.981740474700928, "learning_rate": 4.532857142857143e-05, "loss": 3.7593, "step": 2292 }, { "epoch": 9.359183673469389, "grad_norm": 3.233217477798462, "learning_rate": 4.53265306122449e-05, "loss": 3.7614, "step": 2293 }, { "epoch": 9.363265306122448, "grad_norm": 2.8567748069763184, "learning_rate": 4.532448979591837e-05, "loss": 3.6277, "step": 2294 }, { "epoch": 9.36734693877551, "grad_norm": 3.8175032138824463, "learning_rate": 4.5322448979591845e-05, "loss": 3.8811, "step": 2295 }, { "epoch": 9.371428571428572, "grad_norm": 9.239814758300781, "learning_rate": 4.532040816326531e-05, "loss": 2.9493, "step": 2296 }, { "epoch": 9.375510204081632, "grad_norm": 6.469841003417969, "learning_rate": 4.531836734693878e-05, "loss": 3.3944, "step": 2297 }, { "epoch": 9.379591836734694, "grad_norm": 4.790938854217529, "learning_rate": 4.5316326530612246e-05, "loss": 3.7153, "step": 2298 }, { "epoch": 9.383673469387755, "grad_norm": 3.0096728801727295, "learning_rate": 4.531428571428571e-05, "loss": 3.8206, "step": 2299 }, { "epoch": 9.387755102040817, "grad_norm": 4.841740131378174, "learning_rate": 4.531224489795919e-05, "loss": 3.6345, "step": 2300 }, { "epoch": 9.391836734693877, "grad_norm": 3.6361124515533447, "learning_rate": 4.5310204081632654e-05, "loss": 3.862, "step": 2301 }, { "epoch": 9.395918367346939, "grad_norm": 4.910335540771484, "learning_rate": 4.530816326530613e-05, "loss": 3.5587, "step": 2302 }, { "epoch": 9.4, "grad_norm": 4.532353401184082, "learning_rate": 4.5306122448979595e-05, "loss": 3.6464, "step": 2303 }, { "epoch": 9.404081632653062, "grad_norm": 4.233348846435547, "learning_rate": 4.530408163265306e-05, "loss": 3.7106, "step": 2304 }, { "epoch": 9.408163265306122, "grad_norm": 4.316052436828613, "learning_rate": 4.5302040816326536e-05, "loss": 3.7335, "step": 2305 }, { "epoch": 9.412244897959184, "grad_norm": 3.4178903102874756, "learning_rate": 4.53e-05, "loss": 3.9805, "step": 2306 }, { "epoch": 9.416326530612245, "grad_norm": 3.689901113510132, "learning_rate": 4.529795918367347e-05, "loss": 3.7835, "step": 2307 }, { "epoch": 9.420408163265305, "grad_norm": 3.2521519660949707, "learning_rate": 4.529591836734694e-05, "loss": 3.8339, "step": 2308 }, { "epoch": 9.424489795918367, "grad_norm": 3.7606751918792725, "learning_rate": 4.529387755102041e-05, "loss": 3.4878, "step": 2309 }, { "epoch": 9.428571428571429, "grad_norm": 6.655898094177246, "learning_rate": 4.5291836734693885e-05, "loss": 2.8237, "step": 2310 }, { "epoch": 9.43265306122449, "grad_norm": 4.23257303237915, "learning_rate": 4.528979591836735e-05, "loss": 3.5476, "step": 2311 }, { "epoch": 9.43673469387755, "grad_norm": 3.344896078109741, "learning_rate": 4.528775510204082e-05, "loss": 3.8327, "step": 2312 }, { "epoch": 9.440816326530612, "grad_norm": 3.436870813369751, "learning_rate": 4.528571428571429e-05, "loss": 3.8093, "step": 2313 }, { "epoch": 9.444897959183674, "grad_norm": 5.398637294769287, "learning_rate": 4.5283673469387754e-05, "loss": 3.1253, "step": 2314 }, { "epoch": 9.448979591836734, "grad_norm": 4.9079909324646, "learning_rate": 4.528163265306123e-05, "loss": 3.3861, "step": 2315 }, { "epoch": 9.453061224489796, "grad_norm": 7.550953388214111, "learning_rate": 4.5279591836734695e-05, "loss": 2.9733, "step": 2316 }, { "epoch": 9.457142857142857, "grad_norm": 3.026761770248413, "learning_rate": 4.527755102040816e-05, "loss": 4.1461, "step": 2317 }, { "epoch": 9.461224489795919, "grad_norm": 3.345522880554199, "learning_rate": 4.5275510204081636e-05, "loss": 3.9561, "step": 2318 }, { "epoch": 9.465306122448979, "grad_norm": 3.2176053524017334, "learning_rate": 4.52734693877551e-05, "loss": 4.148, "step": 2319 }, { "epoch": 9.46938775510204, "grad_norm": 4.97713565826416, "learning_rate": 4.527142857142858e-05, "loss": 3.5536, "step": 2320 }, { "epoch": 9.473469387755102, "grad_norm": 4.701786518096924, "learning_rate": 4.5269387755102044e-05, "loss": 3.5409, "step": 2321 }, { "epoch": 9.477551020408164, "grad_norm": 4.22877311706543, "learning_rate": 4.526734693877551e-05, "loss": 3.7571, "step": 2322 }, { "epoch": 9.481632653061224, "grad_norm": 2.711972713470459, "learning_rate": 4.526530612244898e-05, "loss": 3.8841, "step": 2323 }, { "epoch": 9.485714285714286, "grad_norm": 3.087956666946411, "learning_rate": 4.5263265306122445e-05, "loss": 3.8381, "step": 2324 }, { "epoch": 9.489795918367347, "grad_norm": 6.019250392913818, "learning_rate": 4.526122448979592e-05, "loss": 3.4926, "step": 2325 }, { "epoch": 9.493877551020407, "grad_norm": 5.035147190093994, "learning_rate": 4.525918367346939e-05, "loss": 3.7051, "step": 2326 }, { "epoch": 9.49795918367347, "grad_norm": 5.969873428344727, "learning_rate": 4.525714285714286e-05, "loss": 3.0615, "step": 2327 }, { "epoch": 9.50204081632653, "grad_norm": 3.9178924560546875, "learning_rate": 4.525510204081633e-05, "loss": 3.8639, "step": 2328 }, { "epoch": 9.506122448979593, "grad_norm": 6.020920753479004, "learning_rate": 4.5253061224489795e-05, "loss": 3.5659, "step": 2329 }, { "epoch": 9.510204081632653, "grad_norm": 5.99649715423584, "learning_rate": 4.525102040816327e-05, "loss": 3.8704, "step": 2330 }, { "epoch": 9.514285714285714, "grad_norm": 7.137591361999512, "learning_rate": 4.5248979591836736e-05, "loss": 3.3619, "step": 2331 }, { "epoch": 9.518367346938776, "grad_norm": 4.059383392333984, "learning_rate": 4.52469387755102e-05, "loss": 3.7397, "step": 2332 }, { "epoch": 9.522448979591836, "grad_norm": 4.540074825286865, "learning_rate": 4.524489795918368e-05, "loss": 3.0984, "step": 2333 }, { "epoch": 9.526530612244898, "grad_norm": 2.8334081172943115, "learning_rate": 4.5242857142857144e-05, "loss": 4.0923, "step": 2334 }, { "epoch": 9.53061224489796, "grad_norm": 7.936448097229004, "learning_rate": 4.524081632653062e-05, "loss": 2.6228, "step": 2335 }, { "epoch": 9.534693877551021, "grad_norm": 3.1399309635162354, "learning_rate": 4.5238775510204085e-05, "loss": 3.6742, "step": 2336 }, { "epoch": 9.538775510204081, "grad_norm": 3.218625783920288, "learning_rate": 4.523673469387755e-05, "loss": 3.6762, "step": 2337 }, { "epoch": 9.542857142857143, "grad_norm": 6.158787727355957, "learning_rate": 4.523469387755102e-05, "loss": 3.5889, "step": 2338 }, { "epoch": 9.546938775510204, "grad_norm": 3.546679735183716, "learning_rate": 4.5232653061224486e-05, "loss": 3.9348, "step": 2339 }, { "epoch": 9.551020408163264, "grad_norm": 4.163567543029785, "learning_rate": 4.523061224489796e-05, "loss": 3.8443, "step": 2340 }, { "epoch": 9.555102040816326, "grad_norm": 2.680288076400757, "learning_rate": 4.5228571428571434e-05, "loss": 3.9371, "step": 2341 }, { "epoch": 9.559183673469388, "grad_norm": 5.493936061859131, "learning_rate": 4.52265306122449e-05, "loss": 3.3109, "step": 2342 }, { "epoch": 9.56326530612245, "grad_norm": 4.992196559906006, "learning_rate": 4.522448979591837e-05, "loss": 3.6364, "step": 2343 }, { "epoch": 9.56734693877551, "grad_norm": 5.08183479309082, "learning_rate": 4.522244897959184e-05, "loss": 3.8562, "step": 2344 }, { "epoch": 9.571428571428571, "grad_norm": 4.168805122375488, "learning_rate": 4.522040816326531e-05, "loss": 3.8378, "step": 2345 }, { "epoch": 9.575510204081633, "grad_norm": 3.8664867877960205, "learning_rate": 4.5218367346938776e-05, "loss": 3.7817, "step": 2346 }, { "epoch": 9.579591836734695, "grad_norm": 6.444253921508789, "learning_rate": 4.5216326530612244e-05, "loss": 3.7323, "step": 2347 }, { "epoch": 9.583673469387755, "grad_norm": 2.955501079559326, "learning_rate": 4.521428571428572e-05, "loss": 4.0019, "step": 2348 }, { "epoch": 9.587755102040816, "grad_norm": 4.455896854400635, "learning_rate": 4.521224489795919e-05, "loss": 3.6293, "step": 2349 }, { "epoch": 9.591836734693878, "grad_norm": 6.405302047729492, "learning_rate": 4.521020408163266e-05, "loss": 3.2503, "step": 2350 }, { "epoch": 9.59591836734694, "grad_norm": 6.70060920715332, "learning_rate": 4.5208163265306126e-05, "loss": 3.2066, "step": 2351 }, { "epoch": 9.6, "grad_norm": 5.934531211853027, "learning_rate": 4.520612244897959e-05, "loss": 3.2173, "step": 2352 }, { "epoch": 9.604081632653061, "grad_norm": 4.348560810089111, "learning_rate": 4.520408163265306e-05, "loss": 3.6424, "step": 2353 }, { "epoch": 9.608163265306123, "grad_norm": 7.712652683258057, "learning_rate": 4.5202040816326534e-05, "loss": 2.5673, "step": 2354 }, { "epoch": 9.612244897959183, "grad_norm": 6.941705226898193, "learning_rate": 4.52e-05, "loss": 3.3019, "step": 2355 }, { "epoch": 9.616326530612245, "grad_norm": 3.7333366870880127, "learning_rate": 4.5197959183673475e-05, "loss": 3.7412, "step": 2356 }, { "epoch": 9.620408163265306, "grad_norm": 6.000734806060791, "learning_rate": 4.519591836734694e-05, "loss": 3.7084, "step": 2357 }, { "epoch": 9.624489795918368, "grad_norm": 3.61344575881958, "learning_rate": 4.519387755102041e-05, "loss": 4.0722, "step": 2358 }, { "epoch": 9.628571428571428, "grad_norm": 3.411996841430664, "learning_rate": 4.519183673469388e-05, "loss": 3.715, "step": 2359 }, { "epoch": 9.63265306122449, "grad_norm": 2.7851572036743164, "learning_rate": 4.518979591836735e-05, "loss": 4.057, "step": 2360 }, { "epoch": 9.636734693877552, "grad_norm": 5.297043323516846, "learning_rate": 4.518775510204082e-05, "loss": 3.4034, "step": 2361 }, { "epoch": 9.640816326530611, "grad_norm": 4.68658447265625, "learning_rate": 4.5185714285714284e-05, "loss": 3.4894, "step": 2362 }, { "epoch": 9.644897959183673, "grad_norm": 5.911129951477051, "learning_rate": 4.518367346938776e-05, "loss": 3.4051, "step": 2363 }, { "epoch": 9.648979591836735, "grad_norm": 3.2890512943267822, "learning_rate": 4.518163265306123e-05, "loss": 3.8306, "step": 2364 }, { "epoch": 9.653061224489797, "grad_norm": 6.014523029327393, "learning_rate": 4.51795918367347e-05, "loss": 3.5115, "step": 2365 }, { "epoch": 9.657142857142857, "grad_norm": 4.398684501647949, "learning_rate": 4.5177551020408166e-05, "loss": 3.7384, "step": 2366 }, { "epoch": 9.661224489795918, "grad_norm": 4.497523784637451, "learning_rate": 4.5175510204081634e-05, "loss": 3.7132, "step": 2367 }, { "epoch": 9.66530612244898, "grad_norm": 8.618524551391602, "learning_rate": 4.51734693877551e-05, "loss": 2.8749, "step": 2368 }, { "epoch": 9.66938775510204, "grad_norm": NaN, "learning_rate": 4.5171428571428575e-05, "loss": 3.9598, "step": 2369 }, { "epoch": 9.673469387755102, "grad_norm": 4.579023361206055, "learning_rate": 4.5171428571428575e-05, "loss": 3.3065, "step": 2370 }, { "epoch": 9.677551020408163, "grad_norm": 3.055218458175659, "learning_rate": 4.516938775510204e-05, "loss": 3.9707, "step": 2371 }, { "epoch": 9.681632653061225, "grad_norm": 4.083169460296631, "learning_rate": 4.516734693877551e-05, "loss": 3.6172, "step": 2372 }, { "epoch": 9.685714285714285, "grad_norm": 3.9402542114257812, "learning_rate": 4.516530612244898e-05, "loss": 3.6157, "step": 2373 }, { "epoch": 9.689795918367347, "grad_norm": 4.998456001281738, "learning_rate": 4.516326530612245e-05, "loss": 3.6368, "step": 2374 }, { "epoch": 9.693877551020408, "grad_norm": 3.816556215286255, "learning_rate": 4.5161224489795924e-05, "loss": 3.9486, "step": 2375 }, { "epoch": 9.69795918367347, "grad_norm": 3.3164122104644775, "learning_rate": 4.515918367346939e-05, "loss": 3.7422, "step": 2376 }, { "epoch": 9.70204081632653, "grad_norm": 3.199104070663452, "learning_rate": 4.515714285714286e-05, "loss": 3.8912, "step": 2377 }, { "epoch": 9.706122448979592, "grad_norm": 3.535993814468384, "learning_rate": 4.5155102040816325e-05, "loss": 3.8722, "step": 2378 }, { "epoch": 9.710204081632654, "grad_norm": 5.766060829162598, "learning_rate": 4.515306122448979e-05, "loss": 3.5383, "step": 2379 }, { "epoch": 9.714285714285714, "grad_norm": 4.093536376953125, "learning_rate": 4.5151020408163266e-05, "loss": 3.8439, "step": 2380 }, { "epoch": 9.718367346938775, "grad_norm": 7.039300441741943, "learning_rate": 4.514897959183674e-05, "loss": 3.0248, "step": 2381 }, { "epoch": 9.722448979591837, "grad_norm": 2.792757749557495, "learning_rate": 4.514693877551021e-05, "loss": 3.9595, "step": 2382 }, { "epoch": 9.726530612244899, "grad_norm": 3.5100386142730713, "learning_rate": 4.5144897959183674e-05, "loss": 4.0064, "step": 2383 }, { "epoch": 9.730612244897959, "grad_norm": 5.549649715423584, "learning_rate": 4.514285714285714e-05, "loss": 3.5145, "step": 2384 }, { "epoch": 9.73469387755102, "grad_norm": 3.4187583923339844, "learning_rate": 4.5140816326530615e-05, "loss": 3.6403, "step": 2385 }, { "epoch": 9.738775510204082, "grad_norm": 3.592864990234375, "learning_rate": 4.513877551020408e-05, "loss": 3.7963, "step": 2386 }, { "epoch": 9.742857142857144, "grad_norm": 5.386263847351074, "learning_rate": 4.513673469387755e-05, "loss": 3.2173, "step": 2387 }, { "epoch": 9.746938775510204, "grad_norm": 5.288492202758789, "learning_rate": 4.5134693877551023e-05, "loss": 2.8142, "step": 2388 }, { "epoch": 9.751020408163265, "grad_norm": 4.769392013549805, "learning_rate": 4.51326530612245e-05, "loss": 3.4518, "step": 2389 }, { "epoch": 9.755102040816327, "grad_norm": 4.428879261016846, "learning_rate": 4.5130612244897965e-05, "loss": 3.8285, "step": 2390 }, { "epoch": 9.759183673469387, "grad_norm": 3.9080026149749756, "learning_rate": 4.512857142857143e-05, "loss": 3.6948, "step": 2391 }, { "epoch": 9.763265306122449, "grad_norm": 3.290970802307129, "learning_rate": 4.51265306122449e-05, "loss": 3.8297, "step": 2392 }, { "epoch": 9.76734693877551, "grad_norm": 2.812297821044922, "learning_rate": 4.5124489795918366e-05, "loss": 4.195, "step": 2393 }, { "epoch": 9.771428571428572, "grad_norm": 7.409860134124756, "learning_rate": 4.512244897959184e-05, "loss": 2.7517, "step": 2394 }, { "epoch": 9.775510204081632, "grad_norm": 5.009073257446289, "learning_rate": 4.512040816326531e-05, "loss": 3.5601, "step": 2395 }, { "epoch": 9.779591836734694, "grad_norm": 3.370257616043091, "learning_rate": 4.511836734693878e-05, "loss": 3.7249, "step": 2396 }, { "epoch": 9.783673469387756, "grad_norm": 2.412320613861084, "learning_rate": 4.511632653061225e-05, "loss": 3.9029, "step": 2397 }, { "epoch": 9.787755102040816, "grad_norm": 2.8196523189544678, "learning_rate": 4.5114285714285715e-05, "loss": 4.3229, "step": 2398 }, { "epoch": 9.791836734693877, "grad_norm": 4.385014533996582, "learning_rate": 4.511224489795919e-05, "loss": 3.9641, "step": 2399 }, { "epoch": 9.795918367346939, "grad_norm": 4.580752372741699, "learning_rate": 4.5110204081632656e-05, "loss": 3.4001, "step": 2400 }, { "epoch": 9.8, "grad_norm": 3.9265425205230713, "learning_rate": 4.510816326530612e-05, "loss": 3.8343, "step": 2401 }, { "epoch": 9.80408163265306, "grad_norm": 6.535552978515625, "learning_rate": 4.510612244897959e-05, "loss": 3.3365, "step": 2402 }, { "epoch": 9.808163265306122, "grad_norm": 3.0849521160125732, "learning_rate": 4.5104081632653064e-05, "loss": 4.0745, "step": 2403 }, { "epoch": 9.812244897959184, "grad_norm": 4.4491801261901855, "learning_rate": 4.510204081632654e-05, "loss": 3.7135, "step": 2404 }, { "epoch": 9.816326530612244, "grad_norm": 3.290451765060425, "learning_rate": 4.5100000000000005e-05, "loss": 3.8509, "step": 2405 }, { "epoch": 9.820408163265306, "grad_norm": 4.811617851257324, "learning_rate": 4.509795918367347e-05, "loss": 3.0639, "step": 2406 }, { "epoch": 9.824489795918367, "grad_norm": 4.771734714508057, "learning_rate": 4.509591836734694e-05, "loss": 3.7725, "step": 2407 }, { "epoch": 9.82857142857143, "grad_norm": 6.0913238525390625, "learning_rate": 4.509387755102041e-05, "loss": 3.1612, "step": 2408 }, { "epoch": 9.83265306122449, "grad_norm": 6.9171648025512695, "learning_rate": 4.509183673469388e-05, "loss": 2.8859, "step": 2409 }, { "epoch": 9.83673469387755, "grad_norm": 3.3286478519439697, "learning_rate": 4.508979591836735e-05, "loss": 3.9874, "step": 2410 }, { "epoch": 9.840816326530613, "grad_norm": 8.285677909851074, "learning_rate": 4.508775510204082e-05, "loss": 2.7111, "step": 2411 }, { "epoch": 9.844897959183674, "grad_norm": 4.138667106628418, "learning_rate": 4.508571428571429e-05, "loss": 3.5278, "step": 2412 }, { "epoch": 9.848979591836734, "grad_norm": 3.9611716270446777, "learning_rate": 4.5083673469387756e-05, "loss": 4.0196, "step": 2413 }, { "epoch": 9.853061224489796, "grad_norm": 6.017033100128174, "learning_rate": 4.508163265306123e-05, "loss": 3.3183, "step": 2414 }, { "epoch": 9.857142857142858, "grad_norm": 3.843336582183838, "learning_rate": 4.50795918367347e-05, "loss": 3.9849, "step": 2415 }, { "epoch": 9.861224489795918, "grad_norm": 2.8121771812438965, "learning_rate": 4.5077551020408164e-05, "loss": 3.8948, "step": 2416 }, { "epoch": 9.86530612244898, "grad_norm": 4.302929878234863, "learning_rate": 4.507551020408163e-05, "loss": 3.8688, "step": 2417 }, { "epoch": 9.869387755102041, "grad_norm": 5.066306114196777, "learning_rate": 4.5073469387755105e-05, "loss": 3.605, "step": 2418 }, { "epoch": 9.873469387755103, "grad_norm": 3.9229447841644287, "learning_rate": 4.507142857142858e-05, "loss": 3.8768, "step": 2419 }, { "epoch": 9.877551020408163, "grad_norm": 4.104409694671631, "learning_rate": 4.5069387755102046e-05, "loss": 3.6473, "step": 2420 }, { "epoch": 9.881632653061224, "grad_norm": 3.0893170833587646, "learning_rate": 4.506734693877551e-05, "loss": 3.7796, "step": 2421 }, { "epoch": 9.885714285714286, "grad_norm": 2.4359822273254395, "learning_rate": 4.506530612244898e-05, "loss": 4.196, "step": 2422 }, { "epoch": 9.889795918367348, "grad_norm": 4.0155415534973145, "learning_rate": 4.506326530612245e-05, "loss": 3.6204, "step": 2423 }, { "epoch": 9.893877551020408, "grad_norm": 5.081489086151123, "learning_rate": 4.506122448979592e-05, "loss": 3.734, "step": 2424 }, { "epoch": 9.89795918367347, "grad_norm": 3.2137067317962646, "learning_rate": 4.505918367346939e-05, "loss": 3.7937, "step": 2425 }, { "epoch": 9.902040816326531, "grad_norm": 5.506387710571289, "learning_rate": 4.5057142857142856e-05, "loss": 3.3342, "step": 2426 }, { "epoch": 9.906122448979591, "grad_norm": 3.445920944213867, "learning_rate": 4.505510204081633e-05, "loss": 3.9163, "step": 2427 }, { "epoch": 9.910204081632653, "grad_norm": 7.6115288734436035, "learning_rate": 4.5053061224489797e-05, "loss": 3.3774, "step": 2428 }, { "epoch": 9.914285714285715, "grad_norm": 4.712503910064697, "learning_rate": 4.505102040816327e-05, "loss": 3.5264, "step": 2429 }, { "epoch": 9.918367346938776, "grad_norm": 3.4994935989379883, "learning_rate": 4.504897959183674e-05, "loss": 3.7196, "step": 2430 }, { "epoch": 9.922448979591836, "grad_norm": 3.4456069469451904, "learning_rate": 4.5046938775510205e-05, "loss": 3.9263, "step": 2431 }, { "epoch": 9.926530612244898, "grad_norm": 5.893161773681641, "learning_rate": 4.504489795918367e-05, "loss": 3.3931, "step": 2432 }, { "epoch": 9.93061224489796, "grad_norm": 6.6179423332214355, "learning_rate": 4.5042857142857146e-05, "loss": 3.1873, "step": 2433 }, { "epoch": 9.93469387755102, "grad_norm": 5.049369812011719, "learning_rate": 4.504081632653061e-05, "loss": 3.6828, "step": 2434 }, { "epoch": 9.938775510204081, "grad_norm": 4.144637107849121, "learning_rate": 4.503877551020409e-05, "loss": 3.9211, "step": 2435 }, { "epoch": 9.942857142857143, "grad_norm": 4.981191635131836, "learning_rate": 4.5036734693877554e-05, "loss": 3.4103, "step": 2436 }, { "epoch": 9.946938775510205, "grad_norm": 5.088559150695801, "learning_rate": 4.503469387755102e-05, "loss": 3.8365, "step": 2437 }, { "epoch": 9.951020408163265, "grad_norm": 5.204117774963379, "learning_rate": 4.5032653061224495e-05, "loss": 3.5304, "step": 2438 }, { "epoch": 9.955102040816326, "grad_norm": 5.569422721862793, "learning_rate": 4.503061224489796e-05, "loss": 3.3739, "step": 2439 }, { "epoch": 9.959183673469388, "grad_norm": 5.700250148773193, "learning_rate": 4.502857142857143e-05, "loss": 3.5465, "step": 2440 }, { "epoch": 9.963265306122448, "grad_norm": 5.325876712799072, "learning_rate": 4.5026530612244896e-05, "loss": 3.4252, "step": 2441 }, { "epoch": 9.96734693877551, "grad_norm": 6.990859508514404, "learning_rate": 4.502448979591837e-05, "loss": 2.976, "step": 2442 }, { "epoch": 9.971428571428572, "grad_norm": 3.701077461242676, "learning_rate": 4.5022448979591844e-05, "loss": 3.8195, "step": 2443 }, { "epoch": 9.975510204081633, "grad_norm": 4.858779430389404, "learning_rate": 4.502040816326531e-05, "loss": 3.889, "step": 2444 }, { "epoch": 9.979591836734693, "grad_norm": 3.3610622882843018, "learning_rate": 4.501836734693878e-05, "loss": 4.0037, "step": 2445 }, { "epoch": 9.983673469387755, "grad_norm": 5.053096294403076, "learning_rate": 4.5016326530612246e-05, "loss": 3.6314, "step": 2446 }, { "epoch": 9.987755102040817, "grad_norm": 5.6731672286987305, "learning_rate": 4.501428571428571e-05, "loss": 3.145, "step": 2447 }, { "epoch": 9.991836734693878, "grad_norm": 2.905320644378662, "learning_rate": 4.5012244897959187e-05, "loss": 3.9806, "step": 2448 }, { "epoch": 9.995918367346938, "grad_norm": 8.310406684875488, "learning_rate": 4.5010204081632654e-05, "loss": 2.7875, "step": 2449 }, { "epoch": 10.0, "grad_norm": 5.274994850158691, "learning_rate": 4.500816326530613e-05, "loss": 3.9107, "step": 2450 }, { "epoch": 10.004081632653062, "grad_norm": 3.9360995292663574, "learning_rate": 4.5006122448979595e-05, "loss": 3.4572, "step": 2451 }, { "epoch": 10.008163265306122, "grad_norm": 3.775392532348633, "learning_rate": 4.500408163265306e-05, "loss": 3.7416, "step": 2452 }, { "epoch": 10.012244897959183, "grad_norm": 3.547661304473877, "learning_rate": 4.5002040816326536e-05, "loss": 3.8848, "step": 2453 }, { "epoch": 10.016326530612245, "grad_norm": 4.358460426330566, "learning_rate": 4.5e-05, "loss": 3.5404, "step": 2454 }, { "epoch": 10.020408163265307, "grad_norm": 4.678701877593994, "learning_rate": 4.499795918367347e-05, "loss": 3.6285, "step": 2455 }, { "epoch": 10.024489795918367, "grad_norm": 3.0900959968566895, "learning_rate": 4.499591836734694e-05, "loss": 3.9723, "step": 2456 }, { "epoch": 10.028571428571428, "grad_norm": 3.274247646331787, "learning_rate": 4.499387755102041e-05, "loss": 4.0151, "step": 2457 }, { "epoch": 10.03265306122449, "grad_norm": 2.8128740787506104, "learning_rate": 4.4991836734693885e-05, "loss": 4.0214, "step": 2458 }, { "epoch": 10.036734693877552, "grad_norm": 2.986966371536255, "learning_rate": 4.498979591836735e-05, "loss": 3.5923, "step": 2459 }, { "epoch": 10.040816326530612, "grad_norm": 2.8398241996765137, "learning_rate": 4.498775510204082e-05, "loss": 4.1115, "step": 2460 }, { "epoch": 10.044897959183674, "grad_norm": 4.6874895095825195, "learning_rate": 4.4985714285714286e-05, "loss": 3.5167, "step": 2461 }, { "epoch": 10.048979591836735, "grad_norm": 4.942580699920654, "learning_rate": 4.4983673469387753e-05, "loss": 3.2218, "step": 2462 }, { "epoch": 10.053061224489795, "grad_norm": 5.487728595733643, "learning_rate": 4.498163265306123e-05, "loss": 3.397, "step": 2463 }, { "epoch": 10.057142857142857, "grad_norm": 4.404087066650391, "learning_rate": 4.4979591836734694e-05, "loss": 3.6773, "step": 2464 }, { "epoch": 10.061224489795919, "grad_norm": 4.956358432769775, "learning_rate": 4.497755102040817e-05, "loss": 3.3651, "step": 2465 }, { "epoch": 10.06530612244898, "grad_norm": 4.91450834274292, "learning_rate": 4.4975510204081635e-05, "loss": 3.3646, "step": 2466 }, { "epoch": 10.06938775510204, "grad_norm": 3.4222989082336426, "learning_rate": 4.49734693877551e-05, "loss": 3.6427, "step": 2467 }, { "epoch": 10.073469387755102, "grad_norm": 3.214107036590576, "learning_rate": 4.4971428571428576e-05, "loss": 3.8013, "step": 2468 }, { "epoch": 10.077551020408164, "grad_norm": 5.862481117248535, "learning_rate": 4.4969387755102044e-05, "loss": 3.0528, "step": 2469 }, { "epoch": 10.081632653061224, "grad_norm": 3.872159004211426, "learning_rate": 4.496734693877551e-05, "loss": 3.6509, "step": 2470 }, { "epoch": 10.085714285714285, "grad_norm": 5.524241924285889, "learning_rate": 4.496530612244898e-05, "loss": 3.6036, "step": 2471 }, { "epoch": 10.089795918367347, "grad_norm": 2.937464714050293, "learning_rate": 4.496326530612245e-05, "loss": 3.915, "step": 2472 }, { "epoch": 10.093877551020409, "grad_norm": 4.659767150878906, "learning_rate": 4.4961224489795926e-05, "loss": 3.7101, "step": 2473 }, { "epoch": 10.097959183673469, "grad_norm": 5.131330490112305, "learning_rate": 4.495918367346939e-05, "loss": 3.5616, "step": 2474 }, { "epoch": 10.10204081632653, "grad_norm": 3.714833974838257, "learning_rate": 4.495714285714286e-05, "loss": 3.9472, "step": 2475 }, { "epoch": 10.106122448979592, "grad_norm": 3.30318021774292, "learning_rate": 4.495510204081633e-05, "loss": 3.8272, "step": 2476 }, { "epoch": 10.110204081632654, "grad_norm": 3.0426032543182373, "learning_rate": 4.49530612244898e-05, "loss": 3.9, "step": 2477 }, { "epoch": 10.114285714285714, "grad_norm": 7.14857816696167, "learning_rate": 4.495102040816327e-05, "loss": 3.1765, "step": 2478 }, { "epoch": 10.118367346938776, "grad_norm": 5.2941155433654785, "learning_rate": 4.4948979591836735e-05, "loss": 3.5594, "step": 2479 }, { "epoch": 10.122448979591837, "grad_norm": 6.021059036254883, "learning_rate": 4.49469387755102e-05, "loss": 3.7287, "step": 2480 }, { "epoch": 10.126530612244897, "grad_norm": 4.492987632751465, "learning_rate": 4.4944897959183676e-05, "loss": 3.7619, "step": 2481 }, { "epoch": 10.130612244897959, "grad_norm": 4.579298973083496, "learning_rate": 4.494285714285715e-05, "loss": 3.8407, "step": 2482 }, { "epoch": 10.13469387755102, "grad_norm": 4.028650283813477, "learning_rate": 4.494081632653062e-05, "loss": 3.8176, "step": 2483 }, { "epoch": 10.138775510204082, "grad_norm": 4.155653953552246, "learning_rate": 4.4938775510204084e-05, "loss": 3.7572, "step": 2484 }, { "epoch": 10.142857142857142, "grad_norm": 3.954434871673584, "learning_rate": 4.493673469387755e-05, "loss": 3.6556, "step": 2485 }, { "epoch": 10.146938775510204, "grad_norm": 3.2427818775177, "learning_rate": 4.493469387755102e-05, "loss": 3.9572, "step": 2486 }, { "epoch": 10.151020408163266, "grad_norm": 5.015174865722656, "learning_rate": 4.493265306122449e-05, "loss": 3.8185, "step": 2487 }, { "epoch": 10.155102040816326, "grad_norm": 5.558772563934326, "learning_rate": 4.493061224489796e-05, "loss": 3.5404, "step": 2488 }, { "epoch": 10.159183673469387, "grad_norm": 7.190823078155518, "learning_rate": 4.4928571428571434e-05, "loss": 3.3493, "step": 2489 }, { "epoch": 10.16326530612245, "grad_norm": 5.875247478485107, "learning_rate": 4.49265306122449e-05, "loss": 3.3968, "step": 2490 }, { "epoch": 10.16734693877551, "grad_norm": 5.458523273468018, "learning_rate": 4.492448979591837e-05, "loss": 3.4286, "step": 2491 }, { "epoch": 10.17142857142857, "grad_norm": 6.776173114776611, "learning_rate": 4.492244897959184e-05, "loss": 3.6206, "step": 2492 }, { "epoch": 10.175510204081633, "grad_norm": 5.388833999633789, "learning_rate": 4.492040816326531e-05, "loss": 3.5244, "step": 2493 }, { "epoch": 10.179591836734694, "grad_norm": 3.1629374027252197, "learning_rate": 4.4918367346938776e-05, "loss": 3.9574, "step": 2494 }, { "epoch": 10.183673469387756, "grad_norm": 4.7350640296936035, "learning_rate": 4.491632653061224e-05, "loss": 3.4865, "step": 2495 }, { "epoch": 10.187755102040816, "grad_norm": 2.851390838623047, "learning_rate": 4.491428571428572e-05, "loss": 4.1175, "step": 2496 }, { "epoch": 10.191836734693878, "grad_norm": 3.3916239738464355, "learning_rate": 4.491224489795919e-05, "loss": 3.6357, "step": 2497 }, { "epoch": 10.19591836734694, "grad_norm": 3.2743189334869385, "learning_rate": 4.491020408163266e-05, "loss": 3.7656, "step": 2498 }, { "epoch": 10.2, "grad_norm": 4.694741725921631, "learning_rate": 4.4908163265306125e-05, "loss": 3.6356, "step": 2499 }, { "epoch": 10.204081632653061, "grad_norm": 2.9548346996307373, "learning_rate": 4.490612244897959e-05, "loss": 3.9149, "step": 2500 }, { "epoch": 10.208163265306123, "grad_norm": 6.883295059204102, "learning_rate": 4.490408163265306e-05, "loss": 2.7573, "step": 2501 }, { "epoch": 10.212244897959184, "grad_norm": 3.9518425464630127, "learning_rate": 4.490204081632653e-05, "loss": 3.7983, "step": 2502 }, { "epoch": 10.216326530612244, "grad_norm": 6.294760227203369, "learning_rate": 4.49e-05, "loss": 3.3708, "step": 2503 }, { "epoch": 10.220408163265306, "grad_norm": 3.5118331909179688, "learning_rate": 4.4897959183673474e-05, "loss": 3.7633, "step": 2504 }, { "epoch": 10.224489795918368, "grad_norm": 4.588980197906494, "learning_rate": 4.489591836734694e-05, "loss": 3.8994, "step": 2505 }, { "epoch": 10.228571428571428, "grad_norm": 6.5868964195251465, "learning_rate": 4.489387755102041e-05, "loss": 3.1516, "step": 2506 }, { "epoch": 10.23265306122449, "grad_norm": 4.609208106994629, "learning_rate": 4.489183673469388e-05, "loss": 3.6073, "step": 2507 }, { "epoch": 10.236734693877551, "grad_norm": 3.287456750869751, "learning_rate": 4.488979591836735e-05, "loss": 4.1037, "step": 2508 }, { "epoch": 10.240816326530613, "grad_norm": 7.444812774658203, "learning_rate": 4.488775510204082e-05, "loss": 2.6619, "step": 2509 }, { "epoch": 10.244897959183673, "grad_norm": 4.872589111328125, "learning_rate": 4.4885714285714284e-05, "loss": 3.4584, "step": 2510 }, { "epoch": 10.248979591836735, "grad_norm": 5.024157524108887, "learning_rate": 4.488367346938776e-05, "loss": 3.7008, "step": 2511 }, { "epoch": 10.253061224489796, "grad_norm": 4.962684154510498, "learning_rate": 4.488163265306123e-05, "loss": 3.6825, "step": 2512 }, { "epoch": 10.257142857142856, "grad_norm": 3.56817626953125, "learning_rate": 4.48795918367347e-05, "loss": 3.8183, "step": 2513 }, { "epoch": 10.261224489795918, "grad_norm": 5.4297003746032715, "learning_rate": 4.4877551020408166e-05, "loss": 3.4292, "step": 2514 }, { "epoch": 10.26530612244898, "grad_norm": 5.1563496589660645, "learning_rate": 4.487551020408163e-05, "loss": 3.5405, "step": 2515 }, { "epoch": 10.269387755102041, "grad_norm": 5.089086532592773, "learning_rate": 4.48734693877551e-05, "loss": 3.5257, "step": 2516 }, { "epoch": 10.273469387755101, "grad_norm": 3.540297269821167, "learning_rate": 4.4871428571428574e-05, "loss": 3.7949, "step": 2517 }, { "epoch": 10.277551020408163, "grad_norm": 1.911681056022644, "learning_rate": 4.486938775510204e-05, "loss": 4.1158, "step": 2518 }, { "epoch": 10.281632653061225, "grad_norm": 6.742364406585693, "learning_rate": 4.4867346938775515e-05, "loss": 3.2589, "step": 2519 }, { "epoch": 10.285714285714286, "grad_norm": 5.428085803985596, "learning_rate": 4.486530612244898e-05, "loss": 3.5982, "step": 2520 }, { "epoch": 10.289795918367346, "grad_norm": 4.763000965118408, "learning_rate": 4.486326530612245e-05, "loss": 3.6474, "step": 2521 }, { "epoch": 10.293877551020408, "grad_norm": 4.240877628326416, "learning_rate": 4.486122448979592e-05, "loss": 3.8089, "step": 2522 }, { "epoch": 10.29795918367347, "grad_norm": 5.7439165115356445, "learning_rate": 4.485918367346939e-05, "loss": 3.3734, "step": 2523 }, { "epoch": 10.30204081632653, "grad_norm": 5.066707134246826, "learning_rate": 4.485714285714286e-05, "loss": 3.4079, "step": 2524 }, { "epoch": 10.306122448979592, "grad_norm": 3.4704971313476562, "learning_rate": 4.4855102040816325e-05, "loss": 3.8631, "step": 2525 }, { "epoch": 10.310204081632653, "grad_norm": 4.228267669677734, "learning_rate": 4.48530612244898e-05, "loss": 3.7839, "step": 2526 }, { "epoch": 10.314285714285715, "grad_norm": 4.808487415313721, "learning_rate": 4.4851020408163266e-05, "loss": 3.8929, "step": 2527 }, { "epoch": 10.318367346938775, "grad_norm": 3.3561818599700928, "learning_rate": 4.484897959183674e-05, "loss": 3.9301, "step": 2528 }, { "epoch": 10.322448979591837, "grad_norm": 8.420884132385254, "learning_rate": 4.484693877551021e-05, "loss": 2.7215, "step": 2529 }, { "epoch": 10.326530612244898, "grad_norm": 4.3726487159729, "learning_rate": 4.4844897959183674e-05, "loss": 3.4724, "step": 2530 }, { "epoch": 10.33061224489796, "grad_norm": 5.854076862335205, "learning_rate": 4.484285714285715e-05, "loss": 3.3407, "step": 2531 }, { "epoch": 10.33469387755102, "grad_norm": 5.674807548522949, "learning_rate": 4.4840816326530615e-05, "loss": 3.5063, "step": 2532 }, { "epoch": 10.338775510204082, "grad_norm": 5.228090286254883, "learning_rate": 4.483877551020408e-05, "loss": 2.7032, "step": 2533 }, { "epoch": 10.342857142857143, "grad_norm": 8.54759693145752, "learning_rate": 4.483673469387755e-05, "loss": 2.4516, "step": 2534 }, { "epoch": 10.346938775510203, "grad_norm": 4.222681045532227, "learning_rate": 4.483469387755102e-05, "loss": 3.7327, "step": 2535 }, { "epoch": 10.351020408163265, "grad_norm": 4.7078118324279785, "learning_rate": 4.48326530612245e-05, "loss": 3.7468, "step": 2536 }, { "epoch": 10.355102040816327, "grad_norm": 5.901865005493164, "learning_rate": 4.4830612244897964e-05, "loss": 3.6066, "step": 2537 }, { "epoch": 10.359183673469389, "grad_norm": 2.897416114807129, "learning_rate": 4.482857142857143e-05, "loss": 4.2879, "step": 2538 }, { "epoch": 10.363265306122448, "grad_norm": 7.375970840454102, "learning_rate": 4.48265306122449e-05, "loss": 3.308, "step": 2539 }, { "epoch": 10.36734693877551, "grad_norm": 4.533075332641602, "learning_rate": 4.4824489795918365e-05, "loss": 3.5757, "step": 2540 }, { "epoch": 10.371428571428572, "grad_norm": 4.246870040893555, "learning_rate": 4.482244897959184e-05, "loss": 3.9578, "step": 2541 }, { "epoch": 10.375510204081632, "grad_norm": 8.260600090026855, "learning_rate": 4.4820408163265306e-05, "loss": 2.8141, "step": 2542 }, { "epoch": 10.379591836734694, "grad_norm": 4.6197967529296875, "learning_rate": 4.481836734693878e-05, "loss": 3.5724, "step": 2543 }, { "epoch": 10.383673469387755, "grad_norm": 4.759551048278809, "learning_rate": 4.481632653061225e-05, "loss": 3.6468, "step": 2544 }, { "epoch": 10.387755102040817, "grad_norm": 5.226430892944336, "learning_rate": 4.4814285714285715e-05, "loss": 3.4834, "step": 2545 }, { "epoch": 10.391836734693877, "grad_norm": 5.253457069396973, "learning_rate": 4.481224489795919e-05, "loss": 3.3145, "step": 2546 }, { "epoch": 10.395918367346939, "grad_norm": 3.8309807777404785, "learning_rate": 4.4810204081632656e-05, "loss": 3.6343, "step": 2547 }, { "epoch": 10.4, "grad_norm": 8.861743927001953, "learning_rate": 4.480816326530612e-05, "loss": 3.2553, "step": 2548 }, { "epoch": 10.404081632653062, "grad_norm": 3.7351529598236084, "learning_rate": 4.480612244897959e-05, "loss": 3.7633, "step": 2549 }, { "epoch": 10.408163265306122, "grad_norm": 3.4787192344665527, "learning_rate": 4.4804081632653064e-05, "loss": 3.7424, "step": 2550 }, { "epoch": 10.412244897959184, "grad_norm": 4.360190391540527, "learning_rate": 4.480204081632654e-05, "loss": 3.8151, "step": 2551 }, { "epoch": 10.416326530612245, "grad_norm": 7.193674564361572, "learning_rate": 4.4800000000000005e-05, "loss": 3.1706, "step": 2552 }, { "epoch": 10.420408163265305, "grad_norm": 3.2952213287353516, "learning_rate": 4.479795918367347e-05, "loss": 4.139, "step": 2553 }, { "epoch": 10.424489795918367, "grad_norm": 2.6701254844665527, "learning_rate": 4.479591836734694e-05, "loss": 4.1688, "step": 2554 }, { "epoch": 10.428571428571429, "grad_norm": 4.753786563873291, "learning_rate": 4.4793877551020406e-05, "loss": 3.5153, "step": 2555 }, { "epoch": 10.43265306122449, "grad_norm": 6.021011829376221, "learning_rate": 4.479183673469388e-05, "loss": 3.6901, "step": 2556 }, { "epoch": 10.43673469387755, "grad_norm": 4.5074992179870605, "learning_rate": 4.478979591836735e-05, "loss": 3.6805, "step": 2557 }, { "epoch": 10.440816326530612, "grad_norm": 5.254488945007324, "learning_rate": 4.478775510204082e-05, "loss": 3.8473, "step": 2558 }, { "epoch": 10.444897959183674, "grad_norm": 5.320199966430664, "learning_rate": 4.478571428571429e-05, "loss": 3.441, "step": 2559 }, { "epoch": 10.448979591836734, "grad_norm": 4.443774223327637, "learning_rate": 4.4783673469387755e-05, "loss": 3.8045, "step": 2560 }, { "epoch": 10.453061224489796, "grad_norm": 5.983522891998291, "learning_rate": 4.478163265306123e-05, "loss": 3.4761, "step": 2561 }, { "epoch": 10.457142857142857, "grad_norm": 5.988646984100342, "learning_rate": 4.4779591836734696e-05, "loss": 3.1473, "step": 2562 }, { "epoch": 10.461224489795919, "grad_norm": 4.49648380279541, "learning_rate": 4.4777551020408163e-05, "loss": 3.5987, "step": 2563 }, { "epoch": 10.465306122448979, "grad_norm": 3.4017999172210693, "learning_rate": 4.477551020408163e-05, "loss": 3.6579, "step": 2564 }, { "epoch": 10.46938775510204, "grad_norm": 4.874301910400391, "learning_rate": 4.4773469387755104e-05, "loss": 3.7467, "step": 2565 }, { "epoch": 10.473469387755102, "grad_norm": 5.690149307250977, "learning_rate": 4.477142857142858e-05, "loss": 3.1636, "step": 2566 }, { "epoch": 10.477551020408164, "grad_norm": 4.459588527679443, "learning_rate": 4.4769387755102046e-05, "loss": 3.6552, "step": 2567 }, { "epoch": 10.481632653061224, "grad_norm": 3.643357992172241, "learning_rate": 4.476734693877551e-05, "loss": 3.9372, "step": 2568 }, { "epoch": 10.485714285714286, "grad_norm": 6.837028980255127, "learning_rate": 4.476530612244898e-05, "loss": 3.0678, "step": 2569 }, { "epoch": 10.489795918367347, "grad_norm": 4.840850830078125, "learning_rate": 4.4763265306122454e-05, "loss": 3.8921, "step": 2570 }, { "epoch": 10.493877551020407, "grad_norm": 7.147617340087891, "learning_rate": 4.476122448979592e-05, "loss": 3.1853, "step": 2571 }, { "epoch": 10.49795918367347, "grad_norm": 4.644049644470215, "learning_rate": 4.475918367346939e-05, "loss": 3.7751, "step": 2572 }, { "epoch": 10.50204081632653, "grad_norm": 4.67772912979126, "learning_rate": 4.475714285714286e-05, "loss": 3.5686, "step": 2573 }, { "epoch": 10.506122448979593, "grad_norm": 5.447492599487305, "learning_rate": 4.475510204081633e-05, "loss": 3.3448, "step": 2574 }, { "epoch": 10.510204081632653, "grad_norm": 4.763035774230957, "learning_rate": 4.47530612244898e-05, "loss": 3.6257, "step": 2575 }, { "epoch": 10.514285714285714, "grad_norm": 4.608060836791992, "learning_rate": 4.475102040816327e-05, "loss": 3.5588, "step": 2576 }, { "epoch": 10.518367346938776, "grad_norm": 7.3069658279418945, "learning_rate": 4.474897959183674e-05, "loss": 2.8982, "step": 2577 }, { "epoch": 10.522448979591836, "grad_norm": 3.1297616958618164, "learning_rate": 4.4746938775510204e-05, "loss": 4.062, "step": 2578 }, { "epoch": 10.526530612244898, "grad_norm": 4.331700801849365, "learning_rate": 4.474489795918367e-05, "loss": 3.8854, "step": 2579 }, { "epoch": 10.53061224489796, "grad_norm": 7.184332370758057, "learning_rate": 4.4742857142857145e-05, "loss": 3.5759, "step": 2580 }, { "epoch": 10.534693877551021, "grad_norm": 3.6331794261932373, "learning_rate": 4.474081632653061e-05, "loss": 3.8121, "step": 2581 }, { "epoch": 10.538775510204081, "grad_norm": 8.485443115234375, "learning_rate": 4.4738775510204086e-05, "loss": 3.2585, "step": 2582 }, { "epoch": 10.542857142857143, "grad_norm": 3.7030866146087646, "learning_rate": 4.4736734693877553e-05, "loss": 4.0189, "step": 2583 }, { "epoch": 10.546938775510204, "grad_norm": 3.6493847370147705, "learning_rate": 4.473469387755102e-05, "loss": 3.795, "step": 2584 }, { "epoch": 10.551020408163264, "grad_norm": 2.745880365371704, "learning_rate": 4.4732653061224494e-05, "loss": 3.8927, "step": 2585 }, { "epoch": 10.555102040816326, "grad_norm": 6.525904178619385, "learning_rate": 4.473061224489796e-05, "loss": 3.2818, "step": 2586 }, { "epoch": 10.559183673469388, "grad_norm": 7.457293510437012, "learning_rate": 4.472857142857143e-05, "loss": 2.802, "step": 2587 }, { "epoch": 10.56326530612245, "grad_norm": 4.035186290740967, "learning_rate": 4.4726530612244896e-05, "loss": 3.657, "step": 2588 }, { "epoch": 10.56734693877551, "grad_norm": 6.173070907592773, "learning_rate": 4.472448979591837e-05, "loss": 3.1209, "step": 2589 }, { "epoch": 10.571428571428571, "grad_norm": 3.757432222366333, "learning_rate": 4.4722448979591844e-05, "loss": 3.7165, "step": 2590 }, { "epoch": 10.575510204081633, "grad_norm": 4.798702716827393, "learning_rate": 4.472040816326531e-05, "loss": 3.4412, "step": 2591 }, { "epoch": 10.579591836734695, "grad_norm": 4.788888454437256, "learning_rate": 4.471836734693878e-05, "loss": 3.8497, "step": 2592 }, { "epoch": 10.583673469387755, "grad_norm": 4.621147632598877, "learning_rate": 4.4716326530612245e-05, "loss": 3.577, "step": 2593 }, { "epoch": 10.587755102040816, "grad_norm": 2.9583988189697266, "learning_rate": 4.471428571428571e-05, "loss": 3.8688, "step": 2594 }, { "epoch": 10.591836734693878, "grad_norm": 3.127356767654419, "learning_rate": 4.4712244897959186e-05, "loss": 3.7375, "step": 2595 }, { "epoch": 10.59591836734694, "grad_norm": 8.914581298828125, "learning_rate": 4.471020408163265e-05, "loss": 3.0087, "step": 2596 }, { "epoch": 10.6, "grad_norm": 5.675729274749756, "learning_rate": 4.470816326530613e-05, "loss": 3.5033, "step": 2597 }, { "epoch": 10.604081632653061, "grad_norm": 7.048518657684326, "learning_rate": 4.4706122448979594e-05, "loss": 3.6914, "step": 2598 }, { "epoch": 10.608163265306123, "grad_norm": 3.8465263843536377, "learning_rate": 4.470408163265306e-05, "loss": 4.0307, "step": 2599 }, { "epoch": 10.612244897959183, "grad_norm": 6.964426517486572, "learning_rate": 4.4702040816326535e-05, "loss": 3.2213, "step": 2600 }, { "epoch": 10.616326530612245, "grad_norm": 5.827482223510742, "learning_rate": 4.47e-05, "loss": 3.3406, "step": 2601 }, { "epoch": 10.620408163265306, "grad_norm": 3.6768596172332764, "learning_rate": 4.469795918367347e-05, "loss": 3.7272, "step": 2602 }, { "epoch": 10.624489795918368, "grad_norm": 4.248744487762451, "learning_rate": 4.4695918367346937e-05, "loss": 3.7288, "step": 2603 }, { "epoch": 10.628571428571428, "grad_norm": 4.954410552978516, "learning_rate": 4.469387755102041e-05, "loss": 3.8218, "step": 2604 }, { "epoch": 10.63265306122449, "grad_norm": 6.021113872528076, "learning_rate": 4.4691836734693884e-05, "loss": 3.0678, "step": 2605 }, { "epoch": 10.636734693877552, "grad_norm": 7.381223678588867, "learning_rate": 4.468979591836735e-05, "loss": 3.0447, "step": 2606 }, { "epoch": 10.640816326530611, "grad_norm": 4.775564670562744, "learning_rate": 4.468775510204082e-05, "loss": 3.8165, "step": 2607 }, { "epoch": 10.644897959183673, "grad_norm": 6.735879898071289, "learning_rate": 4.4685714285714286e-05, "loss": 3.4277, "step": 2608 }, { "epoch": 10.648979591836735, "grad_norm": 3.9142885208129883, "learning_rate": 4.468367346938775e-05, "loss": 3.9, "step": 2609 }, { "epoch": 10.653061224489797, "grad_norm": 4.994314193725586, "learning_rate": 4.468163265306123e-05, "loss": 3.7004, "step": 2610 }, { "epoch": 10.657142857142857, "grad_norm": 6.569033145904541, "learning_rate": 4.4679591836734694e-05, "loss": 3.4379, "step": 2611 }, { "epoch": 10.661224489795918, "grad_norm": 3.8296234607696533, "learning_rate": 4.467755102040817e-05, "loss": 3.8185, "step": 2612 }, { "epoch": 10.66530612244898, "grad_norm": 4.383983135223389, "learning_rate": 4.4675510204081635e-05, "loss": 3.9741, "step": 2613 }, { "epoch": 10.66938775510204, "grad_norm": 9.104000091552734, "learning_rate": 4.467346938775511e-05, "loss": 3.0968, "step": 2614 }, { "epoch": 10.673469387755102, "grad_norm": 3.6469249725341797, "learning_rate": 4.4671428571428576e-05, "loss": 3.9788, "step": 2615 }, { "epoch": 10.677551020408163, "grad_norm": 5.668689727783203, "learning_rate": 4.466938775510204e-05, "loss": 3.4012, "step": 2616 }, { "epoch": 10.681632653061225, "grad_norm": 7.7337646484375, "learning_rate": 4.466734693877551e-05, "loss": 2.5308, "step": 2617 }, { "epoch": 10.685714285714285, "grad_norm": 5.202347278594971, "learning_rate": 4.466530612244898e-05, "loss": 3.0043, "step": 2618 }, { "epoch": 10.689795918367347, "grad_norm": 5.060728073120117, "learning_rate": 4.466326530612245e-05, "loss": 3.6918, "step": 2619 }, { "epoch": 10.693877551020408, "grad_norm": 4.1015400886535645, "learning_rate": 4.4661224489795925e-05, "loss": 3.832, "step": 2620 }, { "epoch": 10.69795918367347, "grad_norm": 3.6545848846435547, "learning_rate": 4.465918367346939e-05, "loss": 3.9002, "step": 2621 }, { "epoch": 10.70204081632653, "grad_norm": 3.839038133621216, "learning_rate": 4.465714285714286e-05, "loss": 3.6713, "step": 2622 }, { "epoch": 10.706122448979592, "grad_norm": 3.5510687828063965, "learning_rate": 4.4655102040816327e-05, "loss": 3.7627, "step": 2623 }, { "epoch": 10.710204081632654, "grad_norm": 5.769655227661133, "learning_rate": 4.46530612244898e-05, "loss": 3.5565, "step": 2624 }, { "epoch": 10.714285714285714, "grad_norm": 4.241174697875977, "learning_rate": 4.465102040816327e-05, "loss": 3.7721, "step": 2625 }, { "epoch": 10.718367346938775, "grad_norm": 3.66225528717041, "learning_rate": 4.4648979591836735e-05, "loss": 3.9035, "step": 2626 }, { "epoch": 10.722448979591837, "grad_norm": 5.128507614135742, "learning_rate": 4.464693877551021e-05, "loss": 3.0502, "step": 2627 }, { "epoch": 10.726530612244899, "grad_norm": 3.8768203258514404, "learning_rate": 4.4644897959183676e-05, "loss": 3.9804, "step": 2628 }, { "epoch": 10.730612244897959, "grad_norm": 7.124089241027832, "learning_rate": 4.464285714285715e-05, "loss": 3.1816, "step": 2629 }, { "epoch": 10.73469387755102, "grad_norm": 6.6576642990112305, "learning_rate": 4.464081632653062e-05, "loss": 3.471, "step": 2630 }, { "epoch": 10.738775510204082, "grad_norm": 3.470689058303833, "learning_rate": 4.4638775510204084e-05, "loss": 3.5962, "step": 2631 }, { "epoch": 10.742857142857144, "grad_norm": 5.6723480224609375, "learning_rate": 4.463673469387755e-05, "loss": 3.6805, "step": 2632 }, { "epoch": 10.746938775510204, "grad_norm": 4.8378095626831055, "learning_rate": 4.463469387755102e-05, "loss": 3.6223, "step": 2633 }, { "epoch": 10.751020408163265, "grad_norm": 6.958372116088867, "learning_rate": 4.463265306122449e-05, "loss": 3.3269, "step": 2634 }, { "epoch": 10.755102040816327, "grad_norm": 5.255727767944336, "learning_rate": 4.463061224489796e-05, "loss": 3.1494, "step": 2635 }, { "epoch": 10.759183673469387, "grad_norm": 3.288548469543457, "learning_rate": 4.462857142857143e-05, "loss": 3.7755, "step": 2636 }, { "epoch": 10.763265306122449, "grad_norm": 5.112053394317627, "learning_rate": 4.46265306122449e-05, "loss": 3.7914, "step": 2637 }, { "epoch": 10.76734693877551, "grad_norm": 4.012010097503662, "learning_rate": 4.462448979591837e-05, "loss": 3.6522, "step": 2638 }, { "epoch": 10.771428571428572, "grad_norm": 3.593700408935547, "learning_rate": 4.462244897959184e-05, "loss": 3.729, "step": 2639 }, { "epoch": 10.775510204081632, "grad_norm": 3.755244016647339, "learning_rate": 4.462040816326531e-05, "loss": 3.8518, "step": 2640 }, { "epoch": 10.779591836734694, "grad_norm": 8.693889617919922, "learning_rate": 4.4618367346938775e-05, "loss": 2.7368, "step": 2641 }, { "epoch": 10.783673469387756, "grad_norm": 4.761216163635254, "learning_rate": 4.461632653061224e-05, "loss": 3.6073, "step": 2642 }, { "epoch": 10.787755102040816, "grad_norm": 3.114032030105591, "learning_rate": 4.4614285714285716e-05, "loss": 3.9745, "step": 2643 }, { "epoch": 10.791836734693877, "grad_norm": 3.1935763359069824, "learning_rate": 4.461224489795919e-05, "loss": 3.958, "step": 2644 }, { "epoch": 10.795918367346939, "grad_norm": 4.723662376403809, "learning_rate": 4.461020408163266e-05, "loss": 3.9154, "step": 2645 }, { "epoch": 10.8, "grad_norm": 4.0454816818237305, "learning_rate": 4.4608163265306125e-05, "loss": 3.8243, "step": 2646 }, { "epoch": 10.80408163265306, "grad_norm": 7.069613456726074, "learning_rate": 4.460612244897959e-05, "loss": 2.9043, "step": 2647 }, { "epoch": 10.808163265306122, "grad_norm": 5.065738201141357, "learning_rate": 4.460408163265306e-05, "loss": 3.7824, "step": 2648 }, { "epoch": 10.812244897959184, "grad_norm": 4.2450385093688965, "learning_rate": 4.460204081632653e-05, "loss": 3.6859, "step": 2649 }, { "epoch": 10.816326530612244, "grad_norm": 3.537283182144165, "learning_rate": 4.46e-05, "loss": 3.8061, "step": 2650 }, { "epoch": 10.820408163265306, "grad_norm": 3.4723942279815674, "learning_rate": 4.4597959183673474e-05, "loss": 3.77, "step": 2651 }, { "epoch": 10.824489795918367, "grad_norm": 8.276641845703125, "learning_rate": 4.459591836734694e-05, "loss": 2.866, "step": 2652 }, { "epoch": 10.82857142857143, "grad_norm": 6.788547992706299, "learning_rate": 4.459387755102041e-05, "loss": 3.2609, "step": 2653 }, { "epoch": 10.83265306122449, "grad_norm": 6.18472957611084, "learning_rate": 4.459183673469388e-05, "loss": 3.514, "step": 2654 }, { "epoch": 10.83673469387755, "grad_norm": 3.792053699493408, "learning_rate": 4.458979591836735e-05, "loss": 3.8521, "step": 2655 }, { "epoch": 10.840816326530613, "grad_norm": 4.023907661437988, "learning_rate": 4.4587755102040816e-05, "loss": 3.696, "step": 2656 }, { "epoch": 10.844897959183674, "grad_norm": 8.905384063720703, "learning_rate": 4.458571428571428e-05, "loss": 2.7246, "step": 2657 }, { "epoch": 10.848979591836734, "grad_norm": 4.777501583099365, "learning_rate": 4.458367346938776e-05, "loss": 3.2642, "step": 2658 }, { "epoch": 10.853061224489796, "grad_norm": 3.6717324256896973, "learning_rate": 4.458163265306123e-05, "loss": 3.939, "step": 2659 }, { "epoch": 10.857142857142858, "grad_norm": 7.9581522941589355, "learning_rate": 4.45795918367347e-05, "loss": 3.2294, "step": 2660 }, { "epoch": 10.861224489795918, "grad_norm": 8.755032539367676, "learning_rate": 4.4577551020408165e-05, "loss": 2.7799, "step": 2661 }, { "epoch": 10.86530612244898, "grad_norm": 3.7661702632904053, "learning_rate": 4.457551020408163e-05, "loss": 3.682, "step": 2662 }, { "epoch": 10.869387755102041, "grad_norm": 3.3805689811706543, "learning_rate": 4.4573469387755106e-05, "loss": 3.8019, "step": 2663 }, { "epoch": 10.873469387755103, "grad_norm": 3.495206117630005, "learning_rate": 4.4571428571428574e-05, "loss": 4.0804, "step": 2664 }, { "epoch": 10.877551020408163, "grad_norm": 4.635223865509033, "learning_rate": 4.456938775510204e-05, "loss": 3.8359, "step": 2665 }, { "epoch": 10.881632653061224, "grad_norm": 8.815584182739258, "learning_rate": 4.4567346938775515e-05, "loss": 2.5503, "step": 2666 }, { "epoch": 10.885714285714286, "grad_norm": 5.11695671081543, "learning_rate": 4.456530612244898e-05, "loss": 3.6284, "step": 2667 }, { "epoch": 10.889795918367348, "grad_norm": 3.8538589477539062, "learning_rate": 4.4563265306122456e-05, "loss": 3.6567, "step": 2668 }, { "epoch": 10.893877551020408, "grad_norm": 6.179760456085205, "learning_rate": 4.456122448979592e-05, "loss": 3.3529, "step": 2669 }, { "epoch": 10.89795918367347, "grad_norm": 4.259478569030762, "learning_rate": 4.455918367346939e-05, "loss": 3.7457, "step": 2670 }, { "epoch": 10.902040816326531, "grad_norm": 3.9312963485717773, "learning_rate": 4.455714285714286e-05, "loss": 3.7236, "step": 2671 }, { "epoch": 10.906122448979591, "grad_norm": 4.756248474121094, "learning_rate": 4.4555102040816324e-05, "loss": 3.8488, "step": 2672 }, { "epoch": 10.910204081632653, "grad_norm": 7.018425464630127, "learning_rate": 4.45530612244898e-05, "loss": 3.7127, "step": 2673 }, { "epoch": 10.914285714285715, "grad_norm": 4.3782124519348145, "learning_rate": 4.455102040816327e-05, "loss": 3.6857, "step": 2674 }, { "epoch": 10.918367346938776, "grad_norm": 4.999773979187012, "learning_rate": 4.454897959183674e-05, "loss": 3.5833, "step": 2675 }, { "epoch": 10.922448979591836, "grad_norm": 5.278445720672607, "learning_rate": 4.4546938775510206e-05, "loss": 3.25, "step": 2676 }, { "epoch": 10.926530612244898, "grad_norm": 5.135500907897949, "learning_rate": 4.454489795918367e-05, "loss": 3.9858, "step": 2677 }, { "epoch": 10.93061224489796, "grad_norm": 13.832098007202148, "learning_rate": 4.454285714285715e-05, "loss": 1.6082, "step": 2678 }, { "epoch": 10.93469387755102, "grad_norm": 5.469249248504639, "learning_rate": 4.4540816326530614e-05, "loss": 3.5006, "step": 2679 }, { "epoch": 10.938775510204081, "grad_norm": 6.03424072265625, "learning_rate": 4.453877551020408e-05, "loss": 3.514, "step": 2680 }, { "epoch": 10.942857142857143, "grad_norm": 6.8035783767700195, "learning_rate": 4.4536734693877555e-05, "loss": 3.527, "step": 2681 }, { "epoch": 10.946938775510205, "grad_norm": 6.335474014282227, "learning_rate": 4.453469387755102e-05, "loss": 3.217, "step": 2682 }, { "epoch": 10.951020408163265, "grad_norm": 4.17458963394165, "learning_rate": 4.4532653061224496e-05, "loss": 3.5867, "step": 2683 }, { "epoch": 10.955102040816326, "grad_norm": 2.949620246887207, "learning_rate": 4.4530612244897963e-05, "loss": 3.8613, "step": 2684 }, { "epoch": 10.959183673469388, "grad_norm": 5.669450759887695, "learning_rate": 4.452857142857143e-05, "loss": 3.6091, "step": 2685 }, { "epoch": 10.963265306122448, "grad_norm": 2.873452663421631, "learning_rate": 4.45265306122449e-05, "loss": 4.1591, "step": 2686 }, { "epoch": 10.96734693877551, "grad_norm": 4.458907604217529, "learning_rate": 4.4524489795918365e-05, "loss": 4.043, "step": 2687 }, { "epoch": 10.971428571428572, "grad_norm": 3.9764413833618164, "learning_rate": 4.452244897959184e-05, "loss": 3.997, "step": 2688 }, { "epoch": 10.975510204081633, "grad_norm": 5.810985565185547, "learning_rate": 4.4520408163265306e-05, "loss": 3.3503, "step": 2689 }, { "epoch": 10.979591836734693, "grad_norm": 4.9291276931762695, "learning_rate": 4.451836734693878e-05, "loss": 3.509, "step": 2690 }, { "epoch": 10.983673469387755, "grad_norm": 5.136114120483398, "learning_rate": 4.451632653061225e-05, "loss": 3.8017, "step": 2691 }, { "epoch": 10.987755102040817, "grad_norm": 6.107813835144043, "learning_rate": 4.4514285714285714e-05, "loss": 3.4883, "step": 2692 }, { "epoch": 10.991836734693878, "grad_norm": 6.871821880340576, "learning_rate": 4.451224489795919e-05, "loss": 3.2658, "step": 2693 }, { "epoch": 10.995918367346938, "grad_norm": 4.027142524719238, "learning_rate": 4.4510204081632655e-05, "loss": 3.9041, "step": 2694 }, { "epoch": 11.0, "grad_norm": 8.552045822143555, "learning_rate": 4.450816326530612e-05, "loss": 2.9026, "step": 2695 }, { "epoch": 11.004081632653062, "grad_norm": 4.73720645904541, "learning_rate": 4.450612244897959e-05, "loss": 3.6751, "step": 2696 }, { "epoch": 11.008163265306122, "grad_norm": 4.943747043609619, "learning_rate": 4.450408163265306e-05, "loss": 3.7664, "step": 2697 }, { "epoch": 11.012244897959183, "grad_norm": 5.243824481964111, "learning_rate": 4.450204081632654e-05, "loss": 3.3697, "step": 2698 }, { "epoch": 11.016326530612245, "grad_norm": 3.0103440284729004, "learning_rate": 4.4500000000000004e-05, "loss": 3.9921, "step": 2699 }, { "epoch": 11.020408163265307, "grad_norm": 4.254292964935303, "learning_rate": 4.449795918367347e-05, "loss": 3.6532, "step": 2700 }, { "epoch": 11.024489795918367, "grad_norm": 4.999822616577148, "learning_rate": 4.449591836734694e-05, "loss": 3.3612, "step": 2701 }, { "epoch": 11.028571428571428, "grad_norm": 6.736525535583496, "learning_rate": 4.4493877551020406e-05, "loss": 3.5696, "step": 2702 }, { "epoch": 11.03265306122449, "grad_norm": 7.108247756958008, "learning_rate": 4.449183673469388e-05, "loss": 3.5561, "step": 2703 }, { "epoch": 11.036734693877552, "grad_norm": 6.714467525482178, "learning_rate": 4.448979591836735e-05, "loss": 3.4442, "step": 2704 }, { "epoch": 11.040816326530612, "grad_norm": 5.016941070556641, "learning_rate": 4.448775510204082e-05, "loss": 3.4573, "step": 2705 }, { "epoch": 11.044897959183674, "grad_norm": 3.7427866458892822, "learning_rate": 4.448571428571429e-05, "loss": 3.6417, "step": 2706 }, { "epoch": 11.048979591836735, "grad_norm": 3.3641409873962402, "learning_rate": 4.448367346938776e-05, "loss": 3.7044, "step": 2707 }, { "epoch": 11.053061224489795, "grad_norm": 2.973499059677124, "learning_rate": 4.448163265306123e-05, "loss": 4.0854, "step": 2708 }, { "epoch": 11.057142857142857, "grad_norm": 4.57058048248291, "learning_rate": 4.4479591836734696e-05, "loss": 3.5042, "step": 2709 }, { "epoch": 11.061224489795919, "grad_norm": 3.920081853866577, "learning_rate": 4.447755102040816e-05, "loss": 3.5801, "step": 2710 }, { "epoch": 11.06530612244898, "grad_norm": 4.249393463134766, "learning_rate": 4.447551020408163e-05, "loss": 3.7415, "step": 2711 }, { "epoch": 11.06938775510204, "grad_norm": 4.114140033721924, "learning_rate": 4.4473469387755104e-05, "loss": 3.6053, "step": 2712 }, { "epoch": 11.073469387755102, "grad_norm": 4.331147193908691, "learning_rate": 4.447142857142858e-05, "loss": 3.5171, "step": 2713 }, { "epoch": 11.077551020408164, "grad_norm": 7.073971748352051, "learning_rate": 4.4469387755102045e-05, "loss": 3.31, "step": 2714 }, { "epoch": 11.081632653061224, "grad_norm": 3.489619731903076, "learning_rate": 4.446734693877551e-05, "loss": 3.8858, "step": 2715 }, { "epoch": 11.085714285714285, "grad_norm": 5.174905776977539, "learning_rate": 4.446530612244898e-05, "loss": 3.718, "step": 2716 }, { "epoch": 11.089795918367347, "grad_norm": 7.052664279937744, "learning_rate": 4.446326530612245e-05, "loss": 3.3046, "step": 2717 }, { "epoch": 11.093877551020409, "grad_norm": 6.153769016265869, "learning_rate": 4.446122448979592e-05, "loss": 3.4353, "step": 2718 }, { "epoch": 11.097959183673469, "grad_norm": 5.212570667266846, "learning_rate": 4.445918367346939e-05, "loss": 3.6098, "step": 2719 }, { "epoch": 11.10204081632653, "grad_norm": 3.960615396499634, "learning_rate": 4.445714285714286e-05, "loss": 3.7453, "step": 2720 }, { "epoch": 11.106122448979592, "grad_norm": 4.295365810394287, "learning_rate": 4.445510204081633e-05, "loss": 3.5388, "step": 2721 }, { "epoch": 11.110204081632654, "grad_norm": 6.039039134979248, "learning_rate": 4.44530612244898e-05, "loss": 3.2592, "step": 2722 }, { "epoch": 11.114285714285714, "grad_norm": 4.873957633972168, "learning_rate": 4.445102040816327e-05, "loss": 3.7889, "step": 2723 }, { "epoch": 11.118367346938776, "grad_norm": 6.68468713760376, "learning_rate": 4.4448979591836737e-05, "loss": 3.3533, "step": 2724 }, { "epoch": 11.122448979591837, "grad_norm": 4.337703227996826, "learning_rate": 4.4446938775510204e-05, "loss": 3.8587, "step": 2725 }, { "epoch": 11.126530612244897, "grad_norm": 3.9473328590393066, "learning_rate": 4.444489795918367e-05, "loss": 3.8882, "step": 2726 }, { "epoch": 11.130612244897959, "grad_norm": 7.114506721496582, "learning_rate": 4.4442857142857145e-05, "loss": 3.1092, "step": 2727 }, { "epoch": 11.13469387755102, "grad_norm": 5.166895389556885, "learning_rate": 4.444081632653062e-05, "loss": 3.7312, "step": 2728 }, { "epoch": 11.138775510204082, "grad_norm": 5.191842079162598, "learning_rate": 4.4438775510204086e-05, "loss": 3.5954, "step": 2729 }, { "epoch": 11.142857142857142, "grad_norm": 5.0809006690979, "learning_rate": 4.443673469387755e-05, "loss": 3.853, "step": 2730 }, { "epoch": 11.146938775510204, "grad_norm": 5.541624069213867, "learning_rate": 4.443469387755102e-05, "loss": 3.3759, "step": 2731 }, { "epoch": 11.151020408163266, "grad_norm": 4.517406940460205, "learning_rate": 4.4432653061224494e-05, "loss": 3.6311, "step": 2732 }, { "epoch": 11.155102040816326, "grad_norm": 4.115747451782227, "learning_rate": 4.443061224489796e-05, "loss": 3.5923, "step": 2733 }, { "epoch": 11.159183673469387, "grad_norm": 3.934666395187378, "learning_rate": 4.442857142857143e-05, "loss": 3.8453, "step": 2734 }, { "epoch": 11.16326530612245, "grad_norm": 3.981156349182129, "learning_rate": 4.44265306122449e-05, "loss": 3.8994, "step": 2735 }, { "epoch": 11.16734693877551, "grad_norm": 4.50515604019165, "learning_rate": 4.442448979591837e-05, "loss": 3.8345, "step": 2736 }, { "epoch": 11.17142857142857, "grad_norm": 7.086164474487305, "learning_rate": 4.442244897959184e-05, "loss": 3.0833, "step": 2737 }, { "epoch": 11.175510204081633, "grad_norm": 9.966285705566406, "learning_rate": 4.442040816326531e-05, "loss": 3.1193, "step": 2738 }, { "epoch": 11.179591836734694, "grad_norm": 6.324079990386963, "learning_rate": 4.441836734693878e-05, "loss": 3.3707, "step": 2739 }, { "epoch": 11.183673469387756, "grad_norm": 9.204004287719727, "learning_rate": 4.4416326530612244e-05, "loss": 2.8778, "step": 2740 }, { "epoch": 11.187755102040816, "grad_norm": 3.811520576477051, "learning_rate": 4.441428571428571e-05, "loss": 3.7645, "step": 2741 }, { "epoch": 11.191836734693878, "grad_norm": 2.8736467361450195, "learning_rate": 4.4412244897959185e-05, "loss": 3.8698, "step": 2742 }, { "epoch": 11.19591836734694, "grad_norm": 4.605072975158691, "learning_rate": 4.441020408163265e-05, "loss": 3.5381, "step": 2743 }, { "epoch": 11.2, "grad_norm": 4.504487037658691, "learning_rate": 4.4408163265306127e-05, "loss": 3.5742, "step": 2744 }, { "epoch": 11.204081632653061, "grad_norm": 5.09939432144165, "learning_rate": 4.4406122448979594e-05, "loss": 3.5875, "step": 2745 }, { "epoch": 11.208163265306123, "grad_norm": 4.529903411865234, "learning_rate": 4.440408163265306e-05, "loss": 3.7582, "step": 2746 }, { "epoch": 11.212244897959184, "grad_norm": 5.2058844566345215, "learning_rate": 4.4402040816326535e-05, "loss": 3.56, "step": 2747 }, { "epoch": 11.216326530612244, "grad_norm": 6.185530662536621, "learning_rate": 4.44e-05, "loss": 3.579, "step": 2748 }, { "epoch": 11.220408163265306, "grad_norm": 6.119230270385742, "learning_rate": 4.439795918367347e-05, "loss": 3.5647, "step": 2749 }, { "epoch": 11.224489795918368, "grad_norm": 3.1101839542388916, "learning_rate": 4.4395918367346936e-05, "loss": 4.2676, "step": 2750 }, { "epoch": 11.228571428571428, "grad_norm": 4.054935932159424, "learning_rate": 4.439387755102041e-05, "loss": 3.8045, "step": 2751 }, { "epoch": 11.23265306122449, "grad_norm": 4.062369346618652, "learning_rate": 4.4391836734693884e-05, "loss": 3.8949, "step": 2752 }, { "epoch": 11.236734693877551, "grad_norm": 6.217837810516357, "learning_rate": 4.438979591836735e-05, "loss": 3.3753, "step": 2753 }, { "epoch": 11.240816326530613, "grad_norm": 5.544182777404785, "learning_rate": 4.438775510204082e-05, "loss": 3.3498, "step": 2754 }, { "epoch": 11.244897959183673, "grad_norm": 5.256259441375732, "learning_rate": 4.4385714285714285e-05, "loss": 3.4789, "step": 2755 }, { "epoch": 11.248979591836735, "grad_norm": 3.2726473808288574, "learning_rate": 4.438367346938776e-05, "loss": 4.0977, "step": 2756 }, { "epoch": 11.253061224489796, "grad_norm": 8.237162590026855, "learning_rate": 4.4381632653061226e-05, "loss": 2.6976, "step": 2757 }, { "epoch": 11.257142857142856, "grad_norm": 5.159272193908691, "learning_rate": 4.437959183673469e-05, "loss": 3.6477, "step": 2758 }, { "epoch": 11.261224489795918, "grad_norm": 4.464564323425293, "learning_rate": 4.437755102040817e-05, "loss": 3.737, "step": 2759 }, { "epoch": 11.26530612244898, "grad_norm": 9.358745574951172, "learning_rate": 4.4375510204081634e-05, "loss": 2.7191, "step": 2760 }, { "epoch": 11.269387755102041, "grad_norm": 4.244101524353027, "learning_rate": 4.437346938775511e-05, "loss": 3.6229, "step": 2761 }, { "epoch": 11.273469387755101, "grad_norm": 6.211590766906738, "learning_rate": 4.4371428571428575e-05, "loss": 3.0697, "step": 2762 }, { "epoch": 11.277551020408163, "grad_norm": 3.3736164569854736, "learning_rate": 4.436938775510204e-05, "loss": 3.7792, "step": 2763 }, { "epoch": 11.281632653061225, "grad_norm": 5.178025245666504, "learning_rate": 4.436734693877551e-05, "loss": 3.0726, "step": 2764 }, { "epoch": 11.285714285714286, "grad_norm": 5.28727388381958, "learning_rate": 4.436530612244898e-05, "loss": 3.4083, "step": 2765 }, { "epoch": 11.289795918367346, "grad_norm": 7.140950679779053, "learning_rate": 4.436326530612245e-05, "loss": 3.9539, "step": 2766 }, { "epoch": 11.293877551020408, "grad_norm": 4.786002159118652, "learning_rate": 4.4361224489795925e-05, "loss": 3.2034, "step": 2767 }, { "epoch": 11.29795918367347, "grad_norm": 8.157914161682129, "learning_rate": 4.435918367346939e-05, "loss": 2.6317, "step": 2768 }, { "epoch": 11.30204081632653, "grad_norm": 4.1634907722473145, "learning_rate": 4.435714285714286e-05, "loss": 3.9189, "step": 2769 }, { "epoch": 11.306122448979592, "grad_norm": 6.6630682945251465, "learning_rate": 4.4355102040816326e-05, "loss": 3.1588, "step": 2770 }, { "epoch": 11.310204081632653, "grad_norm": 4.382691383361816, "learning_rate": 4.43530612244898e-05, "loss": 3.5223, "step": 2771 }, { "epoch": 11.314285714285715, "grad_norm": 7.846848487854004, "learning_rate": 4.435102040816327e-05, "loss": 3.0229, "step": 2772 }, { "epoch": 11.318367346938775, "grad_norm": 5.04244327545166, "learning_rate": 4.4348979591836734e-05, "loss": 3.5627, "step": 2773 }, { "epoch": 11.322448979591837, "grad_norm": 7.944087028503418, "learning_rate": 4.434693877551021e-05, "loss": 3.1624, "step": 2774 }, { "epoch": 11.326530612244898, "grad_norm": 4.363999366760254, "learning_rate": 4.4344897959183675e-05, "loss": 3.7951, "step": 2775 }, { "epoch": 11.33061224489796, "grad_norm": 7.787664890289307, "learning_rate": 4.434285714285715e-05, "loss": 2.9205, "step": 2776 }, { "epoch": 11.33469387755102, "grad_norm": 10.09264087677002, "learning_rate": 4.4340816326530616e-05, "loss": 2.587, "step": 2777 }, { "epoch": 11.338775510204082, "grad_norm": 3.337317943572998, "learning_rate": 4.433877551020408e-05, "loss": 4.0371, "step": 2778 }, { "epoch": 11.342857142857143, "grad_norm": 5.20474100112915, "learning_rate": 4.433673469387755e-05, "loss": 3.7319, "step": 2779 }, { "epoch": 11.346938775510203, "grad_norm": 4.802759170532227, "learning_rate": 4.433469387755102e-05, "loss": 3.7578, "step": 2780 }, { "epoch": 11.351020408163265, "grad_norm": 3.7515311241149902, "learning_rate": 4.433265306122449e-05, "loss": 3.7291, "step": 2781 }, { "epoch": 11.355102040816327, "grad_norm": 5.260759353637695, "learning_rate": 4.4330612244897965e-05, "loss": 3.6209, "step": 2782 }, { "epoch": 11.359183673469389, "grad_norm": 3.5179264545440674, "learning_rate": 4.432857142857143e-05, "loss": 4.0978, "step": 2783 }, { "epoch": 11.363265306122448, "grad_norm": 7.685668468475342, "learning_rate": 4.43265306122449e-05, "loss": 3.2828, "step": 2784 }, { "epoch": 11.36734693877551, "grad_norm": 5.7427592277526855, "learning_rate": 4.432448979591837e-05, "loss": 3.7086, "step": 2785 }, { "epoch": 11.371428571428572, "grad_norm": 4.4626145362854, "learning_rate": 4.432244897959184e-05, "loss": 3.9558, "step": 2786 }, { "epoch": 11.375510204081632, "grad_norm": 3.446357011795044, "learning_rate": 4.432040816326531e-05, "loss": 3.7624, "step": 2787 }, { "epoch": 11.379591836734694, "grad_norm": 5.159426689147949, "learning_rate": 4.4318367346938775e-05, "loss": 3.6234, "step": 2788 }, { "epoch": 11.383673469387755, "grad_norm": 3.699660062789917, "learning_rate": 4.431632653061225e-05, "loss": 4.0011, "step": 2789 }, { "epoch": 11.387755102040817, "grad_norm": 6.603206634521484, "learning_rate": 4.4314285714285716e-05, "loss": 3.6091, "step": 2790 }, { "epoch": 11.391836734693877, "grad_norm": 6.270772933959961, "learning_rate": 4.431224489795919e-05, "loss": 3.4565, "step": 2791 }, { "epoch": 11.395918367346939, "grad_norm": 5.392909049987793, "learning_rate": 4.431020408163266e-05, "loss": 3.3873, "step": 2792 }, { "epoch": 11.4, "grad_norm": 6.978325843811035, "learning_rate": 4.4308163265306124e-05, "loss": 3.8301, "step": 2793 }, { "epoch": 11.404081632653062, "grad_norm": 4.264011383056641, "learning_rate": 4.430612244897959e-05, "loss": 3.7628, "step": 2794 }, { "epoch": 11.408163265306122, "grad_norm": 5.799688816070557, "learning_rate": 4.4304081632653065e-05, "loss": 3.5451, "step": 2795 }, { "epoch": 11.412244897959184, "grad_norm": 6.380837917327881, "learning_rate": 4.430204081632653e-05, "loss": 3.4603, "step": 2796 }, { "epoch": 11.416326530612245, "grad_norm": 5.83388090133667, "learning_rate": 4.43e-05, "loss": 3.6848, "step": 2797 }, { "epoch": 11.420408163265305, "grad_norm": 4.906007766723633, "learning_rate": 4.429795918367347e-05, "loss": 3.5118, "step": 2798 }, { "epoch": 11.424489795918367, "grad_norm": 3.769740581512451, "learning_rate": 4.429591836734694e-05, "loss": 3.7254, "step": 2799 }, { "epoch": 11.428571428571429, "grad_norm": 5.779485702514648, "learning_rate": 4.4293877551020414e-05, "loss": 2.9909, "step": 2800 }, { "epoch": 11.43265306122449, "grad_norm": 4.074498176574707, "learning_rate": 4.429183673469388e-05, "loss": 3.6978, "step": 2801 }, { "epoch": 11.43673469387755, "grad_norm": 5.1243767738342285, "learning_rate": 4.428979591836735e-05, "loss": 3.863, "step": 2802 }, { "epoch": 11.440816326530612, "grad_norm": 4.744725227355957, "learning_rate": 4.4287755102040816e-05, "loss": 3.8411, "step": 2803 }, { "epoch": 11.444897959183674, "grad_norm": 4.023436546325684, "learning_rate": 4.428571428571428e-05, "loss": 3.9916, "step": 2804 }, { "epoch": 11.448979591836734, "grad_norm": 4.5228729248046875, "learning_rate": 4.428367346938776e-05, "loss": 3.9004, "step": 2805 }, { "epoch": 11.453061224489796, "grad_norm": 8.042742729187012, "learning_rate": 4.428163265306123e-05, "loss": 2.7395, "step": 2806 }, { "epoch": 11.457142857142857, "grad_norm": 8.514951705932617, "learning_rate": 4.42795918367347e-05, "loss": 2.7205, "step": 2807 }, { "epoch": 11.461224489795919, "grad_norm": 6.045901298522949, "learning_rate": 4.4277551020408165e-05, "loss": 3.2797, "step": 2808 }, { "epoch": 11.465306122448979, "grad_norm": 3.4560067653656006, "learning_rate": 4.427551020408163e-05, "loss": 3.7479, "step": 2809 }, { "epoch": 11.46938775510204, "grad_norm": 4.116746425628662, "learning_rate": 4.4273469387755106e-05, "loss": 3.6338, "step": 2810 }, { "epoch": 11.473469387755102, "grad_norm": 3.496962547302246, "learning_rate": 4.427142857142857e-05, "loss": 4.0277, "step": 2811 }, { "epoch": 11.477551020408164, "grad_norm": 3.2419309616088867, "learning_rate": 4.426938775510204e-05, "loss": 3.926, "step": 2812 }, { "epoch": 11.481632653061224, "grad_norm": 5.175417423248291, "learning_rate": 4.4267346938775514e-05, "loss": 3.5222, "step": 2813 }, { "epoch": 11.485714285714286, "grad_norm": 5.145540714263916, "learning_rate": 4.426530612244898e-05, "loss": 3.5084, "step": 2814 }, { "epoch": 11.489795918367347, "grad_norm": 6.991964340209961, "learning_rate": 4.4263265306122455e-05, "loss": 3.1029, "step": 2815 }, { "epoch": 11.493877551020407, "grad_norm": 9.20118236541748, "learning_rate": 4.426122448979592e-05, "loss": 2.4065, "step": 2816 }, { "epoch": 11.49795918367347, "grad_norm": 7.922998428344727, "learning_rate": 4.425918367346939e-05, "loss": 3.0714, "step": 2817 }, { "epoch": 11.50204081632653, "grad_norm": 9.0860595703125, "learning_rate": 4.4257142857142856e-05, "loss": 2.6396, "step": 2818 }, { "epoch": 11.506122448979593, "grad_norm": 3.1216166019439697, "learning_rate": 4.4255102040816324e-05, "loss": 3.917, "step": 2819 }, { "epoch": 11.510204081632653, "grad_norm": 5.498362064361572, "learning_rate": 4.42530612244898e-05, "loss": 3.7094, "step": 2820 }, { "epoch": 11.514285714285714, "grad_norm": 4.07676362991333, "learning_rate": 4.425102040816327e-05, "loss": 3.8791, "step": 2821 }, { "epoch": 11.518367346938776, "grad_norm": 6.770262718200684, "learning_rate": 4.424897959183674e-05, "loss": 3.4897, "step": 2822 }, { "epoch": 11.522448979591836, "grad_norm": 6.389657974243164, "learning_rate": 4.4246938775510206e-05, "loss": 3.4636, "step": 2823 }, { "epoch": 11.526530612244898, "grad_norm": 7.190275192260742, "learning_rate": 4.424489795918367e-05, "loss": 3.0467, "step": 2824 }, { "epoch": 11.53061224489796, "grad_norm": 9.383018493652344, "learning_rate": 4.424285714285715e-05, "loss": 2.4153, "step": 2825 }, { "epoch": 11.534693877551021, "grad_norm": 3.6686596870422363, "learning_rate": 4.4240816326530614e-05, "loss": 3.773, "step": 2826 }, { "epoch": 11.538775510204081, "grad_norm": 2.991745948791504, "learning_rate": 4.423877551020408e-05, "loss": 3.8727, "step": 2827 }, { "epoch": 11.542857142857143, "grad_norm": 4.322829723358154, "learning_rate": 4.4236734693877555e-05, "loss": 3.6989, "step": 2828 }, { "epoch": 11.546938775510204, "grad_norm": 5.8095784187316895, "learning_rate": 4.423469387755102e-05, "loss": 3.4609, "step": 2829 }, { "epoch": 11.551020408163264, "grad_norm": 5.753416538238525, "learning_rate": 4.4232653061224496e-05, "loss": 3.4781, "step": 2830 }, { "epoch": 11.555102040816326, "grad_norm": 7.806664943695068, "learning_rate": 4.423061224489796e-05, "loss": 2.8635, "step": 2831 }, { "epoch": 11.559183673469388, "grad_norm": 6.975472450256348, "learning_rate": 4.422857142857143e-05, "loss": 3.3557, "step": 2832 }, { "epoch": 11.56326530612245, "grad_norm": 6.389227867126465, "learning_rate": 4.42265306122449e-05, "loss": 3.2569, "step": 2833 }, { "epoch": 11.56734693877551, "grad_norm": 6.406622886657715, "learning_rate": 4.4224489795918364e-05, "loss": 3.535, "step": 2834 }, { "epoch": 11.571428571428571, "grad_norm": 4.851696014404297, "learning_rate": 4.422244897959184e-05, "loss": 3.5321, "step": 2835 }, { "epoch": 11.575510204081633, "grad_norm": 7.422347545623779, "learning_rate": 4.422040816326531e-05, "loss": 3.2809, "step": 2836 }, { "epoch": 11.579591836734695, "grad_norm": 4.7177348136901855, "learning_rate": 4.421836734693878e-05, "loss": 3.7573, "step": 2837 }, { "epoch": 11.583673469387755, "grad_norm": 5.777629852294922, "learning_rate": 4.4216326530612246e-05, "loss": 3.3031, "step": 2838 }, { "epoch": 11.587755102040816, "grad_norm": 6.418118000030518, "learning_rate": 4.4214285714285714e-05, "loss": 3.3621, "step": 2839 }, { "epoch": 11.591836734693878, "grad_norm": 6.484901428222656, "learning_rate": 4.421224489795919e-05, "loss": 3.1023, "step": 2840 }, { "epoch": 11.59591836734694, "grad_norm": 3.9921975135803223, "learning_rate": 4.4210204081632655e-05, "loss": 3.6809, "step": 2841 }, { "epoch": 11.6, "grad_norm": 3.9069442749023438, "learning_rate": 4.420816326530612e-05, "loss": 3.9464, "step": 2842 }, { "epoch": 11.604081632653061, "grad_norm": 4.660372257232666, "learning_rate": 4.4206122448979596e-05, "loss": 3.751, "step": 2843 }, { "epoch": 11.608163265306123, "grad_norm": 3.2814764976501465, "learning_rate": 4.420408163265306e-05, "loss": 3.92, "step": 2844 }, { "epoch": 11.612244897959183, "grad_norm": 4.116532802581787, "learning_rate": 4.4202040816326537e-05, "loss": 3.9878, "step": 2845 }, { "epoch": 11.616326530612245, "grad_norm": 4.260604381561279, "learning_rate": 4.4200000000000004e-05, "loss": 3.6241, "step": 2846 }, { "epoch": 11.620408163265306, "grad_norm": 9.046996116638184, "learning_rate": 4.419795918367347e-05, "loss": 2.7737, "step": 2847 }, { "epoch": 11.624489795918368, "grad_norm": 6.084208011627197, "learning_rate": 4.419591836734694e-05, "loss": 3.6105, "step": 2848 }, { "epoch": 11.628571428571428, "grad_norm": 5.124359607696533, "learning_rate": 4.419387755102041e-05, "loss": 3.6555, "step": 2849 }, { "epoch": 11.63265306122449, "grad_norm": 5.251857757568359, "learning_rate": 4.419183673469388e-05, "loss": 3.5858, "step": 2850 }, { "epoch": 11.636734693877552, "grad_norm": 5.831306457519531, "learning_rate": 4.4189795918367346e-05, "loss": 3.4948, "step": 2851 }, { "epoch": 11.640816326530611, "grad_norm": 5.296542644500732, "learning_rate": 4.418775510204082e-05, "loss": 3.555, "step": 2852 }, { "epoch": 11.644897959183673, "grad_norm": 4.344006061553955, "learning_rate": 4.418571428571429e-05, "loss": 3.6867, "step": 2853 }, { "epoch": 11.648979591836735, "grad_norm": 4.718690872192383, "learning_rate": 4.418367346938776e-05, "loss": 3.6105, "step": 2854 }, { "epoch": 11.653061224489797, "grad_norm": 4.652915000915527, "learning_rate": 4.418163265306123e-05, "loss": 3.7606, "step": 2855 }, { "epoch": 11.657142857142857, "grad_norm": 8.699178695678711, "learning_rate": 4.4179591836734695e-05, "loss": 2.8372, "step": 2856 }, { "epoch": 11.661224489795918, "grad_norm": 4.114621162414551, "learning_rate": 4.417755102040816e-05, "loss": 3.7435, "step": 2857 }, { "epoch": 11.66530612244898, "grad_norm": 4.178124904632568, "learning_rate": 4.417551020408163e-05, "loss": 3.6324, "step": 2858 }, { "epoch": 11.66938775510204, "grad_norm": 5.304688930511475, "learning_rate": 4.4173469387755103e-05, "loss": 3.8006, "step": 2859 }, { "epoch": 11.673469387755102, "grad_norm": 7.046751022338867, "learning_rate": 4.417142857142858e-05, "loss": 3.281, "step": 2860 }, { "epoch": 11.677551020408163, "grad_norm": 4.312539100646973, "learning_rate": 4.4169387755102044e-05, "loss": 3.8547, "step": 2861 }, { "epoch": 11.681632653061225, "grad_norm": 6.443550109863281, "learning_rate": 4.416734693877551e-05, "loss": 3.2574, "step": 2862 }, { "epoch": 11.685714285714285, "grad_norm": 4.38927698135376, "learning_rate": 4.416530612244898e-05, "loss": 3.7719, "step": 2863 }, { "epoch": 11.689795918367347, "grad_norm": 6.944699287414551, "learning_rate": 4.416326530612245e-05, "loss": 3.1521, "step": 2864 }, { "epoch": 11.693877551020408, "grad_norm": 5.320376873016357, "learning_rate": 4.416122448979592e-05, "loss": 3.6773, "step": 2865 }, { "epoch": 11.69795918367347, "grad_norm": 7.906416893005371, "learning_rate": 4.415918367346939e-05, "loss": 3.6218, "step": 2866 }, { "epoch": 11.70204081632653, "grad_norm": 5.380166530609131, "learning_rate": 4.415714285714286e-05, "loss": 3.387, "step": 2867 }, { "epoch": 11.706122448979592, "grad_norm": 4.73659086227417, "learning_rate": 4.415510204081633e-05, "loss": 3.8462, "step": 2868 }, { "epoch": 11.710204081632654, "grad_norm": 8.214574813842773, "learning_rate": 4.41530612244898e-05, "loss": 3.2357, "step": 2869 }, { "epoch": 11.714285714285714, "grad_norm": 4.572628021240234, "learning_rate": 4.415102040816327e-05, "loss": 3.43, "step": 2870 }, { "epoch": 11.718367346938775, "grad_norm": 15.683358192443848, "learning_rate": 4.4148979591836736e-05, "loss": 1.4303, "step": 2871 }, { "epoch": 11.722448979591837, "grad_norm": 6.012903213500977, "learning_rate": 4.41469387755102e-05, "loss": 3.4473, "step": 2872 }, { "epoch": 11.726530612244899, "grad_norm": 4.86742639541626, "learning_rate": 4.414489795918367e-05, "loss": 3.4829, "step": 2873 }, { "epoch": 11.730612244897959, "grad_norm": 3.997286796569824, "learning_rate": 4.4142857142857144e-05, "loss": 3.8089, "step": 2874 }, { "epoch": 11.73469387755102, "grad_norm": 5.818069934844971, "learning_rate": 4.414081632653062e-05, "loss": 3.7598, "step": 2875 }, { "epoch": 11.738775510204082, "grad_norm": 5.5277018547058105, "learning_rate": 4.4138775510204085e-05, "loss": 3.3166, "step": 2876 }, { "epoch": 11.742857142857144, "grad_norm": 3.434077501296997, "learning_rate": 4.413673469387755e-05, "loss": 3.8443, "step": 2877 }, { "epoch": 11.746938775510204, "grad_norm": 6.514848709106445, "learning_rate": 4.413469387755102e-05, "loss": 3.4314, "step": 2878 }, { "epoch": 11.751020408163265, "grad_norm": 3.9831883907318115, "learning_rate": 4.4132653061224493e-05, "loss": 3.8744, "step": 2879 }, { "epoch": 11.755102040816327, "grad_norm": 5.901851177215576, "learning_rate": 4.413061224489796e-05, "loss": 3.3617, "step": 2880 }, { "epoch": 11.759183673469387, "grad_norm": 5.407713413238525, "learning_rate": 4.412857142857143e-05, "loss": 3.6324, "step": 2881 }, { "epoch": 11.763265306122449, "grad_norm": 3.101745128631592, "learning_rate": 4.41265306122449e-05, "loss": 3.944, "step": 2882 }, { "epoch": 11.76734693877551, "grad_norm": 4.792869567871094, "learning_rate": 4.412448979591837e-05, "loss": 3.9386, "step": 2883 }, { "epoch": 11.771428571428572, "grad_norm": 4.253514289855957, "learning_rate": 4.412244897959184e-05, "loss": 3.7802, "step": 2884 }, { "epoch": 11.775510204081632, "grad_norm": 3.6941399574279785, "learning_rate": 4.412040816326531e-05, "loss": 3.6344, "step": 2885 }, { "epoch": 11.779591836734694, "grad_norm": 3.0899081230163574, "learning_rate": 4.411836734693878e-05, "loss": 3.8337, "step": 2886 }, { "epoch": 11.783673469387756, "grad_norm": 6.457389831542969, "learning_rate": 4.4116326530612244e-05, "loss": 3.3533, "step": 2887 }, { "epoch": 11.787755102040816, "grad_norm": 11.218193054199219, "learning_rate": 4.411428571428572e-05, "loss": 2.4814, "step": 2888 }, { "epoch": 11.791836734693877, "grad_norm": 4.094077110290527, "learning_rate": 4.4112244897959185e-05, "loss": 3.6927, "step": 2889 }, { "epoch": 11.795918367346939, "grad_norm": 2.618642568588257, "learning_rate": 4.411020408163266e-05, "loss": 4.1439, "step": 2890 }, { "epoch": 11.8, "grad_norm": 8.121742248535156, "learning_rate": 4.4108163265306126e-05, "loss": 3.2036, "step": 2891 }, { "epoch": 11.80408163265306, "grad_norm": 4.432143211364746, "learning_rate": 4.410612244897959e-05, "loss": 3.7554, "step": 2892 }, { "epoch": 11.808163265306122, "grad_norm": 3.7801342010498047, "learning_rate": 4.410408163265307e-05, "loss": 3.8962, "step": 2893 }, { "epoch": 11.812244897959184, "grad_norm": 3.7505264282226562, "learning_rate": 4.4102040816326534e-05, "loss": 3.6535, "step": 2894 }, { "epoch": 11.816326530612244, "grad_norm": 5.481281757354736, "learning_rate": 4.41e-05, "loss": 3.0041, "step": 2895 }, { "epoch": 11.820408163265306, "grad_norm": 3.7067089080810547, "learning_rate": 4.409795918367347e-05, "loss": 3.9339, "step": 2896 }, { "epoch": 11.824489795918367, "grad_norm": 3.7238900661468506, "learning_rate": 4.409591836734694e-05, "loss": 3.7674, "step": 2897 }, { "epoch": 11.82857142857143, "grad_norm": 8.60754108428955, "learning_rate": 4.409387755102041e-05, "loss": 2.6615, "step": 2898 }, { "epoch": 11.83265306122449, "grad_norm": 3.812774896621704, "learning_rate": 4.409183673469388e-05, "loss": 3.6221, "step": 2899 }, { "epoch": 11.83673469387755, "grad_norm": 2.1437549591064453, "learning_rate": 4.408979591836735e-05, "loss": 4.0888, "step": 2900 }, { "epoch": 11.840816326530613, "grad_norm": 3.602581739425659, "learning_rate": 4.408775510204082e-05, "loss": 3.7153, "step": 2901 }, { "epoch": 11.844897959183674, "grad_norm": 3.6320297718048096, "learning_rate": 4.4085714285714285e-05, "loss": 4.0859, "step": 2902 }, { "epoch": 11.848979591836734, "grad_norm": 4.824340343475342, "learning_rate": 4.408367346938776e-05, "loss": 3.5723, "step": 2903 }, { "epoch": 11.853061224489796, "grad_norm": 8.590344429016113, "learning_rate": 4.4081632653061226e-05, "loss": 3.2439, "step": 2904 }, { "epoch": 11.857142857142858, "grad_norm": 7.2580437660217285, "learning_rate": 4.407959183673469e-05, "loss": 3.6405, "step": 2905 }, { "epoch": 11.861224489795918, "grad_norm": 6.366470813751221, "learning_rate": 4.407755102040817e-05, "loss": 3.6184, "step": 2906 }, { "epoch": 11.86530612244898, "grad_norm": 5.271113872528076, "learning_rate": 4.4075510204081634e-05, "loss": 3.8925, "step": 2907 }, { "epoch": 11.869387755102041, "grad_norm": 5.826539039611816, "learning_rate": 4.407346938775511e-05, "loss": 3.3016, "step": 2908 }, { "epoch": 11.873469387755103, "grad_norm": 4.94535493850708, "learning_rate": 4.4071428571428575e-05, "loss": 3.7778, "step": 2909 }, { "epoch": 11.877551020408163, "grad_norm": 7.128791332244873, "learning_rate": 4.406938775510204e-05, "loss": 3.5239, "step": 2910 }, { "epoch": 11.881632653061224, "grad_norm": 5.585460186004639, "learning_rate": 4.406734693877551e-05, "loss": 3.4526, "step": 2911 }, { "epoch": 11.885714285714286, "grad_norm": 3.3366026878356934, "learning_rate": 4.4065306122448976e-05, "loss": 3.9296, "step": 2912 }, { "epoch": 11.889795918367348, "grad_norm": 3.8312385082244873, "learning_rate": 4.406326530612245e-05, "loss": 3.9235, "step": 2913 }, { "epoch": 11.893877551020408, "grad_norm": 4.990665912628174, "learning_rate": 4.4061224489795924e-05, "loss": 3.5889, "step": 2914 }, { "epoch": 11.89795918367347, "grad_norm": 6.128520488739014, "learning_rate": 4.405918367346939e-05, "loss": 2.7014, "step": 2915 }, { "epoch": 11.902040816326531, "grad_norm": 4.4985175132751465, "learning_rate": 4.405714285714286e-05, "loss": 3.6559, "step": 2916 }, { "epoch": 11.906122448979591, "grad_norm": 4.522100925445557, "learning_rate": 4.4055102040816325e-05, "loss": 3.7825, "step": 2917 }, { "epoch": 11.910204081632653, "grad_norm": 7.187729835510254, "learning_rate": 4.40530612244898e-05, "loss": 3.1387, "step": 2918 }, { "epoch": 11.914285714285715, "grad_norm": 3.1763734817504883, "learning_rate": 4.4051020408163267e-05, "loss": 3.9106, "step": 2919 }, { "epoch": 11.918367346938776, "grad_norm": 8.870987892150879, "learning_rate": 4.4048979591836734e-05, "loss": 3.2543, "step": 2920 }, { "epoch": 11.922448979591836, "grad_norm": 5.12082576751709, "learning_rate": 4.404693877551021e-05, "loss": 3.7617, "step": 2921 }, { "epoch": 11.926530612244898, "grad_norm": 3.443586587905884, "learning_rate": 4.4044897959183675e-05, "loss": 3.7036, "step": 2922 }, { "epoch": 11.93061224489796, "grad_norm": 4.3807291984558105, "learning_rate": 4.404285714285715e-05, "loss": 3.6456, "step": 2923 }, { "epoch": 11.93469387755102, "grad_norm": 4.3056817054748535, "learning_rate": 4.4040816326530616e-05, "loss": 3.6197, "step": 2924 }, { "epoch": 11.938775510204081, "grad_norm": 7.62592887878418, "learning_rate": 4.403877551020408e-05, "loss": 3.1208, "step": 2925 }, { "epoch": 11.942857142857143, "grad_norm": 4.049353122711182, "learning_rate": 4.403673469387755e-05, "loss": 3.7897, "step": 2926 }, { "epoch": 11.946938775510205, "grad_norm": 7.066872596740723, "learning_rate": 4.403469387755102e-05, "loss": 3.2972, "step": 2927 }, { "epoch": 11.951020408163265, "grad_norm": 4.400294780731201, "learning_rate": 4.403265306122449e-05, "loss": 3.7647, "step": 2928 }, { "epoch": 11.955102040816326, "grad_norm": 6.367137908935547, "learning_rate": 4.4030612244897965e-05, "loss": 3.203, "step": 2929 }, { "epoch": 11.959183673469388, "grad_norm": 3.383958339691162, "learning_rate": 4.402857142857143e-05, "loss": 3.595, "step": 2930 }, { "epoch": 11.963265306122448, "grad_norm": 3.616250514984131, "learning_rate": 4.40265306122449e-05, "loss": 3.8207, "step": 2931 }, { "epoch": 11.96734693877551, "grad_norm": 3.080714702606201, "learning_rate": 4.402448979591837e-05, "loss": 4.1354, "step": 2932 }, { "epoch": 11.971428571428572, "grad_norm": 5.786810874938965, "learning_rate": 4.402244897959184e-05, "loss": 3.2222, "step": 2933 }, { "epoch": 11.975510204081633, "grad_norm": 9.102005004882812, "learning_rate": 4.402040816326531e-05, "loss": 2.8005, "step": 2934 }, { "epoch": 11.979591836734693, "grad_norm": 5.908854007720947, "learning_rate": 4.4018367346938774e-05, "loss": 3.5014, "step": 2935 }, { "epoch": 11.983673469387755, "grad_norm": 6.059930324554443, "learning_rate": 4.401632653061225e-05, "loss": 3.0314, "step": 2936 }, { "epoch": 11.987755102040817, "grad_norm": 6.42285680770874, "learning_rate": 4.401428571428572e-05, "loss": 3.4243, "step": 2937 }, { "epoch": 11.991836734693878, "grad_norm": 7.695051193237305, "learning_rate": 4.401224489795919e-05, "loss": 3.162, "step": 2938 }, { "epoch": 11.995918367346938, "grad_norm": 4.241032600402832, "learning_rate": 4.4010204081632656e-05, "loss": 3.8863, "step": 2939 }, { "epoch": 12.0, "grad_norm": 5.453252792358398, "learning_rate": 4.4008163265306124e-05, "loss": 3.5718, "step": 2940 }, { "epoch": 12.004081632653062, "grad_norm": 6.301518440246582, "learning_rate": 4.400612244897959e-05, "loss": 3.4111, "step": 2941 }, { "epoch": 12.008163265306122, "grad_norm": 6.583859920501709, "learning_rate": 4.4004081632653065e-05, "loss": 2.9651, "step": 2942 }, { "epoch": 12.012244897959183, "grad_norm": 5.612706184387207, "learning_rate": 4.400204081632653e-05, "loss": 3.0367, "step": 2943 }, { "epoch": 12.016326530612245, "grad_norm": 3.595771312713623, "learning_rate": 4.4000000000000006e-05, "loss": 3.53, "step": 2944 }, { "epoch": 12.020408163265307, "grad_norm": 3.68459415435791, "learning_rate": 4.399795918367347e-05, "loss": 3.7247, "step": 2945 }, { "epoch": 12.024489795918367, "grad_norm": 8.344913482666016, "learning_rate": 4.399591836734694e-05, "loss": 2.7767, "step": 2946 }, { "epoch": 12.028571428571428, "grad_norm": 5.439990043640137, "learning_rate": 4.3993877551020414e-05, "loss": 3.3401, "step": 2947 }, { "epoch": 12.03265306122449, "grad_norm": 3.4209134578704834, "learning_rate": 4.399183673469388e-05, "loss": 3.8937, "step": 2948 }, { "epoch": 12.036734693877552, "grad_norm": 5.4253034591674805, "learning_rate": 4.398979591836735e-05, "loss": 3.4064, "step": 2949 }, { "epoch": 12.040816326530612, "grad_norm": 5.785054683685303, "learning_rate": 4.3987755102040815e-05, "loss": 3.7387, "step": 2950 }, { "epoch": 12.044897959183674, "grad_norm": 7.312629222869873, "learning_rate": 4.398571428571428e-05, "loss": 3.1615, "step": 2951 }, { "epoch": 12.048979591836735, "grad_norm": 6.603084564208984, "learning_rate": 4.3983673469387756e-05, "loss": 3.8543, "step": 2952 }, { "epoch": 12.053061224489795, "grad_norm": 5.044512748718262, "learning_rate": 4.398163265306123e-05, "loss": 3.7168, "step": 2953 }, { "epoch": 12.057142857142857, "grad_norm": 3.7372710704803467, "learning_rate": 4.39795918367347e-05, "loss": 3.6909, "step": 2954 }, { "epoch": 12.061224489795919, "grad_norm": 3.495410919189453, "learning_rate": 4.3977551020408164e-05, "loss": 3.8375, "step": 2955 }, { "epoch": 12.06530612244898, "grad_norm": 4.098114490509033, "learning_rate": 4.397551020408163e-05, "loss": 3.8676, "step": 2956 }, { "epoch": 12.06938775510204, "grad_norm": 3.5312647819519043, "learning_rate": 4.3973469387755105e-05, "loss": 3.7346, "step": 2957 }, { "epoch": 12.073469387755102, "grad_norm": 4.391788482666016, "learning_rate": 4.397142857142857e-05, "loss": 3.706, "step": 2958 }, { "epoch": 12.077551020408164, "grad_norm": 6.072402000427246, "learning_rate": 4.396938775510204e-05, "loss": 2.8556, "step": 2959 }, { "epoch": 12.081632653061224, "grad_norm": 4.657858848571777, "learning_rate": 4.3967346938775514e-05, "loss": 3.5581, "step": 2960 }, { "epoch": 12.085714285714285, "grad_norm": 4.149265766143799, "learning_rate": 4.396530612244898e-05, "loss": 3.8467, "step": 2961 }, { "epoch": 12.089795918367347, "grad_norm": 3.8834657669067383, "learning_rate": 4.3963265306122455e-05, "loss": 3.5803, "step": 2962 }, { "epoch": 12.093877551020409, "grad_norm": 4.0273518562316895, "learning_rate": 4.396122448979592e-05, "loss": 3.8235, "step": 2963 }, { "epoch": 12.097959183673469, "grad_norm": 3.2493317127227783, "learning_rate": 4.395918367346939e-05, "loss": 3.7962, "step": 2964 }, { "epoch": 12.10204081632653, "grad_norm": 4.156914710998535, "learning_rate": 4.3957142857142856e-05, "loss": 3.6618, "step": 2965 }, { "epoch": 12.106122448979592, "grad_norm": 7.077944755554199, "learning_rate": 4.395510204081632e-05, "loss": 3.0754, "step": 2966 }, { "epoch": 12.110204081632654, "grad_norm": 3.8355395793914795, "learning_rate": 4.39530612244898e-05, "loss": 3.6999, "step": 2967 }, { "epoch": 12.114285714285714, "grad_norm": 4.327426910400391, "learning_rate": 4.395102040816327e-05, "loss": 3.9444, "step": 2968 }, { "epoch": 12.118367346938776, "grad_norm": 7.141664981842041, "learning_rate": 4.394897959183674e-05, "loss": 3.1265, "step": 2969 }, { "epoch": 12.122448979591837, "grad_norm": 3.406824827194214, "learning_rate": 4.3946938775510205e-05, "loss": 3.7991, "step": 2970 }, { "epoch": 12.126530612244897, "grad_norm": 7.232169151306152, "learning_rate": 4.394489795918367e-05, "loss": 3.3672, "step": 2971 }, { "epoch": 12.130612244897959, "grad_norm": 5.118012428283691, "learning_rate": 4.3942857142857146e-05, "loss": 3.411, "step": 2972 }, { "epoch": 12.13469387755102, "grad_norm": 3.651357412338257, "learning_rate": 4.394081632653061e-05, "loss": 3.7084, "step": 2973 }, { "epoch": 12.138775510204082, "grad_norm": 7.614086627960205, "learning_rate": 4.393877551020408e-05, "loss": 3.3706, "step": 2974 }, { "epoch": 12.142857142857142, "grad_norm": 5.429875373840332, "learning_rate": 4.3936734693877554e-05, "loss": 3.4214, "step": 2975 }, { "epoch": 12.146938775510204, "grad_norm": 3.955023765563965, "learning_rate": 4.393469387755103e-05, "loss": 3.7262, "step": 2976 }, { "epoch": 12.151020408163266, "grad_norm": 6.229698657989502, "learning_rate": 4.3932653061224495e-05, "loss": 3.448, "step": 2977 }, { "epoch": 12.155102040816326, "grad_norm": 9.490949630737305, "learning_rate": 4.393061224489796e-05, "loss": 2.2521, "step": 2978 }, { "epoch": 12.159183673469387, "grad_norm": 4.109123229980469, "learning_rate": 4.392857142857143e-05, "loss": 3.8761, "step": 2979 }, { "epoch": 12.16326530612245, "grad_norm": 16.9218692779541, "learning_rate": 4.39265306122449e-05, "loss": 1.4104, "step": 2980 }, { "epoch": 12.16734693877551, "grad_norm": 6.030535697937012, "learning_rate": 4.392448979591837e-05, "loss": 3.3659, "step": 2981 }, { "epoch": 12.17142857142857, "grad_norm": 8.002903938293457, "learning_rate": 4.392244897959184e-05, "loss": 2.7376, "step": 2982 }, { "epoch": 12.175510204081633, "grad_norm": 4.8706865310668945, "learning_rate": 4.392040816326531e-05, "loss": 3.8187, "step": 2983 }, { "epoch": 12.179591836734694, "grad_norm": 4.376619338989258, "learning_rate": 4.391836734693878e-05, "loss": 3.6554, "step": 2984 }, { "epoch": 12.183673469387756, "grad_norm": 5.68206787109375, "learning_rate": 4.3916326530612246e-05, "loss": 3.7188, "step": 2985 }, { "epoch": 12.187755102040816, "grad_norm": 5.9243083000183105, "learning_rate": 4.391428571428572e-05, "loss": 3.7006, "step": 2986 }, { "epoch": 12.191836734693878, "grad_norm": 6.45805549621582, "learning_rate": 4.391224489795919e-05, "loss": 3.3816, "step": 2987 }, { "epoch": 12.19591836734694, "grad_norm": 5.397141456604004, "learning_rate": 4.3910204081632654e-05, "loss": 3.7381, "step": 2988 }, { "epoch": 12.2, "grad_norm": 7.467258930206299, "learning_rate": 4.390816326530612e-05, "loss": 3.5522, "step": 2989 }, { "epoch": 12.204081632653061, "grad_norm": 3.963339328765869, "learning_rate": 4.3906122448979595e-05, "loss": 3.7477, "step": 2990 }, { "epoch": 12.208163265306123, "grad_norm": 4.548480987548828, "learning_rate": 4.390408163265307e-05, "loss": 3.8432, "step": 2991 }, { "epoch": 12.212244897959184, "grad_norm": 6.285135746002197, "learning_rate": 4.3902040816326536e-05, "loss": 2.5771, "step": 2992 }, { "epoch": 12.216326530612244, "grad_norm": 7.733376502990723, "learning_rate": 4.39e-05, "loss": 3.2044, "step": 2993 }, { "epoch": 12.220408163265306, "grad_norm": 5.146238327026367, "learning_rate": 4.389795918367347e-05, "loss": 3.4213, "step": 2994 }, { "epoch": 12.224489795918368, "grad_norm": 4.152595520019531, "learning_rate": 4.389591836734694e-05, "loss": 3.6757, "step": 2995 }, { "epoch": 12.228571428571428, "grad_norm": 7.146929740905762, "learning_rate": 4.389387755102041e-05, "loss": 3.3571, "step": 2996 }, { "epoch": 12.23265306122449, "grad_norm": 4.937651634216309, "learning_rate": 4.389183673469388e-05, "loss": 3.7507, "step": 2997 }, { "epoch": 12.236734693877551, "grad_norm": 5.313930511474609, "learning_rate": 4.388979591836735e-05, "loss": 3.5327, "step": 2998 }, { "epoch": 12.240816326530613, "grad_norm": 4.358998775482178, "learning_rate": 4.388775510204082e-05, "loss": 3.7203, "step": 2999 }, { "epoch": 12.244897959183673, "grad_norm": 4.102266311645508, "learning_rate": 4.388571428571429e-05, "loss": 3.971, "step": 3000 }, { "epoch": 12.248979591836735, "grad_norm": 6.436641216278076, "learning_rate": 4.388367346938776e-05, "loss": 3.76, "step": 3001 }, { "epoch": 12.253061224489796, "grad_norm": 6.86653470993042, "learning_rate": 4.388163265306123e-05, "loss": 3.3289, "step": 3002 }, { "epoch": 12.257142857142856, "grad_norm": 5.530351638793945, "learning_rate": 4.3879591836734695e-05, "loss": 3.8062, "step": 3003 }, { "epoch": 12.261224489795918, "grad_norm": 7.03493595123291, "learning_rate": 4.387755102040816e-05, "loss": 3.0254, "step": 3004 }, { "epoch": 12.26530612244898, "grad_norm": 5.800630569458008, "learning_rate": 4.387551020408163e-05, "loss": 3.5488, "step": 3005 }, { "epoch": 12.269387755102041, "grad_norm": 2.364657402038574, "learning_rate": 4.38734693877551e-05, "loss": 4.0524, "step": 3006 }, { "epoch": 12.273469387755101, "grad_norm": 3.4235634803771973, "learning_rate": 4.387142857142858e-05, "loss": 3.89, "step": 3007 }, { "epoch": 12.277551020408163, "grad_norm": 8.247211456298828, "learning_rate": 4.3869387755102044e-05, "loss": 3.0366, "step": 3008 }, { "epoch": 12.281632653061225, "grad_norm": 7.137781620025635, "learning_rate": 4.386734693877551e-05, "loss": 3.3808, "step": 3009 }, { "epoch": 12.285714285714286, "grad_norm": 7.653425216674805, "learning_rate": 4.386530612244898e-05, "loss": 3.3118, "step": 3010 }, { "epoch": 12.289795918367346, "grad_norm": 10.619105339050293, "learning_rate": 4.386326530612245e-05, "loss": 2.6286, "step": 3011 }, { "epoch": 12.293877551020408, "grad_norm": 5.471890449523926, "learning_rate": 4.386122448979592e-05, "loss": 3.6562, "step": 3012 }, { "epoch": 12.29795918367347, "grad_norm": 6.931427955627441, "learning_rate": 4.3859183673469386e-05, "loss": 3.15, "step": 3013 }, { "epoch": 12.30204081632653, "grad_norm": 7.254970550537109, "learning_rate": 4.385714285714286e-05, "loss": 3.5524, "step": 3014 }, { "epoch": 12.306122448979592, "grad_norm": 5.440418720245361, "learning_rate": 4.385510204081633e-05, "loss": 3.7942, "step": 3015 }, { "epoch": 12.310204081632653, "grad_norm": 6.473994731903076, "learning_rate": 4.38530612244898e-05, "loss": 3.2029, "step": 3016 }, { "epoch": 12.314285714285715, "grad_norm": 3.8915209770202637, "learning_rate": 4.385102040816327e-05, "loss": 3.6291, "step": 3017 }, { "epoch": 12.318367346938775, "grad_norm": 6.521450519561768, "learning_rate": 4.3848979591836736e-05, "loss": 3.5393, "step": 3018 }, { "epoch": 12.322448979591837, "grad_norm": 4.038844585418701, "learning_rate": 4.38469387755102e-05, "loss": 3.6889, "step": 3019 }, { "epoch": 12.326530612244898, "grad_norm": 8.056764602661133, "learning_rate": 4.384489795918367e-05, "loss": 3.0274, "step": 3020 }, { "epoch": 12.33061224489796, "grad_norm": 5.369508266448975, "learning_rate": 4.3842857142857144e-05, "loss": 3.5079, "step": 3021 }, { "epoch": 12.33469387755102, "grad_norm": 5.5266947746276855, "learning_rate": 4.384081632653062e-05, "loss": 3.5098, "step": 3022 }, { "epoch": 12.338775510204082, "grad_norm": 8.617506980895996, "learning_rate": 4.3838775510204085e-05, "loss": 3.522, "step": 3023 }, { "epoch": 12.342857142857143, "grad_norm": 5.143661975860596, "learning_rate": 4.383673469387755e-05, "loss": 3.7418, "step": 3024 }, { "epoch": 12.346938775510203, "grad_norm": 5.916137218475342, "learning_rate": 4.3834693877551026e-05, "loss": 3.7299, "step": 3025 }, { "epoch": 12.351020408163265, "grad_norm": 5.030089855194092, "learning_rate": 4.383265306122449e-05, "loss": 3.7408, "step": 3026 }, { "epoch": 12.355102040816327, "grad_norm": 5.971615791320801, "learning_rate": 4.383061224489796e-05, "loss": 3.7518, "step": 3027 }, { "epoch": 12.359183673469389, "grad_norm": 3.7421793937683105, "learning_rate": 4.382857142857143e-05, "loss": 3.8821, "step": 3028 }, { "epoch": 12.363265306122448, "grad_norm": 6.511933326721191, "learning_rate": 4.38265306122449e-05, "loss": 3.1595, "step": 3029 }, { "epoch": 12.36734693877551, "grad_norm": 6.301327228546143, "learning_rate": 4.3824489795918375e-05, "loss": 3.2262, "step": 3030 }, { "epoch": 12.371428571428572, "grad_norm": 5.664402484893799, "learning_rate": 4.382244897959184e-05, "loss": 3.4876, "step": 3031 }, { "epoch": 12.375510204081632, "grad_norm": 6.1053032875061035, "learning_rate": 4.382040816326531e-05, "loss": 3.3362, "step": 3032 }, { "epoch": 12.379591836734694, "grad_norm": 9.725198745727539, "learning_rate": 4.3818367346938776e-05, "loss": 3.0864, "step": 3033 }, { "epoch": 12.383673469387755, "grad_norm": 7.1039719581604, "learning_rate": 4.3816326530612243e-05, "loss": 3.4199, "step": 3034 }, { "epoch": 12.387755102040817, "grad_norm": 6.49655294418335, "learning_rate": 4.381428571428572e-05, "loss": 3.6827, "step": 3035 }, { "epoch": 12.391836734693877, "grad_norm": 8.31298542022705, "learning_rate": 4.3812244897959184e-05, "loss": 3.0362, "step": 3036 }, { "epoch": 12.395918367346939, "grad_norm": 6.630833625793457, "learning_rate": 4.381020408163266e-05, "loss": 2.932, "step": 3037 }, { "epoch": 12.4, "grad_norm": 3.8395676612854004, "learning_rate": 4.3808163265306125e-05, "loss": 4.002, "step": 3038 }, { "epoch": 12.404081632653062, "grad_norm": 6.266568660736084, "learning_rate": 4.380612244897959e-05, "loss": 3.2984, "step": 3039 }, { "epoch": 12.408163265306122, "grad_norm": 3.387244939804077, "learning_rate": 4.3804081632653067e-05, "loss": 3.8815, "step": 3040 }, { "epoch": 12.412244897959184, "grad_norm": 6.391014099121094, "learning_rate": 4.3802040816326534e-05, "loss": 3.6588, "step": 3041 }, { "epoch": 12.416326530612245, "grad_norm": 4.559727191925049, "learning_rate": 4.38e-05, "loss": 3.7085, "step": 3042 }, { "epoch": 12.420408163265305, "grad_norm": 7.8837995529174805, "learning_rate": 4.379795918367347e-05, "loss": 3.3273, "step": 3043 }, { "epoch": 12.424489795918367, "grad_norm": 5.031558990478516, "learning_rate": 4.379591836734694e-05, "loss": 3.5092, "step": 3044 }, { "epoch": 12.428571428571429, "grad_norm": 7.900216579437256, "learning_rate": 4.3793877551020416e-05, "loss": 3.088, "step": 3045 }, { "epoch": 12.43265306122449, "grad_norm": 5.491334438323975, "learning_rate": 4.379183673469388e-05, "loss": 3.6917, "step": 3046 }, { "epoch": 12.43673469387755, "grad_norm": 9.082995414733887, "learning_rate": 4.378979591836735e-05, "loss": 2.9825, "step": 3047 }, { "epoch": 12.440816326530612, "grad_norm": 5.9409003257751465, "learning_rate": 4.378775510204082e-05, "loss": 3.4067, "step": 3048 }, { "epoch": 12.444897959183674, "grad_norm": 4.179593086242676, "learning_rate": 4.3785714285714284e-05, "loss": 3.801, "step": 3049 }, { "epoch": 12.448979591836734, "grad_norm": 4.759922504425049, "learning_rate": 4.378367346938776e-05, "loss": 3.6247, "step": 3050 }, { "epoch": 12.453061224489796, "grad_norm": 5.155834197998047, "learning_rate": 4.3781632653061225e-05, "loss": 3.5707, "step": 3051 }, { "epoch": 12.457142857142857, "grad_norm": 6.129530906677246, "learning_rate": 4.37795918367347e-05, "loss": 3.2687, "step": 3052 }, { "epoch": 12.461224489795919, "grad_norm": 5.467771053314209, "learning_rate": 4.3777551020408166e-05, "loss": 3.4828, "step": 3053 }, { "epoch": 12.465306122448979, "grad_norm": 8.194962501525879, "learning_rate": 4.377551020408163e-05, "loss": 2.7515, "step": 3054 }, { "epoch": 12.46938775510204, "grad_norm": 2.8477566242218018, "learning_rate": 4.377346938775511e-05, "loss": 4.1126, "step": 3055 }, { "epoch": 12.473469387755102, "grad_norm": 4.481545925140381, "learning_rate": 4.3771428571428574e-05, "loss": 3.9451, "step": 3056 }, { "epoch": 12.477551020408164, "grad_norm": 8.718925476074219, "learning_rate": 4.376938775510204e-05, "loss": 3.5259, "step": 3057 }, { "epoch": 12.481632653061224, "grad_norm": 8.301053047180176, "learning_rate": 4.376734693877551e-05, "loss": 3.1227, "step": 3058 }, { "epoch": 12.485714285714286, "grad_norm": 5.008130073547363, "learning_rate": 4.3765306122448976e-05, "loss": 3.7086, "step": 3059 }, { "epoch": 12.489795918367347, "grad_norm": 4.68603515625, "learning_rate": 4.376326530612245e-05, "loss": 3.6472, "step": 3060 }, { "epoch": 12.493877551020407, "grad_norm": 3.5273752212524414, "learning_rate": 4.3761224489795924e-05, "loss": 3.9823, "step": 3061 }, { "epoch": 12.49795918367347, "grad_norm": 4.591777801513672, "learning_rate": 4.375918367346939e-05, "loss": 3.5765, "step": 3062 }, { "epoch": 12.50204081632653, "grad_norm": 3.4357097148895264, "learning_rate": 4.375714285714286e-05, "loss": 3.8435, "step": 3063 }, { "epoch": 12.506122448979593, "grad_norm": 5.686397075653076, "learning_rate": 4.3755102040816325e-05, "loss": 3.594, "step": 3064 }, { "epoch": 12.510204081632653, "grad_norm": 6.494527816772461, "learning_rate": 4.37530612244898e-05, "loss": 3.2757, "step": 3065 }, { "epoch": 12.514285714285714, "grad_norm": 6.142599582672119, "learning_rate": 4.3751020408163266e-05, "loss": 3.628, "step": 3066 }, { "epoch": 12.518367346938776, "grad_norm": 6.767542839050293, "learning_rate": 4.374897959183673e-05, "loss": 3.4216, "step": 3067 }, { "epoch": 12.522448979591836, "grad_norm": 3.605299472808838, "learning_rate": 4.374693877551021e-05, "loss": 3.9455, "step": 3068 }, { "epoch": 12.526530612244898, "grad_norm": 6.242908000946045, "learning_rate": 4.374489795918368e-05, "loss": 3.329, "step": 3069 }, { "epoch": 12.53061224489796, "grad_norm": 8.341730117797852, "learning_rate": 4.374285714285715e-05, "loss": 2.5145, "step": 3070 }, { "epoch": 12.534693877551021, "grad_norm": 8.6072416305542, "learning_rate": 4.3740816326530615e-05, "loss": 2.6217, "step": 3071 }, { "epoch": 12.538775510204081, "grad_norm": 5.166258335113525, "learning_rate": 4.373877551020408e-05, "loss": 3.8902, "step": 3072 }, { "epoch": 12.542857142857143, "grad_norm": 5.528486728668213, "learning_rate": 4.373673469387755e-05, "loss": 3.1748, "step": 3073 }, { "epoch": 12.546938775510204, "grad_norm": 7.907247543334961, "learning_rate": 4.373469387755102e-05, "loss": 2.9652, "step": 3074 }, { "epoch": 12.551020408163264, "grad_norm": 10.758243560791016, "learning_rate": 4.373265306122449e-05, "loss": 2.6541, "step": 3075 }, { "epoch": 12.555102040816326, "grad_norm": 7.7700676918029785, "learning_rate": 4.3730612244897964e-05, "loss": 3.5459, "step": 3076 }, { "epoch": 12.559183673469388, "grad_norm": 5.935472011566162, "learning_rate": 4.372857142857143e-05, "loss": 3.5417, "step": 3077 }, { "epoch": 12.56326530612245, "grad_norm": 7.214427471160889, "learning_rate": 4.37265306122449e-05, "loss": 3.0774, "step": 3078 }, { "epoch": 12.56734693877551, "grad_norm": 3.96830415725708, "learning_rate": 4.372448979591837e-05, "loss": 3.5763, "step": 3079 }, { "epoch": 12.571428571428571, "grad_norm": 4.71677303314209, "learning_rate": 4.372244897959184e-05, "loss": 3.5517, "step": 3080 }, { "epoch": 12.575510204081633, "grad_norm": 8.693807601928711, "learning_rate": 4.372040816326531e-05, "loss": 2.6153, "step": 3081 }, { "epoch": 12.579591836734695, "grad_norm": 4.407995223999023, "learning_rate": 4.3718367346938774e-05, "loss": 3.874, "step": 3082 }, { "epoch": 12.583673469387755, "grad_norm": 6.243584156036377, "learning_rate": 4.371632653061225e-05, "loss": 3.3953, "step": 3083 }, { "epoch": 12.587755102040816, "grad_norm": 11.098335266113281, "learning_rate": 4.371428571428572e-05, "loss": 2.7312, "step": 3084 }, { "epoch": 12.591836734693878, "grad_norm": 7.927530288696289, "learning_rate": 4.371224489795919e-05, "loss": 3.3527, "step": 3085 }, { "epoch": 12.59591836734694, "grad_norm": 6.729948997497559, "learning_rate": 4.3710204081632656e-05, "loss": 3.6485, "step": 3086 }, { "epoch": 12.6, "grad_norm": 4.581427574157715, "learning_rate": 4.370816326530612e-05, "loss": 3.832, "step": 3087 }, { "epoch": 12.604081632653061, "grad_norm": 8.528937339782715, "learning_rate": 4.370612244897959e-05, "loss": 3.3383, "step": 3088 }, { "epoch": 12.608163265306123, "grad_norm": 3.79815936088562, "learning_rate": 4.3704081632653064e-05, "loss": 3.542, "step": 3089 }, { "epoch": 12.612244897959183, "grad_norm": 8.215950012207031, "learning_rate": 4.370204081632653e-05, "loss": 3.0417, "step": 3090 }, { "epoch": 12.616326530612245, "grad_norm": 5.0918145179748535, "learning_rate": 4.3700000000000005e-05, "loss": 3.5961, "step": 3091 }, { "epoch": 12.620408163265306, "grad_norm": 16.079994201660156, "learning_rate": 4.369795918367347e-05, "loss": 2.43, "step": 3092 }, { "epoch": 12.624489795918368, "grad_norm": 7.106673717498779, "learning_rate": 4.369591836734694e-05, "loss": 3.4021, "step": 3093 }, { "epoch": 12.628571428571428, "grad_norm": 4.3738789558410645, "learning_rate": 4.369387755102041e-05, "loss": 3.8985, "step": 3094 }, { "epoch": 12.63265306122449, "grad_norm": 3.3955845832824707, "learning_rate": 4.369183673469388e-05, "loss": 3.9046, "step": 3095 }, { "epoch": 12.636734693877552, "grad_norm": 11.483255386352539, "learning_rate": 4.368979591836735e-05, "loss": 2.626, "step": 3096 }, { "epoch": 12.640816326530611, "grad_norm": 4.995880126953125, "learning_rate": 4.3687755102040815e-05, "loss": 3.7412, "step": 3097 }, { "epoch": 12.644897959183673, "grad_norm": 6.000918388366699, "learning_rate": 4.368571428571429e-05, "loss": 3.4828, "step": 3098 }, { "epoch": 12.648979591836735, "grad_norm": 6.929385662078857, "learning_rate": 4.368367346938776e-05, "loss": 3.5521, "step": 3099 }, { "epoch": 12.653061224489797, "grad_norm": 3.579446792602539, "learning_rate": 4.368163265306123e-05, "loss": 4.2636, "step": 3100 }, { "epoch": 12.657142857142857, "grad_norm": 7.009768962860107, "learning_rate": 4.36795918367347e-05, "loss": 3.4248, "step": 3101 }, { "epoch": 12.661224489795918, "grad_norm": 6.689382076263428, "learning_rate": 4.3677551020408164e-05, "loss": 3.2766, "step": 3102 }, { "epoch": 12.66530612244898, "grad_norm": 4.072082042694092, "learning_rate": 4.367551020408163e-05, "loss": 3.5343, "step": 3103 }, { "epoch": 12.66938775510204, "grad_norm": 8.35804557800293, "learning_rate": 4.3673469387755105e-05, "loss": 2.7109, "step": 3104 }, { "epoch": 12.673469387755102, "grad_norm": 5.795302391052246, "learning_rate": 4.367142857142857e-05, "loss": 3.2228, "step": 3105 }, { "epoch": 12.677551020408163, "grad_norm": 5.368833541870117, "learning_rate": 4.3669387755102046e-05, "loss": 3.5396, "step": 3106 }, { "epoch": 12.681632653061225, "grad_norm": 5.422383785247803, "learning_rate": 4.366734693877551e-05, "loss": 3.5329, "step": 3107 }, { "epoch": 12.685714285714285, "grad_norm": 4.230820178985596, "learning_rate": 4.366530612244898e-05, "loss": 3.8357, "step": 3108 }, { "epoch": 12.689795918367347, "grad_norm": 4.547453880310059, "learning_rate": 4.3663265306122454e-05, "loss": 3.5993, "step": 3109 }, { "epoch": 12.693877551020408, "grad_norm": 5.851369857788086, "learning_rate": 4.366122448979592e-05, "loss": 3.6946, "step": 3110 }, { "epoch": 12.69795918367347, "grad_norm": 4.058239936828613, "learning_rate": 4.365918367346939e-05, "loss": 3.7636, "step": 3111 }, { "epoch": 12.70204081632653, "grad_norm": 3.275759696960449, "learning_rate": 4.3657142857142855e-05, "loss": 4.0243, "step": 3112 }, { "epoch": 12.706122448979592, "grad_norm": 9.27021312713623, "learning_rate": 4.365510204081633e-05, "loss": 2.6626, "step": 3113 }, { "epoch": 12.710204081632654, "grad_norm": 6.458253383636475, "learning_rate": 4.3653061224489796e-05, "loss": 3.6213, "step": 3114 }, { "epoch": 12.714285714285714, "grad_norm": 4.7445902824401855, "learning_rate": 4.365102040816327e-05, "loss": 3.5416, "step": 3115 }, { "epoch": 12.718367346938775, "grad_norm": 7.202678680419922, "learning_rate": 4.364897959183674e-05, "loss": 3.0767, "step": 3116 }, { "epoch": 12.722448979591837, "grad_norm": 4.770481586456299, "learning_rate": 4.3646938775510205e-05, "loss": 3.493, "step": 3117 }, { "epoch": 12.726530612244899, "grad_norm": 4.7252302169799805, "learning_rate": 4.364489795918368e-05, "loss": 3.8135, "step": 3118 }, { "epoch": 12.730612244897959, "grad_norm": 5.9829630851745605, "learning_rate": 4.3642857142857146e-05, "loss": 3.6965, "step": 3119 }, { "epoch": 12.73469387755102, "grad_norm": 3.984525680541992, "learning_rate": 4.364081632653061e-05, "loss": 3.6976, "step": 3120 }, { "epoch": 12.738775510204082, "grad_norm": 4.677011489868164, "learning_rate": 4.363877551020408e-05, "loss": 3.641, "step": 3121 }, { "epoch": 12.742857142857144, "grad_norm": 7.896065711975098, "learning_rate": 4.3636734693877554e-05, "loss": 3.3343, "step": 3122 }, { "epoch": 12.746938775510204, "grad_norm": 6.119565486907959, "learning_rate": 4.363469387755103e-05, "loss": 3.3573, "step": 3123 }, { "epoch": 12.751020408163265, "grad_norm": 4.336785793304443, "learning_rate": 4.3632653061224495e-05, "loss": 3.7651, "step": 3124 }, { "epoch": 12.755102040816327, "grad_norm": 5.6766676902771, "learning_rate": 4.363061224489796e-05, "loss": 3.5836, "step": 3125 }, { "epoch": 12.759183673469387, "grad_norm": 4.984744071960449, "learning_rate": 4.362857142857143e-05, "loss": 3.8361, "step": 3126 }, { "epoch": 12.763265306122449, "grad_norm": 5.78510856628418, "learning_rate": 4.3626530612244896e-05, "loss": 3.5679, "step": 3127 }, { "epoch": 12.76734693877551, "grad_norm": 6.6346330642700195, "learning_rate": 4.362448979591837e-05, "loss": 3.4841, "step": 3128 }, { "epoch": 12.771428571428572, "grad_norm": 4.398176670074463, "learning_rate": 4.362244897959184e-05, "loss": 3.6481, "step": 3129 }, { "epoch": 12.775510204081632, "grad_norm": 6.037783145904541, "learning_rate": 4.362040816326531e-05, "loss": 3.4935, "step": 3130 }, { "epoch": 12.779591836734694, "grad_norm": 4.186243534088135, "learning_rate": 4.361836734693878e-05, "loss": 3.711, "step": 3131 }, { "epoch": 12.783673469387756, "grad_norm": 4.045191287994385, "learning_rate": 4.3616326530612245e-05, "loss": 4.0738, "step": 3132 }, { "epoch": 12.787755102040816, "grad_norm": 4.412844181060791, "learning_rate": 4.361428571428572e-05, "loss": 3.5899, "step": 3133 }, { "epoch": 12.791836734693877, "grad_norm": 5.012313365936279, "learning_rate": 4.3612244897959186e-05, "loss": 3.5079, "step": 3134 }, { "epoch": 12.795918367346939, "grad_norm": 8.507298469543457, "learning_rate": 4.3610204081632654e-05, "loss": 3.1522, "step": 3135 }, { "epoch": 12.8, "grad_norm": 4.356191635131836, "learning_rate": 4.360816326530612e-05, "loss": 3.5751, "step": 3136 }, { "epoch": 12.80408163265306, "grad_norm": 4.511678218841553, "learning_rate": 4.3606122448979595e-05, "loss": 3.6122, "step": 3137 }, { "epoch": 12.808163265306122, "grad_norm": 6.4489264488220215, "learning_rate": 4.360408163265307e-05, "loss": 3.5257, "step": 3138 }, { "epoch": 12.812244897959184, "grad_norm": 4.33747673034668, "learning_rate": 4.3602040816326536e-05, "loss": 3.7565, "step": 3139 }, { "epoch": 12.816326530612244, "grad_norm": 4.51609992980957, "learning_rate": 4.36e-05, "loss": 3.8485, "step": 3140 }, { "epoch": 12.820408163265306, "grad_norm": 8.064851760864258, "learning_rate": 4.359795918367347e-05, "loss": 3.055, "step": 3141 }, { "epoch": 12.824489795918367, "grad_norm": 4.542491436004639, "learning_rate": 4.359591836734694e-05, "loss": 3.5834, "step": 3142 }, { "epoch": 12.82857142857143, "grad_norm": 5.160923480987549, "learning_rate": 4.359387755102041e-05, "loss": 3.75, "step": 3143 }, { "epoch": 12.83265306122449, "grad_norm": 6.308170318603516, "learning_rate": 4.359183673469388e-05, "loss": 3.4488, "step": 3144 }, { "epoch": 12.83673469387755, "grad_norm": 4.0813188552856445, "learning_rate": 4.358979591836735e-05, "loss": 3.8797, "step": 3145 }, { "epoch": 12.840816326530613, "grad_norm": 3.8872921466827393, "learning_rate": 4.358775510204082e-05, "loss": 3.7352, "step": 3146 }, { "epoch": 12.844897959183674, "grad_norm": 5.185924053192139, "learning_rate": 4.3585714285714286e-05, "loss": 3.8072, "step": 3147 }, { "epoch": 12.848979591836734, "grad_norm": 5.303520202636719, "learning_rate": 4.358367346938776e-05, "loss": 3.7304, "step": 3148 }, { "epoch": 12.853061224489796, "grad_norm": 4.20192289352417, "learning_rate": 4.358163265306123e-05, "loss": 3.7633, "step": 3149 }, { "epoch": 12.857142857142858, "grad_norm": 7.565040588378906, "learning_rate": 4.3579591836734694e-05, "loss": 3.4718, "step": 3150 }, { "epoch": 12.861224489795918, "grad_norm": 3.4882514476776123, "learning_rate": 4.357755102040816e-05, "loss": 4.0879, "step": 3151 }, { "epoch": 12.86530612244898, "grad_norm": 6.260823726654053, "learning_rate": 4.3575510204081635e-05, "loss": 3.4494, "step": 3152 }, { "epoch": 12.869387755102041, "grad_norm": 5.165750026702881, "learning_rate": 4.357346938775511e-05, "loss": 3.6969, "step": 3153 }, { "epoch": 12.873469387755103, "grad_norm": 9.390698432922363, "learning_rate": 4.3571428571428576e-05, "loss": 2.5745, "step": 3154 }, { "epoch": 12.877551020408163, "grad_norm": 6.8204345703125, "learning_rate": 4.3569387755102043e-05, "loss": 3.2552, "step": 3155 }, { "epoch": 12.881632653061224, "grad_norm": 7.068206310272217, "learning_rate": 4.356734693877551e-05, "loss": 3.064, "step": 3156 }, { "epoch": 12.885714285714286, "grad_norm": 8.993433952331543, "learning_rate": 4.356530612244898e-05, "loss": 3.5888, "step": 3157 }, { "epoch": 12.889795918367348, "grad_norm": 4.138868808746338, "learning_rate": 4.356326530612245e-05, "loss": 3.7896, "step": 3158 }, { "epoch": 12.893877551020408, "grad_norm": 5.083982467651367, "learning_rate": 4.356122448979592e-05, "loss": 3.8036, "step": 3159 }, { "epoch": 12.89795918367347, "grad_norm": 5.657013416290283, "learning_rate": 4.3559183673469386e-05, "loss": 3.2269, "step": 3160 }, { "epoch": 12.902040816326531, "grad_norm": 5.105096817016602, "learning_rate": 4.355714285714286e-05, "loss": 3.6238, "step": 3161 }, { "epoch": 12.906122448979591, "grad_norm": 7.650345802307129, "learning_rate": 4.3555102040816334e-05, "loss": 3.2035, "step": 3162 }, { "epoch": 12.910204081632653, "grad_norm": 3.8783445358276367, "learning_rate": 4.35530612244898e-05, "loss": 4.0621, "step": 3163 }, { "epoch": 12.914285714285715, "grad_norm": 5.3570027351379395, "learning_rate": 4.355102040816327e-05, "loss": 3.5069, "step": 3164 }, { "epoch": 12.918367346938776, "grad_norm": 4.768919944763184, "learning_rate": 4.3548979591836735e-05, "loss": 3.9242, "step": 3165 }, { "epoch": 12.922448979591836, "grad_norm": 4.04458475112915, "learning_rate": 4.35469387755102e-05, "loss": 3.9187, "step": 3166 }, { "epoch": 12.926530612244898, "grad_norm": 8.224005699157715, "learning_rate": 4.3544897959183676e-05, "loss": 3.2057, "step": 3167 }, { "epoch": 12.93061224489796, "grad_norm": 4.561927795410156, "learning_rate": 4.354285714285714e-05, "loss": 3.833, "step": 3168 }, { "epoch": 12.93469387755102, "grad_norm": 8.280842781066895, "learning_rate": 4.354081632653062e-05, "loss": 3.0936, "step": 3169 }, { "epoch": 12.938775510204081, "grad_norm": 6.855389595031738, "learning_rate": 4.3538775510204084e-05, "loss": 3.2683, "step": 3170 }, { "epoch": 12.942857142857143, "grad_norm": 11.393132209777832, "learning_rate": 4.353673469387755e-05, "loss": 3.1649, "step": 3171 }, { "epoch": 12.946938775510205, "grad_norm": 10.543207168579102, "learning_rate": 4.3534693877551025e-05, "loss": 2.4126, "step": 3172 }, { "epoch": 12.951020408163265, "grad_norm": 5.143747329711914, "learning_rate": 4.353265306122449e-05, "loss": 3.7351, "step": 3173 }, { "epoch": 12.955102040816326, "grad_norm": 3.410151243209839, "learning_rate": 4.353061224489796e-05, "loss": 4.0833, "step": 3174 }, { "epoch": 12.959183673469388, "grad_norm": 4.996172904968262, "learning_rate": 4.352857142857143e-05, "loss": 3.7012, "step": 3175 }, { "epoch": 12.963265306122448, "grad_norm": 3.046233892440796, "learning_rate": 4.35265306122449e-05, "loss": 3.8502, "step": 3176 }, { "epoch": 12.96734693877551, "grad_norm": 5.652232646942139, "learning_rate": 4.3524489795918374e-05, "loss": 3.5269, "step": 3177 }, { "epoch": 12.971428571428572, "grad_norm": 5.955028057098389, "learning_rate": 4.352244897959184e-05, "loss": 3.6575, "step": 3178 }, { "epoch": 12.975510204081633, "grad_norm": 4.866942405700684, "learning_rate": 4.352040816326531e-05, "loss": 3.3971, "step": 3179 }, { "epoch": 12.979591836734693, "grad_norm": 3.0733954906463623, "learning_rate": 4.3518367346938776e-05, "loss": 4.1131, "step": 3180 }, { "epoch": 12.983673469387755, "grad_norm": 7.018243312835693, "learning_rate": 4.351632653061224e-05, "loss": 2.9688, "step": 3181 }, { "epoch": 12.987755102040817, "grad_norm": 8.75721549987793, "learning_rate": 4.351428571428572e-05, "loss": 2.9072, "step": 3182 }, { "epoch": 12.991836734693878, "grad_norm": 5.324097633361816, "learning_rate": 4.3512244897959184e-05, "loss": 3.5269, "step": 3183 }, { "epoch": 12.995918367346938, "grad_norm": 5.190769672393799, "learning_rate": 4.351020408163266e-05, "loss": 3.6851, "step": 3184 }, { "epoch": 13.0, "grad_norm": 3.6259713172912598, "learning_rate": 4.3508163265306125e-05, "loss": 3.6787, "step": 3185 }, { "epoch": 13.004081632653062, "grad_norm": 3.3795154094696045, "learning_rate": 4.350612244897959e-05, "loss": 3.8357, "step": 3186 }, { "epoch": 13.008163265306122, "grad_norm": 2.981978416442871, "learning_rate": 4.3504081632653066e-05, "loss": 3.9966, "step": 3187 }, { "epoch": 13.012244897959183, "grad_norm": 4.146053314208984, "learning_rate": 4.350204081632653e-05, "loss": 3.4733, "step": 3188 }, { "epoch": 13.016326530612245, "grad_norm": 5.198028564453125, "learning_rate": 4.35e-05, "loss": 3.4564, "step": 3189 }, { "epoch": 13.020408163265307, "grad_norm": 6.060603618621826, "learning_rate": 4.349795918367347e-05, "loss": 3.3871, "step": 3190 }, { "epoch": 13.024489795918367, "grad_norm": 6.655406475067139, "learning_rate": 4.349591836734694e-05, "loss": 3.3459, "step": 3191 }, { "epoch": 13.028571428571428, "grad_norm": 7.975331783294678, "learning_rate": 4.3493877551020415e-05, "loss": 2.9443, "step": 3192 }, { "epoch": 13.03265306122449, "grad_norm": 9.679373741149902, "learning_rate": 4.349183673469388e-05, "loss": 2.5711, "step": 3193 }, { "epoch": 13.036734693877552, "grad_norm": 5.22297477722168, "learning_rate": 4.348979591836735e-05, "loss": 3.4067, "step": 3194 }, { "epoch": 13.040816326530612, "grad_norm": 6.88093376159668, "learning_rate": 4.3487755102040817e-05, "loss": 3.1331, "step": 3195 }, { "epoch": 13.044897959183674, "grad_norm": 6.313786029815674, "learning_rate": 4.3485714285714284e-05, "loss": 3.4092, "step": 3196 }, { "epoch": 13.048979591836735, "grad_norm": 7.770173072814941, "learning_rate": 4.348367346938776e-05, "loss": 2.8751, "step": 3197 }, { "epoch": 13.053061224489795, "grad_norm": 4.931217193603516, "learning_rate": 4.3481632653061225e-05, "loss": 3.4988, "step": 3198 }, { "epoch": 13.057142857142857, "grad_norm": 5.280327796936035, "learning_rate": 4.34795918367347e-05, "loss": 3.4582, "step": 3199 }, { "epoch": 13.061224489795919, "grad_norm": 4.857019901275635, "learning_rate": 4.3477551020408166e-05, "loss": 3.6592, "step": 3200 }, { "epoch": 13.06530612244898, "grad_norm": 5.996901988983154, "learning_rate": 4.347551020408163e-05, "loss": 3.7535, "step": 3201 }, { "epoch": 13.06938775510204, "grad_norm": 5.947505474090576, "learning_rate": 4.347346938775511e-05, "loss": 3.4967, "step": 3202 }, { "epoch": 13.073469387755102, "grad_norm": 7.934627532958984, "learning_rate": 4.3471428571428574e-05, "loss": 3.0169, "step": 3203 }, { "epoch": 13.077551020408164, "grad_norm": 6.011601448059082, "learning_rate": 4.346938775510204e-05, "loss": 3.3986, "step": 3204 }, { "epoch": 13.081632653061224, "grad_norm": 10.541963577270508, "learning_rate": 4.346734693877551e-05, "loss": 2.9592, "step": 3205 }, { "epoch": 13.085714285714285, "grad_norm": 5.134422302246094, "learning_rate": 4.346530612244898e-05, "loss": 3.5749, "step": 3206 }, { "epoch": 13.089795918367347, "grad_norm": 5.074029445648193, "learning_rate": 4.3463265306122456e-05, "loss": 3.8278, "step": 3207 }, { "epoch": 13.093877551020409, "grad_norm": 5.321577548980713, "learning_rate": 4.346122448979592e-05, "loss": 3.5699, "step": 3208 }, { "epoch": 13.097959183673469, "grad_norm": 15.950163841247559, "learning_rate": 4.345918367346939e-05, "loss": 1.3461, "step": 3209 }, { "epoch": 13.10204081632653, "grad_norm": 7.898106098175049, "learning_rate": 4.345714285714286e-05, "loss": 3.5247, "step": 3210 }, { "epoch": 13.106122448979592, "grad_norm": 5.616037368774414, "learning_rate": 4.345510204081633e-05, "loss": 3.7518, "step": 3211 }, { "epoch": 13.110204081632654, "grad_norm": 10.033259391784668, "learning_rate": 4.34530612244898e-05, "loss": 3.3909, "step": 3212 }, { "epoch": 13.114285714285714, "grad_norm": 6.877976417541504, "learning_rate": 4.3451020408163265e-05, "loss": 3.3246, "step": 3213 }, { "epoch": 13.118367346938776, "grad_norm": 4.630370140075684, "learning_rate": 4.344897959183673e-05, "loss": 3.7121, "step": 3214 }, { "epoch": 13.122448979591837, "grad_norm": 5.3493428230285645, "learning_rate": 4.3446938775510207e-05, "loss": 3.7796, "step": 3215 }, { "epoch": 13.126530612244897, "grad_norm": 6.240339279174805, "learning_rate": 4.344489795918368e-05, "loss": 3.5434, "step": 3216 }, { "epoch": 13.130612244897959, "grad_norm": 5.032938480377197, "learning_rate": 4.344285714285715e-05, "loss": 3.5422, "step": 3217 }, { "epoch": 13.13469387755102, "grad_norm": 5.182313919067383, "learning_rate": 4.3440816326530615e-05, "loss": 3.5345, "step": 3218 }, { "epoch": 13.138775510204082, "grad_norm": 8.950984001159668, "learning_rate": 4.343877551020408e-05, "loss": 3.0045, "step": 3219 }, { "epoch": 13.142857142857142, "grad_norm": 10.24833869934082, "learning_rate": 4.343673469387755e-05, "loss": 2.3675, "step": 3220 }, { "epoch": 13.146938775510204, "grad_norm": 5.9916768074035645, "learning_rate": 4.343469387755102e-05, "loss": 3.6532, "step": 3221 }, { "epoch": 13.151020408163266, "grad_norm": 8.929248809814453, "learning_rate": 4.343265306122449e-05, "loss": 2.361, "step": 3222 }, { "epoch": 13.155102040816326, "grad_norm": 3.301863431930542, "learning_rate": 4.3430612244897964e-05, "loss": 4.0516, "step": 3223 }, { "epoch": 13.159183673469387, "grad_norm": 5.109378814697266, "learning_rate": 4.342857142857143e-05, "loss": 3.5736, "step": 3224 }, { "epoch": 13.16326530612245, "grad_norm": 4.804717540740967, "learning_rate": 4.34265306122449e-05, "loss": 3.5336, "step": 3225 }, { "epoch": 13.16734693877551, "grad_norm": 3.51558780670166, "learning_rate": 4.342448979591837e-05, "loss": 3.873, "step": 3226 }, { "epoch": 13.17142857142857, "grad_norm": NaN, "learning_rate": 4.342244897959184e-05, "loss": 2.8477, "step": 3227 }, { "epoch": 13.175510204081633, "grad_norm": 3.574652671813965, "learning_rate": 4.342244897959184e-05, "loss": 3.9241, "step": 3228 }, { "epoch": 13.179591836734694, "grad_norm": 5.981013298034668, "learning_rate": 4.3420408163265306e-05, "loss": 3.6704, "step": 3229 }, { "epoch": 13.183673469387756, "grad_norm": 16.435651779174805, "learning_rate": 4.341836734693877e-05, "loss": 2.26, "step": 3230 }, { "epoch": 13.187755102040816, "grad_norm": 8.081843376159668, "learning_rate": 4.341632653061225e-05, "loss": 3.3643, "step": 3231 }, { "epoch": 13.191836734693878, "grad_norm": 4.461973667144775, "learning_rate": 4.341428571428572e-05, "loss": 3.9019, "step": 3232 }, { "epoch": 13.19591836734694, "grad_norm": 4.411591053009033, "learning_rate": 4.341224489795919e-05, "loss": 3.6614, "step": 3233 }, { "epoch": 13.2, "grad_norm": 8.8395414352417, "learning_rate": 4.3410204081632655e-05, "loss": 3.0921, "step": 3234 }, { "epoch": 13.204081632653061, "grad_norm": 6.467608451843262, "learning_rate": 4.340816326530612e-05, "loss": 3.6449, "step": 3235 }, { "epoch": 13.208163265306123, "grad_norm": 8.88366985321045, "learning_rate": 4.340612244897959e-05, "loss": 3.1233, "step": 3236 }, { "epoch": 13.212244897959184, "grad_norm": 5.135293960571289, "learning_rate": 4.3404081632653064e-05, "loss": 3.7023, "step": 3237 }, { "epoch": 13.216326530612244, "grad_norm": 5.24643087387085, "learning_rate": 4.340204081632653e-05, "loss": 3.5152, "step": 3238 }, { "epoch": 13.220408163265306, "grad_norm": 5.177186965942383, "learning_rate": 4.3400000000000005e-05, "loss": 3.4031, "step": 3239 }, { "epoch": 13.224489795918368, "grad_norm": 7.330206871032715, "learning_rate": 4.339795918367347e-05, "loss": 3.3738, "step": 3240 }, { "epoch": 13.228571428571428, "grad_norm": 6.2117133140563965, "learning_rate": 4.339591836734694e-05, "loss": 3.4229, "step": 3241 }, { "epoch": 13.23265306122449, "grad_norm": 4.860517978668213, "learning_rate": 4.339387755102041e-05, "loss": 3.8085, "step": 3242 }, { "epoch": 13.236734693877551, "grad_norm": 2.4388692378997803, "learning_rate": 4.339183673469388e-05, "loss": 4.0295, "step": 3243 }, { "epoch": 13.240816326530613, "grad_norm": 3.9107422828674316, "learning_rate": 4.338979591836735e-05, "loss": 3.8663, "step": 3244 }, { "epoch": 13.244897959183673, "grad_norm": 6.209766864776611, "learning_rate": 4.3387755102040814e-05, "loss": 3.6974, "step": 3245 }, { "epoch": 13.248979591836735, "grad_norm": 3.938389539718628, "learning_rate": 4.338571428571429e-05, "loss": 3.842, "step": 3246 }, { "epoch": 13.253061224489796, "grad_norm": 6.894655227661133, "learning_rate": 4.338367346938776e-05, "loss": 3.4523, "step": 3247 }, { "epoch": 13.257142857142856, "grad_norm": 9.495408058166504, "learning_rate": 4.338163265306123e-05, "loss": 2.5188, "step": 3248 }, { "epoch": 13.261224489795918, "grad_norm": 8.092182159423828, "learning_rate": 4.3379591836734696e-05, "loss": 3.0695, "step": 3249 }, { "epoch": 13.26530612244898, "grad_norm": 8.317208290100098, "learning_rate": 4.337755102040816e-05, "loss": 2.8714, "step": 3250 }, { "epoch": 13.269387755102041, "grad_norm": 9.692124366760254, "learning_rate": 4.337551020408164e-05, "loss": 3.3951, "step": 3251 }, { "epoch": 13.273469387755101, "grad_norm": 4.35976505279541, "learning_rate": 4.3373469387755104e-05, "loss": 3.9994, "step": 3252 }, { "epoch": 13.277551020408163, "grad_norm": 6.485843181610107, "learning_rate": 4.337142857142857e-05, "loss": 3.5897, "step": 3253 }, { "epoch": 13.281632653061225, "grad_norm": 6.870790958404541, "learning_rate": 4.3369387755102045e-05, "loss": 3.4413, "step": 3254 }, { "epoch": 13.285714285714286, "grad_norm": 5.867238998413086, "learning_rate": 4.336734693877551e-05, "loss": 3.7212, "step": 3255 }, { "epoch": 13.289795918367346, "grad_norm": 14.225090980529785, "learning_rate": 4.3365306122448986e-05, "loss": 3.5326, "step": 3256 }, { "epoch": 13.293877551020408, "grad_norm": 10.120930671691895, "learning_rate": 4.3363265306122454e-05, "loss": 3.0593, "step": 3257 }, { "epoch": 13.29795918367347, "grad_norm": 3.9818077087402344, "learning_rate": 4.336122448979592e-05, "loss": 3.4932, "step": 3258 }, { "epoch": 13.30204081632653, "grad_norm": 6.936977386474609, "learning_rate": 4.335918367346939e-05, "loss": 3.4964, "step": 3259 }, { "epoch": 13.306122448979592, "grad_norm": 6.250932693481445, "learning_rate": 4.3357142857142855e-05, "loss": 3.5883, "step": 3260 }, { "epoch": 13.310204081632653, "grad_norm": 10.407254219055176, "learning_rate": 4.335510204081633e-05, "loss": 2.5957, "step": 3261 }, { "epoch": 13.314285714285715, "grad_norm": 9.07860279083252, "learning_rate": 4.33530612244898e-05, "loss": 3.0505, "step": 3262 }, { "epoch": 13.318367346938775, "grad_norm": 4.91123628616333, "learning_rate": 4.335102040816327e-05, "loss": 3.6114, "step": 3263 }, { "epoch": 13.322448979591837, "grad_norm": 4.806981086730957, "learning_rate": 4.334897959183674e-05, "loss": 3.8861, "step": 3264 }, { "epoch": 13.326530612244898, "grad_norm": 10.141640663146973, "learning_rate": 4.3346938775510204e-05, "loss": 2.7204, "step": 3265 }, { "epoch": 13.33061224489796, "grad_norm": 3.740691661834717, "learning_rate": 4.334489795918368e-05, "loss": 3.7889, "step": 3266 }, { "epoch": 13.33469387755102, "grad_norm": 4.45136022567749, "learning_rate": 4.3342857142857145e-05, "loss": 3.8289, "step": 3267 }, { "epoch": 13.338775510204082, "grad_norm": 3.5506248474121094, "learning_rate": 4.334081632653061e-05, "loss": 3.7803, "step": 3268 }, { "epoch": 13.342857142857143, "grad_norm": 3.5174217224121094, "learning_rate": 4.333877551020408e-05, "loss": 3.8676, "step": 3269 }, { "epoch": 13.346938775510203, "grad_norm": 3.517390012741089, "learning_rate": 4.333673469387755e-05, "loss": 4.2247, "step": 3270 }, { "epoch": 13.351020408163265, "grad_norm": 6.34579610824585, "learning_rate": 4.333469387755103e-05, "loss": 3.2976, "step": 3271 }, { "epoch": 13.355102040816327, "grad_norm": 4.385959148406982, "learning_rate": 4.3332653061224494e-05, "loss": 3.7274, "step": 3272 }, { "epoch": 13.359183673469389, "grad_norm": 4.562912464141846, "learning_rate": 4.333061224489796e-05, "loss": 3.8744, "step": 3273 }, { "epoch": 13.363265306122448, "grad_norm": 9.304752349853516, "learning_rate": 4.332857142857143e-05, "loss": 2.9857, "step": 3274 }, { "epoch": 13.36734693877551, "grad_norm": 6.16826057434082, "learning_rate": 4.3326530612244896e-05, "loss": 3.5477, "step": 3275 }, { "epoch": 13.371428571428572, "grad_norm": 7.814360618591309, "learning_rate": 4.332448979591837e-05, "loss": 3.2255, "step": 3276 }, { "epoch": 13.375510204081632, "grad_norm": 4.875821590423584, "learning_rate": 4.332244897959184e-05, "loss": 3.7803, "step": 3277 }, { "epoch": 13.379591836734694, "grad_norm": 6.573967456817627, "learning_rate": 4.332040816326531e-05, "loss": 3.4707, "step": 3278 }, { "epoch": 13.383673469387755, "grad_norm": 6.713890075683594, "learning_rate": 4.331836734693878e-05, "loss": 3.2842, "step": 3279 }, { "epoch": 13.387755102040817, "grad_norm": 4.6396942138671875, "learning_rate": 4.3316326530612245e-05, "loss": 3.5839, "step": 3280 }, { "epoch": 13.391836734693877, "grad_norm": 9.49329948425293, "learning_rate": 4.331428571428572e-05, "loss": 3.0751, "step": 3281 }, { "epoch": 13.395918367346939, "grad_norm": 7.944986820220947, "learning_rate": 4.3312244897959186e-05, "loss": 3.3553, "step": 3282 }, { "epoch": 13.4, "grad_norm": 9.978302955627441, "learning_rate": 4.331020408163265e-05, "loss": 2.9242, "step": 3283 }, { "epoch": 13.404081632653062, "grad_norm": 5.97835636138916, "learning_rate": 4.330816326530612e-05, "loss": 3.3871, "step": 3284 }, { "epoch": 13.408163265306122, "grad_norm": 6.209303855895996, "learning_rate": 4.3306122448979594e-05, "loss": 3.3813, "step": 3285 }, { "epoch": 13.412244897959184, "grad_norm": 5.788926124572754, "learning_rate": 4.330408163265307e-05, "loss": 3.5201, "step": 3286 }, { "epoch": 13.416326530612245, "grad_norm": 7.487948894500732, "learning_rate": 4.3302040816326535e-05, "loss": 3.383, "step": 3287 }, { "epoch": 13.420408163265305, "grad_norm": 6.709731578826904, "learning_rate": 4.33e-05, "loss": 3.1766, "step": 3288 }, { "epoch": 13.424489795918367, "grad_norm": 5.034391403198242, "learning_rate": 4.329795918367347e-05, "loss": 3.6838, "step": 3289 }, { "epoch": 13.428571428571429, "grad_norm": 4.195943832397461, "learning_rate": 4.3295918367346936e-05, "loss": 3.8094, "step": 3290 }, { "epoch": 13.43265306122449, "grad_norm": 6.862665176391602, "learning_rate": 4.329387755102041e-05, "loss": 2.8712, "step": 3291 }, { "epoch": 13.43673469387755, "grad_norm": 6.922441005706787, "learning_rate": 4.329183673469388e-05, "loss": 3.2815, "step": 3292 }, { "epoch": 13.440816326530612, "grad_norm": 4.469754219055176, "learning_rate": 4.328979591836735e-05, "loss": 3.8002, "step": 3293 }, { "epoch": 13.444897959183674, "grad_norm": 11.70145320892334, "learning_rate": 4.328775510204082e-05, "loss": 2.3361, "step": 3294 }, { "epoch": 13.448979591836734, "grad_norm": 9.558420181274414, "learning_rate": 4.328571428571429e-05, "loss": 3.4677, "step": 3295 }, { "epoch": 13.453061224489796, "grad_norm": 4.7025980949401855, "learning_rate": 4.328367346938776e-05, "loss": 3.5614, "step": 3296 }, { "epoch": 13.457142857142857, "grad_norm": 6.4664835929870605, "learning_rate": 4.328163265306123e-05, "loss": 3.3379, "step": 3297 }, { "epoch": 13.461224489795919, "grad_norm": 4.754908561706543, "learning_rate": 4.3279591836734694e-05, "loss": 3.6488, "step": 3298 }, { "epoch": 13.465306122448979, "grad_norm": 6.602443695068359, "learning_rate": 4.327755102040816e-05, "loss": 3.7243, "step": 3299 }, { "epoch": 13.46938775510204, "grad_norm": 4.248968601226807, "learning_rate": 4.3275510204081635e-05, "loss": 3.7291, "step": 3300 }, { "epoch": 13.473469387755102, "grad_norm": 8.145721435546875, "learning_rate": 4.327346938775511e-05, "loss": 2.586, "step": 3301 }, { "epoch": 13.477551020408164, "grad_norm": 7.180164337158203, "learning_rate": 4.3271428571428576e-05, "loss": 3.1176, "step": 3302 }, { "epoch": 13.481632653061224, "grad_norm": 5.224956512451172, "learning_rate": 4.326938775510204e-05, "loss": 3.6174, "step": 3303 }, { "epoch": 13.485714285714286, "grad_norm": 8.028789520263672, "learning_rate": 4.326734693877551e-05, "loss": 3.0229, "step": 3304 }, { "epoch": 13.489795918367347, "grad_norm": 5.7030534744262695, "learning_rate": 4.3265306122448984e-05, "loss": 3.4364, "step": 3305 }, { "epoch": 13.493877551020407, "grad_norm": 4.992998123168945, "learning_rate": 4.326326530612245e-05, "loss": 3.7445, "step": 3306 }, { "epoch": 13.49795918367347, "grad_norm": 5.416432857513428, "learning_rate": 4.326122448979592e-05, "loss": 3.7307, "step": 3307 }, { "epoch": 13.50204081632653, "grad_norm": 8.725768089294434, "learning_rate": 4.325918367346939e-05, "loss": 2.6615, "step": 3308 }, { "epoch": 13.506122448979593, "grad_norm": 12.667040824890137, "learning_rate": 4.325714285714286e-05, "loss": 3.1225, "step": 3309 }, { "epoch": 13.510204081632653, "grad_norm": 5.095433712005615, "learning_rate": 4.325510204081633e-05, "loss": 3.4356, "step": 3310 }, { "epoch": 13.514285714285714, "grad_norm": 3.622152805328369, "learning_rate": 4.32530612244898e-05, "loss": 3.9995, "step": 3311 }, { "epoch": 13.518367346938776, "grad_norm": 4.3882246017456055, "learning_rate": 4.325102040816327e-05, "loss": 3.7355, "step": 3312 }, { "epoch": 13.522448979591836, "grad_norm": 8.223052978515625, "learning_rate": 4.3248979591836735e-05, "loss": 3.114, "step": 3313 }, { "epoch": 13.526530612244898, "grad_norm": 6.204627513885498, "learning_rate": 4.32469387755102e-05, "loss": 3.5812, "step": 3314 }, { "epoch": 13.53061224489796, "grad_norm": 5.815744400024414, "learning_rate": 4.3244897959183676e-05, "loss": 3.6836, "step": 3315 }, { "epoch": 13.534693877551021, "grad_norm": 3.055277109146118, "learning_rate": 4.324285714285715e-05, "loss": 3.828, "step": 3316 }, { "epoch": 13.538775510204081, "grad_norm": 5.105190277099609, "learning_rate": 4.3240816326530617e-05, "loss": 3.7161, "step": 3317 }, { "epoch": 13.542857142857143, "grad_norm": 8.637845993041992, "learning_rate": 4.3238775510204084e-05, "loss": 3.2564, "step": 3318 }, { "epoch": 13.546938775510204, "grad_norm": 5.538488864898682, "learning_rate": 4.323673469387755e-05, "loss": 3.759, "step": 3319 }, { "epoch": 13.551020408163264, "grad_norm": 6.728883266448975, "learning_rate": 4.3234693877551025e-05, "loss": 3.3735, "step": 3320 }, { "epoch": 13.555102040816326, "grad_norm": 4.013355731964111, "learning_rate": 4.323265306122449e-05, "loss": 3.9282, "step": 3321 }, { "epoch": 13.559183673469388, "grad_norm": 5.833527565002441, "learning_rate": 4.323061224489796e-05, "loss": 3.4022, "step": 3322 }, { "epoch": 13.56326530612245, "grad_norm": 11.90023422241211, "learning_rate": 4.3228571428571426e-05, "loss": 2.5344, "step": 3323 }, { "epoch": 13.56734693877551, "grad_norm": 3.8681037425994873, "learning_rate": 4.32265306122449e-05, "loss": 3.6792, "step": 3324 }, { "epoch": 13.571428571428571, "grad_norm": 8.829708099365234, "learning_rate": 4.3224489795918374e-05, "loss": 2.7037, "step": 3325 }, { "epoch": 13.575510204081633, "grad_norm": 5.792920112609863, "learning_rate": 4.322244897959184e-05, "loss": 3.6192, "step": 3326 }, { "epoch": 13.579591836734695, "grad_norm": 7.829460144042969, "learning_rate": 4.322040816326531e-05, "loss": 3.4715, "step": 3327 }, { "epoch": 13.583673469387755, "grad_norm": 5.574912071228027, "learning_rate": 4.3218367346938775e-05, "loss": 3.5178, "step": 3328 }, { "epoch": 13.587755102040816, "grad_norm": 7.756597995758057, "learning_rate": 4.321632653061224e-05, "loss": 3.2142, "step": 3329 }, { "epoch": 13.591836734693878, "grad_norm": 5.250380039215088, "learning_rate": 4.3214285714285716e-05, "loss": 3.7409, "step": 3330 }, { "epoch": 13.59591836734694, "grad_norm": 7.337037563323975, "learning_rate": 4.3212244897959183e-05, "loss": 3.528, "step": 3331 }, { "epoch": 13.6, "grad_norm": 5.418059349060059, "learning_rate": 4.321020408163266e-05, "loss": 3.4658, "step": 3332 }, { "epoch": 13.604081632653061, "grad_norm": 9.553783416748047, "learning_rate": 4.3208163265306124e-05, "loss": 2.2733, "step": 3333 }, { "epoch": 13.608163265306123, "grad_norm": 9.322003364562988, "learning_rate": 4.320612244897959e-05, "loss": 3.1503, "step": 3334 }, { "epoch": 13.612244897959183, "grad_norm": 9.993101119995117, "learning_rate": 4.3204081632653065e-05, "loss": 2.4995, "step": 3335 }, { "epoch": 13.616326530612245, "grad_norm": 4.122823715209961, "learning_rate": 4.320204081632653e-05, "loss": 3.8648, "step": 3336 }, { "epoch": 13.620408163265306, "grad_norm": 6.873713970184326, "learning_rate": 4.32e-05, "loss": 3.5329, "step": 3337 }, { "epoch": 13.624489795918368, "grad_norm": 9.12844467163086, "learning_rate": 4.319795918367347e-05, "loss": 2.5268, "step": 3338 }, { "epoch": 13.628571428571428, "grad_norm": 4.783270835876465, "learning_rate": 4.319591836734694e-05, "loss": 3.5541, "step": 3339 }, { "epoch": 13.63265306122449, "grad_norm": 4.5337629318237305, "learning_rate": 4.3193877551020415e-05, "loss": 3.9718, "step": 3340 }, { "epoch": 13.636734693877552, "grad_norm": 6.209836483001709, "learning_rate": 4.319183673469388e-05, "loss": 3.8916, "step": 3341 }, { "epoch": 13.640816326530611, "grad_norm": 5.88408899307251, "learning_rate": 4.318979591836735e-05, "loss": 3.9465, "step": 3342 }, { "epoch": 13.644897959183673, "grad_norm": 5.660397529602051, "learning_rate": 4.3187755102040816e-05, "loss": 3.8371, "step": 3343 }, { "epoch": 13.648979591836735, "grad_norm": 5.128872394561768, "learning_rate": 4.318571428571429e-05, "loss": 3.3532, "step": 3344 }, { "epoch": 13.653061224489797, "grad_norm": 4.683090686798096, "learning_rate": 4.318367346938776e-05, "loss": 3.7029, "step": 3345 }, { "epoch": 13.657142857142857, "grad_norm": 7.463231086730957, "learning_rate": 4.3181632653061224e-05, "loss": 3.2893, "step": 3346 }, { "epoch": 13.661224489795918, "grad_norm": 6.184676170349121, "learning_rate": 4.31795918367347e-05, "loss": 3.6935, "step": 3347 }, { "epoch": 13.66530612244898, "grad_norm": 6.283607006072998, "learning_rate": 4.3177551020408165e-05, "loss": 3.705, "step": 3348 }, { "epoch": 13.66938775510204, "grad_norm": 5.58510684967041, "learning_rate": 4.317551020408164e-05, "loss": 3.7912, "step": 3349 }, { "epoch": 13.673469387755102, "grad_norm": 4.072152137756348, "learning_rate": 4.3173469387755106e-05, "loss": 4.0357, "step": 3350 }, { "epoch": 13.677551020408163, "grad_norm": 6.485809326171875, "learning_rate": 4.317142857142857e-05, "loss": 3.3677, "step": 3351 }, { "epoch": 13.681632653061225, "grad_norm": 6.225104331970215, "learning_rate": 4.316938775510204e-05, "loss": 3.5031, "step": 3352 }, { "epoch": 13.685714285714285, "grad_norm": 6.935919761657715, "learning_rate": 4.316734693877551e-05, "loss": 3.4335, "step": 3353 }, { "epoch": 13.689795918367347, "grad_norm": 6.982316017150879, "learning_rate": 4.316530612244898e-05, "loss": 3.2062, "step": 3354 }, { "epoch": 13.693877551020408, "grad_norm": 7.888844966888428, "learning_rate": 4.3163265306122455e-05, "loss": 3.3486, "step": 3355 }, { "epoch": 13.69795918367347, "grad_norm": 4.13409948348999, "learning_rate": 4.316122448979592e-05, "loss": 3.6929, "step": 3356 }, { "epoch": 13.70204081632653, "grad_norm": 3.053619146347046, "learning_rate": 4.315918367346939e-05, "loss": 4.1032, "step": 3357 }, { "epoch": 13.706122448979592, "grad_norm": 3.8413429260253906, "learning_rate": 4.315714285714286e-05, "loss": 3.8387, "step": 3358 }, { "epoch": 13.710204081632654, "grad_norm": 9.416318893432617, "learning_rate": 4.315510204081633e-05, "loss": 3.4627, "step": 3359 }, { "epoch": 13.714285714285714, "grad_norm": 8.261016845703125, "learning_rate": 4.31530612244898e-05, "loss": 3.0241, "step": 3360 }, { "epoch": 13.718367346938775, "grad_norm": 7.391164779663086, "learning_rate": 4.3151020408163265e-05, "loss": 3.0094, "step": 3361 }, { "epoch": 13.722448979591837, "grad_norm": 5.062023162841797, "learning_rate": 4.314897959183674e-05, "loss": 3.691, "step": 3362 }, { "epoch": 13.726530612244899, "grad_norm": 4.69378137588501, "learning_rate": 4.3146938775510206e-05, "loss": 3.6636, "step": 3363 }, { "epoch": 13.730612244897959, "grad_norm": 7.482748031616211, "learning_rate": 4.314489795918368e-05, "loss": 3.3508, "step": 3364 }, { "epoch": 13.73469387755102, "grad_norm": 6.86587381362915, "learning_rate": 4.314285714285715e-05, "loss": 3.2332, "step": 3365 }, { "epoch": 13.738775510204082, "grad_norm": 6.170403003692627, "learning_rate": 4.3140816326530614e-05, "loss": 3.182, "step": 3366 }, { "epoch": 13.742857142857144, "grad_norm": 4.845961093902588, "learning_rate": 4.313877551020408e-05, "loss": 3.8137, "step": 3367 }, { "epoch": 13.746938775510204, "grad_norm": 6.352352142333984, "learning_rate": 4.313673469387755e-05, "loss": 3.2085, "step": 3368 }, { "epoch": 13.751020408163265, "grad_norm": 6.038875102996826, "learning_rate": 4.313469387755102e-05, "loss": 3.6722, "step": 3369 }, { "epoch": 13.755102040816327, "grad_norm": 5.963590621948242, "learning_rate": 4.313265306122449e-05, "loss": 3.6028, "step": 3370 }, { "epoch": 13.759183673469387, "grad_norm": 5.593310356140137, "learning_rate": 4.313061224489796e-05, "loss": 3.7556, "step": 3371 }, { "epoch": 13.763265306122449, "grad_norm": 5.112518787384033, "learning_rate": 4.312857142857143e-05, "loss": 3.5643, "step": 3372 }, { "epoch": 13.76734693877551, "grad_norm": 6.393068313598633, "learning_rate": 4.31265306122449e-05, "loss": 3.5074, "step": 3373 }, { "epoch": 13.771428571428572, "grad_norm": 10.220939636230469, "learning_rate": 4.312448979591837e-05, "loss": 2.6185, "step": 3374 }, { "epoch": 13.775510204081632, "grad_norm": 4.380191326141357, "learning_rate": 4.312244897959184e-05, "loss": 3.792, "step": 3375 }, { "epoch": 13.779591836734694, "grad_norm": 11.143896102905273, "learning_rate": 4.3120408163265306e-05, "loss": 2.778, "step": 3376 }, { "epoch": 13.783673469387756, "grad_norm": 5.929333686828613, "learning_rate": 4.311836734693877e-05, "loss": 3.706, "step": 3377 }, { "epoch": 13.787755102040816, "grad_norm": 7.111721992492676, "learning_rate": 4.311632653061225e-05, "loss": 3.5117, "step": 3378 }, { "epoch": 13.791836734693877, "grad_norm": 6.634905815124512, "learning_rate": 4.311428571428572e-05, "loss": 2.9876, "step": 3379 }, { "epoch": 13.795918367346939, "grad_norm": 11.303667068481445, "learning_rate": 4.311224489795919e-05, "loss": 3.038, "step": 3380 }, { "epoch": 13.8, "grad_norm": 9.179426193237305, "learning_rate": 4.3110204081632655e-05, "loss": 3.0113, "step": 3381 }, { "epoch": 13.80408163265306, "grad_norm": 4.254327297210693, "learning_rate": 4.310816326530612e-05, "loss": 3.6442, "step": 3382 }, { "epoch": 13.808163265306122, "grad_norm": 3.9181811809539795, "learning_rate": 4.310612244897959e-05, "loss": 3.7112, "step": 3383 }, { "epoch": 13.812244897959184, "grad_norm": 6.704534530639648, "learning_rate": 4.310408163265306e-05, "loss": 3.2505, "step": 3384 }, { "epoch": 13.816326530612244, "grad_norm": 7.91060209274292, "learning_rate": 4.310204081632653e-05, "loss": 3.7402, "step": 3385 }, { "epoch": 13.820408163265306, "grad_norm": 4.151638507843018, "learning_rate": 4.3100000000000004e-05, "loss": 3.7332, "step": 3386 }, { "epoch": 13.824489795918367, "grad_norm": 6.999144554138184, "learning_rate": 4.309795918367347e-05, "loss": 3.2818, "step": 3387 }, { "epoch": 13.82857142857143, "grad_norm": 5.427470684051514, "learning_rate": 4.3095918367346945e-05, "loss": 3.6516, "step": 3388 }, { "epoch": 13.83265306122449, "grad_norm": 4.509360313415527, "learning_rate": 4.309387755102041e-05, "loss": 3.6549, "step": 3389 }, { "epoch": 13.83673469387755, "grad_norm": 5.0788140296936035, "learning_rate": 4.309183673469388e-05, "loss": 3.6096, "step": 3390 }, { "epoch": 13.840816326530613, "grad_norm": 5.095775127410889, "learning_rate": 4.3089795918367346e-05, "loss": 3.5538, "step": 3391 }, { "epoch": 13.844897959183674, "grad_norm": 4.184415817260742, "learning_rate": 4.3087755102040814e-05, "loss": 3.7261, "step": 3392 }, { "epoch": 13.848979591836734, "grad_norm": 3.418336868286133, "learning_rate": 4.308571428571429e-05, "loss": 3.8819, "step": 3393 }, { "epoch": 13.853061224489796, "grad_norm": 4.445486545562744, "learning_rate": 4.308367346938776e-05, "loss": 3.6457, "step": 3394 }, { "epoch": 13.857142857142858, "grad_norm": 5.7127275466918945, "learning_rate": 4.308163265306123e-05, "loss": 3.1073, "step": 3395 }, { "epoch": 13.861224489795918, "grad_norm": 5.8346147537231445, "learning_rate": 4.3079591836734696e-05, "loss": 3.2821, "step": 3396 }, { "epoch": 13.86530612244898, "grad_norm": 5.326418399810791, "learning_rate": 4.307755102040816e-05, "loss": 3.8634, "step": 3397 }, { "epoch": 13.869387755102041, "grad_norm": 3.6526691913604736, "learning_rate": 4.307551020408164e-05, "loss": 4.044, "step": 3398 }, { "epoch": 13.873469387755103, "grad_norm": 7.727941989898682, "learning_rate": 4.3073469387755104e-05, "loss": 3.3627, "step": 3399 }, { "epoch": 13.877551020408163, "grad_norm": 6.151679515838623, "learning_rate": 4.307142857142857e-05, "loss": 3.4458, "step": 3400 }, { "epoch": 13.881632653061224, "grad_norm": 7.968480110168457, "learning_rate": 4.3069387755102045e-05, "loss": 3.1987, "step": 3401 }, { "epoch": 13.885714285714286, "grad_norm": 4.515568733215332, "learning_rate": 4.306734693877551e-05, "loss": 3.7334, "step": 3402 }, { "epoch": 13.889795918367348, "grad_norm": 4.400454044342041, "learning_rate": 4.3065306122448986e-05, "loss": 3.7945, "step": 3403 }, { "epoch": 13.893877551020408, "grad_norm": 6.995728015899658, "learning_rate": 4.306326530612245e-05, "loss": 2.9305, "step": 3404 }, { "epoch": 13.89795918367347, "grad_norm": 7.448699474334717, "learning_rate": 4.306122448979592e-05, "loss": 2.925, "step": 3405 }, { "epoch": 13.902040816326531, "grad_norm": 5.197447299957275, "learning_rate": 4.305918367346939e-05, "loss": 3.6185, "step": 3406 }, { "epoch": 13.906122448979591, "grad_norm": 6.835203170776367, "learning_rate": 4.3057142857142854e-05, "loss": 3.2492, "step": 3407 }, { "epoch": 13.910204081632653, "grad_norm": 9.751132011413574, "learning_rate": 4.305510204081633e-05, "loss": 2.5954, "step": 3408 }, { "epoch": 13.914285714285715, "grad_norm": 4.800563335418701, "learning_rate": 4.30530612244898e-05, "loss": 3.7221, "step": 3409 }, { "epoch": 13.918367346938776, "grad_norm": 6.6944146156311035, "learning_rate": 4.305102040816327e-05, "loss": 3.3362, "step": 3410 }, { "epoch": 13.922448979591836, "grad_norm": 8.272115707397461, "learning_rate": 4.3048979591836736e-05, "loss": 3.3093, "step": 3411 }, { "epoch": 13.926530612244898, "grad_norm": 5.204406261444092, "learning_rate": 4.3046938775510204e-05, "loss": 3.5965, "step": 3412 }, { "epoch": 13.93061224489796, "grad_norm": 5.499877452850342, "learning_rate": 4.304489795918368e-05, "loss": 3.7026, "step": 3413 }, { "epoch": 13.93469387755102, "grad_norm": 7.062320709228516, "learning_rate": 4.3042857142857145e-05, "loss": 3.2802, "step": 3414 }, { "epoch": 13.938775510204081, "grad_norm": 7.666751861572266, "learning_rate": 4.304081632653061e-05, "loss": 3.1539, "step": 3415 }, { "epoch": 13.942857142857143, "grad_norm": 4.6041765213012695, "learning_rate": 4.3038775510204086e-05, "loss": 3.5516, "step": 3416 }, { "epoch": 13.946938775510205, "grad_norm": 4.304025650024414, "learning_rate": 4.303673469387755e-05, "loss": 3.7069, "step": 3417 }, { "epoch": 13.951020408163265, "grad_norm": 4.836004734039307, "learning_rate": 4.303469387755103e-05, "loss": 3.7274, "step": 3418 }, { "epoch": 13.955102040816326, "grad_norm": 5.3481645584106445, "learning_rate": 4.3032653061224494e-05, "loss": 3.4666, "step": 3419 }, { "epoch": 13.959183673469388, "grad_norm": 3.9571330547332764, "learning_rate": 4.303061224489796e-05, "loss": 3.9253, "step": 3420 }, { "epoch": 13.963265306122448, "grad_norm": 4.596685409545898, "learning_rate": 4.302857142857143e-05, "loss": 3.8541, "step": 3421 }, { "epoch": 13.96734693877551, "grad_norm": 4.089326858520508, "learning_rate": 4.3026530612244895e-05, "loss": 3.6989, "step": 3422 }, { "epoch": 13.971428571428572, "grad_norm": 9.438234329223633, "learning_rate": 4.302448979591837e-05, "loss": 3.0104, "step": 3423 }, { "epoch": 13.975510204081633, "grad_norm": 7.552208423614502, "learning_rate": 4.3022448979591836e-05, "loss": 3.4418, "step": 3424 }, { "epoch": 13.979591836734693, "grad_norm": 5.322824478149414, "learning_rate": 4.302040816326531e-05, "loss": 3.6332, "step": 3425 }, { "epoch": 13.983673469387755, "grad_norm": 5.949536323547363, "learning_rate": 4.301836734693878e-05, "loss": 3.6678, "step": 3426 }, { "epoch": 13.987755102040817, "grad_norm": 4.368216037750244, "learning_rate": 4.3016326530612244e-05, "loss": 3.546, "step": 3427 }, { "epoch": 13.991836734693878, "grad_norm": 5.2757391929626465, "learning_rate": 4.301428571428572e-05, "loss": 3.505, "step": 3428 }, { "epoch": 13.995918367346938, "grad_norm": 3.502474784851074, "learning_rate": 4.3012244897959185e-05, "loss": 3.8308, "step": 3429 }, { "epoch": 14.0, "grad_norm": 6.823404788970947, "learning_rate": 4.301020408163265e-05, "loss": 3.617, "step": 3430 }, { "epoch": 14.004081632653062, "grad_norm": 5.048367023468018, "learning_rate": 4.300816326530612e-05, "loss": 3.6668, "step": 3431 }, { "epoch": 14.008163265306122, "grad_norm": 3.396080255508423, "learning_rate": 4.3006122448979594e-05, "loss": 3.8137, "step": 3432 }, { "epoch": 14.012244897959183, "grad_norm": 5.670709133148193, "learning_rate": 4.300408163265307e-05, "loss": 3.4852, "step": 3433 }, { "epoch": 14.016326530612245, "grad_norm": 5.253901958465576, "learning_rate": 4.3002040816326535e-05, "loss": 3.3979, "step": 3434 }, { "epoch": 14.020408163265307, "grad_norm": 8.71407413482666, "learning_rate": 4.3e-05, "loss": 2.9186, "step": 3435 }, { "epoch": 14.024489795918367, "grad_norm": 8.780470848083496, "learning_rate": 4.299795918367347e-05, "loss": 2.9347, "step": 3436 }, { "epoch": 14.028571428571428, "grad_norm": 4.660196781158447, "learning_rate": 4.299591836734694e-05, "loss": 3.6074, "step": 3437 }, { "epoch": 14.03265306122449, "grad_norm": 7.485269546508789, "learning_rate": 4.299387755102041e-05, "loss": 3.4445, "step": 3438 }, { "epoch": 14.036734693877552, "grad_norm": 3.3511111736297607, "learning_rate": 4.299183673469388e-05, "loss": 4.1773, "step": 3439 }, { "epoch": 14.040816326530612, "grad_norm": 9.105213165283203, "learning_rate": 4.298979591836735e-05, "loss": 2.4527, "step": 3440 }, { "epoch": 14.044897959183674, "grad_norm": 6.019593238830566, "learning_rate": 4.298775510204082e-05, "loss": 3.6315, "step": 3441 }, { "epoch": 14.048979591836735, "grad_norm": 4.581212520599365, "learning_rate": 4.298571428571429e-05, "loss": 3.7609, "step": 3442 }, { "epoch": 14.053061224489795, "grad_norm": 5.982417583465576, "learning_rate": 4.298367346938776e-05, "loss": 3.4466, "step": 3443 }, { "epoch": 14.057142857142857, "grad_norm": 5.427894115447998, "learning_rate": 4.2981632653061226e-05, "loss": 3.7748, "step": 3444 }, { "epoch": 14.061224489795919, "grad_norm": 6.003526210784912, "learning_rate": 4.297959183673469e-05, "loss": 3.4194, "step": 3445 }, { "epoch": 14.06530612244898, "grad_norm": 8.867347717285156, "learning_rate": 4.297755102040816e-05, "loss": 3.0395, "step": 3446 }, { "epoch": 14.06938775510204, "grad_norm": 9.083779335021973, "learning_rate": 4.2975510204081634e-05, "loss": 2.8511, "step": 3447 }, { "epoch": 14.073469387755102, "grad_norm": 6.609250545501709, "learning_rate": 4.297346938775511e-05, "loss": 3.4257, "step": 3448 }, { "epoch": 14.077551020408164, "grad_norm": 5.389763832092285, "learning_rate": 4.2971428571428575e-05, "loss": 3.7143, "step": 3449 }, { "epoch": 14.081632653061224, "grad_norm": 4.429380893707275, "learning_rate": 4.296938775510204e-05, "loss": 3.6999, "step": 3450 }, { "epoch": 14.085714285714285, "grad_norm": 6.7828192710876465, "learning_rate": 4.296734693877551e-05, "loss": 3.6105, "step": 3451 }, { "epoch": 14.089795918367347, "grad_norm": 6.827824592590332, "learning_rate": 4.2965306122448983e-05, "loss": 2.9005, "step": 3452 }, { "epoch": 14.093877551020409, "grad_norm": 4.010068893432617, "learning_rate": 4.296326530612245e-05, "loss": 3.9886, "step": 3453 }, { "epoch": 14.097959183673469, "grad_norm": 4.728545665740967, "learning_rate": 4.296122448979592e-05, "loss": 3.7215, "step": 3454 }, { "epoch": 14.10204081632653, "grad_norm": 5.4959330558776855, "learning_rate": 4.295918367346939e-05, "loss": 3.6944, "step": 3455 }, { "epoch": 14.106122448979592, "grad_norm": 5.763251781463623, "learning_rate": 4.295714285714286e-05, "loss": 3.6654, "step": 3456 }, { "epoch": 14.110204081632654, "grad_norm": 14.595307350158691, "learning_rate": 4.295510204081633e-05, "loss": 2.1729, "step": 3457 }, { "epoch": 14.114285714285714, "grad_norm": 9.952160835266113, "learning_rate": 4.29530612244898e-05, "loss": 3.0388, "step": 3458 }, { "epoch": 14.118367346938776, "grad_norm": 12.28657054901123, "learning_rate": 4.295102040816327e-05, "loss": 2.3979, "step": 3459 }, { "epoch": 14.122448979591837, "grad_norm": 5.532776355743408, "learning_rate": 4.2948979591836734e-05, "loss": 3.5999, "step": 3460 }, { "epoch": 14.126530612244897, "grad_norm": 5.012792110443115, "learning_rate": 4.29469387755102e-05, "loss": 3.5458, "step": 3461 }, { "epoch": 14.130612244897959, "grad_norm": 6.46878719329834, "learning_rate": 4.2944897959183675e-05, "loss": 2.9978, "step": 3462 }, { "epoch": 14.13469387755102, "grad_norm": 9.796594619750977, "learning_rate": 4.294285714285715e-05, "loss": 3.0318, "step": 3463 }, { "epoch": 14.138775510204082, "grad_norm": 6.418121337890625, "learning_rate": 4.2940816326530616e-05, "loss": 3.23, "step": 3464 }, { "epoch": 14.142857142857142, "grad_norm": 9.393918991088867, "learning_rate": 4.293877551020408e-05, "loss": 2.4136, "step": 3465 }, { "epoch": 14.146938775510204, "grad_norm": 6.280067443847656, "learning_rate": 4.293673469387755e-05, "loss": 3.6017, "step": 3466 }, { "epoch": 14.151020408163266, "grad_norm": 8.581700325012207, "learning_rate": 4.2934693877551024e-05, "loss": 2.5076, "step": 3467 }, { "epoch": 14.155102040816326, "grad_norm": 4.731351852416992, "learning_rate": 4.293265306122449e-05, "loss": 3.6314, "step": 3468 }, { "epoch": 14.159183673469387, "grad_norm": 9.588294982910156, "learning_rate": 4.293061224489796e-05, "loss": 3.1727, "step": 3469 }, { "epoch": 14.16326530612245, "grad_norm": 5.888367176055908, "learning_rate": 4.292857142857143e-05, "loss": 3.6212, "step": 3470 }, { "epoch": 14.16734693877551, "grad_norm": 10.552618980407715, "learning_rate": 4.29265306122449e-05, "loss": 3.0878, "step": 3471 }, { "epoch": 14.17142857142857, "grad_norm": 6.662060260772705, "learning_rate": 4.2924489795918373e-05, "loss": 3.4561, "step": 3472 }, { "epoch": 14.175510204081633, "grad_norm": 3.8315296173095703, "learning_rate": 4.292244897959184e-05, "loss": 3.9485, "step": 3473 }, { "epoch": 14.179591836734694, "grad_norm": 10.396849632263184, "learning_rate": 4.292040816326531e-05, "loss": 2.9117, "step": 3474 }, { "epoch": 14.183673469387756, "grad_norm": 4.84809684753418, "learning_rate": 4.2918367346938775e-05, "loss": 3.5845, "step": 3475 }, { "epoch": 14.187755102040816, "grad_norm": 5.986484050750732, "learning_rate": 4.291632653061225e-05, "loss": 3.3773, "step": 3476 }, { "epoch": 14.191836734693878, "grad_norm": 7.385470390319824, "learning_rate": 4.2914285714285716e-05, "loss": 3.0386, "step": 3477 }, { "epoch": 14.19591836734694, "grad_norm": 7.7546305656433105, "learning_rate": 4.291224489795918e-05, "loss": 2.7902, "step": 3478 }, { "epoch": 14.2, "grad_norm": 10.644436836242676, "learning_rate": 4.291020408163266e-05, "loss": 2.5993, "step": 3479 }, { "epoch": 14.204081632653061, "grad_norm": 10.967331886291504, "learning_rate": 4.2908163265306124e-05, "loss": 3.3957, "step": 3480 }, { "epoch": 14.208163265306123, "grad_norm": 6.6932172775268555, "learning_rate": 4.29061224489796e-05, "loss": 3.3805, "step": 3481 }, { "epoch": 14.212244897959184, "grad_norm": 5.952167510986328, "learning_rate": 4.2904081632653065e-05, "loss": 3.2912, "step": 3482 }, { "epoch": 14.216326530612244, "grad_norm": 6.060666084289551, "learning_rate": 4.290204081632653e-05, "loss": 3.4581, "step": 3483 }, { "epoch": 14.220408163265306, "grad_norm": 5.925596714019775, "learning_rate": 4.29e-05, "loss": 3.437, "step": 3484 }, { "epoch": 14.224489795918368, "grad_norm": 4.183223724365234, "learning_rate": 4.2897959183673466e-05, "loss": 3.6539, "step": 3485 }, { "epoch": 14.228571428571428, "grad_norm": 7.422243595123291, "learning_rate": 4.289591836734694e-05, "loss": 3.3096, "step": 3486 }, { "epoch": 14.23265306122449, "grad_norm": 7.143606185913086, "learning_rate": 4.2893877551020414e-05, "loss": 3.5644, "step": 3487 }, { "epoch": 14.236734693877551, "grad_norm": 6.169679164886475, "learning_rate": 4.289183673469388e-05, "loss": 3.3531, "step": 3488 }, { "epoch": 14.240816326530613, "grad_norm": 5.494051456451416, "learning_rate": 4.288979591836735e-05, "loss": 3.4539, "step": 3489 }, { "epoch": 14.244897959183673, "grad_norm": 4.962257385253906, "learning_rate": 4.2887755102040816e-05, "loss": 3.5146, "step": 3490 }, { "epoch": 14.248979591836735, "grad_norm": 6.371198654174805, "learning_rate": 4.288571428571429e-05, "loss": 3.4175, "step": 3491 }, { "epoch": 14.253061224489796, "grad_norm": 4.749500274658203, "learning_rate": 4.2883673469387757e-05, "loss": 3.4974, "step": 3492 }, { "epoch": 14.257142857142856, "grad_norm": 6.434980392456055, "learning_rate": 4.2881632653061224e-05, "loss": 3.6414, "step": 3493 }, { "epoch": 14.261224489795918, "grad_norm": 8.777533531188965, "learning_rate": 4.28795918367347e-05, "loss": 3.2271, "step": 3494 }, { "epoch": 14.26530612244898, "grad_norm": 13.152050971984863, "learning_rate": 4.2877551020408165e-05, "loss": 3.4429, "step": 3495 }, { "epoch": 14.269387755102041, "grad_norm": 8.180154800415039, "learning_rate": 4.287551020408164e-05, "loss": 2.8167, "step": 3496 }, { "epoch": 14.273469387755101, "grad_norm": 14.695710182189941, "learning_rate": 4.2873469387755106e-05, "loss": 2.3642, "step": 3497 }, { "epoch": 14.277551020408163, "grad_norm": 4.496495246887207, "learning_rate": 4.287142857142857e-05, "loss": 3.6727, "step": 3498 }, { "epoch": 14.281632653061225, "grad_norm": 5.01383638381958, "learning_rate": 4.286938775510204e-05, "loss": 3.8563, "step": 3499 }, { "epoch": 14.285714285714286, "grad_norm": 3.693333864212036, "learning_rate": 4.286734693877551e-05, "loss": 3.7607, "step": 3500 }, { "epoch": 14.289795918367346, "grad_norm": 10.588436126708984, "learning_rate": 4.286530612244898e-05, "loss": 2.593, "step": 3501 }, { "epoch": 14.293877551020408, "grad_norm": 4.606422424316406, "learning_rate": 4.2863265306122455e-05, "loss": 3.5923, "step": 3502 }, { "epoch": 14.29795918367347, "grad_norm": 6.857972145080566, "learning_rate": 4.286122448979592e-05, "loss": 3.0978, "step": 3503 }, { "epoch": 14.30204081632653, "grad_norm": 6.611581802368164, "learning_rate": 4.285918367346939e-05, "loss": 3.5231, "step": 3504 }, { "epoch": 14.306122448979592, "grad_norm": 8.71416187286377, "learning_rate": 4.2857142857142856e-05, "loss": 3.2566, "step": 3505 }, { "epoch": 14.310204081632653, "grad_norm": 5.2841949462890625, "learning_rate": 4.285510204081633e-05, "loss": 3.5703, "step": 3506 }, { "epoch": 14.314285714285715, "grad_norm": 7.120683193206787, "learning_rate": 4.28530612244898e-05, "loss": 3.6679, "step": 3507 }, { "epoch": 14.318367346938775, "grad_norm": 9.7600679397583, "learning_rate": 4.2851020408163264e-05, "loss": 2.9335, "step": 3508 }, { "epoch": 14.322448979591837, "grad_norm": 3.8936448097229004, "learning_rate": 4.284897959183674e-05, "loss": 3.9934, "step": 3509 }, { "epoch": 14.326530612244898, "grad_norm": 7.868233680725098, "learning_rate": 4.2846938775510205e-05, "loss": 3.4029, "step": 3510 }, { "epoch": 14.33061224489796, "grad_norm": 5.332897186279297, "learning_rate": 4.284489795918368e-05, "loss": 3.6728, "step": 3511 }, { "epoch": 14.33469387755102, "grad_norm": 4.985393524169922, "learning_rate": 4.2842857142857146e-05, "loss": 3.4642, "step": 3512 }, { "epoch": 14.338775510204082, "grad_norm": 12.200807571411133, "learning_rate": 4.2840816326530614e-05, "loss": 2.5684, "step": 3513 }, { "epoch": 14.342857142857143, "grad_norm": 7.150488376617432, "learning_rate": 4.283877551020408e-05, "loss": 3.4149, "step": 3514 }, { "epoch": 14.346938775510203, "grad_norm": 7.302241325378418, "learning_rate": 4.283673469387755e-05, "loss": 3.1389, "step": 3515 }, { "epoch": 14.351020408163265, "grad_norm": 5.433082103729248, "learning_rate": 4.283469387755102e-05, "loss": 3.5299, "step": 3516 }, { "epoch": 14.355102040816327, "grad_norm": 4.297316074371338, "learning_rate": 4.2832653061224496e-05, "loss": 3.985, "step": 3517 }, { "epoch": 14.359183673469389, "grad_norm": 6.378148078918457, "learning_rate": 4.283061224489796e-05, "loss": 3.1815, "step": 3518 }, { "epoch": 14.363265306122448, "grad_norm": 7.6270270347595215, "learning_rate": 4.282857142857143e-05, "loss": 3.2624, "step": 3519 }, { "epoch": 14.36734693877551, "grad_norm": 5.940568923950195, "learning_rate": 4.28265306122449e-05, "loss": 3.7398, "step": 3520 }, { "epoch": 14.371428571428572, "grad_norm": 7.176810264587402, "learning_rate": 4.282448979591837e-05, "loss": 3.3034, "step": 3521 }, { "epoch": 14.375510204081632, "grad_norm": 4.942806243896484, "learning_rate": 4.282244897959184e-05, "loss": 3.5053, "step": 3522 }, { "epoch": 14.379591836734694, "grad_norm": 3.732771873474121, "learning_rate": 4.2820408163265305e-05, "loss": 3.744, "step": 3523 }, { "epoch": 14.383673469387755, "grad_norm": 7.7979021072387695, "learning_rate": 4.281836734693878e-05, "loss": 3.2643, "step": 3524 }, { "epoch": 14.387755102040817, "grad_norm": 5.4088311195373535, "learning_rate": 4.281632653061225e-05, "loss": 3.7148, "step": 3525 }, { "epoch": 14.391836734693877, "grad_norm": 7.189306735992432, "learning_rate": 4.281428571428572e-05, "loss": 3.3989, "step": 3526 }, { "epoch": 14.395918367346939, "grad_norm": 5.672240257263184, "learning_rate": 4.281224489795919e-05, "loss": 3.3771, "step": 3527 }, { "epoch": 14.4, "grad_norm": 4.790797233581543, "learning_rate": 4.2810204081632654e-05, "loss": 3.8797, "step": 3528 }, { "epoch": 14.404081632653062, "grad_norm": 4.235658168792725, "learning_rate": 4.280816326530612e-05, "loss": 3.63, "step": 3529 }, { "epoch": 14.408163265306122, "grad_norm": 5.694297790527344, "learning_rate": 4.2806122448979595e-05, "loss": 3.4261, "step": 3530 }, { "epoch": 14.412244897959184, "grad_norm": 6.692010402679443, "learning_rate": 4.280408163265306e-05, "loss": 3.5029, "step": 3531 }, { "epoch": 14.416326530612245, "grad_norm": 6.6005425453186035, "learning_rate": 4.280204081632653e-05, "loss": 3.5373, "step": 3532 }, { "epoch": 14.420408163265305, "grad_norm": 5.009964942932129, "learning_rate": 4.2800000000000004e-05, "loss": 3.8249, "step": 3533 }, { "epoch": 14.424489795918367, "grad_norm": 5.108092784881592, "learning_rate": 4.279795918367347e-05, "loss": 3.5608, "step": 3534 }, { "epoch": 14.428571428571429, "grad_norm": 7.994190216064453, "learning_rate": 4.2795918367346945e-05, "loss": 3.4169, "step": 3535 }, { "epoch": 14.43265306122449, "grad_norm": 6.082253456115723, "learning_rate": 4.279387755102041e-05, "loss": 3.389, "step": 3536 }, { "epoch": 14.43673469387755, "grad_norm": 6.2967095375061035, "learning_rate": 4.279183673469388e-05, "loss": 3.3389, "step": 3537 }, { "epoch": 14.440816326530612, "grad_norm": 9.358882904052734, "learning_rate": 4.2789795918367346e-05, "loss": 2.4976, "step": 3538 }, { "epoch": 14.444897959183674, "grad_norm": 13.07321834564209, "learning_rate": 4.278775510204081e-05, "loss": 2.4622, "step": 3539 }, { "epoch": 14.448979591836734, "grad_norm": 9.046313285827637, "learning_rate": 4.278571428571429e-05, "loss": 3.1098, "step": 3540 }, { "epoch": 14.453061224489796, "grad_norm": 9.692120552062988, "learning_rate": 4.278367346938776e-05, "loss": 2.6178, "step": 3541 }, { "epoch": 14.457142857142857, "grad_norm": 5.661147117614746, "learning_rate": 4.278163265306123e-05, "loss": 3.5607, "step": 3542 }, { "epoch": 14.461224489795919, "grad_norm": 5.680997848510742, "learning_rate": 4.2779591836734695e-05, "loss": 3.5525, "step": 3543 }, { "epoch": 14.465306122448979, "grad_norm": 5.129210948944092, "learning_rate": 4.277755102040816e-05, "loss": 3.6029, "step": 3544 }, { "epoch": 14.46938775510204, "grad_norm": 5.285322189331055, "learning_rate": 4.2775510204081636e-05, "loss": 3.5729, "step": 3545 }, { "epoch": 14.473469387755102, "grad_norm": 6.693445682525635, "learning_rate": 4.27734693877551e-05, "loss": 3.6902, "step": 3546 }, { "epoch": 14.477551020408164, "grad_norm": 8.718636512756348, "learning_rate": 4.277142857142857e-05, "loss": 3.4618, "step": 3547 }, { "epoch": 14.481632653061224, "grad_norm": 10.784834861755371, "learning_rate": 4.2769387755102044e-05, "loss": 3.4834, "step": 3548 }, { "epoch": 14.485714285714286, "grad_norm": 6.570213317871094, "learning_rate": 4.276734693877551e-05, "loss": 3.3994, "step": 3549 }, { "epoch": 14.489795918367347, "grad_norm": 4.55258846282959, "learning_rate": 4.2765306122448985e-05, "loss": 3.8327, "step": 3550 }, { "epoch": 14.493877551020407, "grad_norm": 6.892808437347412, "learning_rate": 4.276326530612245e-05, "loss": 3.1523, "step": 3551 }, { "epoch": 14.49795918367347, "grad_norm": 6.020925045013428, "learning_rate": 4.276122448979592e-05, "loss": 3.7885, "step": 3552 }, { "epoch": 14.50204081632653, "grad_norm": 7.101747512817383, "learning_rate": 4.275918367346939e-05, "loss": 3.2859, "step": 3553 }, { "epoch": 14.506122448979593, "grad_norm": 7.5743184089660645, "learning_rate": 4.2757142857142854e-05, "loss": 3.4445, "step": 3554 }, { "epoch": 14.510204081632653, "grad_norm": 14.208588600158691, "learning_rate": 4.275510204081633e-05, "loss": 2.9082, "step": 3555 }, { "epoch": 14.514285714285714, "grad_norm": 4.566429138183594, "learning_rate": 4.27530612244898e-05, "loss": 3.6621, "step": 3556 }, { "epoch": 14.518367346938776, "grad_norm": 7.013317108154297, "learning_rate": 4.275102040816327e-05, "loss": 3.3054, "step": 3557 }, { "epoch": 14.522448979591836, "grad_norm": 9.520852088928223, "learning_rate": 4.2748979591836736e-05, "loss": 2.9228, "step": 3558 }, { "epoch": 14.526530612244898, "grad_norm": 4.612450122833252, "learning_rate": 4.27469387755102e-05, "loss": 3.6998, "step": 3559 }, { "epoch": 14.53061224489796, "grad_norm": 4.637116432189941, "learning_rate": 4.274489795918368e-05, "loss": 3.892, "step": 3560 }, { "epoch": 14.534693877551021, "grad_norm": 4.022984504699707, "learning_rate": 4.2742857142857144e-05, "loss": 3.9406, "step": 3561 }, { "epoch": 14.538775510204081, "grad_norm": 7.9795613288879395, "learning_rate": 4.274081632653061e-05, "loss": 2.961, "step": 3562 }, { "epoch": 14.542857142857143, "grad_norm": 5.27625846862793, "learning_rate": 4.2738775510204085e-05, "loss": 3.5126, "step": 3563 }, { "epoch": 14.546938775510204, "grad_norm": 5.094842910766602, "learning_rate": 4.273673469387755e-05, "loss": 3.6595, "step": 3564 }, { "epoch": 14.551020408163264, "grad_norm": 5.684930801391602, "learning_rate": 4.2734693877551026e-05, "loss": 3.6198, "step": 3565 }, { "epoch": 14.555102040816326, "grad_norm": 5.7768120765686035, "learning_rate": 4.273265306122449e-05, "loss": 3.6627, "step": 3566 }, { "epoch": 14.559183673469388, "grad_norm": 9.370490074157715, "learning_rate": 4.273061224489796e-05, "loss": 3.2064, "step": 3567 }, { "epoch": 14.56326530612245, "grad_norm": 3.887097120285034, "learning_rate": 4.272857142857143e-05, "loss": 3.4884, "step": 3568 }, { "epoch": 14.56734693877551, "grad_norm": 7.166926860809326, "learning_rate": 4.27265306122449e-05, "loss": 3.6997, "step": 3569 }, { "epoch": 14.571428571428571, "grad_norm": 4.970879077911377, "learning_rate": 4.272448979591837e-05, "loss": 3.7026, "step": 3570 }, { "epoch": 14.575510204081633, "grad_norm": 7.9543538093566895, "learning_rate": 4.272244897959184e-05, "loss": 3.4336, "step": 3571 }, { "epoch": 14.579591836734695, "grad_norm": 4.89068603515625, "learning_rate": 4.272040816326531e-05, "loss": 3.8936, "step": 3572 }, { "epoch": 14.583673469387755, "grad_norm": 6.830969333648682, "learning_rate": 4.271836734693878e-05, "loss": 3.398, "step": 3573 }, { "epoch": 14.587755102040816, "grad_norm": 5.457698822021484, "learning_rate": 4.271632653061225e-05, "loss": 3.7504, "step": 3574 }, { "epoch": 14.591836734693878, "grad_norm": 8.63539981842041, "learning_rate": 4.271428571428572e-05, "loss": 2.9153, "step": 3575 }, { "epoch": 14.59591836734694, "grad_norm": 4.837587356567383, "learning_rate": 4.2712244897959185e-05, "loss": 3.7962, "step": 3576 }, { "epoch": 14.6, "grad_norm": 7.962646484375, "learning_rate": 4.271020408163265e-05, "loss": 3.3587, "step": 3577 }, { "epoch": 14.604081632653061, "grad_norm": 4.065853118896484, "learning_rate": 4.2708163265306126e-05, "loss": 3.8111, "step": 3578 }, { "epoch": 14.608163265306123, "grad_norm": 16.69668960571289, "learning_rate": 4.270612244897959e-05, "loss": 1.2763, "step": 3579 }, { "epoch": 14.612244897959183, "grad_norm": 5.402054786682129, "learning_rate": 4.270408163265307e-05, "loss": 3.635, "step": 3580 }, { "epoch": 14.616326530612245, "grad_norm": 8.082993507385254, "learning_rate": 4.2702040816326534e-05, "loss": 2.9703, "step": 3581 }, { "epoch": 14.620408163265306, "grad_norm": 8.943682670593262, "learning_rate": 4.27e-05, "loss": 2.9455, "step": 3582 }, { "epoch": 14.624489795918368, "grad_norm": 5.605941295623779, "learning_rate": 4.269795918367347e-05, "loss": 3.4961, "step": 3583 }, { "epoch": 14.628571428571428, "grad_norm": 5.72525691986084, "learning_rate": 4.269591836734694e-05, "loss": 3.6934, "step": 3584 }, { "epoch": 14.63265306122449, "grad_norm": 7.27733039855957, "learning_rate": 4.269387755102041e-05, "loss": 3.1419, "step": 3585 }, { "epoch": 14.636734693877552, "grad_norm": 4.199255466461182, "learning_rate": 4.2691836734693876e-05, "loss": 3.8758, "step": 3586 }, { "epoch": 14.640816326530611, "grad_norm": 4.683276176452637, "learning_rate": 4.268979591836735e-05, "loss": 3.7669, "step": 3587 }, { "epoch": 14.644897959183673, "grad_norm": 6.065154075622559, "learning_rate": 4.268775510204082e-05, "loss": 3.5895, "step": 3588 }, { "epoch": 14.648979591836735, "grad_norm": 4.6968913078308105, "learning_rate": 4.268571428571429e-05, "loss": 3.554, "step": 3589 }, { "epoch": 14.653061224489797, "grad_norm": 12.13912582397461, "learning_rate": 4.268367346938776e-05, "loss": 2.1778, "step": 3590 }, { "epoch": 14.657142857142857, "grad_norm": 3.50097918510437, "learning_rate": 4.2681632653061226e-05, "loss": 4.0472, "step": 3591 }, { "epoch": 14.661224489795918, "grad_norm": 4.023043155670166, "learning_rate": 4.267959183673469e-05, "loss": 3.8686, "step": 3592 }, { "epoch": 14.66530612244898, "grad_norm": 7.015305519104004, "learning_rate": 4.267755102040816e-05, "loss": 3.4242, "step": 3593 }, { "epoch": 14.66938775510204, "grad_norm": 7.356764316558838, "learning_rate": 4.2675510204081634e-05, "loss": 3.3174, "step": 3594 }, { "epoch": 14.673469387755102, "grad_norm": 4.928897380828857, "learning_rate": 4.267346938775511e-05, "loss": 3.9442, "step": 3595 }, { "epoch": 14.677551020408163, "grad_norm": 10.210982322692871, "learning_rate": 4.2671428571428575e-05, "loss": 3.2112, "step": 3596 }, { "epoch": 14.681632653061225, "grad_norm": 7.631703853607178, "learning_rate": 4.266938775510204e-05, "loss": 2.9987, "step": 3597 }, { "epoch": 14.685714285714285, "grad_norm": 5.865663528442383, "learning_rate": 4.266734693877551e-05, "loss": 3.6119, "step": 3598 }, { "epoch": 14.689795918367347, "grad_norm": 9.275792121887207, "learning_rate": 4.266530612244898e-05, "loss": 3.1344, "step": 3599 }, { "epoch": 14.693877551020408, "grad_norm": 5.076303958892822, "learning_rate": 4.266326530612245e-05, "loss": 3.5435, "step": 3600 }, { "epoch": 14.69795918367347, "grad_norm": 10.303689002990723, "learning_rate": 4.266122448979592e-05, "loss": 2.9692, "step": 3601 }, { "epoch": 14.70204081632653, "grad_norm": 9.968488693237305, "learning_rate": 4.265918367346939e-05, "loss": 3.1993, "step": 3602 }, { "epoch": 14.706122448979592, "grad_norm": 4.372609615325928, "learning_rate": 4.265714285714286e-05, "loss": 3.6565, "step": 3603 }, { "epoch": 14.710204081632654, "grad_norm": 9.927159309387207, "learning_rate": 4.265510204081633e-05, "loss": 2.7642, "step": 3604 }, { "epoch": 14.714285714285714, "grad_norm": 6.531652450561523, "learning_rate": 4.26530612244898e-05, "loss": 3.2746, "step": 3605 }, { "epoch": 14.718367346938775, "grad_norm": 5.149615287780762, "learning_rate": 4.2651020408163266e-05, "loss": 3.7006, "step": 3606 }, { "epoch": 14.722448979591837, "grad_norm": 10.584977149963379, "learning_rate": 4.2648979591836733e-05, "loss": 2.9917, "step": 3607 }, { "epoch": 14.726530612244899, "grad_norm": 7.468328475952148, "learning_rate": 4.26469387755102e-05, "loss": 3.4591, "step": 3608 }, { "epoch": 14.730612244897959, "grad_norm": 3.8643031120300293, "learning_rate": 4.2644897959183675e-05, "loss": 3.8422, "step": 3609 }, { "epoch": 14.73469387755102, "grad_norm": 7.326937675476074, "learning_rate": 4.264285714285715e-05, "loss": 3.371, "step": 3610 }, { "epoch": 14.738775510204082, "grad_norm": 7.693155765533447, "learning_rate": 4.2640816326530616e-05, "loss": 3.3342, "step": 3611 }, { "epoch": 14.742857142857144, "grad_norm": 5.8523993492126465, "learning_rate": 4.263877551020408e-05, "loss": 3.4426, "step": 3612 }, { "epoch": 14.746938775510204, "grad_norm": 4.996550559997559, "learning_rate": 4.2636734693877557e-05, "loss": 3.8546, "step": 3613 }, { "epoch": 14.751020408163265, "grad_norm": 12.153691291809082, "learning_rate": 4.2634693877551024e-05, "loss": 2.4385, "step": 3614 }, { "epoch": 14.755102040816327, "grad_norm": 5.719627857208252, "learning_rate": 4.263265306122449e-05, "loss": 3.6676, "step": 3615 }, { "epoch": 14.759183673469387, "grad_norm": 8.177918434143066, "learning_rate": 4.263061224489796e-05, "loss": 3.3616, "step": 3616 }, { "epoch": 14.763265306122449, "grad_norm": 4.748505592346191, "learning_rate": 4.262857142857143e-05, "loss": 3.8511, "step": 3617 }, { "epoch": 14.76734693877551, "grad_norm": 4.389712333679199, "learning_rate": 4.2626530612244906e-05, "loss": 3.6159, "step": 3618 }, { "epoch": 14.771428571428572, "grad_norm": 3.2708990573883057, "learning_rate": 4.262448979591837e-05, "loss": 3.8063, "step": 3619 }, { "epoch": 14.775510204081632, "grad_norm": 10.447443008422852, "learning_rate": 4.262244897959184e-05, "loss": 2.9595, "step": 3620 }, { "epoch": 14.779591836734694, "grad_norm": 4.215814113616943, "learning_rate": 4.262040816326531e-05, "loss": 3.8281, "step": 3621 }, { "epoch": 14.783673469387756, "grad_norm": 5.00468111038208, "learning_rate": 4.2618367346938774e-05, "loss": 3.7082, "step": 3622 }, { "epoch": 14.787755102040816, "grad_norm": 7.89408540725708, "learning_rate": 4.261632653061225e-05, "loss": 2.8598, "step": 3623 }, { "epoch": 14.791836734693877, "grad_norm": 6.037527561187744, "learning_rate": 4.2614285714285715e-05, "loss": 3.74, "step": 3624 }, { "epoch": 14.795918367346939, "grad_norm": 7.6155242919921875, "learning_rate": 4.261224489795919e-05, "loss": 3.3465, "step": 3625 }, { "epoch": 14.8, "grad_norm": 3.202341318130493, "learning_rate": 4.2610204081632656e-05, "loss": 4.0834, "step": 3626 }, { "epoch": 14.80408163265306, "grad_norm": 4.91594934463501, "learning_rate": 4.2608163265306123e-05, "loss": 3.6904, "step": 3627 }, { "epoch": 14.808163265306122, "grad_norm": 4.502063274383545, "learning_rate": 4.26061224489796e-05, "loss": 3.64, "step": 3628 }, { "epoch": 14.812244897959184, "grad_norm": 7.023047924041748, "learning_rate": 4.2604081632653064e-05, "loss": 3.2118, "step": 3629 }, { "epoch": 14.816326530612244, "grad_norm": 6.723394393920898, "learning_rate": 4.260204081632653e-05, "loss": 3.5356, "step": 3630 }, { "epoch": 14.820408163265306, "grad_norm": 8.723793983459473, "learning_rate": 4.26e-05, "loss": 3.2439, "step": 3631 }, { "epoch": 14.824489795918367, "grad_norm": 7.154966354370117, "learning_rate": 4.259795918367347e-05, "loss": 3.537, "step": 3632 }, { "epoch": 14.82857142857143, "grad_norm": 5.565726280212402, "learning_rate": 4.259591836734694e-05, "loss": 3.6649, "step": 3633 }, { "epoch": 14.83265306122449, "grad_norm": 6.601034164428711, "learning_rate": 4.2593877551020414e-05, "loss": 3.3509, "step": 3634 }, { "epoch": 14.83673469387755, "grad_norm": 3.7632508277893066, "learning_rate": 4.259183673469388e-05, "loss": 3.7752, "step": 3635 }, { "epoch": 14.840816326530613, "grad_norm": 7.85811710357666, "learning_rate": 4.258979591836735e-05, "loss": 3.1088, "step": 3636 }, { "epoch": 14.844897959183674, "grad_norm": 5.046207427978516, "learning_rate": 4.2587755102040815e-05, "loss": 3.7786, "step": 3637 }, { "epoch": 14.848979591836734, "grad_norm": 8.467079162597656, "learning_rate": 4.258571428571429e-05, "loss": 3.0475, "step": 3638 }, { "epoch": 14.853061224489796, "grad_norm": 4.597343444824219, "learning_rate": 4.2583673469387756e-05, "loss": 3.8082, "step": 3639 }, { "epoch": 14.857142857142858, "grad_norm": 4.637474536895752, "learning_rate": 4.258163265306122e-05, "loss": 3.5045, "step": 3640 }, { "epoch": 14.861224489795918, "grad_norm": 6.691149711608887, "learning_rate": 4.25795918367347e-05, "loss": 3.4859, "step": 3641 }, { "epoch": 14.86530612244898, "grad_norm": 5.577962398529053, "learning_rate": 4.2577551020408164e-05, "loss": 3.6792, "step": 3642 }, { "epoch": 14.869387755102041, "grad_norm": 4.0021281242370605, "learning_rate": 4.257551020408164e-05, "loss": 3.6412, "step": 3643 }, { "epoch": 14.873469387755103, "grad_norm": 5.155494689941406, "learning_rate": 4.2573469387755105e-05, "loss": 3.7727, "step": 3644 }, { "epoch": 14.877551020408163, "grad_norm": 7.46030855178833, "learning_rate": 4.257142857142857e-05, "loss": 3.1719, "step": 3645 }, { "epoch": 14.881632653061224, "grad_norm": 3.499281167984009, "learning_rate": 4.256938775510204e-05, "loss": 4.0077, "step": 3646 }, { "epoch": 14.885714285714286, "grad_norm": 6.128637313842773, "learning_rate": 4.2567346938775507e-05, "loss": 3.4026, "step": 3647 }, { "epoch": 14.889795918367348, "grad_norm": 7.304086208343506, "learning_rate": 4.256530612244898e-05, "loss": 3.2202, "step": 3648 }, { "epoch": 14.893877551020408, "grad_norm": 5.7808756828308105, "learning_rate": 4.2563265306122454e-05, "loss": 3.7241, "step": 3649 }, { "epoch": 14.89795918367347, "grad_norm": 6.633958339691162, "learning_rate": 4.256122448979592e-05, "loss": 3.1306, "step": 3650 }, { "epoch": 14.902040816326531, "grad_norm": 7.51275110244751, "learning_rate": 4.255918367346939e-05, "loss": 3.3102, "step": 3651 }, { "epoch": 14.906122448979591, "grad_norm": 6.0516252517700195, "learning_rate": 4.2557142857142856e-05, "loss": 3.5761, "step": 3652 }, { "epoch": 14.910204081632653, "grad_norm": 7.488738536834717, "learning_rate": 4.255510204081633e-05, "loss": 3.1378, "step": 3653 }, { "epoch": 14.914285714285715, "grad_norm": 8.178561210632324, "learning_rate": 4.25530612244898e-05, "loss": 2.8935, "step": 3654 }, { "epoch": 14.918367346938776, "grad_norm": 6.4130096435546875, "learning_rate": 4.2551020408163264e-05, "loss": 3.2416, "step": 3655 }, { "epoch": 14.922448979591836, "grad_norm": 4.992390155792236, "learning_rate": 4.254897959183674e-05, "loss": 3.4987, "step": 3656 }, { "epoch": 14.926530612244898, "grad_norm": 3.7539403438568115, "learning_rate": 4.2546938775510205e-05, "loss": 3.8684, "step": 3657 }, { "epoch": 14.93061224489796, "grad_norm": 4.73036527633667, "learning_rate": 4.254489795918368e-05, "loss": 3.535, "step": 3658 }, { "epoch": 14.93469387755102, "grad_norm": 6.411326885223389, "learning_rate": 4.2542857142857146e-05, "loss": 3.8218, "step": 3659 }, { "epoch": 14.938775510204081, "grad_norm": 4.773394584655762, "learning_rate": 4.254081632653061e-05, "loss": 3.6647, "step": 3660 }, { "epoch": 14.942857142857143, "grad_norm": 10.579089164733887, "learning_rate": 4.253877551020408e-05, "loss": 2.3404, "step": 3661 }, { "epoch": 14.946938775510205, "grad_norm": 6.064553737640381, "learning_rate": 4.2536734693877554e-05, "loss": 3.5362, "step": 3662 }, { "epoch": 14.951020408163265, "grad_norm": 4.969227313995361, "learning_rate": 4.253469387755102e-05, "loss": 3.8305, "step": 3663 }, { "epoch": 14.955102040816326, "grad_norm": 7.586976528167725, "learning_rate": 4.2532653061224495e-05, "loss": 3.6204, "step": 3664 }, { "epoch": 14.959183673469388, "grad_norm": 7.140070915222168, "learning_rate": 4.253061224489796e-05, "loss": 3.5462, "step": 3665 }, { "epoch": 14.963265306122448, "grad_norm": 8.513764381408691, "learning_rate": 4.252857142857143e-05, "loss": 3.2056, "step": 3666 }, { "epoch": 14.96734693877551, "grad_norm": 9.039411544799805, "learning_rate": 4.25265306122449e-05, "loss": 3.0854, "step": 3667 }, { "epoch": 14.971428571428572, "grad_norm": 8.413331031799316, "learning_rate": 4.252448979591837e-05, "loss": 3.3654, "step": 3668 }, { "epoch": 14.975510204081633, "grad_norm": 2.968513250350952, "learning_rate": 4.252244897959184e-05, "loss": 4.0164, "step": 3669 }, { "epoch": 14.979591836734693, "grad_norm": 11.284895896911621, "learning_rate": 4.2520408163265305e-05, "loss": 3.0634, "step": 3670 }, { "epoch": 14.983673469387755, "grad_norm": 19.75514030456543, "learning_rate": 4.251836734693878e-05, "loss": 2.5667, "step": 3671 }, { "epoch": 14.987755102040817, "grad_norm": 6.2159552574157715, "learning_rate": 4.251632653061225e-05, "loss": 3.4139, "step": 3672 }, { "epoch": 14.991836734693878, "grad_norm": 10.376583099365234, "learning_rate": 4.251428571428572e-05, "loss": 2.9914, "step": 3673 }, { "epoch": 14.995918367346938, "grad_norm": 6.530878067016602, "learning_rate": 4.251224489795919e-05, "loss": 3.6329, "step": 3674 }, { "epoch": 15.0, "grad_norm": 12.758476257324219, "learning_rate": 4.2510204081632654e-05, "loss": 2.2978, "step": 3675 }, { "epoch": 15.004081632653062, "grad_norm": 9.507381439208984, "learning_rate": 4.250816326530612e-05, "loss": 2.4276, "step": 3676 }, { "epoch": 15.008163265306122, "grad_norm": 3.6619932651519775, "learning_rate": 4.2506122448979595e-05, "loss": 4.0036, "step": 3677 }, { "epoch": 15.012244897959183, "grad_norm": 6.466986656188965, "learning_rate": 4.250408163265306e-05, "loss": 3.285, "step": 3678 }, { "epoch": 15.016326530612245, "grad_norm": 4.191864013671875, "learning_rate": 4.2502040816326536e-05, "loss": 3.7757, "step": 3679 }, { "epoch": 15.020408163265307, "grad_norm": 6.690586566925049, "learning_rate": 4.25e-05, "loss": 3.4445, "step": 3680 }, { "epoch": 15.024489795918367, "grad_norm": 8.746185302734375, "learning_rate": 4.249795918367347e-05, "loss": 3.2057, "step": 3681 }, { "epoch": 15.028571428571428, "grad_norm": 7.310567855834961, "learning_rate": 4.2495918367346944e-05, "loss": 3.0361, "step": 3682 }, { "epoch": 15.03265306122449, "grad_norm": 7.269344806671143, "learning_rate": 4.249387755102041e-05, "loss": 3.0546, "step": 3683 }, { "epoch": 15.036734693877552, "grad_norm": 6.30587100982666, "learning_rate": 4.249183673469388e-05, "loss": 3.3122, "step": 3684 }, { "epoch": 15.040816326530612, "grad_norm": 6.831896781921387, "learning_rate": 4.2489795918367345e-05, "loss": 3.3113, "step": 3685 }, { "epoch": 15.044897959183674, "grad_norm": 11.133947372436523, "learning_rate": 4.248775510204082e-05, "loss": 2.4996, "step": 3686 }, { "epoch": 15.048979591836735, "grad_norm": 6.059529781341553, "learning_rate": 4.2485714285714286e-05, "loss": 3.4922, "step": 3687 }, { "epoch": 15.053061224489795, "grad_norm": 9.239399909973145, "learning_rate": 4.248367346938776e-05, "loss": 2.9206, "step": 3688 }, { "epoch": 15.057142857142857, "grad_norm": 5.045151710510254, "learning_rate": 4.248163265306123e-05, "loss": 3.5534, "step": 3689 }, { "epoch": 15.061224489795919, "grad_norm": 5.440079689025879, "learning_rate": 4.2479591836734695e-05, "loss": 3.7047, "step": 3690 }, { "epoch": 15.06530612244898, "grad_norm": 4.761218547821045, "learning_rate": 4.247755102040816e-05, "loss": 3.978, "step": 3691 }, { "epoch": 15.06938775510204, "grad_norm": 6.753596782684326, "learning_rate": 4.2475510204081636e-05, "loss": 3.3894, "step": 3692 }, { "epoch": 15.073469387755102, "grad_norm": 3.851574420928955, "learning_rate": 4.24734693877551e-05, "loss": 4.1517, "step": 3693 }, { "epoch": 15.077551020408164, "grad_norm": 7.2191314697265625, "learning_rate": 4.247142857142857e-05, "loss": 3.2112, "step": 3694 }, { "epoch": 15.081632653061224, "grad_norm": 6.1381072998046875, "learning_rate": 4.2469387755102044e-05, "loss": 3.3422, "step": 3695 }, { "epoch": 15.085714285714285, "grad_norm": 6.444193363189697, "learning_rate": 4.246734693877551e-05, "loss": 3.7956, "step": 3696 }, { "epoch": 15.089795918367347, "grad_norm": 7.435385704040527, "learning_rate": 4.2465306122448985e-05, "loss": 3.022, "step": 3697 }, { "epoch": 15.093877551020409, "grad_norm": 5.963365077972412, "learning_rate": 4.246326530612245e-05, "loss": 3.5692, "step": 3698 }, { "epoch": 15.097959183673469, "grad_norm": 3.9027202129364014, "learning_rate": 4.246122448979592e-05, "loss": 3.9373, "step": 3699 }, { "epoch": 15.10204081632653, "grad_norm": 7.683741092681885, "learning_rate": 4.2459183673469386e-05, "loss": 3.3501, "step": 3700 }, { "epoch": 15.106122448979592, "grad_norm": 8.074971199035645, "learning_rate": 4.245714285714285e-05, "loss": 3.3783, "step": 3701 }, { "epoch": 15.110204081632654, "grad_norm": 4.854177474975586, "learning_rate": 4.245510204081633e-05, "loss": 3.7955, "step": 3702 }, { "epoch": 15.114285714285714, "grad_norm": 9.345345497131348, "learning_rate": 4.24530612244898e-05, "loss": 2.9418, "step": 3703 }, { "epoch": 15.118367346938776, "grad_norm": 8.159955024719238, "learning_rate": 4.245102040816327e-05, "loss": 3.1755, "step": 3704 }, { "epoch": 15.122448979591837, "grad_norm": 7.189591884613037, "learning_rate": 4.2448979591836735e-05, "loss": 3.2191, "step": 3705 }, { "epoch": 15.126530612244897, "grad_norm": 6.72805643081665, "learning_rate": 4.244693877551021e-05, "loss": 3.4097, "step": 3706 }, { "epoch": 15.130612244897959, "grad_norm": 3.9202370643615723, "learning_rate": 4.2444897959183676e-05, "loss": 3.9218, "step": 3707 }, { "epoch": 15.13469387755102, "grad_norm": 8.21033000946045, "learning_rate": 4.2442857142857144e-05, "loss": 2.7317, "step": 3708 }, { "epoch": 15.138775510204082, "grad_norm": 4.122145175933838, "learning_rate": 4.244081632653061e-05, "loss": 3.7853, "step": 3709 }, { "epoch": 15.142857142857142, "grad_norm": 5.136005401611328, "learning_rate": 4.2438775510204085e-05, "loss": 3.4307, "step": 3710 }, { "epoch": 15.146938775510204, "grad_norm": 5.214320182800293, "learning_rate": 4.243673469387756e-05, "loss": 3.6163, "step": 3711 }, { "epoch": 15.151020408163266, "grad_norm": 7.035396575927734, "learning_rate": 4.2434693877551026e-05, "loss": 3.4842, "step": 3712 }, { "epoch": 15.155102040816326, "grad_norm": 5.05590295791626, "learning_rate": 4.243265306122449e-05, "loss": 3.6816, "step": 3713 }, { "epoch": 15.159183673469387, "grad_norm": 10.17858600616455, "learning_rate": 4.243061224489796e-05, "loss": 2.9093, "step": 3714 }, { "epoch": 15.16326530612245, "grad_norm": 7.313606262207031, "learning_rate": 4.242857142857143e-05, "loss": 3.3933, "step": 3715 }, { "epoch": 15.16734693877551, "grad_norm": 4.2380852699279785, "learning_rate": 4.24265306122449e-05, "loss": 3.8814, "step": 3716 }, { "epoch": 15.17142857142857, "grad_norm": 8.545478820800781, "learning_rate": 4.242448979591837e-05, "loss": 3.009, "step": 3717 }, { "epoch": 15.175510204081633, "grad_norm": NaN, "learning_rate": 4.242244897959184e-05, "loss": 2.867, "step": 3718 }, { "epoch": 15.179591836734694, "grad_norm": 6.255237102508545, "learning_rate": 4.242244897959184e-05, "loss": 3.4935, "step": 3719 }, { "epoch": 15.183673469387756, "grad_norm": 10.341347694396973, "learning_rate": 4.242040816326531e-05, "loss": 2.364, "step": 3720 }, { "epoch": 15.187755102040816, "grad_norm": 9.202388763427734, "learning_rate": 4.2418367346938776e-05, "loss": 2.926, "step": 3721 }, { "epoch": 15.191836734693878, "grad_norm": 5.789558410644531, "learning_rate": 4.241632653061225e-05, "loss": 3.3193, "step": 3722 }, { "epoch": 15.19591836734694, "grad_norm": 7.221806049346924, "learning_rate": 4.241428571428572e-05, "loss": 3.1627, "step": 3723 }, { "epoch": 15.2, "grad_norm": 10.200267791748047, "learning_rate": 4.2412244897959184e-05, "loss": 2.4993, "step": 3724 }, { "epoch": 15.204081632653061, "grad_norm": 6.672095775604248, "learning_rate": 4.241020408163265e-05, "loss": 3.7255, "step": 3725 }, { "epoch": 15.208163265306123, "grad_norm": 6.414969444274902, "learning_rate": 4.2408163265306125e-05, "loss": 3.3232, "step": 3726 }, { "epoch": 15.212244897959184, "grad_norm": 5.248787879943848, "learning_rate": 4.24061224489796e-05, "loss": 3.5899, "step": 3727 }, { "epoch": 15.216326530612244, "grad_norm": 13.25257682800293, "learning_rate": 4.2404081632653066e-05, "loss": 2.7996, "step": 3728 }, { "epoch": 15.220408163265306, "grad_norm": 4.254227638244629, "learning_rate": 4.2402040816326533e-05, "loss": 3.7401, "step": 3729 }, { "epoch": 15.224489795918368, "grad_norm": 9.87341022491455, "learning_rate": 4.24e-05, "loss": 2.9483, "step": 3730 }, { "epoch": 15.228571428571428, "grad_norm": 2.778066873550415, "learning_rate": 4.239795918367347e-05, "loss": 3.9608, "step": 3731 }, { "epoch": 15.23265306122449, "grad_norm": 9.312628746032715, "learning_rate": 4.239591836734694e-05, "loss": 3.1201, "step": 3732 }, { "epoch": 15.236734693877551, "grad_norm": 7.141024112701416, "learning_rate": 4.239387755102041e-05, "loss": 3.5363, "step": 3733 }, { "epoch": 15.240816326530613, "grad_norm": 6.911282062530518, "learning_rate": 4.239183673469388e-05, "loss": 3.4609, "step": 3734 }, { "epoch": 15.244897959183673, "grad_norm": 8.471010208129883, "learning_rate": 4.238979591836735e-05, "loss": 3.2525, "step": 3735 }, { "epoch": 15.248979591836735, "grad_norm": 8.491908073425293, "learning_rate": 4.238775510204082e-05, "loss": 3.2606, "step": 3736 }, { "epoch": 15.253061224489796, "grad_norm": 6.2342023849487305, "learning_rate": 4.238571428571429e-05, "loss": 3.7462, "step": 3737 }, { "epoch": 15.257142857142856, "grad_norm": 5.967325687408447, "learning_rate": 4.238367346938776e-05, "loss": 3.5351, "step": 3738 }, { "epoch": 15.261224489795918, "grad_norm": 6.30575704574585, "learning_rate": 4.2381632653061225e-05, "loss": 3.2818, "step": 3739 }, { "epoch": 15.26530612244898, "grad_norm": 5.583068370819092, "learning_rate": 4.237959183673469e-05, "loss": 3.7515, "step": 3740 }, { "epoch": 15.269387755102041, "grad_norm": 10.940451622009277, "learning_rate": 4.237755102040816e-05, "loss": 2.802, "step": 3741 }, { "epoch": 15.273469387755101, "grad_norm": 8.551135063171387, "learning_rate": 4.237551020408163e-05, "loss": 3.0866, "step": 3742 }, { "epoch": 15.277551020408163, "grad_norm": 9.488130569458008, "learning_rate": 4.237346938775511e-05, "loss": 3.2098, "step": 3743 }, { "epoch": 15.281632653061225, "grad_norm": 8.642871856689453, "learning_rate": 4.2371428571428574e-05, "loss": 3.0776, "step": 3744 }, { "epoch": 15.285714285714286, "grad_norm": 5.271658897399902, "learning_rate": 4.236938775510204e-05, "loss": 3.7409, "step": 3745 }, { "epoch": 15.289795918367346, "grad_norm": 6.324840068817139, "learning_rate": 4.236734693877551e-05, "loss": 3.3868, "step": 3746 }, { "epoch": 15.293877551020408, "grad_norm": 7.168661594390869, "learning_rate": 4.236530612244898e-05, "loss": 3.4657, "step": 3747 }, { "epoch": 15.29795918367347, "grad_norm": 7.046822547912598, "learning_rate": 4.236326530612245e-05, "loss": 2.9774, "step": 3748 }, { "epoch": 15.30204081632653, "grad_norm": 14.535309791564941, "learning_rate": 4.236122448979592e-05, "loss": 2.0837, "step": 3749 }, { "epoch": 15.306122448979592, "grad_norm": 10.556288719177246, "learning_rate": 4.235918367346939e-05, "loss": 2.8898, "step": 3750 }, { "epoch": 15.310204081632653, "grad_norm": 3.9316070079803467, "learning_rate": 4.2357142857142864e-05, "loss": 3.8131, "step": 3751 }, { "epoch": 15.314285714285715, "grad_norm": 9.813251495361328, "learning_rate": 4.235510204081633e-05, "loss": 3.2295, "step": 3752 }, { "epoch": 15.318367346938775, "grad_norm": 10.067663192749023, "learning_rate": 4.23530612244898e-05, "loss": 2.8278, "step": 3753 }, { "epoch": 15.322448979591837, "grad_norm": 5.929957389831543, "learning_rate": 4.2351020408163266e-05, "loss": 3.4993, "step": 3754 }, { "epoch": 15.326530612244898, "grad_norm": 10.715645790100098, "learning_rate": 4.234897959183673e-05, "loss": 2.912, "step": 3755 }, { "epoch": 15.33061224489796, "grad_norm": 13.977802276611328, "learning_rate": 4.234693877551021e-05, "loss": 3.4418, "step": 3756 }, { "epoch": 15.33469387755102, "grad_norm": 4.599195957183838, "learning_rate": 4.2344897959183674e-05, "loss": 3.6077, "step": 3757 }, { "epoch": 15.338775510204082, "grad_norm": 12.015470504760742, "learning_rate": 4.234285714285715e-05, "loss": 2.4715, "step": 3758 }, { "epoch": 15.342857142857143, "grad_norm": 3.4606871604919434, "learning_rate": 4.2340816326530615e-05, "loss": 4.0332, "step": 3759 }, { "epoch": 15.346938775510203, "grad_norm": 5.393328666687012, "learning_rate": 4.233877551020408e-05, "loss": 3.629, "step": 3760 }, { "epoch": 15.351020408163265, "grad_norm": 6.58065938949585, "learning_rate": 4.2336734693877556e-05, "loss": 3.5984, "step": 3761 }, { "epoch": 15.355102040816327, "grad_norm": 6.7549729347229, "learning_rate": 4.233469387755102e-05, "loss": 3.5121, "step": 3762 }, { "epoch": 15.359183673469389, "grad_norm": 12.821940422058105, "learning_rate": 4.233265306122449e-05, "loss": 2.2458, "step": 3763 }, { "epoch": 15.363265306122448, "grad_norm": 5.26340389251709, "learning_rate": 4.233061224489796e-05, "loss": 3.7583, "step": 3764 }, { "epoch": 15.36734693877551, "grad_norm": 4.683936595916748, "learning_rate": 4.232857142857143e-05, "loss": 3.6844, "step": 3765 }, { "epoch": 15.371428571428572, "grad_norm": 4.91886043548584, "learning_rate": 4.2326530612244905e-05, "loss": 3.5119, "step": 3766 }, { "epoch": 15.375510204081632, "grad_norm": 9.454550743103027, "learning_rate": 4.232448979591837e-05, "loss": 2.8777, "step": 3767 }, { "epoch": 15.379591836734694, "grad_norm": 7.803900241851807, "learning_rate": 4.232244897959184e-05, "loss": 3.5101, "step": 3768 }, { "epoch": 15.383673469387755, "grad_norm": 7.0866193771362305, "learning_rate": 4.2320408163265307e-05, "loss": 3.6252, "step": 3769 }, { "epoch": 15.387755102040817, "grad_norm": 7.397933006286621, "learning_rate": 4.2318367346938774e-05, "loss": 3.3348, "step": 3770 }, { "epoch": 15.391836734693877, "grad_norm": 7.885481357574463, "learning_rate": 4.231632653061225e-05, "loss": 3.1062, "step": 3771 }, { "epoch": 15.395918367346939, "grad_norm": 4.538924217224121, "learning_rate": 4.2314285714285715e-05, "loss": 3.7847, "step": 3772 }, { "epoch": 15.4, "grad_norm": 5.785786151885986, "learning_rate": 4.231224489795919e-05, "loss": 3.4317, "step": 3773 }, { "epoch": 15.404081632653062, "grad_norm": 8.2662353515625, "learning_rate": 4.2310204081632656e-05, "loss": 3.6268, "step": 3774 }, { "epoch": 15.408163265306122, "grad_norm": 6.976170063018799, "learning_rate": 4.230816326530612e-05, "loss": 3.2837, "step": 3775 }, { "epoch": 15.412244897959184, "grad_norm": 7.245400428771973, "learning_rate": 4.23061224489796e-05, "loss": 3.6137, "step": 3776 }, { "epoch": 15.416326530612245, "grad_norm": 9.381718635559082, "learning_rate": 4.2304081632653064e-05, "loss": 2.9207, "step": 3777 }, { "epoch": 15.420408163265305, "grad_norm": 7.257016658782959, "learning_rate": 4.230204081632653e-05, "loss": 3.2905, "step": 3778 }, { "epoch": 15.424489795918367, "grad_norm": 4.655364036560059, "learning_rate": 4.23e-05, "loss": 3.5963, "step": 3779 }, { "epoch": 15.428571428571429, "grad_norm": 5.189565658569336, "learning_rate": 4.229795918367347e-05, "loss": 3.6605, "step": 3780 }, { "epoch": 15.43265306122449, "grad_norm": 4.986557960510254, "learning_rate": 4.2295918367346946e-05, "loss": 3.7318, "step": 3781 }, { "epoch": 15.43673469387755, "grad_norm": 11.933988571166992, "learning_rate": 4.229387755102041e-05, "loss": 3.3824, "step": 3782 }, { "epoch": 15.440816326530612, "grad_norm": 4.84625244140625, "learning_rate": 4.229183673469388e-05, "loss": 3.5557, "step": 3783 }, { "epoch": 15.444897959183674, "grad_norm": 4.83833646774292, "learning_rate": 4.228979591836735e-05, "loss": 3.5829, "step": 3784 }, { "epoch": 15.448979591836734, "grad_norm": 4.515514850616455, "learning_rate": 4.2287755102040814e-05, "loss": 3.6482, "step": 3785 }, { "epoch": 15.453061224489796, "grad_norm": 10.89545726776123, "learning_rate": 4.228571428571429e-05, "loss": 3.2647, "step": 3786 }, { "epoch": 15.457142857142857, "grad_norm": 8.347283363342285, "learning_rate": 4.2283673469387756e-05, "loss": 3.371, "step": 3787 }, { "epoch": 15.461224489795919, "grad_norm": 6.668556213378906, "learning_rate": 4.228163265306123e-05, "loss": 3.3973, "step": 3788 }, { "epoch": 15.465306122448979, "grad_norm": 4.077728748321533, "learning_rate": 4.2279591836734697e-05, "loss": 3.964, "step": 3789 }, { "epoch": 15.46938775510204, "grad_norm": 6.0693793296813965, "learning_rate": 4.2277551020408164e-05, "loss": 3.606, "step": 3790 }, { "epoch": 15.473469387755102, "grad_norm": 7.9250712394714355, "learning_rate": 4.227551020408164e-05, "loss": 3.0924, "step": 3791 }, { "epoch": 15.477551020408164, "grad_norm": 7.436736583709717, "learning_rate": 4.2273469387755105e-05, "loss": 3.3008, "step": 3792 }, { "epoch": 15.481632653061224, "grad_norm": 9.73439884185791, "learning_rate": 4.227142857142857e-05, "loss": 2.8457, "step": 3793 }, { "epoch": 15.485714285714286, "grad_norm": 9.295479774475098, "learning_rate": 4.226938775510204e-05, "loss": 3.1945, "step": 3794 }, { "epoch": 15.489795918367347, "grad_norm": 6.311616897583008, "learning_rate": 4.226734693877551e-05, "loss": 3.6153, "step": 3795 }, { "epoch": 15.493877551020407, "grad_norm": 4.656355857849121, "learning_rate": 4.226530612244898e-05, "loss": 3.6036, "step": 3796 }, { "epoch": 15.49795918367347, "grad_norm": 8.843574523925781, "learning_rate": 4.2263265306122454e-05, "loss": 3.4322, "step": 3797 }, { "epoch": 15.50204081632653, "grad_norm": 8.866243362426758, "learning_rate": 4.226122448979592e-05, "loss": 2.7337, "step": 3798 }, { "epoch": 15.506122448979593, "grad_norm": 6.253833770751953, "learning_rate": 4.225918367346939e-05, "loss": 3.6119, "step": 3799 }, { "epoch": 15.510204081632653, "grad_norm": 4.508166313171387, "learning_rate": 4.225714285714286e-05, "loss": 3.5631, "step": 3800 }, { "epoch": 15.514285714285714, "grad_norm": 4.9211602210998535, "learning_rate": 4.225510204081633e-05, "loss": 3.8505, "step": 3801 }, { "epoch": 15.518367346938776, "grad_norm": 9.289901733398438, "learning_rate": 4.2253061224489796e-05, "loss": 3.1907, "step": 3802 }, { "epoch": 15.522448979591836, "grad_norm": 7.246789455413818, "learning_rate": 4.2251020408163263e-05, "loss": 3.651, "step": 3803 }, { "epoch": 15.526530612244898, "grad_norm": 11.702716827392578, "learning_rate": 4.224897959183674e-05, "loss": 2.8985, "step": 3804 }, { "epoch": 15.53061224489796, "grad_norm": 8.780695915222168, "learning_rate": 4.224693877551021e-05, "loss": 2.9279, "step": 3805 }, { "epoch": 15.534693877551021, "grad_norm": 10.212981224060059, "learning_rate": 4.224489795918368e-05, "loss": 3.4215, "step": 3806 }, { "epoch": 15.538775510204081, "grad_norm": 6.599767208099365, "learning_rate": 4.2242857142857145e-05, "loss": 3.6698, "step": 3807 }, { "epoch": 15.542857142857143, "grad_norm": 8.3089599609375, "learning_rate": 4.224081632653061e-05, "loss": 3.282, "step": 3808 }, { "epoch": 15.546938775510204, "grad_norm": 14.389657020568848, "learning_rate": 4.223877551020408e-05, "loss": 2.2895, "step": 3809 }, { "epoch": 15.551020408163264, "grad_norm": 13.884817123413086, "learning_rate": 4.2236734693877554e-05, "loss": 1.1037, "step": 3810 }, { "epoch": 15.555102040816326, "grad_norm": 6.154823303222656, "learning_rate": 4.223469387755102e-05, "loss": 3.6575, "step": 3811 }, { "epoch": 15.559183673469388, "grad_norm": 4.905170917510986, "learning_rate": 4.2232653061224495e-05, "loss": 3.9117, "step": 3812 }, { "epoch": 15.56326530612245, "grad_norm": 6.222763538360596, "learning_rate": 4.223061224489796e-05, "loss": 3.7662, "step": 3813 }, { "epoch": 15.56734693877551, "grad_norm": 7.4631171226501465, "learning_rate": 4.222857142857143e-05, "loss": 3.1866, "step": 3814 }, { "epoch": 15.571428571428571, "grad_norm": 5.8619208335876465, "learning_rate": 4.22265306122449e-05, "loss": 3.6222, "step": 3815 }, { "epoch": 15.575510204081633, "grad_norm": 5.764459609985352, "learning_rate": 4.222448979591837e-05, "loss": 3.8211, "step": 3816 }, { "epoch": 15.579591836734695, "grad_norm": 5.812229156494141, "learning_rate": 4.222244897959184e-05, "loss": 3.714, "step": 3817 }, { "epoch": 15.583673469387755, "grad_norm": 6.913232326507568, "learning_rate": 4.2220408163265304e-05, "loss": 3.5009, "step": 3818 }, { "epoch": 15.587755102040816, "grad_norm": 8.667071342468262, "learning_rate": 4.221836734693878e-05, "loss": 3.2866, "step": 3819 }, { "epoch": 15.591836734693878, "grad_norm": 7.117218971252441, "learning_rate": 4.221632653061225e-05, "loss": 3.1488, "step": 3820 }, { "epoch": 15.59591836734694, "grad_norm": 5.357242584228516, "learning_rate": 4.221428571428572e-05, "loss": 3.5046, "step": 3821 }, { "epoch": 15.6, "grad_norm": 8.703612327575684, "learning_rate": 4.2212244897959186e-05, "loss": 3.4359, "step": 3822 }, { "epoch": 15.604081632653061, "grad_norm": 5.523036479949951, "learning_rate": 4.221020408163265e-05, "loss": 3.52, "step": 3823 }, { "epoch": 15.608163265306123, "grad_norm": 4.2145161628723145, "learning_rate": 4.220816326530612e-05, "loss": 3.8414, "step": 3824 }, { "epoch": 15.612244897959183, "grad_norm": 5.803880214691162, "learning_rate": 4.2206122448979594e-05, "loss": 3.5831, "step": 3825 }, { "epoch": 15.616326530612245, "grad_norm": 7.026004791259766, "learning_rate": 4.220408163265306e-05, "loss": 3.4788, "step": 3826 }, { "epoch": 15.620408163265306, "grad_norm": 5.699084281921387, "learning_rate": 4.2202040816326535e-05, "loss": 3.4429, "step": 3827 }, { "epoch": 15.624489795918368, "grad_norm": 7.5040507316589355, "learning_rate": 4.22e-05, "loss": 3.0356, "step": 3828 }, { "epoch": 15.628571428571428, "grad_norm": 6.145822048187256, "learning_rate": 4.219795918367347e-05, "loss": 3.6608, "step": 3829 }, { "epoch": 15.63265306122449, "grad_norm": 6.106715679168701, "learning_rate": 4.2195918367346944e-05, "loss": 3.3756, "step": 3830 }, { "epoch": 15.636734693877552, "grad_norm": 9.817769050598145, "learning_rate": 4.219387755102041e-05, "loss": 3.1125, "step": 3831 }, { "epoch": 15.640816326530611, "grad_norm": 8.936254501342773, "learning_rate": 4.219183673469388e-05, "loss": 3.1221, "step": 3832 }, { "epoch": 15.644897959183673, "grad_norm": 8.286615371704102, "learning_rate": 4.2189795918367345e-05, "loss": 3.3699, "step": 3833 }, { "epoch": 15.648979591836735, "grad_norm": 5.010092258453369, "learning_rate": 4.218775510204082e-05, "loss": 3.8482, "step": 3834 }, { "epoch": 15.653061224489797, "grad_norm": 9.18437385559082, "learning_rate": 4.218571428571429e-05, "loss": 3.0652, "step": 3835 }, { "epoch": 15.657142857142857, "grad_norm": 7.933992862701416, "learning_rate": 4.218367346938776e-05, "loss": 3.0607, "step": 3836 }, { "epoch": 15.661224489795918, "grad_norm": 14.05422592163086, "learning_rate": 4.218163265306123e-05, "loss": 2.4687, "step": 3837 }, { "epoch": 15.66530612244898, "grad_norm": 3.9965150356292725, "learning_rate": 4.2179591836734694e-05, "loss": 3.7305, "step": 3838 }, { "epoch": 15.66938775510204, "grad_norm": 6.3803229331970215, "learning_rate": 4.217755102040816e-05, "loss": 3.6121, "step": 3839 }, { "epoch": 15.673469387755102, "grad_norm": 9.5001802444458, "learning_rate": 4.2175510204081635e-05, "loss": 2.4612, "step": 3840 }, { "epoch": 15.677551020408163, "grad_norm": 5.758542537689209, "learning_rate": 4.21734693877551e-05, "loss": 3.8651, "step": 3841 }, { "epoch": 15.681632653061225, "grad_norm": 4.350054740905762, "learning_rate": 4.2171428571428576e-05, "loss": 3.4646, "step": 3842 }, { "epoch": 15.685714285714285, "grad_norm": 6.705610752105713, "learning_rate": 4.216938775510204e-05, "loss": 3.576, "step": 3843 }, { "epoch": 15.689795918367347, "grad_norm": 9.940580368041992, "learning_rate": 4.216734693877552e-05, "loss": 2.9924, "step": 3844 }, { "epoch": 15.693877551020408, "grad_norm": 4.9497480392456055, "learning_rate": 4.2165306122448984e-05, "loss": 3.652, "step": 3845 }, { "epoch": 15.69795918367347, "grad_norm": 8.6453218460083, "learning_rate": 4.216326530612245e-05, "loss": 2.99, "step": 3846 }, { "epoch": 15.70204081632653, "grad_norm": 6.330470561981201, "learning_rate": 4.216122448979592e-05, "loss": 3.3906, "step": 3847 }, { "epoch": 15.706122448979592, "grad_norm": 6.30268669128418, "learning_rate": 4.2159183673469386e-05, "loss": 3.3224, "step": 3848 }, { "epoch": 15.710204081632654, "grad_norm": 7.592936038970947, "learning_rate": 4.215714285714286e-05, "loss": 3.3693, "step": 3849 }, { "epoch": 15.714285714285714, "grad_norm": 5.769023895263672, "learning_rate": 4.215510204081633e-05, "loss": 3.4964, "step": 3850 }, { "epoch": 15.718367346938775, "grad_norm": 4.698797225952148, "learning_rate": 4.21530612244898e-05, "loss": 3.6923, "step": 3851 }, { "epoch": 15.722448979591837, "grad_norm": 5.116866588592529, "learning_rate": 4.215102040816327e-05, "loss": 3.6278, "step": 3852 }, { "epoch": 15.726530612244899, "grad_norm": 6.268083572387695, "learning_rate": 4.2148979591836735e-05, "loss": 3.511, "step": 3853 }, { "epoch": 15.730612244897959, "grad_norm": 4.814438819885254, "learning_rate": 4.214693877551021e-05, "loss": 3.9815, "step": 3854 }, { "epoch": 15.73469387755102, "grad_norm": 14.375738143920898, "learning_rate": 4.2144897959183676e-05, "loss": 2.35, "step": 3855 }, { "epoch": 15.738775510204082, "grad_norm": 4.999359130859375, "learning_rate": 4.214285714285714e-05, "loss": 3.7249, "step": 3856 }, { "epoch": 15.742857142857144, "grad_norm": 9.815508842468262, "learning_rate": 4.214081632653061e-05, "loss": 3.0624, "step": 3857 }, { "epoch": 15.746938775510204, "grad_norm": 5.725892543792725, "learning_rate": 4.2138775510204084e-05, "loss": 3.6699, "step": 3858 }, { "epoch": 15.751020408163265, "grad_norm": 10.56163215637207, "learning_rate": 4.213673469387756e-05, "loss": 2.6296, "step": 3859 }, { "epoch": 15.755102040816327, "grad_norm": 5.05830717086792, "learning_rate": 4.2134693877551025e-05, "loss": 3.6158, "step": 3860 }, { "epoch": 15.759183673469387, "grad_norm": 5.537606239318848, "learning_rate": 4.213265306122449e-05, "loss": 3.4686, "step": 3861 }, { "epoch": 15.763265306122449, "grad_norm": 5.624019145965576, "learning_rate": 4.213061224489796e-05, "loss": 3.7226, "step": 3862 }, { "epoch": 15.76734693877551, "grad_norm": 3.924546003341675, "learning_rate": 4.2128571428571426e-05, "loss": 3.8345, "step": 3863 }, { "epoch": 15.771428571428572, "grad_norm": 5.784869194030762, "learning_rate": 4.21265306122449e-05, "loss": 3.6344, "step": 3864 }, { "epoch": 15.775510204081632, "grad_norm": 6.204427242279053, "learning_rate": 4.212448979591837e-05, "loss": 3.8184, "step": 3865 }, { "epoch": 15.779591836734694, "grad_norm": 16.260440826416016, "learning_rate": 4.212244897959184e-05, "loss": 2.5693, "step": 3866 }, { "epoch": 15.783673469387756, "grad_norm": 9.434711456298828, "learning_rate": 4.212040816326531e-05, "loss": 3.2659, "step": 3867 }, { "epoch": 15.787755102040816, "grad_norm": 10.7889404296875, "learning_rate": 4.2118367346938776e-05, "loss": 2.9269, "step": 3868 }, { "epoch": 15.791836734693877, "grad_norm": 7.259758949279785, "learning_rate": 4.211632653061225e-05, "loss": 3.4127, "step": 3869 }, { "epoch": 15.795918367346939, "grad_norm": 13.035616874694824, "learning_rate": 4.211428571428572e-05, "loss": 2.1111, "step": 3870 }, { "epoch": 15.8, "grad_norm": 12.82659912109375, "learning_rate": 4.2112244897959184e-05, "loss": 2.3491, "step": 3871 }, { "epoch": 15.80408163265306, "grad_norm": 5.9881415367126465, "learning_rate": 4.211020408163265e-05, "loss": 3.4292, "step": 3872 }, { "epoch": 15.808163265306122, "grad_norm": 3.9861884117126465, "learning_rate": 4.2108163265306125e-05, "loss": 3.7375, "step": 3873 }, { "epoch": 15.812244897959184, "grad_norm": 4.737736225128174, "learning_rate": 4.21061224489796e-05, "loss": 3.4877, "step": 3874 }, { "epoch": 15.816326530612244, "grad_norm": 9.533113479614258, "learning_rate": 4.2104081632653066e-05, "loss": 3.1962, "step": 3875 }, { "epoch": 15.820408163265306, "grad_norm": 6.937789440155029, "learning_rate": 4.210204081632653e-05, "loss": 3.1916, "step": 3876 }, { "epoch": 15.824489795918367, "grad_norm": 5.134127616882324, "learning_rate": 4.21e-05, "loss": 3.6791, "step": 3877 }, { "epoch": 15.82857142857143, "grad_norm": 7.902211666107178, "learning_rate": 4.209795918367347e-05, "loss": 3.184, "step": 3878 }, { "epoch": 15.83265306122449, "grad_norm": 4.894566059112549, "learning_rate": 4.209591836734694e-05, "loss": 3.6822, "step": 3879 }, { "epoch": 15.83673469387755, "grad_norm": 7.255113124847412, "learning_rate": 4.209387755102041e-05, "loss": 3.3707, "step": 3880 }, { "epoch": 15.840816326530613, "grad_norm": 6.678956508636475, "learning_rate": 4.209183673469388e-05, "loss": 3.4425, "step": 3881 }, { "epoch": 15.844897959183674, "grad_norm": 11.277192115783691, "learning_rate": 4.208979591836735e-05, "loss": 2.8737, "step": 3882 }, { "epoch": 15.848979591836734, "grad_norm": 10.067131996154785, "learning_rate": 4.2087755102040816e-05, "loss": 2.79, "step": 3883 }, { "epoch": 15.853061224489796, "grad_norm": 6.2686686515808105, "learning_rate": 4.208571428571429e-05, "loss": 3.6921, "step": 3884 }, { "epoch": 15.857142857142858, "grad_norm": 4.140767574310303, "learning_rate": 4.208367346938776e-05, "loss": 3.7823, "step": 3885 }, { "epoch": 15.861224489795918, "grad_norm": 4.985367298126221, "learning_rate": 4.2081632653061225e-05, "loss": 3.4742, "step": 3886 }, { "epoch": 15.86530612244898, "grad_norm": 5.817669868469238, "learning_rate": 4.207959183673469e-05, "loss": 3.5565, "step": 3887 }, { "epoch": 15.869387755102041, "grad_norm": 7.144381523132324, "learning_rate": 4.2077551020408166e-05, "loss": 3.3651, "step": 3888 }, { "epoch": 15.873469387755103, "grad_norm": 4.926249027252197, "learning_rate": 4.207551020408164e-05, "loss": 3.5588, "step": 3889 }, { "epoch": 15.877551020408163, "grad_norm": 6.76177978515625, "learning_rate": 4.207346938775511e-05, "loss": 3.5887, "step": 3890 }, { "epoch": 15.881632653061224, "grad_norm": 4.863776206970215, "learning_rate": 4.2071428571428574e-05, "loss": 3.6681, "step": 3891 }, { "epoch": 15.885714285714286, "grad_norm": 6.012524604797363, "learning_rate": 4.206938775510204e-05, "loss": 3.4522, "step": 3892 }, { "epoch": 15.889795918367348, "grad_norm": 7.348855018615723, "learning_rate": 4.2067346938775515e-05, "loss": 2.9195, "step": 3893 }, { "epoch": 15.893877551020408, "grad_norm": 6.731104850769043, "learning_rate": 4.206530612244898e-05, "loss": 3.3318, "step": 3894 }, { "epoch": 15.89795918367347, "grad_norm": 14.159850120544434, "learning_rate": 4.206326530612245e-05, "loss": 2.1624, "step": 3895 }, { "epoch": 15.902040816326531, "grad_norm": 7.78592586517334, "learning_rate": 4.206122448979592e-05, "loss": 3.6254, "step": 3896 }, { "epoch": 15.906122448979591, "grad_norm": 5.498477935791016, "learning_rate": 4.205918367346939e-05, "loss": 3.4902, "step": 3897 }, { "epoch": 15.910204081632653, "grad_norm": 7.2132697105407715, "learning_rate": 4.2057142857142864e-05, "loss": 3.4995, "step": 3898 }, { "epoch": 15.914285714285715, "grad_norm": 6.7053656578063965, "learning_rate": 4.205510204081633e-05, "loss": 3.3917, "step": 3899 }, { "epoch": 15.918367346938776, "grad_norm": 4.333471298217773, "learning_rate": 4.20530612244898e-05, "loss": 3.8382, "step": 3900 }, { "epoch": 15.922448979591836, "grad_norm": 9.356537818908691, "learning_rate": 4.2051020408163265e-05, "loss": 3.0692, "step": 3901 }, { "epoch": 15.926530612244898, "grad_norm": 5.620856761932373, "learning_rate": 4.204897959183673e-05, "loss": 3.7212, "step": 3902 }, { "epoch": 15.93061224489796, "grad_norm": 6.1018900871276855, "learning_rate": 4.2046938775510206e-05, "loss": 3.3936, "step": 3903 }, { "epoch": 15.93469387755102, "grad_norm": 13.142126083374023, "learning_rate": 4.2044897959183673e-05, "loss": 3.0096, "step": 3904 }, { "epoch": 15.938775510204081, "grad_norm": 7.824909687042236, "learning_rate": 4.204285714285715e-05, "loss": 2.8274, "step": 3905 }, { "epoch": 15.942857142857143, "grad_norm": 6.954948902130127, "learning_rate": 4.2040816326530615e-05, "loss": 3.6132, "step": 3906 }, { "epoch": 15.946938775510205, "grad_norm": 6.065127849578857, "learning_rate": 4.203877551020408e-05, "loss": 3.4701, "step": 3907 }, { "epoch": 15.951020408163265, "grad_norm": 10.666455268859863, "learning_rate": 4.2036734693877556e-05, "loss": 2.4194, "step": 3908 }, { "epoch": 15.955102040816326, "grad_norm": 5.527215957641602, "learning_rate": 4.203469387755102e-05, "loss": 3.685, "step": 3909 }, { "epoch": 15.959183673469388, "grad_norm": 6.494544982910156, "learning_rate": 4.203265306122449e-05, "loss": 3.6445, "step": 3910 }, { "epoch": 15.963265306122448, "grad_norm": 7.449856758117676, "learning_rate": 4.203061224489796e-05, "loss": 3.2565, "step": 3911 }, { "epoch": 15.96734693877551, "grad_norm": 8.684380531311035, "learning_rate": 4.202857142857143e-05, "loss": 3.2831, "step": 3912 }, { "epoch": 15.971428571428572, "grad_norm": 8.991203308105469, "learning_rate": 4.2026530612244905e-05, "loss": 2.9253, "step": 3913 }, { "epoch": 15.975510204081633, "grad_norm": 6.173135757446289, "learning_rate": 4.202448979591837e-05, "loss": 3.6009, "step": 3914 }, { "epoch": 15.979591836734693, "grad_norm": 5.586538791656494, "learning_rate": 4.202244897959184e-05, "loss": 3.4806, "step": 3915 }, { "epoch": 15.983673469387755, "grad_norm": 3.3847250938415527, "learning_rate": 4.2020408163265306e-05, "loss": 3.7832, "step": 3916 }, { "epoch": 15.987755102040817, "grad_norm": 6.4341864585876465, "learning_rate": 4.201836734693877e-05, "loss": 3.6855, "step": 3917 }, { "epoch": 15.991836734693878, "grad_norm": 10.658267974853516, "learning_rate": 4.201632653061225e-05, "loss": 2.8858, "step": 3918 }, { "epoch": 15.995918367346938, "grad_norm": 7.934282302856445, "learning_rate": 4.2014285714285714e-05, "loss": 3.4791, "step": 3919 }, { "epoch": 16.0, "grad_norm": 10.644411087036133, "learning_rate": 4.201224489795919e-05, "loss": 2.4895, "step": 3920 }, { "epoch": 16.00408163265306, "grad_norm": 6.024831295013428, "learning_rate": 4.2010204081632655e-05, "loss": 3.5233, "step": 3921 }, { "epoch": 16.008163265306123, "grad_norm": 8.550165176391602, "learning_rate": 4.200816326530612e-05, "loss": 3.1688, "step": 3922 }, { "epoch": 16.012244897959185, "grad_norm": 10.245054244995117, "learning_rate": 4.2006122448979596e-05, "loss": 2.6827, "step": 3923 }, { "epoch": 16.016326530612243, "grad_norm": 7.458313465118408, "learning_rate": 4.2004081632653063e-05, "loss": 3.2518, "step": 3924 }, { "epoch": 16.020408163265305, "grad_norm": 11.184043884277344, "learning_rate": 4.200204081632653e-05, "loss": 2.1904, "step": 3925 }, { "epoch": 16.024489795918367, "grad_norm": 7.115199089050293, "learning_rate": 4.2e-05, "loss": 3.4786, "step": 3926 }, { "epoch": 16.02857142857143, "grad_norm": 6.448376655578613, "learning_rate": 4.199795918367347e-05, "loss": 3.3177, "step": 3927 }, { "epoch": 16.03265306122449, "grad_norm": 8.64349365234375, "learning_rate": 4.1995918367346945e-05, "loss": 3.4388, "step": 3928 }, { "epoch": 16.036734693877552, "grad_norm": 4.376633167266846, "learning_rate": 4.199387755102041e-05, "loss": 3.7891, "step": 3929 }, { "epoch": 16.040816326530614, "grad_norm": 5.558321475982666, "learning_rate": 4.199183673469388e-05, "loss": 3.555, "step": 3930 }, { "epoch": 16.044897959183672, "grad_norm": 14.148963928222656, "learning_rate": 4.198979591836735e-05, "loss": 2.1938, "step": 3931 }, { "epoch": 16.048979591836734, "grad_norm": 7.80745267868042, "learning_rate": 4.198775510204082e-05, "loss": 3.5501, "step": 3932 }, { "epoch": 16.053061224489795, "grad_norm": 8.03702449798584, "learning_rate": 4.198571428571429e-05, "loss": 3.232, "step": 3933 }, { "epoch": 16.057142857142857, "grad_norm": 5.998245716094971, "learning_rate": 4.1983673469387755e-05, "loss": 3.5456, "step": 3934 }, { "epoch": 16.06122448979592, "grad_norm": 9.365114212036133, "learning_rate": 4.198163265306123e-05, "loss": 3.3246, "step": 3935 }, { "epoch": 16.06530612244898, "grad_norm": 7.541627407073975, "learning_rate": 4.1979591836734696e-05, "loss": 3.2631, "step": 3936 }, { "epoch": 16.069387755102042, "grad_norm": 9.779339790344238, "learning_rate": 4.197755102040817e-05, "loss": 3.3003, "step": 3937 }, { "epoch": 16.073469387755104, "grad_norm": 5.059908866882324, "learning_rate": 4.197551020408164e-05, "loss": 3.5658, "step": 3938 }, { "epoch": 16.077551020408162, "grad_norm": 10.173091888427734, "learning_rate": 4.1973469387755104e-05, "loss": 3.205, "step": 3939 }, { "epoch": 16.081632653061224, "grad_norm": 4.217624187469482, "learning_rate": 4.197142857142857e-05, "loss": 3.7506, "step": 3940 }, { "epoch": 16.085714285714285, "grad_norm": 5.574347972869873, "learning_rate": 4.196938775510204e-05, "loss": 3.5169, "step": 3941 }, { "epoch": 16.089795918367347, "grad_norm": 6.373685359954834, "learning_rate": 4.196734693877551e-05, "loss": 3.3411, "step": 3942 }, { "epoch": 16.09387755102041, "grad_norm": 13.445562362670898, "learning_rate": 4.1965306122448986e-05, "loss": 2.4035, "step": 3943 }, { "epoch": 16.09795918367347, "grad_norm": 5.223499298095703, "learning_rate": 4.196326530612245e-05, "loss": 3.6478, "step": 3944 }, { "epoch": 16.102040816326532, "grad_norm": 6.261787414550781, "learning_rate": 4.196122448979592e-05, "loss": 3.5146, "step": 3945 }, { "epoch": 16.10612244897959, "grad_norm": 8.20046615600586, "learning_rate": 4.195918367346939e-05, "loss": 3.2983, "step": 3946 }, { "epoch": 16.110204081632652, "grad_norm": 14.824597358703613, "learning_rate": 4.195714285714286e-05, "loss": 2.1916, "step": 3947 }, { "epoch": 16.114285714285714, "grad_norm": 6.597584247589111, "learning_rate": 4.195510204081633e-05, "loss": 3.4964, "step": 3948 }, { "epoch": 16.118367346938776, "grad_norm": 6.268973350524902, "learning_rate": 4.1953061224489796e-05, "loss": 3.4587, "step": 3949 }, { "epoch": 16.122448979591837, "grad_norm": 9.433354377746582, "learning_rate": 4.195102040816326e-05, "loss": 3.195, "step": 3950 }, { "epoch": 16.1265306122449, "grad_norm": 4.477566242218018, "learning_rate": 4.194897959183674e-05, "loss": 3.774, "step": 3951 }, { "epoch": 16.13061224489796, "grad_norm": 7.356500625610352, "learning_rate": 4.194693877551021e-05, "loss": 3.1055, "step": 3952 }, { "epoch": 16.13469387755102, "grad_norm": 9.6848783493042, "learning_rate": 4.194489795918368e-05, "loss": 2.8964, "step": 3953 }, { "epoch": 16.13877551020408, "grad_norm": 8.881000518798828, "learning_rate": 4.1942857142857145e-05, "loss": 3.1081, "step": 3954 }, { "epoch": 16.142857142857142, "grad_norm": 8.629100799560547, "learning_rate": 4.194081632653061e-05, "loss": 3.3545, "step": 3955 }, { "epoch": 16.146938775510204, "grad_norm": 15.108983039855957, "learning_rate": 4.193877551020408e-05, "loss": 2.7183, "step": 3956 }, { "epoch": 16.151020408163266, "grad_norm": 6.447962284088135, "learning_rate": 4.193673469387755e-05, "loss": 3.4248, "step": 3957 }, { "epoch": 16.155102040816328, "grad_norm": 11.638772010803223, "learning_rate": 4.193469387755102e-05, "loss": 3.1191, "step": 3958 }, { "epoch": 16.15918367346939, "grad_norm": 5.212494850158691, "learning_rate": 4.1932653061224494e-05, "loss": 3.6058, "step": 3959 }, { "epoch": 16.163265306122447, "grad_norm": 13.244291305541992, "learning_rate": 4.193061224489796e-05, "loss": 2.7605, "step": 3960 }, { "epoch": 16.16734693877551, "grad_norm": 35.000064849853516, "learning_rate": 4.192857142857143e-05, "loss": 1.1213, "step": 3961 }, { "epoch": 16.17142857142857, "grad_norm": 10.862931251525879, "learning_rate": 4.19265306122449e-05, "loss": 3.2403, "step": 3962 }, { "epoch": 16.175510204081633, "grad_norm": 7.533414840698242, "learning_rate": 4.192448979591837e-05, "loss": 3.2781, "step": 3963 }, { "epoch": 16.179591836734694, "grad_norm": 6.358275890350342, "learning_rate": 4.1922448979591837e-05, "loss": 3.3206, "step": 3964 }, { "epoch": 16.183673469387756, "grad_norm": 8.254201889038086, "learning_rate": 4.1920408163265304e-05, "loss": 2.9413, "step": 3965 }, { "epoch": 16.187755102040818, "grad_norm": 4.685081958770752, "learning_rate": 4.191836734693878e-05, "loss": 3.3823, "step": 3966 }, { "epoch": 16.191836734693876, "grad_norm": 11.593937873840332, "learning_rate": 4.191632653061225e-05, "loss": 2.4041, "step": 3967 }, { "epoch": 16.195918367346938, "grad_norm": 8.782296180725098, "learning_rate": 4.191428571428572e-05, "loss": 2.8728, "step": 3968 }, { "epoch": 16.2, "grad_norm": 4.755122184753418, "learning_rate": 4.1912244897959186e-05, "loss": 3.7263, "step": 3969 }, { "epoch": 16.20408163265306, "grad_norm": 7.902053356170654, "learning_rate": 4.191020408163265e-05, "loss": 3.5725, "step": 3970 }, { "epoch": 16.208163265306123, "grad_norm": 10.345752716064453, "learning_rate": 4.190816326530612e-05, "loss": 2.81, "step": 3971 }, { "epoch": 16.212244897959184, "grad_norm": 7.928689479827881, "learning_rate": 4.1906122448979594e-05, "loss": 3.1913, "step": 3972 }, { "epoch": 16.216326530612246, "grad_norm": 5.832350254058838, "learning_rate": 4.190408163265306e-05, "loss": 3.6015, "step": 3973 }, { "epoch": 16.220408163265308, "grad_norm": 5.673258304595947, "learning_rate": 4.1902040816326535e-05, "loss": 3.4086, "step": 3974 }, { "epoch": 16.224489795918366, "grad_norm": 6.094400405883789, "learning_rate": 4.19e-05, "loss": 3.6373, "step": 3975 }, { "epoch": 16.228571428571428, "grad_norm": 7.914774417877197, "learning_rate": 4.1897959183673476e-05, "loss": 3.1369, "step": 3976 }, { "epoch": 16.23265306122449, "grad_norm": 7.427877426147461, "learning_rate": 4.189591836734694e-05, "loss": 3.6003, "step": 3977 }, { "epoch": 16.23673469387755, "grad_norm": 4.746922016143799, "learning_rate": 4.189387755102041e-05, "loss": 3.7937, "step": 3978 }, { "epoch": 16.240816326530613, "grad_norm": 9.395139694213867, "learning_rate": 4.189183673469388e-05, "loss": 3.0698, "step": 3979 }, { "epoch": 16.244897959183675, "grad_norm": 4.914482116699219, "learning_rate": 4.1889795918367344e-05, "loss": 3.5735, "step": 3980 }, { "epoch": 16.248979591836736, "grad_norm": 8.195609092712402, "learning_rate": 4.188775510204082e-05, "loss": 3.4271, "step": 3981 }, { "epoch": 16.253061224489795, "grad_norm": 11.966465950012207, "learning_rate": 4.188571428571429e-05, "loss": 2.8336, "step": 3982 }, { "epoch": 16.257142857142856, "grad_norm": 8.685897827148438, "learning_rate": 4.188367346938776e-05, "loss": 3.4804, "step": 3983 }, { "epoch": 16.261224489795918, "grad_norm": 6.437585353851318, "learning_rate": 4.1881632653061226e-05, "loss": 3.3417, "step": 3984 }, { "epoch": 16.26530612244898, "grad_norm": 7.2671918869018555, "learning_rate": 4.1879591836734694e-05, "loss": 3.3732, "step": 3985 }, { "epoch": 16.26938775510204, "grad_norm": 5.4529194831848145, "learning_rate": 4.187755102040817e-05, "loss": 3.7187, "step": 3986 }, { "epoch": 16.273469387755103, "grad_norm": 4.945472717285156, "learning_rate": 4.1875510204081635e-05, "loss": 3.9526, "step": 3987 }, { "epoch": 16.277551020408165, "grad_norm": 6.308182716369629, "learning_rate": 4.18734693877551e-05, "loss": 3.6935, "step": 3988 }, { "epoch": 16.281632653061223, "grad_norm": 12.613551139831543, "learning_rate": 4.1871428571428576e-05, "loss": 2.3688, "step": 3989 }, { "epoch": 16.285714285714285, "grad_norm": 10.12554931640625, "learning_rate": 4.186938775510204e-05, "loss": 2.8648, "step": 3990 }, { "epoch": 16.289795918367346, "grad_norm": 5.553479194641113, "learning_rate": 4.186734693877552e-05, "loss": 3.7609, "step": 3991 }, { "epoch": 16.293877551020408, "grad_norm": 9.027463912963867, "learning_rate": 4.1865306122448984e-05, "loss": 2.9748, "step": 3992 }, { "epoch": 16.29795918367347, "grad_norm": 3.9271020889282227, "learning_rate": 4.186326530612245e-05, "loss": 4.0006, "step": 3993 }, { "epoch": 16.30204081632653, "grad_norm": 4.175640106201172, "learning_rate": 4.186122448979592e-05, "loss": 3.721, "step": 3994 }, { "epoch": 16.306122448979593, "grad_norm": 10.432573318481445, "learning_rate": 4.1859183673469385e-05, "loss": 3.0211, "step": 3995 }, { "epoch": 16.31020408163265, "grad_norm": 8.003342628479004, "learning_rate": 4.185714285714286e-05, "loss": 3.2252, "step": 3996 }, { "epoch": 16.314285714285713, "grad_norm": 9.9658784866333, "learning_rate": 4.185510204081633e-05, "loss": 3.2206, "step": 3997 }, { "epoch": 16.318367346938775, "grad_norm": 4.790897369384766, "learning_rate": 4.18530612244898e-05, "loss": 3.5195, "step": 3998 }, { "epoch": 16.322448979591837, "grad_norm": 12.246454238891602, "learning_rate": 4.185102040816327e-05, "loss": 2.7625, "step": 3999 }, { "epoch": 16.3265306122449, "grad_norm": 6.127351760864258, "learning_rate": 4.1848979591836734e-05, "loss": 3.5215, "step": 4000 }, { "epoch": 16.33061224489796, "grad_norm": 9.7698974609375, "learning_rate": 4.184693877551021e-05, "loss": 2.9566, "step": 4001 }, { "epoch": 16.33469387755102, "grad_norm": 6.821934700012207, "learning_rate": 4.1844897959183675e-05, "loss": 3.6088, "step": 4002 }, { "epoch": 16.33877551020408, "grad_norm": 5.537286281585693, "learning_rate": 4.184285714285714e-05, "loss": 3.7526, "step": 4003 }, { "epoch": 16.34285714285714, "grad_norm": 5.639070987701416, "learning_rate": 4.184081632653061e-05, "loss": 3.3659, "step": 4004 }, { "epoch": 16.346938775510203, "grad_norm": 15.66285514831543, "learning_rate": 4.1838775510204084e-05, "loss": 2.9545, "step": 4005 }, { "epoch": 16.351020408163265, "grad_norm": 7.498091220855713, "learning_rate": 4.183673469387756e-05, "loss": 3.1261, "step": 4006 }, { "epoch": 16.355102040816327, "grad_norm": 10.752720832824707, "learning_rate": 4.1834693877551025e-05, "loss": 2.5276, "step": 4007 }, { "epoch": 16.35918367346939, "grad_norm": 7.47431755065918, "learning_rate": 4.183265306122449e-05, "loss": 3.3588, "step": 4008 }, { "epoch": 16.36326530612245, "grad_norm": 13.149115562438965, "learning_rate": 4.183061224489796e-05, "loss": 2.3539, "step": 4009 }, { "epoch": 16.367346938775512, "grad_norm": 5.201915740966797, "learning_rate": 4.1828571428571426e-05, "loss": 3.8013, "step": 4010 }, { "epoch": 16.37142857142857, "grad_norm": 3.766279697418213, "learning_rate": 4.18265306122449e-05, "loss": 4.0208, "step": 4011 }, { "epoch": 16.375510204081632, "grad_norm": 11.594082832336426, "learning_rate": 4.182448979591837e-05, "loss": 2.3497, "step": 4012 }, { "epoch": 16.379591836734694, "grad_norm": 6.603479862213135, "learning_rate": 4.182244897959184e-05, "loss": 3.4885, "step": 4013 }, { "epoch": 16.383673469387755, "grad_norm": 6.9477715492248535, "learning_rate": 4.182040816326531e-05, "loss": 3.6107, "step": 4014 }, { "epoch": 16.387755102040817, "grad_norm": 4.198768138885498, "learning_rate": 4.1818367346938775e-05, "loss": 3.9102, "step": 4015 }, { "epoch": 16.39183673469388, "grad_norm": 9.50190258026123, "learning_rate": 4.181632653061225e-05, "loss": 2.3704, "step": 4016 }, { "epoch": 16.39591836734694, "grad_norm": 8.740411758422852, "learning_rate": 4.1814285714285716e-05, "loss": 2.9638, "step": 4017 }, { "epoch": 16.4, "grad_norm": 7.149326801300049, "learning_rate": 4.181224489795918e-05, "loss": 3.2886, "step": 4018 }, { "epoch": 16.40408163265306, "grad_norm": 8.169761657714844, "learning_rate": 4.181020408163265e-05, "loss": 3.3047, "step": 4019 }, { "epoch": 16.408163265306122, "grad_norm": 6.117996692657471, "learning_rate": 4.1808163265306124e-05, "loss": 3.5898, "step": 4020 }, { "epoch": 16.412244897959184, "grad_norm": 13.38272762298584, "learning_rate": 4.18061224489796e-05, "loss": 3.4316, "step": 4021 }, { "epoch": 16.416326530612245, "grad_norm": 7.871743679046631, "learning_rate": 4.1804081632653065e-05, "loss": 3.3985, "step": 4022 }, { "epoch": 16.420408163265307, "grad_norm": 4.555209636688232, "learning_rate": 4.180204081632653e-05, "loss": 3.7248, "step": 4023 }, { "epoch": 16.42448979591837, "grad_norm": 10.520393371582031, "learning_rate": 4.18e-05, "loss": 2.3132, "step": 4024 }, { "epoch": 16.428571428571427, "grad_norm": 6.571114540100098, "learning_rate": 4.1797959183673473e-05, "loss": 3.3541, "step": 4025 }, { "epoch": 16.43265306122449, "grad_norm": 6.803560256958008, "learning_rate": 4.179591836734694e-05, "loss": 3.3108, "step": 4026 }, { "epoch": 16.43673469387755, "grad_norm": 7.75968074798584, "learning_rate": 4.179387755102041e-05, "loss": 2.7365, "step": 4027 }, { "epoch": 16.440816326530612, "grad_norm": 8.32282829284668, "learning_rate": 4.179183673469388e-05, "loss": 3.3099, "step": 4028 }, { "epoch": 16.444897959183674, "grad_norm": 6.752836227416992, "learning_rate": 4.178979591836735e-05, "loss": 3.3144, "step": 4029 }, { "epoch": 16.448979591836736, "grad_norm": 4.1318888664245605, "learning_rate": 4.178775510204082e-05, "loss": 3.9353, "step": 4030 }, { "epoch": 16.453061224489797, "grad_norm": 6.050978183746338, "learning_rate": 4.178571428571429e-05, "loss": 3.5014, "step": 4031 }, { "epoch": 16.457142857142856, "grad_norm": 6.703257083892822, "learning_rate": 4.178367346938776e-05, "loss": 3.4945, "step": 4032 }, { "epoch": 16.461224489795917, "grad_norm": 6.685251712799072, "learning_rate": 4.1781632653061224e-05, "loss": 3.4538, "step": 4033 }, { "epoch": 16.46530612244898, "grad_norm": 6.5300211906433105, "learning_rate": 4.177959183673469e-05, "loss": 3.476, "step": 4034 }, { "epoch": 16.46938775510204, "grad_norm": 7.882908821105957, "learning_rate": 4.1777551020408165e-05, "loss": 3.3915, "step": 4035 }, { "epoch": 16.473469387755102, "grad_norm": 5.467289447784424, "learning_rate": 4.177551020408164e-05, "loss": 3.8053, "step": 4036 }, { "epoch": 16.477551020408164, "grad_norm": 8.798498153686523, "learning_rate": 4.1773469387755106e-05, "loss": 2.9888, "step": 4037 }, { "epoch": 16.481632653061226, "grad_norm": 6.651452541351318, "learning_rate": 4.177142857142857e-05, "loss": 3.6005, "step": 4038 }, { "epoch": 16.485714285714284, "grad_norm": 4.641428470611572, "learning_rate": 4.176938775510204e-05, "loss": 3.6046, "step": 4039 }, { "epoch": 16.489795918367346, "grad_norm": 7.414203643798828, "learning_rate": 4.1767346938775514e-05, "loss": 3.5183, "step": 4040 }, { "epoch": 16.493877551020407, "grad_norm": 7.832083225250244, "learning_rate": 4.176530612244898e-05, "loss": 3.4071, "step": 4041 }, { "epoch": 16.49795918367347, "grad_norm": 7.086422920227051, "learning_rate": 4.176326530612245e-05, "loss": 3.3532, "step": 4042 }, { "epoch": 16.50204081632653, "grad_norm": 8.46577262878418, "learning_rate": 4.176122448979592e-05, "loss": 3.5874, "step": 4043 }, { "epoch": 16.506122448979593, "grad_norm": 4.738289833068848, "learning_rate": 4.175918367346939e-05, "loss": 3.6375, "step": 4044 }, { "epoch": 16.510204081632654, "grad_norm": 9.822088241577148, "learning_rate": 4.1757142857142863e-05, "loss": 3.3498, "step": 4045 }, { "epoch": 16.514285714285712, "grad_norm": 8.177180290222168, "learning_rate": 4.175510204081633e-05, "loss": 3.0693, "step": 4046 }, { "epoch": 16.518367346938774, "grad_norm": 8.804930686950684, "learning_rate": 4.17530612244898e-05, "loss": 3.0507, "step": 4047 }, { "epoch": 16.522448979591836, "grad_norm": 9.468120574951172, "learning_rate": 4.1751020408163265e-05, "loss": 2.7963, "step": 4048 }, { "epoch": 16.526530612244898, "grad_norm": 10.449769020080566, "learning_rate": 4.174897959183673e-05, "loss": 2.7082, "step": 4049 }, { "epoch": 16.53061224489796, "grad_norm": 5.1058478355407715, "learning_rate": 4.1746938775510206e-05, "loss": 3.6512, "step": 4050 }, { "epoch": 16.53469387755102, "grad_norm": 6.267096519470215, "learning_rate": 4.174489795918368e-05, "loss": 3.5838, "step": 4051 }, { "epoch": 16.538775510204083, "grad_norm": 5.697884559631348, "learning_rate": 4.174285714285715e-05, "loss": 3.3954, "step": 4052 }, { "epoch": 16.542857142857144, "grad_norm": 4.844966411590576, "learning_rate": 4.1740816326530614e-05, "loss": 3.7492, "step": 4053 }, { "epoch": 16.546938775510203, "grad_norm": 9.353960037231445, "learning_rate": 4.173877551020408e-05, "loss": 3.0851, "step": 4054 }, { "epoch": 16.551020408163264, "grad_norm": 5.604300498962402, "learning_rate": 4.1736734693877555e-05, "loss": 3.6903, "step": 4055 }, { "epoch": 16.555102040816326, "grad_norm": 6.2815070152282715, "learning_rate": 4.173469387755102e-05, "loss": 3.4443, "step": 4056 }, { "epoch": 16.559183673469388, "grad_norm": 17.194252014160156, "learning_rate": 4.173265306122449e-05, "loss": 2.0169, "step": 4057 }, { "epoch": 16.56326530612245, "grad_norm": 5.367269515991211, "learning_rate": 4.1730612244897956e-05, "loss": 3.4256, "step": 4058 }, { "epoch": 16.56734693877551, "grad_norm": 5.950901031494141, "learning_rate": 4.172857142857143e-05, "loss": 3.7877, "step": 4059 }, { "epoch": 16.571428571428573, "grad_norm": 6.464088439941406, "learning_rate": 4.1726530612244904e-05, "loss": 3.6744, "step": 4060 }, { "epoch": 16.57551020408163, "grad_norm": 6.722716331481934, "learning_rate": 4.172448979591837e-05, "loss": 3.3583, "step": 4061 }, { "epoch": 16.579591836734693, "grad_norm": 4.746799945831299, "learning_rate": 4.172244897959184e-05, "loss": 3.5596, "step": 4062 }, { "epoch": 16.583673469387755, "grad_norm": 9.702999114990234, "learning_rate": 4.1720408163265306e-05, "loss": 3.209, "step": 4063 }, { "epoch": 16.587755102040816, "grad_norm": 9.03414249420166, "learning_rate": 4.171836734693877e-05, "loss": 3.0546, "step": 4064 }, { "epoch": 16.591836734693878, "grad_norm": 5.661082744598389, "learning_rate": 4.1716326530612247e-05, "loss": 3.6444, "step": 4065 }, { "epoch": 16.59591836734694, "grad_norm": 10.083551406860352, "learning_rate": 4.1714285714285714e-05, "loss": 2.752, "step": 4066 }, { "epoch": 16.6, "grad_norm": 5.696998119354248, "learning_rate": 4.171224489795919e-05, "loss": 3.8151, "step": 4067 }, { "epoch": 16.60408163265306, "grad_norm": 5.982105255126953, "learning_rate": 4.1710204081632655e-05, "loss": 3.6434, "step": 4068 }, { "epoch": 16.60816326530612, "grad_norm": 5.450875759124756, "learning_rate": 4.170816326530613e-05, "loss": 3.6523, "step": 4069 }, { "epoch": 16.612244897959183, "grad_norm": 9.50920581817627, "learning_rate": 4.1706122448979596e-05, "loss": 3.0254, "step": 4070 }, { "epoch": 16.616326530612245, "grad_norm": 6.472507476806641, "learning_rate": 4.170408163265306e-05, "loss": 3.7167, "step": 4071 }, { "epoch": 16.620408163265306, "grad_norm": 7.462554931640625, "learning_rate": 4.170204081632653e-05, "loss": 3.5338, "step": 4072 }, { "epoch": 16.624489795918368, "grad_norm": 7.213229656219482, "learning_rate": 4.17e-05, "loss": 3.3895, "step": 4073 }, { "epoch": 16.62857142857143, "grad_norm": 15.132692337036133, "learning_rate": 4.169795918367347e-05, "loss": 1.9812, "step": 4074 }, { "epoch": 16.632653061224488, "grad_norm": 9.12622356414795, "learning_rate": 4.1695918367346945e-05, "loss": 3.4024, "step": 4075 }, { "epoch": 16.63673469387755, "grad_norm": 10.455944061279297, "learning_rate": 4.169387755102041e-05, "loss": 2.9613, "step": 4076 }, { "epoch": 16.64081632653061, "grad_norm": 4.678838729858398, "learning_rate": 4.169183673469388e-05, "loss": 3.8871, "step": 4077 }, { "epoch": 16.644897959183673, "grad_norm": 11.224335670471191, "learning_rate": 4.1689795918367346e-05, "loss": 2.7963, "step": 4078 }, { "epoch": 16.648979591836735, "grad_norm": 3.2575058937072754, "learning_rate": 4.168775510204082e-05, "loss": 3.9624, "step": 4079 }, { "epoch": 16.653061224489797, "grad_norm": 4.849247932434082, "learning_rate": 4.168571428571429e-05, "loss": 3.5743, "step": 4080 }, { "epoch": 16.65714285714286, "grad_norm": 7.1298322677612305, "learning_rate": 4.1683673469387754e-05, "loss": 3.6048, "step": 4081 }, { "epoch": 16.66122448979592, "grad_norm": 6.147975444793701, "learning_rate": 4.168163265306123e-05, "loss": 3.5078, "step": 4082 }, { "epoch": 16.66530612244898, "grad_norm": 8.67895793914795, "learning_rate": 4.1679591836734696e-05, "loss": 3.0158, "step": 4083 }, { "epoch": 16.66938775510204, "grad_norm": 8.964072227478027, "learning_rate": 4.167755102040817e-05, "loss": 3.2064, "step": 4084 }, { "epoch": 16.6734693877551, "grad_norm": 5.57827091217041, "learning_rate": 4.1675510204081637e-05, "loss": 3.6187, "step": 4085 }, { "epoch": 16.677551020408163, "grad_norm": 10.049473762512207, "learning_rate": 4.1673469387755104e-05, "loss": 3.0492, "step": 4086 }, { "epoch": 16.681632653061225, "grad_norm": 6.400760173797607, "learning_rate": 4.167142857142857e-05, "loss": 3.3256, "step": 4087 }, { "epoch": 16.685714285714287, "grad_norm": 5.671912670135498, "learning_rate": 4.166938775510204e-05, "loss": 3.593, "step": 4088 }, { "epoch": 16.68979591836735, "grad_norm": 11.750972747802734, "learning_rate": 4.166734693877551e-05, "loss": 2.9361, "step": 4089 }, { "epoch": 16.693877551020407, "grad_norm": 8.124093055725098, "learning_rate": 4.1665306122448986e-05, "loss": 3.4683, "step": 4090 }, { "epoch": 16.69795918367347, "grad_norm": 12.937429428100586, "learning_rate": 4.166326530612245e-05, "loss": 2.0388, "step": 4091 }, { "epoch": 16.70204081632653, "grad_norm": 9.810417175292969, "learning_rate": 4.166122448979592e-05, "loss": 3.0738, "step": 4092 }, { "epoch": 16.706122448979592, "grad_norm": 7.120839595794678, "learning_rate": 4.165918367346939e-05, "loss": 3.377, "step": 4093 }, { "epoch": 16.710204081632654, "grad_norm": 4.560178279876709, "learning_rate": 4.165714285714286e-05, "loss": 3.9623, "step": 4094 }, { "epoch": 16.714285714285715, "grad_norm": 13.691122055053711, "learning_rate": 4.165510204081633e-05, "loss": 3.285, "step": 4095 }, { "epoch": 16.718367346938777, "grad_norm": 6.013572692871094, "learning_rate": 4.1653061224489795e-05, "loss": 3.4448, "step": 4096 }, { "epoch": 16.722448979591835, "grad_norm": 6.100953102111816, "learning_rate": 4.165102040816327e-05, "loss": 3.5565, "step": 4097 }, { "epoch": 16.726530612244897, "grad_norm": 10.492430686950684, "learning_rate": 4.1648979591836736e-05, "loss": 3.0723, "step": 4098 }, { "epoch": 16.73061224489796, "grad_norm": 6.437997341156006, "learning_rate": 4.164693877551021e-05, "loss": 3.4759, "step": 4099 }, { "epoch": 16.73469387755102, "grad_norm": 5.2599005699157715, "learning_rate": 4.164489795918368e-05, "loss": 3.5726, "step": 4100 }, { "epoch": 16.738775510204082, "grad_norm": 6.2099103927612305, "learning_rate": 4.1642857142857144e-05, "loss": 3.6333, "step": 4101 }, { "epoch": 16.742857142857144, "grad_norm": 8.020271301269531, "learning_rate": 4.164081632653061e-05, "loss": 3.1486, "step": 4102 }, { "epoch": 16.746938775510205, "grad_norm": 5.634275913238525, "learning_rate": 4.163877551020408e-05, "loss": 3.5597, "step": 4103 }, { "epoch": 16.751020408163264, "grad_norm": 4.12169075012207, "learning_rate": 4.163673469387755e-05, "loss": 3.797, "step": 4104 }, { "epoch": 16.755102040816325, "grad_norm": 5.1881422996521, "learning_rate": 4.1634693877551026e-05, "loss": 3.5836, "step": 4105 }, { "epoch": 16.759183673469387, "grad_norm": 3.5543746948242188, "learning_rate": 4.1632653061224494e-05, "loss": 3.7648, "step": 4106 }, { "epoch": 16.76326530612245, "grad_norm": 6.551626205444336, "learning_rate": 4.163061224489796e-05, "loss": 3.5362, "step": 4107 }, { "epoch": 16.76734693877551, "grad_norm": 13.565595626831055, "learning_rate": 4.162857142857143e-05, "loss": 2.79, "step": 4108 }, { "epoch": 16.771428571428572, "grad_norm": 6.738400936126709, "learning_rate": 4.16265306122449e-05, "loss": 3.2808, "step": 4109 }, { "epoch": 16.775510204081634, "grad_norm": 5.7737812995910645, "learning_rate": 4.162448979591837e-05, "loss": 3.3112, "step": 4110 }, { "epoch": 16.779591836734696, "grad_norm": 9.702133178710938, "learning_rate": 4.1622448979591836e-05, "loss": 2.8076, "step": 4111 }, { "epoch": 16.783673469387754, "grad_norm": 5.530475616455078, "learning_rate": 4.16204081632653e-05, "loss": 3.6414, "step": 4112 }, { "epoch": 16.787755102040816, "grad_norm": 9.254486083984375, "learning_rate": 4.161836734693878e-05, "loss": 3.411, "step": 4113 }, { "epoch": 16.791836734693877, "grad_norm": 4.838490009307861, "learning_rate": 4.161632653061225e-05, "loss": 3.8137, "step": 4114 }, { "epoch": 16.79591836734694, "grad_norm": 13.853372573852539, "learning_rate": 4.161428571428572e-05, "loss": 2.505, "step": 4115 }, { "epoch": 16.8, "grad_norm": 7.684218406677246, "learning_rate": 4.1612244897959185e-05, "loss": 3.2282, "step": 4116 }, { "epoch": 16.804081632653062, "grad_norm": 9.634309768676758, "learning_rate": 4.161020408163265e-05, "loss": 3.1568, "step": 4117 }, { "epoch": 16.808163265306124, "grad_norm": 5.6069722175598145, "learning_rate": 4.1608163265306126e-05, "loss": 3.7243, "step": 4118 }, { "epoch": 16.812244897959182, "grad_norm": 15.18747329711914, "learning_rate": 4.160612244897959e-05, "loss": 2.26, "step": 4119 }, { "epoch": 16.816326530612244, "grad_norm": 6.4060564041137695, "learning_rate": 4.160408163265306e-05, "loss": 3.5919, "step": 4120 }, { "epoch": 16.820408163265306, "grad_norm": 11.263026237487793, "learning_rate": 4.1602040816326534e-05, "loss": 3.2281, "step": 4121 }, { "epoch": 16.824489795918367, "grad_norm": 7.451514720916748, "learning_rate": 4.16e-05, "loss": 3.0066, "step": 4122 }, { "epoch": 16.82857142857143, "grad_norm": 4.0540266036987305, "learning_rate": 4.1597959183673475e-05, "loss": 4.1568, "step": 4123 }, { "epoch": 16.83265306122449, "grad_norm": 9.625056266784668, "learning_rate": 4.159591836734694e-05, "loss": 2.8264, "step": 4124 }, { "epoch": 16.836734693877553, "grad_norm": 10.251328468322754, "learning_rate": 4.159387755102041e-05, "loss": 2.9148, "step": 4125 }, { "epoch": 16.84081632653061, "grad_norm": 5.216225624084473, "learning_rate": 4.159183673469388e-05, "loss": 3.4939, "step": 4126 }, { "epoch": 16.844897959183672, "grad_norm": 7.723496913909912, "learning_rate": 4.1589795918367344e-05, "loss": 3.2979, "step": 4127 }, { "epoch": 16.848979591836734, "grad_norm": 5.316748142242432, "learning_rate": 4.158775510204082e-05, "loss": 3.6068, "step": 4128 }, { "epoch": 16.853061224489796, "grad_norm": 5.813505172729492, "learning_rate": 4.158571428571429e-05, "loss": 3.7124, "step": 4129 }, { "epoch": 16.857142857142858, "grad_norm": 4.436227798461914, "learning_rate": 4.158367346938776e-05, "loss": 3.724, "step": 4130 }, { "epoch": 16.86122448979592, "grad_norm": 7.528432846069336, "learning_rate": 4.1581632653061226e-05, "loss": 2.8156, "step": 4131 }, { "epoch": 16.86530612244898, "grad_norm": 5.662804126739502, "learning_rate": 4.157959183673469e-05, "loss": 3.8232, "step": 4132 }, { "epoch": 16.86938775510204, "grad_norm": 11.435988426208496, "learning_rate": 4.157755102040817e-05, "loss": 2.8959, "step": 4133 }, { "epoch": 16.8734693877551, "grad_norm": 10.724874496459961, "learning_rate": 4.1575510204081634e-05, "loss": 3.1206, "step": 4134 }, { "epoch": 16.877551020408163, "grad_norm": 11.539169311523438, "learning_rate": 4.15734693877551e-05, "loss": 2.8863, "step": 4135 }, { "epoch": 16.881632653061224, "grad_norm": 7.604029178619385, "learning_rate": 4.1571428571428575e-05, "loss": 3.3303, "step": 4136 }, { "epoch": 16.885714285714286, "grad_norm": 7.121152400970459, "learning_rate": 4.156938775510204e-05, "loss": 3.4732, "step": 4137 }, { "epoch": 16.889795918367348, "grad_norm": 8.162343978881836, "learning_rate": 4.1567346938775516e-05, "loss": 3.2825, "step": 4138 }, { "epoch": 16.89387755102041, "grad_norm": 8.798535346984863, "learning_rate": 4.156530612244898e-05, "loss": 3.2628, "step": 4139 }, { "epoch": 16.897959183673468, "grad_norm": 7.2282209396362305, "learning_rate": 4.156326530612245e-05, "loss": 3.511, "step": 4140 }, { "epoch": 16.90204081632653, "grad_norm": 11.374988555908203, "learning_rate": 4.156122448979592e-05, "loss": 2.5044, "step": 4141 }, { "epoch": 16.90612244897959, "grad_norm": 6.847060203552246, "learning_rate": 4.1559183673469385e-05, "loss": 3.3159, "step": 4142 }, { "epoch": 16.910204081632653, "grad_norm": 16.747652053833008, "learning_rate": 4.155714285714286e-05, "loss": 2.2477, "step": 4143 }, { "epoch": 16.914285714285715, "grad_norm": 5.180291652679443, "learning_rate": 4.155510204081633e-05, "loss": 3.7821, "step": 4144 }, { "epoch": 16.918367346938776, "grad_norm": 6.761234760284424, "learning_rate": 4.15530612244898e-05, "loss": 3.5689, "step": 4145 }, { "epoch": 16.922448979591838, "grad_norm": 7.109291076660156, "learning_rate": 4.155102040816327e-05, "loss": 3.5478, "step": 4146 }, { "epoch": 16.926530612244896, "grad_norm": 6.404953479766846, "learning_rate": 4.1548979591836734e-05, "loss": 3.4529, "step": 4147 }, { "epoch": 16.930612244897958, "grad_norm": 5.937655925750732, "learning_rate": 4.154693877551021e-05, "loss": 3.7169, "step": 4148 }, { "epoch": 16.93469387755102, "grad_norm": 4.761525630950928, "learning_rate": 4.1544897959183675e-05, "loss": 3.9257, "step": 4149 }, { "epoch": 16.93877551020408, "grad_norm": 6.562135696411133, "learning_rate": 4.154285714285714e-05, "loss": 3.3251, "step": 4150 }, { "epoch": 16.942857142857143, "grad_norm": 9.341806411743164, "learning_rate": 4.1540816326530616e-05, "loss": 2.7127, "step": 4151 }, { "epoch": 16.946938775510205, "grad_norm": 6.940940856933594, "learning_rate": 4.153877551020408e-05, "loss": 3.7284, "step": 4152 }, { "epoch": 16.951020408163266, "grad_norm": 8.125648498535156, "learning_rate": 4.153673469387756e-05, "loss": 3.1466, "step": 4153 }, { "epoch": 16.955102040816328, "grad_norm": 12.169755935668945, "learning_rate": 4.1534693877551024e-05, "loss": 2.9167, "step": 4154 }, { "epoch": 16.959183673469386, "grad_norm": 4.7314453125, "learning_rate": 4.153265306122449e-05, "loss": 3.8651, "step": 4155 }, { "epoch": 16.963265306122448, "grad_norm": 5.708140850067139, "learning_rate": 4.153061224489796e-05, "loss": 3.5469, "step": 4156 }, { "epoch": 16.96734693877551, "grad_norm": 6.212392330169678, "learning_rate": 4.1528571428571425e-05, "loss": 3.2463, "step": 4157 }, { "epoch": 16.97142857142857, "grad_norm": 9.324410438537598, "learning_rate": 4.15265306122449e-05, "loss": 2.7837, "step": 4158 }, { "epoch": 16.975510204081633, "grad_norm": 7.808959484100342, "learning_rate": 4.152448979591837e-05, "loss": 3.5315, "step": 4159 }, { "epoch": 16.979591836734695, "grad_norm": 5.1286091804504395, "learning_rate": 4.152244897959184e-05, "loss": 3.56, "step": 4160 }, { "epoch": 16.983673469387757, "grad_norm": 10.87668514251709, "learning_rate": 4.152040816326531e-05, "loss": 2.7692, "step": 4161 }, { "epoch": 16.987755102040815, "grad_norm": 5.955293655395508, "learning_rate": 4.151836734693878e-05, "loss": 3.5967, "step": 4162 }, { "epoch": 16.991836734693877, "grad_norm": 11.437288284301758, "learning_rate": 4.151632653061225e-05, "loss": 2.8145, "step": 4163 }, { "epoch": 16.99591836734694, "grad_norm": 9.56639575958252, "learning_rate": 4.1514285714285716e-05, "loss": 3.0905, "step": 4164 }, { "epoch": 17.0, "grad_norm": 11.348514556884766, "learning_rate": 4.151224489795918e-05, "loss": 3.3555, "step": 4165 }, { "epoch": 17.00408163265306, "grad_norm": 5.262394428253174, "learning_rate": 4.151020408163265e-05, "loss": 3.7521, "step": 4166 }, { "epoch": 17.008163265306123, "grad_norm": 5.624878406524658, "learning_rate": 4.1508163265306124e-05, "loss": 3.4902, "step": 4167 }, { "epoch": 17.012244897959185, "grad_norm": 6.98193883895874, "learning_rate": 4.15061224489796e-05, "loss": 3.2598, "step": 4168 }, { "epoch": 17.016326530612243, "grad_norm": 10.363992691040039, "learning_rate": 4.1504081632653065e-05, "loss": 2.8513, "step": 4169 }, { "epoch": 17.020408163265305, "grad_norm": 6.905411720275879, "learning_rate": 4.150204081632653e-05, "loss": 3.3079, "step": 4170 }, { "epoch": 17.024489795918367, "grad_norm": 5.575463771820068, "learning_rate": 4.15e-05, "loss": 3.4616, "step": 4171 }, { "epoch": 17.02857142857143, "grad_norm": 8.37132740020752, "learning_rate": 4.149795918367347e-05, "loss": 3.1523, "step": 4172 }, { "epoch": 17.03265306122449, "grad_norm": 5.4872002601623535, "learning_rate": 4.149591836734694e-05, "loss": 3.5461, "step": 4173 }, { "epoch": 17.036734693877552, "grad_norm": 3.8639748096466064, "learning_rate": 4.149387755102041e-05, "loss": 3.7162, "step": 4174 }, { "epoch": 17.040816326530614, "grad_norm": 5.476118564605713, "learning_rate": 4.149183673469388e-05, "loss": 3.7284, "step": 4175 }, { "epoch": 17.044897959183672, "grad_norm": 6.8882246017456055, "learning_rate": 4.148979591836735e-05, "loss": 3.4239, "step": 4176 }, { "epoch": 17.048979591836734, "grad_norm": 10.106562614440918, "learning_rate": 4.148775510204082e-05, "loss": 3.1179, "step": 4177 }, { "epoch": 17.053061224489795, "grad_norm": 11.24638557434082, "learning_rate": 4.148571428571429e-05, "loss": 2.9392, "step": 4178 }, { "epoch": 17.057142857142857, "grad_norm": 15.929670333862305, "learning_rate": 4.1483673469387756e-05, "loss": 2.2207, "step": 4179 }, { "epoch": 17.06122448979592, "grad_norm": 4.524440288543701, "learning_rate": 4.1481632653061224e-05, "loss": 3.7573, "step": 4180 }, { "epoch": 17.06530612244898, "grad_norm": 9.499162673950195, "learning_rate": 4.147959183673469e-05, "loss": 3.1804, "step": 4181 }, { "epoch": 17.069387755102042, "grad_norm": 5.4250030517578125, "learning_rate": 4.1477551020408165e-05, "loss": 3.4877, "step": 4182 }, { "epoch": 17.073469387755104, "grad_norm": 5.386284828186035, "learning_rate": 4.147551020408164e-05, "loss": 3.6007, "step": 4183 }, { "epoch": 17.077551020408162, "grad_norm": 8.24742603302002, "learning_rate": 4.1473469387755106e-05, "loss": 3.0957, "step": 4184 }, { "epoch": 17.081632653061224, "grad_norm": 8.863967895507812, "learning_rate": 4.147142857142857e-05, "loss": 2.6374, "step": 4185 }, { "epoch": 17.085714285714285, "grad_norm": 8.497513771057129, "learning_rate": 4.146938775510204e-05, "loss": 3.3088, "step": 4186 }, { "epoch": 17.089795918367347, "grad_norm": 13.977148056030273, "learning_rate": 4.1467346938775514e-05, "loss": 2.6989, "step": 4187 }, { "epoch": 17.09387755102041, "grad_norm": 4.969851493835449, "learning_rate": 4.146530612244898e-05, "loss": 3.8983, "step": 4188 }, { "epoch": 17.09795918367347, "grad_norm": 7.117897033691406, "learning_rate": 4.146326530612245e-05, "loss": 3.5572, "step": 4189 }, { "epoch": 17.102040816326532, "grad_norm": 7.255463600158691, "learning_rate": 4.146122448979592e-05, "loss": 3.3198, "step": 4190 }, { "epoch": 17.10612244897959, "grad_norm": 9.267828941345215, "learning_rate": 4.145918367346939e-05, "loss": 2.5483, "step": 4191 }, { "epoch": 17.110204081632652, "grad_norm": 5.01167106628418, "learning_rate": 4.145714285714286e-05, "loss": 3.5376, "step": 4192 }, { "epoch": 17.114285714285714, "grad_norm": 8.817034721374512, "learning_rate": 4.145510204081633e-05, "loss": 2.9188, "step": 4193 }, { "epoch": 17.118367346938776, "grad_norm": 3.8457233905792236, "learning_rate": 4.14530612244898e-05, "loss": 3.9922, "step": 4194 }, { "epoch": 17.122448979591837, "grad_norm": 8.525518417358398, "learning_rate": 4.1451020408163264e-05, "loss": 3.2278, "step": 4195 }, { "epoch": 17.1265306122449, "grad_norm": 9.162701606750488, "learning_rate": 4.144897959183673e-05, "loss": 2.9555, "step": 4196 }, { "epoch": 17.13061224489796, "grad_norm": 5.593794345855713, "learning_rate": 4.1446938775510205e-05, "loss": 3.3974, "step": 4197 }, { "epoch": 17.13469387755102, "grad_norm": 15.3726224899292, "learning_rate": 4.144489795918368e-05, "loss": 2.6174, "step": 4198 }, { "epoch": 17.13877551020408, "grad_norm": 9.943063735961914, "learning_rate": 4.1442857142857146e-05, "loss": 2.9983, "step": 4199 }, { "epoch": 17.142857142857142, "grad_norm": 5.350536346435547, "learning_rate": 4.1440816326530613e-05, "loss": 3.4321, "step": 4200 }, { "epoch": 17.146938775510204, "grad_norm": 6.316948413848877, "learning_rate": 4.143877551020408e-05, "loss": 3.6112, "step": 4201 }, { "epoch": 17.151020408163266, "grad_norm": 9.722343444824219, "learning_rate": 4.1436734693877555e-05, "loss": 3.2034, "step": 4202 }, { "epoch": 17.155102040816328, "grad_norm": 10.999874114990234, "learning_rate": 4.143469387755102e-05, "loss": 3.3047, "step": 4203 }, { "epoch": 17.15918367346939, "grad_norm": 12.231301307678223, "learning_rate": 4.143265306122449e-05, "loss": 2.6725, "step": 4204 }, { "epoch": 17.163265306122447, "grad_norm": 6.416522026062012, "learning_rate": 4.143061224489796e-05, "loss": 3.363, "step": 4205 }, { "epoch": 17.16734693877551, "grad_norm": 6.782208442687988, "learning_rate": 4.1428571428571437e-05, "loss": 3.4647, "step": 4206 }, { "epoch": 17.17142857142857, "grad_norm": 7.15505838394165, "learning_rate": 4.1426530612244904e-05, "loss": 3.5941, "step": 4207 }, { "epoch": 17.175510204081633, "grad_norm": 8.180856704711914, "learning_rate": 4.142448979591837e-05, "loss": 2.8868, "step": 4208 }, { "epoch": 17.179591836734694, "grad_norm": 11.238040924072266, "learning_rate": 4.142244897959184e-05, "loss": 2.7916, "step": 4209 }, { "epoch": 17.183673469387756, "grad_norm": 7.061032772064209, "learning_rate": 4.1420408163265305e-05, "loss": 3.5012, "step": 4210 }, { "epoch": 17.187755102040818, "grad_norm": 4.622450828552246, "learning_rate": 4.141836734693878e-05, "loss": 3.6775, "step": 4211 }, { "epoch": 17.191836734693876, "grad_norm": 7.80316686630249, "learning_rate": 4.1416326530612246e-05, "loss": 3.4928, "step": 4212 }, { "epoch": 17.195918367346938, "grad_norm": 5.238960266113281, "learning_rate": 4.141428571428571e-05, "loss": 3.5334, "step": 4213 }, { "epoch": 17.2, "grad_norm": 6.211671352386475, "learning_rate": 4.141224489795919e-05, "loss": 3.6639, "step": 4214 }, { "epoch": 17.20408163265306, "grad_norm": 6.835175037384033, "learning_rate": 4.1410204081632654e-05, "loss": 3.5781, "step": 4215 }, { "epoch": 17.208163265306123, "grad_norm": 12.463888168334961, "learning_rate": 4.140816326530613e-05, "loss": 2.203, "step": 4216 }, { "epoch": 17.212244897959184, "grad_norm": 8.03374195098877, "learning_rate": 4.1406122448979595e-05, "loss": 3.3769, "step": 4217 }, { "epoch": 17.216326530612246, "grad_norm": 5.919369697570801, "learning_rate": 4.140408163265306e-05, "loss": 3.5958, "step": 4218 }, { "epoch": 17.220408163265308, "grad_norm": 9.239035606384277, "learning_rate": 4.140204081632653e-05, "loss": 3.2294, "step": 4219 }, { "epoch": 17.224489795918366, "grad_norm": 10.184101104736328, "learning_rate": 4.14e-05, "loss": 3.3715, "step": 4220 }, { "epoch": 17.228571428571428, "grad_norm": 13.524157524108887, "learning_rate": 4.139795918367347e-05, "loss": 2.765, "step": 4221 }, { "epoch": 17.23265306122449, "grad_norm": 10.150224685668945, "learning_rate": 4.1395918367346944e-05, "loss": 3.1583, "step": 4222 }, { "epoch": 17.23673469387755, "grad_norm": 4.418004035949707, "learning_rate": 4.139387755102041e-05, "loss": 3.6705, "step": 4223 }, { "epoch": 17.240816326530613, "grad_norm": 7.350397109985352, "learning_rate": 4.139183673469388e-05, "loss": 3.2996, "step": 4224 }, { "epoch": 17.244897959183675, "grad_norm": 8.596534729003906, "learning_rate": 4.1389795918367346e-05, "loss": 3.5135, "step": 4225 }, { "epoch": 17.248979591836736, "grad_norm": 8.403135299682617, "learning_rate": 4.138775510204082e-05, "loss": 2.9601, "step": 4226 }, { "epoch": 17.253061224489795, "grad_norm": 15.796133041381836, "learning_rate": 4.138571428571429e-05, "loss": 2.7506, "step": 4227 }, { "epoch": 17.257142857142856, "grad_norm": 6.363978862762451, "learning_rate": 4.1383673469387754e-05, "loss": 3.3762, "step": 4228 }, { "epoch": 17.261224489795918, "grad_norm": 6.586868762969971, "learning_rate": 4.138163265306123e-05, "loss": 3.5323, "step": 4229 }, { "epoch": 17.26530612244898, "grad_norm": 11.479798316955566, "learning_rate": 4.1379591836734695e-05, "loss": 2.8463, "step": 4230 }, { "epoch": 17.26938775510204, "grad_norm": 4.582498073577881, "learning_rate": 4.137755102040817e-05, "loss": 3.7518, "step": 4231 }, { "epoch": 17.273469387755103, "grad_norm": 7.481479644775391, "learning_rate": 4.1375510204081636e-05, "loss": 3.5399, "step": 4232 }, { "epoch": 17.277551020408165, "grad_norm": 6.098112106323242, "learning_rate": 4.13734693877551e-05, "loss": 3.5681, "step": 4233 }, { "epoch": 17.281632653061223, "grad_norm": 7.251554489135742, "learning_rate": 4.137142857142857e-05, "loss": 3.5305, "step": 4234 }, { "epoch": 17.285714285714285, "grad_norm": 7.17779541015625, "learning_rate": 4.136938775510204e-05, "loss": 3.4717, "step": 4235 }, { "epoch": 17.289795918367346, "grad_norm": 14.23650074005127, "learning_rate": 4.136734693877551e-05, "loss": 2.8122, "step": 4236 }, { "epoch": 17.293877551020408, "grad_norm": 8.979472160339355, "learning_rate": 4.1365306122448985e-05, "loss": 3.3614, "step": 4237 }, { "epoch": 17.29795918367347, "grad_norm": 8.35159969329834, "learning_rate": 4.136326530612245e-05, "loss": 3.363, "step": 4238 }, { "epoch": 17.30204081632653, "grad_norm": 5.309518814086914, "learning_rate": 4.136122448979592e-05, "loss": 3.7123, "step": 4239 }, { "epoch": 17.306122448979593, "grad_norm": 12.89745807647705, "learning_rate": 4.1359183673469387e-05, "loss": 3.166, "step": 4240 }, { "epoch": 17.31020408163265, "grad_norm": 11.003671646118164, "learning_rate": 4.135714285714286e-05, "loss": 3.745, "step": 4241 }, { "epoch": 17.314285714285713, "grad_norm": 8.512412071228027, "learning_rate": 4.135510204081633e-05, "loss": 2.8915, "step": 4242 }, { "epoch": 17.318367346938775, "grad_norm": 6.036011695861816, "learning_rate": 4.1353061224489795e-05, "loss": 3.4882, "step": 4243 }, { "epoch": 17.322448979591837, "grad_norm": 5.512811183929443, "learning_rate": 4.135102040816327e-05, "loss": 3.4162, "step": 4244 }, { "epoch": 17.3265306122449, "grad_norm": 5.188508033752441, "learning_rate": 4.1348979591836736e-05, "loss": 3.5483, "step": 4245 }, { "epoch": 17.33061224489796, "grad_norm": 6.845926761627197, "learning_rate": 4.134693877551021e-05, "loss": 3.3069, "step": 4246 }, { "epoch": 17.33469387755102, "grad_norm": 5.656836986541748, "learning_rate": 4.134489795918368e-05, "loss": 3.6374, "step": 4247 }, { "epoch": 17.33877551020408, "grad_norm": 7.274719715118408, "learning_rate": 4.1342857142857144e-05, "loss": 3.3338, "step": 4248 }, { "epoch": 17.34285714285714, "grad_norm": 7.106010913848877, "learning_rate": 4.134081632653061e-05, "loss": 3.2092, "step": 4249 }, { "epoch": 17.346938775510203, "grad_norm": 11.18237590789795, "learning_rate": 4.1338775510204085e-05, "loss": 3.2005, "step": 4250 }, { "epoch": 17.351020408163265, "grad_norm": 6.745845317840576, "learning_rate": 4.133673469387755e-05, "loss": 3.2545, "step": 4251 }, { "epoch": 17.355102040816327, "grad_norm": 15.989096641540527, "learning_rate": 4.1334693877551026e-05, "loss": 3.233, "step": 4252 }, { "epoch": 17.35918367346939, "grad_norm": 6.363494873046875, "learning_rate": 4.133265306122449e-05, "loss": 3.5445, "step": 4253 }, { "epoch": 17.36326530612245, "grad_norm": 6.595459461212158, "learning_rate": 4.133061224489796e-05, "loss": 3.4678, "step": 4254 }, { "epoch": 17.367346938775512, "grad_norm": 7.818910598754883, "learning_rate": 4.1328571428571434e-05, "loss": 3.3029, "step": 4255 }, { "epoch": 17.37142857142857, "grad_norm": 4.7563323974609375, "learning_rate": 4.13265306122449e-05, "loss": 3.388, "step": 4256 }, { "epoch": 17.375510204081632, "grad_norm": 5.9872941970825195, "learning_rate": 4.132448979591837e-05, "loss": 3.7697, "step": 4257 }, { "epoch": 17.379591836734694, "grad_norm": 7.11539888381958, "learning_rate": 4.1322448979591836e-05, "loss": 3.6224, "step": 4258 }, { "epoch": 17.383673469387755, "grad_norm": 5.255215167999268, "learning_rate": 4.132040816326531e-05, "loss": 3.7513, "step": 4259 }, { "epoch": 17.387755102040817, "grad_norm": 9.007308959960938, "learning_rate": 4.131836734693878e-05, "loss": 2.9544, "step": 4260 }, { "epoch": 17.39183673469388, "grad_norm": 6.037494659423828, "learning_rate": 4.131632653061225e-05, "loss": 3.3337, "step": 4261 }, { "epoch": 17.39591836734694, "grad_norm": 13.96403980255127, "learning_rate": 4.131428571428572e-05, "loss": 2.1625, "step": 4262 }, { "epoch": 17.4, "grad_norm": 8.03799819946289, "learning_rate": 4.1312244897959185e-05, "loss": 3.3722, "step": 4263 }, { "epoch": 17.40408163265306, "grad_norm": 5.601916790008545, "learning_rate": 4.131020408163265e-05, "loss": 3.6403, "step": 4264 }, { "epoch": 17.408163265306122, "grad_norm": 7.11585807800293, "learning_rate": 4.1308163265306126e-05, "loss": 3.3503, "step": 4265 }, { "epoch": 17.412244897959184, "grad_norm": 7.720278739929199, "learning_rate": 4.130612244897959e-05, "loss": 3.4559, "step": 4266 }, { "epoch": 17.416326530612245, "grad_norm": 11.360321044921875, "learning_rate": 4.130408163265306e-05, "loss": 3.3896, "step": 4267 }, { "epoch": 17.420408163265307, "grad_norm": 14.511202812194824, "learning_rate": 4.1302040816326534e-05, "loss": 2.129, "step": 4268 }, { "epoch": 17.42448979591837, "grad_norm": 7.690535068511963, "learning_rate": 4.13e-05, "loss": 3.5318, "step": 4269 }, { "epoch": 17.428571428571427, "grad_norm": 6.273187160491943, "learning_rate": 4.1297959183673475e-05, "loss": 3.5415, "step": 4270 }, { "epoch": 17.43265306122449, "grad_norm": 5.9149956703186035, "learning_rate": 4.129591836734694e-05, "loss": 3.5363, "step": 4271 }, { "epoch": 17.43673469387755, "grad_norm": 6.14265251159668, "learning_rate": 4.129387755102041e-05, "loss": 3.738, "step": 4272 }, { "epoch": 17.440816326530612, "grad_norm": 6.111799240112305, "learning_rate": 4.1291836734693876e-05, "loss": 3.7272, "step": 4273 }, { "epoch": 17.444897959183674, "grad_norm": 8.86442756652832, "learning_rate": 4.1289795918367343e-05, "loss": 3.2389, "step": 4274 }, { "epoch": 17.448979591836736, "grad_norm": 10.893828392028809, "learning_rate": 4.128775510204082e-05, "loss": 2.6409, "step": 4275 }, { "epoch": 17.453061224489797, "grad_norm": 10.559571266174316, "learning_rate": 4.128571428571429e-05, "loss": 2.9785, "step": 4276 }, { "epoch": 17.457142857142856, "grad_norm": 15.93860149383545, "learning_rate": 4.128367346938776e-05, "loss": 2.0647, "step": 4277 }, { "epoch": 17.461224489795917, "grad_norm": 5.4166059494018555, "learning_rate": 4.1281632653061225e-05, "loss": 3.7241, "step": 4278 }, { "epoch": 17.46530612244898, "grad_norm": 7.911605358123779, "learning_rate": 4.127959183673469e-05, "loss": 3.5413, "step": 4279 }, { "epoch": 17.46938775510204, "grad_norm": 17.11079978942871, "learning_rate": 4.1277551020408166e-05, "loss": 2.5246, "step": 4280 }, { "epoch": 17.473469387755102, "grad_norm": 8.414958953857422, "learning_rate": 4.1275510204081634e-05, "loss": 3.3234, "step": 4281 }, { "epoch": 17.477551020408164, "grad_norm": 17.86404800415039, "learning_rate": 4.12734693877551e-05, "loss": 1.9016, "step": 4282 }, { "epoch": 17.481632653061226, "grad_norm": 9.087015151977539, "learning_rate": 4.1271428571428575e-05, "loss": 2.9786, "step": 4283 }, { "epoch": 17.485714285714284, "grad_norm": 9.936727523803711, "learning_rate": 4.126938775510204e-05, "loss": 3.0614, "step": 4284 }, { "epoch": 17.489795918367346, "grad_norm": 4.811816215515137, "learning_rate": 4.1267346938775516e-05, "loss": 3.9279, "step": 4285 }, { "epoch": 17.493877551020407, "grad_norm": 7.493943214416504, "learning_rate": 4.126530612244898e-05, "loss": 3.2877, "step": 4286 }, { "epoch": 17.49795918367347, "grad_norm": 4.884336471557617, "learning_rate": 4.126326530612245e-05, "loss": 3.7619, "step": 4287 }, { "epoch": 17.50204081632653, "grad_norm": 5.576732635498047, "learning_rate": 4.126122448979592e-05, "loss": 3.581, "step": 4288 }, { "epoch": 17.506122448979593, "grad_norm": 8.641743659973145, "learning_rate": 4.1259183673469384e-05, "loss": 3.1492, "step": 4289 }, { "epoch": 17.510204081632654, "grad_norm": 12.475286483764648, "learning_rate": 4.125714285714286e-05, "loss": 2.359, "step": 4290 }, { "epoch": 17.514285714285712, "grad_norm": 5.78947114944458, "learning_rate": 4.125510204081633e-05, "loss": 3.7005, "step": 4291 }, { "epoch": 17.518367346938774, "grad_norm": 3.6038925647735596, "learning_rate": 4.12530612244898e-05, "loss": 3.9357, "step": 4292 }, { "epoch": 17.522448979591836, "grad_norm": 13.737896919250488, "learning_rate": 4.1251020408163266e-05, "loss": 2.7221, "step": 4293 }, { "epoch": 17.526530612244898, "grad_norm": 10.769160270690918, "learning_rate": 4.124897959183674e-05, "loss": 2.7395, "step": 4294 }, { "epoch": 17.53061224489796, "grad_norm": 12.762588500976562, "learning_rate": 4.124693877551021e-05, "loss": 3.4084, "step": 4295 }, { "epoch": 17.53469387755102, "grad_norm": 7.88714075088501, "learning_rate": 4.1244897959183674e-05, "loss": 3.2406, "step": 4296 }, { "epoch": 17.538775510204083, "grad_norm": 7.339934349060059, "learning_rate": 4.124285714285714e-05, "loss": 3.5788, "step": 4297 }, { "epoch": 17.542857142857144, "grad_norm": 10.401579856872559, "learning_rate": 4.1240816326530615e-05, "loss": 3.3002, "step": 4298 }, { "epoch": 17.546938775510203, "grad_norm": 10.3444185256958, "learning_rate": 4.123877551020409e-05, "loss": 2.9199, "step": 4299 }, { "epoch": 17.551020408163264, "grad_norm": 6.2295732498168945, "learning_rate": 4.1236734693877556e-05, "loss": 3.4023, "step": 4300 }, { "epoch": 17.555102040816326, "grad_norm": 4.216312885284424, "learning_rate": 4.1234693877551024e-05, "loss": 3.8945, "step": 4301 }, { "epoch": 17.559183673469388, "grad_norm": 7.9064106941223145, "learning_rate": 4.123265306122449e-05, "loss": 3.2569, "step": 4302 }, { "epoch": 17.56326530612245, "grad_norm": 12.93154239654541, "learning_rate": 4.123061224489796e-05, "loss": 2.7118, "step": 4303 }, { "epoch": 17.56734693877551, "grad_norm": 6.142505168914795, "learning_rate": 4.122857142857143e-05, "loss": 3.4303, "step": 4304 }, { "epoch": 17.571428571428573, "grad_norm": 12.080927848815918, "learning_rate": 4.12265306122449e-05, "loss": 2.8007, "step": 4305 }, { "epoch": 17.57551020408163, "grad_norm": 12.7671537399292, "learning_rate": 4.122448979591837e-05, "loss": 2.7576, "step": 4306 }, { "epoch": 17.579591836734693, "grad_norm": 9.013983726501465, "learning_rate": 4.122244897959184e-05, "loss": 3.2005, "step": 4307 }, { "epoch": 17.583673469387755, "grad_norm": 7.688212871551514, "learning_rate": 4.122040816326531e-05, "loss": 3.3872, "step": 4308 }, { "epoch": 17.587755102040816, "grad_norm": 9.132650375366211, "learning_rate": 4.121836734693878e-05, "loss": 2.9767, "step": 4309 }, { "epoch": 17.591836734693878, "grad_norm": 6.177831172943115, "learning_rate": 4.121632653061225e-05, "loss": 3.45, "step": 4310 }, { "epoch": 17.59591836734694, "grad_norm": 10.980340957641602, "learning_rate": 4.1214285714285715e-05, "loss": 2.7162, "step": 4311 }, { "epoch": 17.6, "grad_norm": 6.629461765289307, "learning_rate": 4.121224489795918e-05, "loss": 3.5752, "step": 4312 }, { "epoch": 17.60408163265306, "grad_norm": 5.846560955047607, "learning_rate": 4.1210204081632656e-05, "loss": 3.6074, "step": 4313 }, { "epoch": 17.60816326530612, "grad_norm": 19.73516273498535, "learning_rate": 4.120816326530613e-05, "loss": 2.1857, "step": 4314 }, { "epoch": 17.612244897959183, "grad_norm": 5.944655418395996, "learning_rate": 4.12061224489796e-05, "loss": 3.3984, "step": 4315 }, { "epoch": 17.616326530612245, "grad_norm": 6.891174793243408, "learning_rate": 4.1204081632653064e-05, "loss": 3.4735, "step": 4316 }, { "epoch": 17.620408163265306, "grad_norm": 11.145424842834473, "learning_rate": 4.120204081632653e-05, "loss": 2.9101, "step": 4317 }, { "epoch": 17.624489795918368, "grad_norm": 8.437417030334473, "learning_rate": 4.12e-05, "loss": 3.4535, "step": 4318 }, { "epoch": 17.62857142857143, "grad_norm": 12.711320877075195, "learning_rate": 4.119795918367347e-05, "loss": 2.3402, "step": 4319 }, { "epoch": 17.632653061224488, "grad_norm": 6.910150051116943, "learning_rate": 4.119591836734694e-05, "loss": 3.2626, "step": 4320 }, { "epoch": 17.63673469387755, "grad_norm": 14.485648155212402, "learning_rate": 4.119387755102041e-05, "loss": 2.3639, "step": 4321 }, { "epoch": 17.64081632653061, "grad_norm": 10.982671737670898, "learning_rate": 4.119183673469388e-05, "loss": 2.9257, "step": 4322 }, { "epoch": 17.644897959183673, "grad_norm": 4.716424942016602, "learning_rate": 4.118979591836735e-05, "loss": 3.6845, "step": 4323 }, { "epoch": 17.648979591836735, "grad_norm": 9.163541793823242, "learning_rate": 4.118775510204082e-05, "loss": 3.2795, "step": 4324 }, { "epoch": 17.653061224489797, "grad_norm": 11.129416465759277, "learning_rate": 4.118571428571429e-05, "loss": 2.7829, "step": 4325 }, { "epoch": 17.65714285714286, "grad_norm": 16.343996047973633, "learning_rate": 4.1183673469387756e-05, "loss": 3.014, "step": 4326 }, { "epoch": 17.66122448979592, "grad_norm": 7.023854732513428, "learning_rate": 4.118163265306122e-05, "loss": 3.4229, "step": 4327 }, { "epoch": 17.66530612244898, "grad_norm": 5.5448174476623535, "learning_rate": 4.117959183673469e-05, "loss": 3.821, "step": 4328 }, { "epoch": 17.66938775510204, "grad_norm": 12.590147018432617, "learning_rate": 4.1177551020408164e-05, "loss": 2.7092, "step": 4329 }, { "epoch": 17.6734693877551, "grad_norm": 8.238351821899414, "learning_rate": 4.117551020408164e-05, "loss": 3.2596, "step": 4330 }, { "epoch": 17.677551020408163, "grad_norm": 7.9371418952941895, "learning_rate": 4.1173469387755105e-05, "loss": 3.0527, "step": 4331 }, { "epoch": 17.681632653061225, "grad_norm": 14.800952911376953, "learning_rate": 4.117142857142857e-05, "loss": 2.8856, "step": 4332 }, { "epoch": 17.685714285714287, "grad_norm": 5.062203884124756, "learning_rate": 4.116938775510204e-05, "loss": 3.6201, "step": 4333 }, { "epoch": 17.68979591836735, "grad_norm": 9.548643112182617, "learning_rate": 4.116734693877551e-05, "loss": 2.8966, "step": 4334 }, { "epoch": 17.693877551020407, "grad_norm": 5.90824556350708, "learning_rate": 4.116530612244898e-05, "loss": 3.5799, "step": 4335 }, { "epoch": 17.69795918367347, "grad_norm": 7.514692306518555, "learning_rate": 4.116326530612245e-05, "loss": 3.0525, "step": 4336 }, { "epoch": 17.70204081632653, "grad_norm": 6.15176248550415, "learning_rate": 4.116122448979592e-05, "loss": 3.673, "step": 4337 }, { "epoch": 17.706122448979592, "grad_norm": 11.239384651184082, "learning_rate": 4.115918367346939e-05, "loss": 2.3241, "step": 4338 }, { "epoch": 17.710204081632654, "grad_norm": 7.371608257293701, "learning_rate": 4.115714285714286e-05, "loss": 3.565, "step": 4339 }, { "epoch": 17.714285714285715, "grad_norm": 6.961215019226074, "learning_rate": 4.115510204081633e-05, "loss": 3.4277, "step": 4340 }, { "epoch": 17.718367346938777, "grad_norm": 7.255846977233887, "learning_rate": 4.11530612244898e-05, "loss": 3.4351, "step": 4341 }, { "epoch": 17.722448979591835, "grad_norm": 7.123993873596191, "learning_rate": 4.1151020408163264e-05, "loss": 3.4114, "step": 4342 }, { "epoch": 17.726530612244897, "grad_norm": 5.919724464416504, "learning_rate": 4.114897959183674e-05, "loss": 3.6687, "step": 4343 }, { "epoch": 17.73061224489796, "grad_norm": 7.610337734222412, "learning_rate": 4.1146938775510205e-05, "loss": 3.4967, "step": 4344 }, { "epoch": 17.73469387755102, "grad_norm": 5.420987129211426, "learning_rate": 4.114489795918368e-05, "loss": 3.5124, "step": 4345 }, { "epoch": 17.738775510204082, "grad_norm": 8.304402351379395, "learning_rate": 4.1142857142857146e-05, "loss": 3.2916, "step": 4346 }, { "epoch": 17.742857142857144, "grad_norm": 11.889551162719727, "learning_rate": 4.114081632653061e-05, "loss": 2.8227, "step": 4347 }, { "epoch": 17.746938775510205, "grad_norm": 5.056840419769287, "learning_rate": 4.113877551020409e-05, "loss": 3.5329, "step": 4348 }, { "epoch": 17.751020408163264, "grad_norm": 8.685151100158691, "learning_rate": 4.1136734693877554e-05, "loss": 3.0994, "step": 4349 }, { "epoch": 17.755102040816325, "grad_norm": 7.84678316116333, "learning_rate": 4.113469387755102e-05, "loss": 3.2194, "step": 4350 }, { "epoch": 17.759183673469387, "grad_norm": 6.633584976196289, "learning_rate": 4.113265306122449e-05, "loss": 3.2036, "step": 4351 }, { "epoch": 17.76326530612245, "grad_norm": 7.129715442657471, "learning_rate": 4.113061224489796e-05, "loss": 3.382, "step": 4352 }, { "epoch": 17.76734693877551, "grad_norm": 9.59860897064209, "learning_rate": 4.1128571428571436e-05, "loss": 2.9862, "step": 4353 }, { "epoch": 17.771428571428572, "grad_norm": 10.130983352661133, "learning_rate": 4.11265306122449e-05, "loss": 3.1334, "step": 4354 }, { "epoch": 17.775510204081634, "grad_norm": 6.491892337799072, "learning_rate": 4.112448979591837e-05, "loss": 3.654, "step": 4355 }, { "epoch": 17.779591836734696, "grad_norm": 10.902695655822754, "learning_rate": 4.112244897959184e-05, "loss": 3.3205, "step": 4356 }, { "epoch": 17.783673469387754, "grad_norm": 11.851686477661133, "learning_rate": 4.1120408163265305e-05, "loss": 2.2185, "step": 4357 }, { "epoch": 17.787755102040816, "grad_norm": 5.177953720092773, "learning_rate": 4.111836734693878e-05, "loss": 3.8884, "step": 4358 }, { "epoch": 17.791836734693877, "grad_norm": 16.52167510986328, "learning_rate": 4.1116326530612246e-05, "loss": 1.0328, "step": 4359 }, { "epoch": 17.79591836734694, "grad_norm": 12.279694557189941, "learning_rate": 4.111428571428572e-05, "loss": 2.7809, "step": 4360 }, { "epoch": 17.8, "grad_norm": 4.2457098960876465, "learning_rate": 4.1112244897959187e-05, "loss": 3.98, "step": 4361 }, { "epoch": 17.804081632653062, "grad_norm": 9.774164199829102, "learning_rate": 4.1110204081632654e-05, "loss": 3.1875, "step": 4362 }, { "epoch": 17.808163265306124, "grad_norm": 14.66677474975586, "learning_rate": 4.110816326530613e-05, "loss": 2.3097, "step": 4363 }, { "epoch": 17.812244897959182, "grad_norm": 6.266210556030273, "learning_rate": 4.1106122448979595e-05, "loss": 3.5874, "step": 4364 }, { "epoch": 17.816326530612244, "grad_norm": 8.898356437683105, "learning_rate": 4.110408163265306e-05, "loss": 2.7639, "step": 4365 }, { "epoch": 17.820408163265306, "grad_norm": 8.726364135742188, "learning_rate": 4.110204081632653e-05, "loss": 3.3718, "step": 4366 }, { "epoch": 17.824489795918367, "grad_norm": 13.531410217285156, "learning_rate": 4.11e-05, "loss": 2.3136, "step": 4367 }, { "epoch": 17.82857142857143, "grad_norm": 6.2839436531066895, "learning_rate": 4.109795918367348e-05, "loss": 3.6378, "step": 4368 }, { "epoch": 17.83265306122449, "grad_norm": 9.428226470947266, "learning_rate": 4.1095918367346944e-05, "loss": 3.3181, "step": 4369 }, { "epoch": 17.836734693877553, "grad_norm": 5.625243663787842, "learning_rate": 4.109387755102041e-05, "loss": 3.7151, "step": 4370 }, { "epoch": 17.84081632653061, "grad_norm": 9.034750938415527, "learning_rate": 4.109183673469388e-05, "loss": 3.5434, "step": 4371 }, { "epoch": 17.844897959183672, "grad_norm": 7.403808116912842, "learning_rate": 4.1089795918367345e-05, "loss": 3.4572, "step": 4372 }, { "epoch": 17.848979591836734, "grad_norm": 7.337512969970703, "learning_rate": 4.108775510204082e-05, "loss": 3.2555, "step": 4373 }, { "epoch": 17.853061224489796, "grad_norm": 12.287592887878418, "learning_rate": 4.1085714285714286e-05, "loss": 2.6909, "step": 4374 }, { "epoch": 17.857142857142858, "grad_norm": 12.437918663024902, "learning_rate": 4.1083673469387753e-05, "loss": 2.7547, "step": 4375 }, { "epoch": 17.86122448979592, "grad_norm": 6.169220924377441, "learning_rate": 4.108163265306123e-05, "loss": 3.5219, "step": 4376 }, { "epoch": 17.86530612244898, "grad_norm": 7.068182945251465, "learning_rate": 4.1079591836734694e-05, "loss": 3.2551, "step": 4377 }, { "epoch": 17.86938775510204, "grad_norm": 12.044967651367188, "learning_rate": 4.107755102040817e-05, "loss": 3.208, "step": 4378 }, { "epoch": 17.8734693877551, "grad_norm": 4.342226505279541, "learning_rate": 4.1075510204081636e-05, "loss": 3.7694, "step": 4379 }, { "epoch": 17.877551020408163, "grad_norm": 10.915560722351074, "learning_rate": 4.10734693877551e-05, "loss": 2.8074, "step": 4380 }, { "epoch": 17.881632653061224, "grad_norm": 6.113248348236084, "learning_rate": 4.107142857142857e-05, "loss": 3.4229, "step": 4381 }, { "epoch": 17.885714285714286, "grad_norm": 12.649880409240723, "learning_rate": 4.106938775510204e-05, "loss": 2.6683, "step": 4382 }, { "epoch": 17.889795918367348, "grad_norm": 6.150980472564697, "learning_rate": 4.106734693877551e-05, "loss": 3.5779, "step": 4383 }, { "epoch": 17.89387755102041, "grad_norm": 11.137123107910156, "learning_rate": 4.1065306122448985e-05, "loss": 3.1162, "step": 4384 }, { "epoch": 17.897959183673468, "grad_norm": 4.9658427238464355, "learning_rate": 4.106326530612245e-05, "loss": 3.7922, "step": 4385 }, { "epoch": 17.90204081632653, "grad_norm": 4.245107173919678, "learning_rate": 4.106122448979592e-05, "loss": 4.1149, "step": 4386 }, { "epoch": 17.90612244897959, "grad_norm": 20.022369384765625, "learning_rate": 4.105918367346939e-05, "loss": 2.0309, "step": 4387 }, { "epoch": 17.910204081632653, "grad_norm": 9.639101028442383, "learning_rate": 4.105714285714286e-05, "loss": 3.5356, "step": 4388 }, { "epoch": 17.914285714285715, "grad_norm": 7.995589256286621, "learning_rate": 4.105510204081633e-05, "loss": 3.4044, "step": 4389 }, { "epoch": 17.918367346938776, "grad_norm": 5.163209438323975, "learning_rate": 4.1053061224489794e-05, "loss": 3.7215, "step": 4390 }, { "epoch": 17.922448979591838, "grad_norm": 7.747063159942627, "learning_rate": 4.105102040816327e-05, "loss": 3.2529, "step": 4391 }, { "epoch": 17.926530612244896, "grad_norm": 5.5345778465271, "learning_rate": 4.104897959183674e-05, "loss": 3.6058, "step": 4392 }, { "epoch": 17.930612244897958, "grad_norm": 4.348935604095459, "learning_rate": 4.104693877551021e-05, "loss": 3.8953, "step": 4393 }, { "epoch": 17.93469387755102, "grad_norm": 10.444552421569824, "learning_rate": 4.1044897959183676e-05, "loss": 3.1282, "step": 4394 }, { "epoch": 17.93877551020408, "grad_norm": 12.844624519348145, "learning_rate": 4.1042857142857143e-05, "loss": 3.0273, "step": 4395 }, { "epoch": 17.942857142857143, "grad_norm": 9.771836280822754, "learning_rate": 4.104081632653061e-05, "loss": 3.0387, "step": 4396 }, { "epoch": 17.946938775510205, "grad_norm": 7.4795098304748535, "learning_rate": 4.1038775510204084e-05, "loss": 3.5396, "step": 4397 }, { "epoch": 17.951020408163266, "grad_norm": 8.72095775604248, "learning_rate": 4.103673469387755e-05, "loss": 3.1823, "step": 4398 }, { "epoch": 17.955102040816328, "grad_norm": 5.027658462524414, "learning_rate": 4.1034693877551025e-05, "loss": 3.5883, "step": 4399 }, { "epoch": 17.959183673469386, "grad_norm": 9.832587242126465, "learning_rate": 4.103265306122449e-05, "loss": 3.1712, "step": 4400 }, { "epoch": 17.963265306122448, "grad_norm": 9.341269493103027, "learning_rate": 4.103061224489796e-05, "loss": 3.1983, "step": 4401 }, { "epoch": 17.96734693877551, "grad_norm": 11.1034574508667, "learning_rate": 4.1028571428571434e-05, "loss": 3.165, "step": 4402 }, { "epoch": 17.97142857142857, "grad_norm": 15.5703763961792, "learning_rate": 4.10265306122449e-05, "loss": 1.9109, "step": 4403 }, { "epoch": 17.975510204081633, "grad_norm": 13.813108444213867, "learning_rate": 4.102448979591837e-05, "loss": 2.4229, "step": 4404 }, { "epoch": 17.979591836734695, "grad_norm": 8.047402381896973, "learning_rate": 4.1022448979591835e-05, "loss": 3.2483, "step": 4405 }, { "epoch": 17.983673469387757, "grad_norm": 4.7350172996521, "learning_rate": 4.102040816326531e-05, "loss": 3.8701, "step": 4406 }, { "epoch": 17.987755102040815, "grad_norm": 9.003375053405762, "learning_rate": 4.101836734693878e-05, "loss": 3.4883, "step": 4407 }, { "epoch": 17.991836734693877, "grad_norm": 7.1081318855285645, "learning_rate": 4.101632653061225e-05, "loss": 3.5844, "step": 4408 }, { "epoch": 17.99591836734694, "grad_norm": 11.935099601745605, "learning_rate": 4.101428571428572e-05, "loss": 3.0671, "step": 4409 }, { "epoch": 18.0, "grad_norm": 10.444748878479004, "learning_rate": 4.1012244897959184e-05, "loss": 3.4856, "step": 4410 }, { "epoch": 18.00408163265306, "grad_norm": 5.583410263061523, "learning_rate": 4.101020408163265e-05, "loss": 3.7181, "step": 4411 }, { "epoch": 18.008163265306123, "grad_norm": 8.496129989624023, "learning_rate": 4.1008163265306125e-05, "loss": 3.1094, "step": 4412 }, { "epoch": 18.012244897959185, "grad_norm": 4.801305770874023, "learning_rate": 4.100612244897959e-05, "loss": 3.8633, "step": 4413 }, { "epoch": 18.016326530612243, "grad_norm": 9.557676315307617, "learning_rate": 4.1004081632653066e-05, "loss": 3.1549, "step": 4414 }, { "epoch": 18.020408163265305, "grad_norm": 5.7621846199035645, "learning_rate": 4.100204081632653e-05, "loss": 3.4856, "step": 4415 }, { "epoch": 18.024489795918367, "grad_norm": 6.224963188171387, "learning_rate": 4.1e-05, "loss": 3.3573, "step": 4416 }, { "epoch": 18.02857142857143, "grad_norm": 9.166388511657715, "learning_rate": 4.0997959183673474e-05, "loss": 3.174, "step": 4417 }, { "epoch": 18.03265306122449, "grad_norm": 8.447712898254395, "learning_rate": 4.099591836734694e-05, "loss": 3.0697, "step": 4418 }, { "epoch": 18.036734693877552, "grad_norm": 7.682389736175537, "learning_rate": 4.099387755102041e-05, "loss": 3.1655, "step": 4419 }, { "epoch": 18.040816326530614, "grad_norm": 18.172224044799805, "learning_rate": 4.0991836734693876e-05, "loss": 1.8404, "step": 4420 }, { "epoch": 18.044897959183672, "grad_norm": 6.571361064910889, "learning_rate": 4.098979591836735e-05, "loss": 3.5551, "step": 4421 }, { "epoch": 18.048979591836734, "grad_norm": 9.924276351928711, "learning_rate": 4.098775510204082e-05, "loss": 2.6318, "step": 4422 }, { "epoch": 18.053061224489795, "grad_norm": 10.684654235839844, "learning_rate": 4.098571428571429e-05, "loss": 3.4469, "step": 4423 }, { "epoch": 18.057142857142857, "grad_norm": 6.344960689544678, "learning_rate": 4.098367346938776e-05, "loss": 3.5493, "step": 4424 }, { "epoch": 18.06122448979592, "grad_norm": 6.511331081390381, "learning_rate": 4.0981632653061225e-05, "loss": 3.5385, "step": 4425 }, { "epoch": 18.06530612244898, "grad_norm": 8.309009552001953, "learning_rate": 4.097959183673469e-05, "loss": 3.3191, "step": 4426 }, { "epoch": 18.069387755102042, "grad_norm": 12.988744735717773, "learning_rate": 4.0977551020408166e-05, "loss": 3.1994, "step": 4427 }, { "epoch": 18.073469387755104, "grad_norm": 11.929080963134766, "learning_rate": 4.097551020408163e-05, "loss": 2.2198, "step": 4428 }, { "epoch": 18.077551020408162, "grad_norm": 6.4336137771606445, "learning_rate": 4.09734693877551e-05, "loss": 3.6326, "step": 4429 }, { "epoch": 18.081632653061224, "grad_norm": 7.035803318023682, "learning_rate": 4.0971428571428574e-05, "loss": 3.4926, "step": 4430 }, { "epoch": 18.085714285714285, "grad_norm": 8.068435668945312, "learning_rate": 4.096938775510205e-05, "loss": 3.5052, "step": 4431 }, { "epoch": 18.089795918367347, "grad_norm": 9.008257865905762, "learning_rate": 4.0967346938775515e-05, "loss": 3.1773, "step": 4432 }, { "epoch": 18.09387755102041, "grad_norm": 8.91015911102295, "learning_rate": 4.096530612244898e-05, "loss": 3.3512, "step": 4433 }, { "epoch": 18.09795918367347, "grad_norm": 5.61093282699585, "learning_rate": 4.096326530612245e-05, "loss": 3.641, "step": 4434 }, { "epoch": 18.102040816326532, "grad_norm": 6.516190528869629, "learning_rate": 4.0961224489795917e-05, "loss": 3.4578, "step": 4435 }, { "epoch": 18.10612244897959, "grad_norm": 9.18908977508545, "learning_rate": 4.095918367346939e-05, "loss": 3.4012, "step": 4436 }, { "epoch": 18.110204081632652, "grad_norm": 10.999966621398926, "learning_rate": 4.095714285714286e-05, "loss": 2.553, "step": 4437 }, { "epoch": 18.114285714285714, "grad_norm": 7.38931131362915, "learning_rate": 4.095510204081633e-05, "loss": 3.4264, "step": 4438 }, { "epoch": 18.118367346938776, "grad_norm": 6.8388776779174805, "learning_rate": 4.09530612244898e-05, "loss": 3.4774, "step": 4439 }, { "epoch": 18.122448979591837, "grad_norm": 9.786705017089844, "learning_rate": 4.0951020408163266e-05, "loss": 2.8926, "step": 4440 }, { "epoch": 18.1265306122449, "grad_norm": 8.077984809875488, "learning_rate": 4.094897959183674e-05, "loss": 3.15, "step": 4441 }, { "epoch": 18.13061224489796, "grad_norm": 7.5984930992126465, "learning_rate": 4.094693877551021e-05, "loss": 3.3004, "step": 4442 }, { "epoch": 18.13469387755102, "grad_norm": 6.286343574523926, "learning_rate": 4.0944897959183674e-05, "loss": 3.6096, "step": 4443 }, { "epoch": 18.13877551020408, "grad_norm": 13.418109893798828, "learning_rate": 4.094285714285714e-05, "loss": 2.4083, "step": 4444 }, { "epoch": 18.142857142857142, "grad_norm": 12.51771068572998, "learning_rate": 4.0940816326530615e-05, "loss": 2.6451, "step": 4445 }, { "epoch": 18.146938775510204, "grad_norm": 7.632861614227295, "learning_rate": 4.093877551020409e-05, "loss": 3.5288, "step": 4446 }, { "epoch": 18.151020408163266, "grad_norm": 17.26099395751953, "learning_rate": 4.0936734693877556e-05, "loss": 1.9418, "step": 4447 }, { "epoch": 18.155102040816328, "grad_norm": 4.541133403778076, "learning_rate": 4.093469387755102e-05, "loss": 4.0666, "step": 4448 }, { "epoch": 18.15918367346939, "grad_norm": 10.47582721710205, "learning_rate": 4.093265306122449e-05, "loss": 2.8593, "step": 4449 }, { "epoch": 18.163265306122447, "grad_norm": 6.635553359985352, "learning_rate": 4.093061224489796e-05, "loss": 3.7209, "step": 4450 }, { "epoch": 18.16734693877551, "grad_norm": 12.105584144592285, "learning_rate": 4.092857142857143e-05, "loss": 3.1646, "step": 4451 }, { "epoch": 18.17142857142857, "grad_norm": 14.37861156463623, "learning_rate": 4.09265306122449e-05, "loss": 2.8055, "step": 4452 }, { "epoch": 18.175510204081633, "grad_norm": 15.192977905273438, "learning_rate": 4.092448979591837e-05, "loss": 2.0047, "step": 4453 }, { "epoch": 18.179591836734694, "grad_norm": 7.82846212387085, "learning_rate": 4.092244897959184e-05, "loss": 3.3145, "step": 4454 }, { "epoch": 18.183673469387756, "grad_norm": 6.043039798736572, "learning_rate": 4.0920408163265306e-05, "loss": 3.5777, "step": 4455 }, { "epoch": 18.187755102040818, "grad_norm": 6.862060070037842, "learning_rate": 4.091836734693878e-05, "loss": 3.5469, "step": 4456 }, { "epoch": 18.191836734693876, "grad_norm": 12.533777236938477, "learning_rate": 4.091632653061225e-05, "loss": 3.0316, "step": 4457 }, { "epoch": 18.195918367346938, "grad_norm": 7.790749549865723, "learning_rate": 4.0914285714285715e-05, "loss": 3.2982, "step": 4458 }, { "epoch": 18.2, "grad_norm": 6.287365913391113, "learning_rate": 4.091224489795918e-05, "loss": 3.4943, "step": 4459 }, { "epoch": 18.20408163265306, "grad_norm": 6.853413105010986, "learning_rate": 4.0910204081632656e-05, "loss": 3.4879, "step": 4460 }, { "epoch": 18.208163265306123, "grad_norm": 7.355722904205322, "learning_rate": 4.090816326530613e-05, "loss": 3.4208, "step": 4461 }, { "epoch": 18.212244897959184, "grad_norm": 20.33177375793457, "learning_rate": 4.09061224489796e-05, "loss": 0.9103, "step": 4462 }, { "epoch": 18.216326530612246, "grad_norm": 4.955240249633789, "learning_rate": 4.0904081632653064e-05, "loss": 3.653, "step": 4463 }, { "epoch": 18.220408163265308, "grad_norm": 5.782406806945801, "learning_rate": 4.090204081632653e-05, "loss": 3.7496, "step": 4464 }, { "epoch": 18.224489795918366, "grad_norm": 4.946937561035156, "learning_rate": 4.09e-05, "loss": 3.6338, "step": 4465 }, { "epoch": 18.228571428571428, "grad_norm": 5.929367542266846, "learning_rate": 4.089795918367347e-05, "loss": 3.6353, "step": 4466 }, { "epoch": 18.23265306122449, "grad_norm": 7.636821746826172, "learning_rate": 4.089591836734694e-05, "loss": 3.1535, "step": 4467 }, { "epoch": 18.23673469387755, "grad_norm": 5.863283157348633, "learning_rate": 4.089387755102041e-05, "loss": 3.3456, "step": 4468 }, { "epoch": 18.240816326530613, "grad_norm": 7.764771461486816, "learning_rate": 4.089183673469388e-05, "loss": 3.3084, "step": 4469 }, { "epoch": 18.244897959183675, "grad_norm": 18.92487907409668, "learning_rate": 4.088979591836735e-05, "loss": 2.107, "step": 4470 }, { "epoch": 18.248979591836736, "grad_norm": 6.354405879974365, "learning_rate": 4.088775510204082e-05, "loss": 3.6195, "step": 4471 }, { "epoch": 18.253061224489795, "grad_norm": 6.577188014984131, "learning_rate": 4.088571428571429e-05, "loss": 3.4067, "step": 4472 }, { "epoch": 18.257142857142856, "grad_norm": 9.072300910949707, "learning_rate": 4.0883673469387755e-05, "loss": 3.1122, "step": 4473 }, { "epoch": 18.261224489795918, "grad_norm": 10.453378677368164, "learning_rate": 4.088163265306122e-05, "loss": 3.0622, "step": 4474 }, { "epoch": 18.26530612244898, "grad_norm": 9.384031295776367, "learning_rate": 4.0879591836734696e-05, "loss": 3.2072, "step": 4475 }, { "epoch": 18.26938775510204, "grad_norm": 9.053131103515625, "learning_rate": 4.0877551020408164e-05, "loss": 3.2675, "step": 4476 }, { "epoch": 18.273469387755103, "grad_norm": 5.8414225578308105, "learning_rate": 4.087551020408164e-05, "loss": 3.6711, "step": 4477 }, { "epoch": 18.277551020408165, "grad_norm": 13.186163902282715, "learning_rate": 4.0873469387755105e-05, "loss": 2.5621, "step": 4478 }, { "epoch": 18.281632653061223, "grad_norm": 9.294694900512695, "learning_rate": 4.087142857142857e-05, "loss": 3.1442, "step": 4479 }, { "epoch": 18.285714285714285, "grad_norm": 5.206625938415527, "learning_rate": 4.0869387755102046e-05, "loss": 3.7063, "step": 4480 }, { "epoch": 18.289795918367346, "grad_norm": 4.759476661682129, "learning_rate": 4.086734693877551e-05, "loss": 3.664, "step": 4481 }, { "epoch": 18.293877551020408, "grad_norm": 13.326446533203125, "learning_rate": 4.086530612244898e-05, "loss": 3.0876, "step": 4482 }, { "epoch": 18.29795918367347, "grad_norm": 6.985692977905273, "learning_rate": 4.086326530612245e-05, "loss": 3.4123, "step": 4483 }, { "epoch": 18.30204081632653, "grad_norm": 6.974443435668945, "learning_rate": 4.086122448979592e-05, "loss": 3.518, "step": 4484 }, { "epoch": 18.306122448979593, "grad_norm": 7.257748126983643, "learning_rate": 4.0859183673469395e-05, "loss": 3.3828, "step": 4485 }, { "epoch": 18.31020408163265, "grad_norm": 5.529037952423096, "learning_rate": 4.085714285714286e-05, "loss": 3.5058, "step": 4486 }, { "epoch": 18.314285714285713, "grad_norm": 16.435216903686523, "learning_rate": 4.085510204081633e-05, "loss": 2.5788, "step": 4487 }, { "epoch": 18.318367346938775, "grad_norm": 9.61273193359375, "learning_rate": 4.0853061224489796e-05, "loss": 3.2182, "step": 4488 }, { "epoch": 18.322448979591837, "grad_norm": 7.338489532470703, "learning_rate": 4.085102040816326e-05, "loss": 3.4778, "step": 4489 }, { "epoch": 18.3265306122449, "grad_norm": 13.056675910949707, "learning_rate": 4.084897959183674e-05, "loss": 2.5656, "step": 4490 }, { "epoch": 18.33061224489796, "grad_norm": 9.66441822052002, "learning_rate": 4.0846938775510204e-05, "loss": 3.3452, "step": 4491 }, { "epoch": 18.33469387755102, "grad_norm": 6.330323219299316, "learning_rate": 4.084489795918368e-05, "loss": 3.5045, "step": 4492 }, { "epoch": 18.33877551020408, "grad_norm": 11.622649192810059, "learning_rate": 4.0842857142857145e-05, "loss": 2.9306, "step": 4493 }, { "epoch": 18.34285714285714, "grad_norm": 17.34473419189453, "learning_rate": 4.084081632653061e-05, "loss": 2.6367, "step": 4494 }, { "epoch": 18.346938775510203, "grad_norm": 6.376004695892334, "learning_rate": 4.0838775510204086e-05, "loss": 3.4253, "step": 4495 }, { "epoch": 18.351020408163265, "grad_norm": 8.58779239654541, "learning_rate": 4.0836734693877553e-05, "loss": 3.4641, "step": 4496 }, { "epoch": 18.355102040816327, "grad_norm": 9.294252395629883, "learning_rate": 4.083469387755102e-05, "loss": 2.6801, "step": 4497 }, { "epoch": 18.35918367346939, "grad_norm": 8.316059112548828, "learning_rate": 4.083265306122449e-05, "loss": 3.298, "step": 4498 }, { "epoch": 18.36326530612245, "grad_norm": 11.962706565856934, "learning_rate": 4.083061224489796e-05, "loss": 3.2914, "step": 4499 }, { "epoch": 18.367346938775512, "grad_norm": 9.728150367736816, "learning_rate": 4.0828571428571436e-05, "loss": 2.6621, "step": 4500 }, { "epoch": 18.37142857142857, "grad_norm": 5.898555755615234, "learning_rate": 4.08265306122449e-05, "loss": 3.6719, "step": 4501 }, { "epoch": 18.375510204081632, "grad_norm": 6.908792018890381, "learning_rate": 4.082448979591837e-05, "loss": 3.538, "step": 4502 }, { "epoch": 18.379591836734694, "grad_norm": 6.032003402709961, "learning_rate": 4.082244897959184e-05, "loss": 3.3486, "step": 4503 }, { "epoch": 18.383673469387755, "grad_norm": 10.37144947052002, "learning_rate": 4.0820408163265304e-05, "loss": 3.2331, "step": 4504 }, { "epoch": 18.387755102040817, "grad_norm": 7.854357719421387, "learning_rate": 4.081836734693878e-05, "loss": 3.4751, "step": 4505 }, { "epoch": 18.39183673469388, "grad_norm": 8.535985946655273, "learning_rate": 4.0816326530612245e-05, "loss": 3.1997, "step": 4506 }, { "epoch": 18.39591836734694, "grad_norm": 12.26395320892334, "learning_rate": 4.081428571428572e-05, "loss": 2.9043, "step": 4507 }, { "epoch": 18.4, "grad_norm": 12.79742431640625, "learning_rate": 4.0812244897959186e-05, "loss": 2.8365, "step": 4508 }, { "epoch": 18.40408163265306, "grad_norm": 5.531776428222656, "learning_rate": 4.081020408163265e-05, "loss": 3.5797, "step": 4509 }, { "epoch": 18.408163265306122, "grad_norm": 11.785428047180176, "learning_rate": 4.080816326530613e-05, "loss": 2.931, "step": 4510 }, { "epoch": 18.412244897959184, "grad_norm": 9.464578628540039, "learning_rate": 4.0806122448979594e-05, "loss": 3.0021, "step": 4511 }, { "epoch": 18.416326530612245, "grad_norm": 6.270713806152344, "learning_rate": 4.080408163265306e-05, "loss": 3.4843, "step": 4512 }, { "epoch": 18.420408163265307, "grad_norm": 5.35970401763916, "learning_rate": 4.080204081632653e-05, "loss": 3.5401, "step": 4513 }, { "epoch": 18.42448979591837, "grad_norm": 13.85045337677002, "learning_rate": 4.08e-05, "loss": 2.786, "step": 4514 }, { "epoch": 18.428571428571427, "grad_norm": 11.187712669372559, "learning_rate": 4.0797959183673476e-05, "loss": 2.835, "step": 4515 }, { "epoch": 18.43265306122449, "grad_norm": 8.529922485351562, "learning_rate": 4.0795918367346943e-05, "loss": 3.4459, "step": 4516 }, { "epoch": 18.43673469387755, "grad_norm": 13.658329010009766, "learning_rate": 4.079387755102041e-05, "loss": 2.1155, "step": 4517 }, { "epoch": 18.440816326530612, "grad_norm": 6.754324436187744, "learning_rate": 4.079183673469388e-05, "loss": 3.5245, "step": 4518 }, { "epoch": 18.444897959183674, "grad_norm": 8.424922943115234, "learning_rate": 4.0789795918367345e-05, "loss": 3.1523, "step": 4519 }, { "epoch": 18.448979591836736, "grad_norm": 8.448745727539062, "learning_rate": 4.078775510204082e-05, "loss": 3.5368, "step": 4520 }, { "epoch": 18.453061224489797, "grad_norm": 8.023260116577148, "learning_rate": 4.0785714285714286e-05, "loss": 3.4931, "step": 4521 }, { "epoch": 18.457142857142856, "grad_norm": 12.73546028137207, "learning_rate": 4.078367346938776e-05, "loss": 3.0561, "step": 4522 }, { "epoch": 18.461224489795917, "grad_norm": 14.113677978515625, "learning_rate": 4.078163265306123e-05, "loss": 2.2216, "step": 4523 }, { "epoch": 18.46530612244898, "grad_norm": 10.060356140136719, "learning_rate": 4.07795918367347e-05, "loss": 2.9125, "step": 4524 }, { "epoch": 18.46938775510204, "grad_norm": 9.50307559967041, "learning_rate": 4.077755102040817e-05, "loss": 3.1358, "step": 4525 }, { "epoch": 18.473469387755102, "grad_norm": 5.376185417175293, "learning_rate": 4.0775510204081635e-05, "loss": 3.5093, "step": 4526 }, { "epoch": 18.477551020408164, "grad_norm": 9.495634078979492, "learning_rate": 4.07734693877551e-05, "loss": 2.9807, "step": 4527 }, { "epoch": 18.481632653061226, "grad_norm": 10.427177429199219, "learning_rate": 4.077142857142857e-05, "loss": 2.963, "step": 4528 }, { "epoch": 18.485714285714284, "grad_norm": 8.799001693725586, "learning_rate": 4.076938775510204e-05, "loss": 3.3968, "step": 4529 }, { "epoch": 18.489795918367346, "grad_norm": 17.81943702697754, "learning_rate": 4.076734693877551e-05, "loss": 2.1214, "step": 4530 }, { "epoch": 18.493877551020407, "grad_norm": 9.458839416503906, "learning_rate": 4.0765306122448984e-05, "loss": 3.1379, "step": 4531 }, { "epoch": 18.49795918367347, "grad_norm": 7.285837650299072, "learning_rate": 4.076326530612245e-05, "loss": 3.3559, "step": 4532 }, { "epoch": 18.50204081632653, "grad_norm": 5.8538289070129395, "learning_rate": 4.076122448979592e-05, "loss": 3.5319, "step": 4533 }, { "epoch": 18.506122448979593, "grad_norm": 5.94509744644165, "learning_rate": 4.075918367346939e-05, "loss": 3.5322, "step": 4534 }, { "epoch": 18.510204081632654, "grad_norm": 5.008271217346191, "learning_rate": 4.075714285714286e-05, "loss": 3.7285, "step": 4535 }, { "epoch": 18.514285714285712, "grad_norm": 8.124932289123535, "learning_rate": 4.0755102040816327e-05, "loss": 3.2495, "step": 4536 }, { "epoch": 18.518367346938774, "grad_norm": 13.33531379699707, "learning_rate": 4.0753061224489794e-05, "loss": 2.5437, "step": 4537 }, { "epoch": 18.522448979591836, "grad_norm": 17.205041885375977, "learning_rate": 4.075102040816327e-05, "loss": 2.7902, "step": 4538 }, { "epoch": 18.526530612244898, "grad_norm": 9.26437759399414, "learning_rate": 4.074897959183674e-05, "loss": 3.3731, "step": 4539 }, { "epoch": 18.53061224489796, "grad_norm": 6.709195137023926, "learning_rate": 4.074693877551021e-05, "loss": 3.785, "step": 4540 }, { "epoch": 18.53469387755102, "grad_norm": 8.432697296142578, "learning_rate": 4.0744897959183676e-05, "loss": 3.6229, "step": 4541 }, { "epoch": 18.538775510204083, "grad_norm": 6.896304130554199, "learning_rate": 4.074285714285714e-05, "loss": 3.7123, "step": 4542 }, { "epoch": 18.542857142857144, "grad_norm": 4.15034818649292, "learning_rate": 4.074081632653061e-05, "loss": 3.9839, "step": 4543 }, { "epoch": 18.546938775510203, "grad_norm": 7.034171104431152, "learning_rate": 4.0738775510204084e-05, "loss": 3.4309, "step": 4544 }, { "epoch": 18.551020408163264, "grad_norm": 5.752840042114258, "learning_rate": 4.073673469387755e-05, "loss": 3.6065, "step": 4545 }, { "epoch": 18.555102040816326, "grad_norm": 9.745903968811035, "learning_rate": 4.0734693877551025e-05, "loss": 2.888, "step": 4546 }, { "epoch": 18.559183673469388, "grad_norm": 4.826035499572754, "learning_rate": 4.073265306122449e-05, "loss": 3.856, "step": 4547 }, { "epoch": 18.56326530612245, "grad_norm": 15.543059349060059, "learning_rate": 4.073061224489796e-05, "loss": 2.2614, "step": 4548 }, { "epoch": 18.56734693877551, "grad_norm": 11.581957817077637, "learning_rate": 4.072857142857143e-05, "loss": 2.8909, "step": 4549 }, { "epoch": 18.571428571428573, "grad_norm": 11.981266021728516, "learning_rate": 4.07265306122449e-05, "loss": 3.0907, "step": 4550 }, { "epoch": 18.57551020408163, "grad_norm": 6.120373725891113, "learning_rate": 4.072448979591837e-05, "loss": 3.7379, "step": 4551 }, { "epoch": 18.579591836734693, "grad_norm": 9.362563133239746, "learning_rate": 4.0722448979591834e-05, "loss": 3.102, "step": 4552 }, { "epoch": 18.583673469387755, "grad_norm": 10.080269813537598, "learning_rate": 4.072040816326531e-05, "loss": 3.4524, "step": 4553 }, { "epoch": 18.587755102040816, "grad_norm": 7.3106608390808105, "learning_rate": 4.071836734693878e-05, "loss": 3.6078, "step": 4554 }, { "epoch": 18.591836734693878, "grad_norm": 10.490774154663086, "learning_rate": 4.071632653061225e-05, "loss": 3.1123, "step": 4555 }, { "epoch": 18.59591836734694, "grad_norm": 5.689733505249023, "learning_rate": 4.0714285714285717e-05, "loss": 3.483, "step": 4556 }, { "epoch": 18.6, "grad_norm": 4.903782844543457, "learning_rate": 4.0712244897959184e-05, "loss": 3.3797, "step": 4557 }, { "epoch": 18.60408163265306, "grad_norm": 5.133344650268555, "learning_rate": 4.071020408163265e-05, "loss": 3.8336, "step": 4558 }, { "epoch": 18.60816326530612, "grad_norm": 11.041606903076172, "learning_rate": 4.0708163265306125e-05, "loss": 3.1687, "step": 4559 }, { "epoch": 18.612244897959183, "grad_norm": 13.143675804138184, "learning_rate": 4.070612244897959e-05, "loss": 2.2643, "step": 4560 }, { "epoch": 18.616326530612245, "grad_norm": 6.307034492492676, "learning_rate": 4.0704081632653066e-05, "loss": 3.3471, "step": 4561 }, { "epoch": 18.620408163265306, "grad_norm": 4.791107177734375, "learning_rate": 4.070204081632653e-05, "loss": 3.7504, "step": 4562 }, { "epoch": 18.624489795918368, "grad_norm": 7.953855037689209, "learning_rate": 4.07e-05, "loss": 3.2729, "step": 4563 }, { "epoch": 18.62857142857143, "grad_norm": 7.94935941696167, "learning_rate": 4.0697959183673474e-05, "loss": 3.2659, "step": 4564 }, { "epoch": 18.632653061224488, "grad_norm": 8.920024871826172, "learning_rate": 4.069591836734694e-05, "loss": 3.3372, "step": 4565 }, { "epoch": 18.63673469387755, "grad_norm": 9.15135383605957, "learning_rate": 4.069387755102041e-05, "loss": 3.2133, "step": 4566 }, { "epoch": 18.64081632653061, "grad_norm": 7.821528434753418, "learning_rate": 4.0691836734693875e-05, "loss": 3.3831, "step": 4567 }, { "epoch": 18.644897959183673, "grad_norm": 14.721960067749023, "learning_rate": 4.068979591836735e-05, "loss": 1.7568, "step": 4568 }, { "epoch": 18.648979591836735, "grad_norm": 7.781970024108887, "learning_rate": 4.068775510204082e-05, "loss": 3.1657, "step": 4569 }, { "epoch": 18.653061224489797, "grad_norm": 8.180522918701172, "learning_rate": 4.068571428571429e-05, "loss": 3.208, "step": 4570 }, { "epoch": 18.65714285714286, "grad_norm": 10.595446586608887, "learning_rate": 4.068367346938776e-05, "loss": 3.0257, "step": 4571 }, { "epoch": 18.66122448979592, "grad_norm": 12.26594066619873, "learning_rate": 4.0681632653061224e-05, "loss": 2.9327, "step": 4572 }, { "epoch": 18.66530612244898, "grad_norm": 5.717643737792969, "learning_rate": 4.06795918367347e-05, "loss": 3.6821, "step": 4573 }, { "epoch": 18.66938775510204, "grad_norm": 15.247447967529297, "learning_rate": 4.0677551020408165e-05, "loss": 2.6917, "step": 4574 }, { "epoch": 18.6734693877551, "grad_norm": 13.211181640625, "learning_rate": 4.067551020408163e-05, "loss": 2.7255, "step": 4575 }, { "epoch": 18.677551020408163, "grad_norm": 9.37104320526123, "learning_rate": 4.0673469387755106e-05, "loss": 3.1171, "step": 4576 }, { "epoch": 18.681632653061225, "grad_norm": 10.337584495544434, "learning_rate": 4.0671428571428574e-05, "loss": 2.8455, "step": 4577 }, { "epoch": 18.685714285714287, "grad_norm": 15.344024658203125, "learning_rate": 4.066938775510205e-05, "loss": 2.067, "step": 4578 }, { "epoch": 18.68979591836735, "grad_norm": 12.577190399169922, "learning_rate": 4.0667346938775515e-05, "loss": 2.7412, "step": 4579 }, { "epoch": 18.693877551020407, "grad_norm": 3.9333999156951904, "learning_rate": 4.066530612244898e-05, "loss": 3.9095, "step": 4580 }, { "epoch": 18.69795918367347, "grad_norm": 12.680216789245605, "learning_rate": 4.066326530612245e-05, "loss": 3.1889, "step": 4581 }, { "epoch": 18.70204081632653, "grad_norm": 10.472064971923828, "learning_rate": 4.0661224489795916e-05, "loss": 3.1803, "step": 4582 }, { "epoch": 18.706122448979592, "grad_norm": 7.3587164878845215, "learning_rate": 4.065918367346939e-05, "loss": 3.1533, "step": 4583 }, { "epoch": 18.710204081632654, "grad_norm": 8.848609924316406, "learning_rate": 4.065714285714286e-05, "loss": 3.3676, "step": 4584 }, { "epoch": 18.714285714285715, "grad_norm": 9.52843189239502, "learning_rate": 4.065510204081633e-05, "loss": 3.4744, "step": 4585 }, { "epoch": 18.718367346938777, "grad_norm": 5.300967216491699, "learning_rate": 4.06530612244898e-05, "loss": 3.7403, "step": 4586 }, { "epoch": 18.722448979591835, "grad_norm": 4.149491786956787, "learning_rate": 4.0651020408163265e-05, "loss": 3.734, "step": 4587 }, { "epoch": 18.726530612244897, "grad_norm": 20.066631317138672, "learning_rate": 4.064897959183674e-05, "loss": 2.3372, "step": 4588 }, { "epoch": 18.73061224489796, "grad_norm": 12.2880220413208, "learning_rate": 4.0646938775510206e-05, "loss": 2.9961, "step": 4589 }, { "epoch": 18.73469387755102, "grad_norm": 6.812994956970215, "learning_rate": 4.064489795918367e-05, "loss": 3.3603, "step": 4590 }, { "epoch": 18.738775510204082, "grad_norm": 9.804471015930176, "learning_rate": 4.064285714285714e-05, "loss": 3.323, "step": 4591 }, { "epoch": 18.742857142857144, "grad_norm": 12.557943344116211, "learning_rate": 4.0640816326530614e-05, "loss": 2.2777, "step": 4592 }, { "epoch": 18.746938775510205, "grad_norm": 13.197333335876465, "learning_rate": 4.063877551020409e-05, "loss": 2.7514, "step": 4593 }, { "epoch": 18.751020408163264, "grad_norm": 5.50238037109375, "learning_rate": 4.0636734693877555e-05, "loss": 3.8469, "step": 4594 }, { "epoch": 18.755102040816325, "grad_norm": 4.393853664398193, "learning_rate": 4.063469387755102e-05, "loss": 3.9377, "step": 4595 }, { "epoch": 18.759183673469387, "grad_norm": 13.130724906921387, "learning_rate": 4.063265306122449e-05, "loss": 2.6552, "step": 4596 }, { "epoch": 18.76326530612245, "grad_norm": 8.228973388671875, "learning_rate": 4.063061224489796e-05, "loss": 3.0044, "step": 4597 }, { "epoch": 18.76734693877551, "grad_norm": 11.050023078918457, "learning_rate": 4.062857142857143e-05, "loss": 2.8518, "step": 4598 }, { "epoch": 18.771428571428572, "grad_norm": 6.105250835418701, "learning_rate": 4.06265306122449e-05, "loss": 3.6203, "step": 4599 }, { "epoch": 18.775510204081634, "grad_norm": 6.479179382324219, "learning_rate": 4.062448979591837e-05, "loss": 3.2915, "step": 4600 }, { "epoch": 18.779591836734696, "grad_norm": 5.829867362976074, "learning_rate": 4.062244897959184e-05, "loss": 3.8958, "step": 4601 }, { "epoch": 18.783673469387754, "grad_norm": 13.930352210998535, "learning_rate": 4.0620408163265306e-05, "loss": 3.3225, "step": 4602 }, { "epoch": 18.787755102040816, "grad_norm": 14.803834915161133, "learning_rate": 4.061836734693878e-05, "loss": 2.7086, "step": 4603 }, { "epoch": 18.791836734693877, "grad_norm": 12.37070369720459, "learning_rate": 4.061632653061225e-05, "loss": 2.7474, "step": 4604 }, { "epoch": 18.79591836734694, "grad_norm": 12.842610359191895, "learning_rate": 4.0614285714285714e-05, "loss": 2.783, "step": 4605 }, { "epoch": 18.8, "grad_norm": 12.75222110748291, "learning_rate": 4.061224489795918e-05, "loss": 2.7318, "step": 4606 }, { "epoch": 18.804081632653062, "grad_norm": 8.039068222045898, "learning_rate": 4.0610204081632655e-05, "loss": 3.2919, "step": 4607 }, { "epoch": 18.808163265306124, "grad_norm": 8.482644081115723, "learning_rate": 4.060816326530613e-05, "loss": 3.3047, "step": 4608 }, { "epoch": 18.812244897959182, "grad_norm": 6.9114789962768555, "learning_rate": 4.0606122448979596e-05, "loss": 3.4826, "step": 4609 }, { "epoch": 18.816326530612244, "grad_norm": 9.518494606018066, "learning_rate": 4.060408163265306e-05, "loss": 2.9379, "step": 4610 }, { "epoch": 18.820408163265306, "grad_norm": 5.305342674255371, "learning_rate": 4.060204081632653e-05, "loss": 3.477, "step": 4611 }, { "epoch": 18.824489795918367, "grad_norm": 5.562684535980225, "learning_rate": 4.0600000000000004e-05, "loss": 3.7055, "step": 4612 }, { "epoch": 18.82857142857143, "grad_norm": 12.713770866394043, "learning_rate": 4.059795918367347e-05, "loss": 2.9322, "step": 4613 }, { "epoch": 18.83265306122449, "grad_norm": 5.912574768066406, "learning_rate": 4.059591836734694e-05, "loss": 3.4917, "step": 4614 }, { "epoch": 18.836734693877553, "grad_norm": 14.931072235107422, "learning_rate": 4.059387755102041e-05, "loss": 3.3532, "step": 4615 }, { "epoch": 18.84081632653061, "grad_norm": 6.490711212158203, "learning_rate": 4.059183673469388e-05, "loss": 3.4448, "step": 4616 }, { "epoch": 18.844897959183672, "grad_norm": 7.859009742736816, "learning_rate": 4.0589795918367353e-05, "loss": 3.4708, "step": 4617 }, { "epoch": 18.848979591836734, "grad_norm": 4.825067520141602, "learning_rate": 4.058775510204082e-05, "loss": 3.7647, "step": 4618 }, { "epoch": 18.853061224489796, "grad_norm": 10.042527198791504, "learning_rate": 4.058571428571429e-05, "loss": 3.1273, "step": 4619 }, { "epoch": 18.857142857142858, "grad_norm": 10.442119598388672, "learning_rate": 4.0583673469387755e-05, "loss": 2.7912, "step": 4620 }, { "epoch": 18.86122448979592, "grad_norm": 12.843647956848145, "learning_rate": 4.058163265306122e-05, "loss": 2.1593, "step": 4621 }, { "epoch": 18.86530612244898, "grad_norm": 6.725691795349121, "learning_rate": 4.0579591836734696e-05, "loss": 3.2052, "step": 4622 }, { "epoch": 18.86938775510204, "grad_norm": 6.505337238311768, "learning_rate": 4.057755102040817e-05, "loss": 3.4565, "step": 4623 }, { "epoch": 18.8734693877551, "grad_norm": 11.541594505310059, "learning_rate": 4.057551020408164e-05, "loss": 3.343, "step": 4624 }, { "epoch": 18.877551020408163, "grad_norm": 6.247659683227539, "learning_rate": 4.0573469387755104e-05, "loss": 3.6008, "step": 4625 }, { "epoch": 18.881632653061224, "grad_norm": 6.797972679138184, "learning_rate": 4.057142857142857e-05, "loss": 3.368, "step": 4626 }, { "epoch": 18.885714285714286, "grad_norm": 9.847247123718262, "learning_rate": 4.0569387755102045e-05, "loss": 3.3478, "step": 4627 }, { "epoch": 18.889795918367348, "grad_norm": 10.74323844909668, "learning_rate": 4.056734693877551e-05, "loss": 2.6358, "step": 4628 }, { "epoch": 18.89387755102041, "grad_norm": 7.12266731262207, "learning_rate": 4.056530612244898e-05, "loss": 3.4931, "step": 4629 }, { "epoch": 18.897959183673468, "grad_norm": 6.457979679107666, "learning_rate": 4.056326530612245e-05, "loss": 3.5998, "step": 4630 }, { "epoch": 18.90204081632653, "grad_norm": 9.535665512084961, "learning_rate": 4.056122448979592e-05, "loss": 3.0003, "step": 4631 }, { "epoch": 18.90612244897959, "grad_norm": 7.034969806671143, "learning_rate": 4.0559183673469394e-05, "loss": 3.2296, "step": 4632 }, { "epoch": 18.910204081632653, "grad_norm": 4.693009853363037, "learning_rate": 4.055714285714286e-05, "loss": 3.8639, "step": 4633 }, { "epoch": 18.914285714285715, "grad_norm": 4.641902446746826, "learning_rate": 4.055510204081633e-05, "loss": 3.747, "step": 4634 }, { "epoch": 18.918367346938776, "grad_norm": 6.196042537689209, "learning_rate": 4.0553061224489796e-05, "loss": 3.3783, "step": 4635 }, { "epoch": 18.922448979591838, "grad_norm": 12.932909965515137, "learning_rate": 4.055102040816326e-05, "loss": 2.7458, "step": 4636 }, { "epoch": 18.926530612244896, "grad_norm": 7.298468112945557, "learning_rate": 4.054897959183674e-05, "loss": 3.4952, "step": 4637 }, { "epoch": 18.930612244897958, "grad_norm": 12.294219017028809, "learning_rate": 4.0546938775510204e-05, "loss": 2.8547, "step": 4638 }, { "epoch": 18.93469387755102, "grad_norm": 12.7113037109375, "learning_rate": 4.054489795918368e-05, "loss": 2.6892, "step": 4639 }, { "epoch": 18.93877551020408, "grad_norm": 8.454566955566406, "learning_rate": 4.0542857142857145e-05, "loss": 3.4816, "step": 4640 }, { "epoch": 18.942857142857143, "grad_norm": 7.465996742248535, "learning_rate": 4.054081632653061e-05, "loss": 3.2716, "step": 4641 }, { "epoch": 18.946938775510205, "grad_norm": 8.008447647094727, "learning_rate": 4.0538775510204086e-05, "loss": 3.5499, "step": 4642 }, { "epoch": 18.951020408163266, "grad_norm": 13.168207168579102, "learning_rate": 4.053673469387755e-05, "loss": 2.7253, "step": 4643 }, { "epoch": 18.955102040816328, "grad_norm": 6.098853588104248, "learning_rate": 4.053469387755102e-05, "loss": 3.374, "step": 4644 }, { "epoch": 18.959183673469386, "grad_norm": 5.437079906463623, "learning_rate": 4.053265306122449e-05, "loss": 3.4959, "step": 4645 }, { "epoch": 18.963265306122448, "grad_norm": 9.813836097717285, "learning_rate": 4.053061224489796e-05, "loss": 3.2528, "step": 4646 }, { "epoch": 18.96734693877551, "grad_norm": 8.209534645080566, "learning_rate": 4.0528571428571435e-05, "loss": 3.3044, "step": 4647 }, { "epoch": 18.97142857142857, "grad_norm": 5.402306079864502, "learning_rate": 4.05265306122449e-05, "loss": 3.5506, "step": 4648 }, { "epoch": 18.975510204081633, "grad_norm": 9.480337142944336, "learning_rate": 4.052448979591837e-05, "loss": 2.9039, "step": 4649 }, { "epoch": 18.979591836734695, "grad_norm": 8.274988174438477, "learning_rate": 4.0522448979591836e-05, "loss": 3.0095, "step": 4650 }, { "epoch": 18.983673469387757, "grad_norm": 9.09642505645752, "learning_rate": 4.0520408163265304e-05, "loss": 2.8596, "step": 4651 }, { "epoch": 18.987755102040815, "grad_norm": 9.834221839904785, "learning_rate": 4.051836734693878e-05, "loss": 2.9403, "step": 4652 }, { "epoch": 18.991836734693877, "grad_norm": 15.249787330627441, "learning_rate": 4.0516326530612245e-05, "loss": 2.2224, "step": 4653 }, { "epoch": 18.99591836734694, "grad_norm": 8.010138511657715, "learning_rate": 4.051428571428572e-05, "loss": 3.5549, "step": 4654 }, { "epoch": 19.0, "grad_norm": 13.795144081115723, "learning_rate": 4.0512244897959186e-05, "loss": 1.8742, "step": 4655 }, { "epoch": 19.00408163265306, "grad_norm": 11.553279876708984, "learning_rate": 4.051020408163265e-05, "loss": 2.7276, "step": 4656 }, { "epoch": 19.008163265306123, "grad_norm": 10.823563575744629, "learning_rate": 4.0508163265306127e-05, "loss": 3.1985, "step": 4657 }, { "epoch": 19.012244897959185, "grad_norm": 3.874253988265991, "learning_rate": 4.0506122448979594e-05, "loss": 3.8671, "step": 4658 }, { "epoch": 19.016326530612243, "grad_norm": 8.078750610351562, "learning_rate": 4.050408163265306e-05, "loss": 3.4234, "step": 4659 }, { "epoch": 19.020408163265305, "grad_norm": 8.50280475616455, "learning_rate": 4.050204081632653e-05, "loss": 3.2865, "step": 4660 }, { "epoch": 19.024489795918367, "grad_norm": 6.609981536865234, "learning_rate": 4.05e-05, "loss": 3.3281, "step": 4661 }, { "epoch": 19.02857142857143, "grad_norm": 6.905981540679932, "learning_rate": 4.0497959183673476e-05, "loss": 3.4899, "step": 4662 }, { "epoch": 19.03265306122449, "grad_norm": 11.640661239624023, "learning_rate": 4.049591836734694e-05, "loss": 3.5654, "step": 4663 }, { "epoch": 19.036734693877552, "grad_norm": 11.304819107055664, "learning_rate": 4.049387755102041e-05, "loss": 3.0615, "step": 4664 }, { "epoch": 19.040816326530614, "grad_norm": 9.867202758789062, "learning_rate": 4.049183673469388e-05, "loss": 3.3668, "step": 4665 }, { "epoch": 19.044897959183672, "grad_norm": 5.752011299133301, "learning_rate": 4.048979591836735e-05, "loss": 3.4303, "step": 4666 }, { "epoch": 19.048979591836734, "grad_norm": 13.455141067504883, "learning_rate": 4.048775510204082e-05, "loss": 2.4881, "step": 4667 }, { "epoch": 19.053061224489795, "grad_norm": 11.662710189819336, "learning_rate": 4.0485714285714285e-05, "loss": 2.439, "step": 4668 }, { "epoch": 19.057142857142857, "grad_norm": 9.715202331542969, "learning_rate": 4.048367346938776e-05, "loss": 3.0032, "step": 4669 }, { "epoch": 19.06122448979592, "grad_norm": 7.414673328399658, "learning_rate": 4.0481632653061226e-05, "loss": 3.6438, "step": 4670 }, { "epoch": 19.06530612244898, "grad_norm": 7.531999111175537, "learning_rate": 4.04795918367347e-05, "loss": 3.4364, "step": 4671 }, { "epoch": 19.069387755102042, "grad_norm": 6.143050670623779, "learning_rate": 4.047755102040817e-05, "loss": 3.8109, "step": 4672 }, { "epoch": 19.073469387755104, "grad_norm": 5.300349712371826, "learning_rate": 4.0475510204081634e-05, "loss": 3.6532, "step": 4673 }, { "epoch": 19.077551020408162, "grad_norm": 9.579927444458008, "learning_rate": 4.04734693877551e-05, "loss": 3.4602, "step": 4674 }, { "epoch": 19.081632653061224, "grad_norm": 4.6583356857299805, "learning_rate": 4.047142857142857e-05, "loss": 3.8632, "step": 4675 }, { "epoch": 19.085714285714285, "grad_norm": 5.610012054443359, "learning_rate": 4.046938775510204e-05, "loss": 3.855, "step": 4676 }, { "epoch": 19.089795918367347, "grad_norm": 17.299848556518555, "learning_rate": 4.0467346938775517e-05, "loss": 2.6345, "step": 4677 }, { "epoch": 19.09387755102041, "grad_norm": 10.325855255126953, "learning_rate": 4.0465306122448984e-05, "loss": 3.3268, "step": 4678 }, { "epoch": 19.09795918367347, "grad_norm": 8.820560455322266, "learning_rate": 4.046326530612245e-05, "loss": 3.1995, "step": 4679 }, { "epoch": 19.102040816326532, "grad_norm": 6.020683765411377, "learning_rate": 4.046122448979592e-05, "loss": 3.4126, "step": 4680 }, { "epoch": 19.10612244897959, "grad_norm": 8.92653751373291, "learning_rate": 4.045918367346939e-05, "loss": 3.1802, "step": 4681 }, { "epoch": 19.110204081632652, "grad_norm": 6.937068462371826, "learning_rate": 4.045714285714286e-05, "loss": 3.2867, "step": 4682 }, { "epoch": 19.114285714285714, "grad_norm": 7.653066635131836, "learning_rate": 4.0455102040816326e-05, "loss": 3.1045, "step": 4683 }, { "epoch": 19.118367346938776, "grad_norm": 5.319581508636475, "learning_rate": 4.04530612244898e-05, "loss": 3.6261, "step": 4684 }, { "epoch": 19.122448979591837, "grad_norm": 11.90207290649414, "learning_rate": 4.045102040816327e-05, "loss": 3.0699, "step": 4685 }, { "epoch": 19.1265306122449, "grad_norm": 7.2172017097473145, "learning_rate": 4.044897959183674e-05, "loss": 3.3786, "step": 4686 }, { "epoch": 19.13061224489796, "grad_norm": 5.082103252410889, "learning_rate": 4.044693877551021e-05, "loss": 3.7069, "step": 4687 }, { "epoch": 19.13469387755102, "grad_norm": 9.8975248336792, "learning_rate": 4.0444897959183675e-05, "loss": 2.7565, "step": 4688 }, { "epoch": 19.13877551020408, "grad_norm": 6.4265289306640625, "learning_rate": 4.044285714285714e-05, "loss": 3.5074, "step": 4689 }, { "epoch": 19.142857142857142, "grad_norm": 20.788684844970703, "learning_rate": 4.044081632653061e-05, "loss": 2.3443, "step": 4690 }, { "epoch": 19.146938775510204, "grad_norm": 10.767343521118164, "learning_rate": 4.0438775510204083e-05, "loss": 2.7574, "step": 4691 }, { "epoch": 19.151020408163266, "grad_norm": 8.876450538635254, "learning_rate": 4.043673469387755e-05, "loss": 3.3205, "step": 4692 }, { "epoch": 19.155102040816328, "grad_norm": 8.60230827331543, "learning_rate": 4.0434693877551024e-05, "loss": 3.3178, "step": 4693 }, { "epoch": 19.15918367346939, "grad_norm": 8.18294620513916, "learning_rate": 4.043265306122449e-05, "loss": 3.4663, "step": 4694 }, { "epoch": 19.163265306122447, "grad_norm": 14.893074989318848, "learning_rate": 4.043061224489796e-05, "loss": 1.6559, "step": 4695 }, { "epoch": 19.16734693877551, "grad_norm": 6.669955730438232, "learning_rate": 4.042857142857143e-05, "loss": 3.7052, "step": 4696 }, { "epoch": 19.17142857142857, "grad_norm": 4.655916690826416, "learning_rate": 4.04265306122449e-05, "loss": 3.6652, "step": 4697 }, { "epoch": 19.175510204081633, "grad_norm": 5.234415531158447, "learning_rate": 4.042448979591837e-05, "loss": 3.6236, "step": 4698 }, { "epoch": 19.179591836734694, "grad_norm": 9.5327730178833, "learning_rate": 4.0422448979591834e-05, "loss": 2.7931, "step": 4699 }, { "epoch": 19.183673469387756, "grad_norm": 8.874227523803711, "learning_rate": 4.042040816326531e-05, "loss": 3.1595, "step": 4700 }, { "epoch": 19.187755102040818, "grad_norm": 10.430251121520996, "learning_rate": 4.041836734693878e-05, "loss": 3.2397, "step": 4701 }, { "epoch": 19.191836734693876, "grad_norm": 11.315118789672852, "learning_rate": 4.041632653061225e-05, "loss": 2.9502, "step": 4702 }, { "epoch": 19.195918367346938, "grad_norm": 11.39794921875, "learning_rate": 4.0414285714285716e-05, "loss": 3.1613, "step": 4703 }, { "epoch": 19.2, "grad_norm": 11.35049819946289, "learning_rate": 4.041224489795918e-05, "loss": 3.0035, "step": 4704 }, { "epoch": 19.20408163265306, "grad_norm": 14.40557861328125, "learning_rate": 4.041020408163266e-05, "loss": 2.986, "step": 4705 }, { "epoch": 19.208163265306123, "grad_norm": 5.140707015991211, "learning_rate": 4.0408163265306124e-05, "loss": 3.7017, "step": 4706 }, { "epoch": 19.212244897959184, "grad_norm": 6.666274547576904, "learning_rate": 4.040612244897959e-05, "loss": 3.6241, "step": 4707 }, { "epoch": 19.216326530612246, "grad_norm": 8.515361785888672, "learning_rate": 4.0404081632653065e-05, "loss": 3.2021, "step": 4708 }, { "epoch": 19.220408163265308, "grad_norm": 7.812678337097168, "learning_rate": 4.040204081632653e-05, "loss": 3.4839, "step": 4709 }, { "epoch": 19.224489795918366, "grad_norm": 7.605022430419922, "learning_rate": 4.0400000000000006e-05, "loss": 3.4563, "step": 4710 }, { "epoch": 19.228571428571428, "grad_norm": 5.256904602050781, "learning_rate": 4.039795918367347e-05, "loss": 3.4006, "step": 4711 }, { "epoch": 19.23265306122449, "grad_norm": 7.968411445617676, "learning_rate": 4.039591836734694e-05, "loss": 3.3599, "step": 4712 }, { "epoch": 19.23673469387755, "grad_norm": 4.915560722351074, "learning_rate": 4.039387755102041e-05, "loss": 3.6241, "step": 4713 }, { "epoch": 19.240816326530613, "grad_norm": 4.329973220825195, "learning_rate": 4.0391836734693875e-05, "loss": 4.0522, "step": 4714 }, { "epoch": 19.244897959183675, "grad_norm": 8.87285041809082, "learning_rate": 4.038979591836735e-05, "loss": 3.1225, "step": 4715 }, { "epoch": 19.248979591836736, "grad_norm": 6.833964824676514, "learning_rate": 4.038775510204082e-05, "loss": 3.4895, "step": 4716 }, { "epoch": 19.253061224489795, "grad_norm": 8.077726364135742, "learning_rate": 4.038571428571429e-05, "loss": 3.083, "step": 4717 }, { "epoch": 19.257142857142856, "grad_norm": 17.55934715270996, "learning_rate": 4.038367346938776e-05, "loss": 2.5563, "step": 4718 }, { "epoch": 19.261224489795918, "grad_norm": 10.873769760131836, "learning_rate": 4.0381632653061224e-05, "loss": 3.1578, "step": 4719 }, { "epoch": 19.26530612244898, "grad_norm": 7.209128379821777, "learning_rate": 4.03795918367347e-05, "loss": 3.3103, "step": 4720 }, { "epoch": 19.26938775510204, "grad_norm": 18.373275756835938, "learning_rate": 4.0377551020408165e-05, "loss": 2.5464, "step": 4721 }, { "epoch": 19.273469387755103, "grad_norm": 12.74618911743164, "learning_rate": 4.037551020408163e-05, "loss": 2.8881, "step": 4722 }, { "epoch": 19.277551020408165, "grad_norm": 13.818059921264648, "learning_rate": 4.0373469387755106e-05, "loss": 2.7509, "step": 4723 }, { "epoch": 19.281632653061223, "grad_norm": 6.695840358734131, "learning_rate": 4.037142857142857e-05, "loss": 3.455, "step": 4724 }, { "epoch": 19.285714285714285, "grad_norm": 7.32656717300415, "learning_rate": 4.036938775510205e-05, "loss": 3.3908, "step": 4725 }, { "epoch": 19.289795918367346, "grad_norm": 8.152809143066406, "learning_rate": 4.0367346938775514e-05, "loss": 3.1031, "step": 4726 }, { "epoch": 19.293877551020408, "grad_norm": 7.131932258605957, "learning_rate": 4.036530612244898e-05, "loss": 3.2872, "step": 4727 }, { "epoch": 19.29795918367347, "grad_norm": 14.104764938354492, "learning_rate": 4.036326530612245e-05, "loss": 2.1245, "step": 4728 }, { "epoch": 19.30204081632653, "grad_norm": 7.502751350402832, "learning_rate": 4.0361224489795915e-05, "loss": 3.1343, "step": 4729 }, { "epoch": 19.306122448979593, "grad_norm": 10.420393943786621, "learning_rate": 4.035918367346939e-05, "loss": 3.2405, "step": 4730 }, { "epoch": 19.31020408163265, "grad_norm": 14.738826751708984, "learning_rate": 4.035714285714286e-05, "loss": 2.2317, "step": 4731 }, { "epoch": 19.314285714285713, "grad_norm": 13.781756401062012, "learning_rate": 4.035510204081633e-05, "loss": 2.5891, "step": 4732 }, { "epoch": 19.318367346938775, "grad_norm": 14.848628044128418, "learning_rate": 4.03530612244898e-05, "loss": 1.9806, "step": 4733 }, { "epoch": 19.322448979591837, "grad_norm": 10.684454917907715, "learning_rate": 4.0351020408163265e-05, "loss": 2.808, "step": 4734 }, { "epoch": 19.3265306122449, "grad_norm": 12.8388090133667, "learning_rate": 4.034897959183674e-05, "loss": 2.5924, "step": 4735 }, { "epoch": 19.33061224489796, "grad_norm": 8.539111137390137, "learning_rate": 4.0346938775510206e-05, "loss": 3.4412, "step": 4736 }, { "epoch": 19.33469387755102, "grad_norm": 25.93454933166504, "learning_rate": 4.034489795918367e-05, "loss": 1.9027, "step": 4737 }, { "epoch": 19.33877551020408, "grad_norm": 9.306153297424316, "learning_rate": 4.034285714285715e-05, "loss": 3.0896, "step": 4738 }, { "epoch": 19.34285714285714, "grad_norm": 12.904692649841309, "learning_rate": 4.0340816326530614e-05, "loss": 3.0294, "step": 4739 }, { "epoch": 19.346938775510203, "grad_norm": 5.275241374969482, "learning_rate": 4.033877551020409e-05, "loss": 3.81, "step": 4740 }, { "epoch": 19.351020408163265, "grad_norm": 8.09461498260498, "learning_rate": 4.0336734693877555e-05, "loss": 3.3698, "step": 4741 }, { "epoch": 19.355102040816327, "grad_norm": 6.42604398727417, "learning_rate": 4.033469387755102e-05, "loss": 3.5276, "step": 4742 }, { "epoch": 19.35918367346939, "grad_norm": 6.81157112121582, "learning_rate": 4.033265306122449e-05, "loss": 3.492, "step": 4743 }, { "epoch": 19.36326530612245, "grad_norm": 10.709258079528809, "learning_rate": 4.0330612244897956e-05, "loss": 3.3152, "step": 4744 }, { "epoch": 19.367346938775512, "grad_norm": 4.126951694488525, "learning_rate": 4.032857142857143e-05, "loss": 3.6853, "step": 4745 }, { "epoch": 19.37142857142857, "grad_norm": 21.832691192626953, "learning_rate": 4.03265306122449e-05, "loss": 2.2304, "step": 4746 }, { "epoch": 19.375510204081632, "grad_norm": 12.112852096557617, "learning_rate": 4.032448979591837e-05, "loss": 2.6629, "step": 4747 }, { "epoch": 19.379591836734694, "grad_norm": 17.689605712890625, "learning_rate": 4.032244897959184e-05, "loss": 2.2191, "step": 4748 }, { "epoch": 19.383673469387755, "grad_norm": 12.746465682983398, "learning_rate": 4.032040816326531e-05, "loss": 3.0958, "step": 4749 }, { "epoch": 19.387755102040817, "grad_norm": 9.790987968444824, "learning_rate": 4.031836734693878e-05, "loss": 3.0683, "step": 4750 }, { "epoch": 19.39183673469388, "grad_norm": 10.189799308776855, "learning_rate": 4.0316326530612246e-05, "loss": 2.8346, "step": 4751 }, { "epoch": 19.39591836734694, "grad_norm": 14.414178848266602, "learning_rate": 4.0314285714285714e-05, "loss": 2.5513, "step": 4752 }, { "epoch": 19.4, "grad_norm": 14.024194717407227, "learning_rate": 4.031224489795918e-05, "loss": 2.6153, "step": 4753 }, { "epoch": 19.40408163265306, "grad_norm": 6.836727142333984, "learning_rate": 4.0310204081632655e-05, "loss": 3.4441, "step": 4754 }, { "epoch": 19.408163265306122, "grad_norm": 12.255256652832031, "learning_rate": 4.030816326530613e-05, "loss": 2.7119, "step": 4755 }, { "epoch": 19.412244897959184, "grad_norm": 6.461723327636719, "learning_rate": 4.0306122448979596e-05, "loss": 3.4832, "step": 4756 }, { "epoch": 19.416326530612245, "grad_norm": 9.194925308227539, "learning_rate": 4.030408163265306e-05, "loss": 2.9432, "step": 4757 }, { "epoch": 19.420408163265307, "grad_norm": 10.596412658691406, "learning_rate": 4.030204081632653e-05, "loss": 3.079, "step": 4758 }, { "epoch": 19.42448979591837, "grad_norm": 14.914515495300293, "learning_rate": 4.0300000000000004e-05, "loss": 3.2432, "step": 4759 }, { "epoch": 19.428571428571427, "grad_norm": 7.629842758178711, "learning_rate": 4.029795918367347e-05, "loss": 3.4647, "step": 4760 }, { "epoch": 19.43265306122449, "grad_norm": 8.309290885925293, "learning_rate": 4.029591836734694e-05, "loss": 3.1525, "step": 4761 }, { "epoch": 19.43673469387755, "grad_norm": 6.921653747558594, "learning_rate": 4.029387755102041e-05, "loss": 3.5814, "step": 4762 }, { "epoch": 19.440816326530612, "grad_norm": 14.259243965148926, "learning_rate": 4.029183673469388e-05, "loss": 3.2331, "step": 4763 }, { "epoch": 19.444897959183674, "grad_norm": 8.429927825927734, "learning_rate": 4.028979591836735e-05, "loss": 3.1637, "step": 4764 }, { "epoch": 19.448979591836736, "grad_norm": 5.762011528015137, "learning_rate": 4.028775510204082e-05, "loss": 3.7998, "step": 4765 }, { "epoch": 19.453061224489797, "grad_norm": 7.238094806671143, "learning_rate": 4.028571428571429e-05, "loss": 3.3066, "step": 4766 }, { "epoch": 19.457142857142856, "grad_norm": 4.791992664337158, "learning_rate": 4.0283673469387754e-05, "loss": 3.8122, "step": 4767 }, { "epoch": 19.461224489795917, "grad_norm": 7.163934230804443, "learning_rate": 4.028163265306122e-05, "loss": 3.4717, "step": 4768 }, { "epoch": 19.46530612244898, "grad_norm": 8.204178810119629, "learning_rate": 4.0279591836734695e-05, "loss": 3.3537, "step": 4769 }, { "epoch": 19.46938775510204, "grad_norm": 9.413778305053711, "learning_rate": 4.027755102040817e-05, "loss": 3.1645, "step": 4770 }, { "epoch": 19.473469387755102, "grad_norm": 12.346132278442383, "learning_rate": 4.0275510204081636e-05, "loss": 2.1703, "step": 4771 }, { "epoch": 19.477551020408164, "grad_norm": 11.102118492126465, "learning_rate": 4.0273469387755104e-05, "loss": 2.8527, "step": 4772 }, { "epoch": 19.481632653061226, "grad_norm": 5.099610805511475, "learning_rate": 4.027142857142857e-05, "loss": 3.3234, "step": 4773 }, { "epoch": 19.485714285714284, "grad_norm": 6.199872970581055, "learning_rate": 4.0269387755102045e-05, "loss": 3.4891, "step": 4774 }, { "epoch": 19.489795918367346, "grad_norm": 5.463296413421631, "learning_rate": 4.026734693877551e-05, "loss": 3.5304, "step": 4775 }, { "epoch": 19.493877551020407, "grad_norm": 12.153287887573242, "learning_rate": 4.026530612244898e-05, "loss": 2.7266, "step": 4776 }, { "epoch": 19.49795918367347, "grad_norm": 6.489790916442871, "learning_rate": 4.026326530612245e-05, "loss": 3.6309, "step": 4777 }, { "epoch": 19.50204081632653, "grad_norm": 10.296860694885254, "learning_rate": 4.026122448979592e-05, "loss": 3.0261, "step": 4778 }, { "epoch": 19.506122448979593, "grad_norm": 10.709298133850098, "learning_rate": 4.0259183673469394e-05, "loss": 3.1662, "step": 4779 }, { "epoch": 19.510204081632654, "grad_norm": 21.102745056152344, "learning_rate": 4.025714285714286e-05, "loss": 1.7781, "step": 4780 }, { "epoch": 19.514285714285712, "grad_norm": 7.420272350311279, "learning_rate": 4.025510204081633e-05, "loss": 3.3792, "step": 4781 }, { "epoch": 19.518367346938774, "grad_norm": 13.269105911254883, "learning_rate": 4.0253061224489795e-05, "loss": 2.8884, "step": 4782 }, { "epoch": 19.522448979591836, "grad_norm": 6.381002426147461, "learning_rate": 4.025102040816326e-05, "loss": 3.3359, "step": 4783 }, { "epoch": 19.526530612244898, "grad_norm": 9.562445640563965, "learning_rate": 4.0248979591836736e-05, "loss": 2.9515, "step": 4784 }, { "epoch": 19.53061224489796, "grad_norm": 7.821256160736084, "learning_rate": 4.024693877551021e-05, "loss": 3.4572, "step": 4785 }, { "epoch": 19.53469387755102, "grad_norm": 9.172015190124512, "learning_rate": 4.024489795918368e-05, "loss": 3.259, "step": 4786 }, { "epoch": 19.538775510204083, "grad_norm": 11.551191329956055, "learning_rate": 4.0242857142857144e-05, "loss": 2.8541, "step": 4787 }, { "epoch": 19.542857142857144, "grad_norm": 9.56283187866211, "learning_rate": 4.024081632653061e-05, "loss": 3.3605, "step": 4788 }, { "epoch": 19.546938775510203, "grad_norm": 6.585773468017578, "learning_rate": 4.0238775510204085e-05, "loss": 3.6422, "step": 4789 }, { "epoch": 19.551020408163264, "grad_norm": 19.719287872314453, "learning_rate": 4.023673469387755e-05, "loss": 2.0318, "step": 4790 }, { "epoch": 19.555102040816326, "grad_norm": 7.301337242126465, "learning_rate": 4.023469387755102e-05, "loss": 3.2773, "step": 4791 }, { "epoch": 19.559183673469388, "grad_norm": 13.472296714782715, "learning_rate": 4.023265306122449e-05, "loss": 3.0532, "step": 4792 }, { "epoch": 19.56326530612245, "grad_norm": 17.04705047607422, "learning_rate": 4.023061224489796e-05, "loss": 2.0273, "step": 4793 }, { "epoch": 19.56734693877551, "grad_norm": 11.584127426147461, "learning_rate": 4.0228571428571434e-05, "loss": 3.1333, "step": 4794 }, { "epoch": 19.571428571428573, "grad_norm": 7.4030656814575195, "learning_rate": 4.02265306122449e-05, "loss": 3.3872, "step": 4795 }, { "epoch": 19.57551020408163, "grad_norm": 14.471546173095703, "learning_rate": 4.022448979591837e-05, "loss": 2.945, "step": 4796 }, { "epoch": 19.579591836734693, "grad_norm": 6.339555740356445, "learning_rate": 4.0222448979591836e-05, "loss": 3.6463, "step": 4797 }, { "epoch": 19.583673469387755, "grad_norm": 11.144245147705078, "learning_rate": 4.022040816326531e-05, "loss": 3.0365, "step": 4798 }, { "epoch": 19.587755102040816, "grad_norm": 14.358169555664062, "learning_rate": 4.021836734693878e-05, "loss": 3.0913, "step": 4799 }, { "epoch": 19.591836734693878, "grad_norm": 7.95171594619751, "learning_rate": 4.0216326530612244e-05, "loss": 3.3117, "step": 4800 }, { "epoch": 19.59591836734694, "grad_norm": 7.26198673248291, "learning_rate": 4.021428571428572e-05, "loss": 3.3715, "step": 4801 }, { "epoch": 19.6, "grad_norm": 9.36670970916748, "learning_rate": 4.0212244897959185e-05, "loss": 3.4539, "step": 4802 }, { "epoch": 19.60408163265306, "grad_norm": 13.285537719726562, "learning_rate": 4.021020408163266e-05, "loss": 2.6099, "step": 4803 }, { "epoch": 19.60816326530612, "grad_norm": 5.642730712890625, "learning_rate": 4.0208163265306126e-05, "loss": 3.7557, "step": 4804 }, { "epoch": 19.612244897959183, "grad_norm": 9.823885917663574, "learning_rate": 4.020612244897959e-05, "loss": 2.7492, "step": 4805 }, { "epoch": 19.616326530612245, "grad_norm": 9.527442932128906, "learning_rate": 4.020408163265306e-05, "loss": 2.8124, "step": 4806 }, { "epoch": 19.620408163265306, "grad_norm": 7.925368309020996, "learning_rate": 4.020204081632653e-05, "loss": 3.1082, "step": 4807 }, { "epoch": 19.624489795918368, "grad_norm": 5.516655445098877, "learning_rate": 4.02e-05, "loss": 3.6948, "step": 4808 }, { "epoch": 19.62857142857143, "grad_norm": 6.154831409454346, "learning_rate": 4.0197959183673475e-05, "loss": 3.6731, "step": 4809 }, { "epoch": 19.632653061224488, "grad_norm": 9.625448226928711, "learning_rate": 4.019591836734694e-05, "loss": 2.9543, "step": 4810 }, { "epoch": 19.63673469387755, "grad_norm": 10.021682739257812, "learning_rate": 4.019387755102041e-05, "loss": 3.26, "step": 4811 }, { "epoch": 19.64081632653061, "grad_norm": 7.826672077178955, "learning_rate": 4.019183673469388e-05, "loss": 3.3272, "step": 4812 }, { "epoch": 19.644897959183673, "grad_norm": 10.44068431854248, "learning_rate": 4.018979591836735e-05, "loss": 3.1817, "step": 4813 }, { "epoch": 19.648979591836735, "grad_norm": 7.182497978210449, "learning_rate": 4.018775510204082e-05, "loss": 3.4956, "step": 4814 }, { "epoch": 19.653061224489797, "grad_norm": 7.186047077178955, "learning_rate": 4.0185714285714285e-05, "loss": 3.523, "step": 4815 }, { "epoch": 19.65714285714286, "grad_norm": 5.519247055053711, "learning_rate": 4.018367346938776e-05, "loss": 3.4729, "step": 4816 }, { "epoch": 19.66122448979592, "grad_norm": 9.470468521118164, "learning_rate": 4.0181632653061226e-05, "loss": 3.139, "step": 4817 }, { "epoch": 19.66530612244898, "grad_norm": 7.233153343200684, "learning_rate": 4.01795918367347e-05, "loss": 3.4387, "step": 4818 }, { "epoch": 19.66938775510204, "grad_norm": 7.337748050689697, "learning_rate": 4.017755102040817e-05, "loss": 3.5536, "step": 4819 }, { "epoch": 19.6734693877551, "grad_norm": 6.721665859222412, "learning_rate": 4.0175510204081634e-05, "loss": 3.7042, "step": 4820 }, { "epoch": 19.677551020408163, "grad_norm": 4.245735168457031, "learning_rate": 4.01734693877551e-05, "loss": 3.9664, "step": 4821 }, { "epoch": 19.681632653061225, "grad_norm": 10.821011543273926, "learning_rate": 4.017142857142857e-05, "loss": 2.8842, "step": 4822 }, { "epoch": 19.685714285714287, "grad_norm": 8.794407844543457, "learning_rate": 4.016938775510204e-05, "loss": 3.2188, "step": 4823 }, { "epoch": 19.68979591836735, "grad_norm": 17.942707061767578, "learning_rate": 4.0167346938775516e-05, "loss": 1.9799, "step": 4824 }, { "epoch": 19.693877551020407, "grad_norm": 9.576798439025879, "learning_rate": 4.016530612244898e-05, "loss": 2.5909, "step": 4825 }, { "epoch": 19.69795918367347, "grad_norm": 10.913355827331543, "learning_rate": 4.016326530612245e-05, "loss": 3.3508, "step": 4826 }, { "epoch": 19.70204081632653, "grad_norm": 14.89827823638916, "learning_rate": 4.016122448979592e-05, "loss": 2.6314, "step": 4827 }, { "epoch": 19.706122448979592, "grad_norm": 5.435293197631836, "learning_rate": 4.015918367346939e-05, "loss": 3.6468, "step": 4828 }, { "epoch": 19.710204081632654, "grad_norm": 9.561431884765625, "learning_rate": 4.015714285714286e-05, "loss": 3.29, "step": 4829 }, { "epoch": 19.714285714285715, "grad_norm": 13.18609619140625, "learning_rate": 4.0155102040816326e-05, "loss": 3.0423, "step": 4830 }, { "epoch": 19.718367346938777, "grad_norm": 9.64344310760498, "learning_rate": 4.01530612244898e-05, "loss": 3.1736, "step": 4831 }, { "epoch": 19.722448979591835, "grad_norm": 7.1121087074279785, "learning_rate": 4.0151020408163267e-05, "loss": 3.5159, "step": 4832 }, { "epoch": 19.726530612244897, "grad_norm": 12.170665740966797, "learning_rate": 4.014897959183674e-05, "loss": 2.5555, "step": 4833 }, { "epoch": 19.73061224489796, "grad_norm": 9.783438682556152, "learning_rate": 4.014693877551021e-05, "loss": 3.1813, "step": 4834 }, { "epoch": 19.73469387755102, "grad_norm": 12.373865127563477, "learning_rate": 4.0144897959183675e-05, "loss": 2.8596, "step": 4835 }, { "epoch": 19.738775510204082, "grad_norm": 12.75149917602539, "learning_rate": 4.014285714285714e-05, "loss": 2.5852, "step": 4836 }, { "epoch": 19.742857142857144, "grad_norm": 6.259061813354492, "learning_rate": 4.014081632653061e-05, "loss": 3.4508, "step": 4837 }, { "epoch": 19.746938775510205, "grad_norm": 5.344852447509766, "learning_rate": 4.013877551020408e-05, "loss": 3.8045, "step": 4838 }, { "epoch": 19.751020408163264, "grad_norm": 13.512680053710938, "learning_rate": 4.013673469387756e-05, "loss": 2.8581, "step": 4839 }, { "epoch": 19.755102040816325, "grad_norm": 5.643545150756836, "learning_rate": 4.0134693877551024e-05, "loss": 3.4228, "step": 4840 }, { "epoch": 19.759183673469387, "grad_norm": 9.906487464904785, "learning_rate": 4.013265306122449e-05, "loss": 3.248, "step": 4841 }, { "epoch": 19.76326530612245, "grad_norm": 14.522542953491211, "learning_rate": 4.0130612244897965e-05, "loss": 2.8745, "step": 4842 }, { "epoch": 19.76734693877551, "grad_norm": 9.309378623962402, "learning_rate": 4.012857142857143e-05, "loss": 3.4205, "step": 4843 }, { "epoch": 19.771428571428572, "grad_norm": 6.4688720703125, "learning_rate": 4.01265306122449e-05, "loss": 3.3201, "step": 4844 }, { "epoch": 19.775510204081634, "grad_norm": 11.420817375183105, "learning_rate": 4.0124489795918366e-05, "loss": 2.7633, "step": 4845 }, { "epoch": 19.779591836734696, "grad_norm": 7.419734954833984, "learning_rate": 4.0122448979591833e-05, "loss": 3.4413, "step": 4846 }, { "epoch": 19.783673469387754, "grad_norm": 8.17078685760498, "learning_rate": 4.012040816326531e-05, "loss": 3.4881, "step": 4847 }, { "epoch": 19.787755102040816, "grad_norm": 14.732267379760742, "learning_rate": 4.011836734693878e-05, "loss": 1.9811, "step": 4848 }, { "epoch": 19.791836734693877, "grad_norm": 9.266944885253906, "learning_rate": 4.011632653061225e-05, "loss": 2.8669, "step": 4849 }, { "epoch": 19.79591836734694, "grad_norm": 15.039298057556152, "learning_rate": 4.0114285714285715e-05, "loss": 2.6991, "step": 4850 }, { "epoch": 19.8, "grad_norm": 11.646677017211914, "learning_rate": 4.011224489795918e-05, "loss": 2.6894, "step": 4851 }, { "epoch": 19.804081632653062, "grad_norm": 6.479663372039795, "learning_rate": 4.0110204081632657e-05, "loss": 3.7119, "step": 4852 }, { "epoch": 19.808163265306124, "grad_norm": 11.6280517578125, "learning_rate": 4.0108163265306124e-05, "loss": 3.1079, "step": 4853 }, { "epoch": 19.812244897959182, "grad_norm": 15.2257080078125, "learning_rate": 4.010612244897959e-05, "loss": 2.1119, "step": 4854 }, { "epoch": 19.816326530612244, "grad_norm": 6.273597240447998, "learning_rate": 4.0104081632653065e-05, "loss": 3.587, "step": 4855 }, { "epoch": 19.820408163265306, "grad_norm": 8.068650245666504, "learning_rate": 4.010204081632653e-05, "loss": 3.5536, "step": 4856 }, { "epoch": 19.824489795918367, "grad_norm": 30.046632766723633, "learning_rate": 4.0100000000000006e-05, "loss": 0.9556, "step": 4857 }, { "epoch": 19.82857142857143, "grad_norm": 6.398153781890869, "learning_rate": 4.009795918367347e-05, "loss": 3.5991, "step": 4858 }, { "epoch": 19.83265306122449, "grad_norm": 7.7891926765441895, "learning_rate": 4.009591836734694e-05, "loss": 3.5347, "step": 4859 }, { "epoch": 19.836734693877553, "grad_norm": 12.616893768310547, "learning_rate": 4.009387755102041e-05, "loss": 2.6313, "step": 4860 }, { "epoch": 19.84081632653061, "grad_norm": 13.733794212341309, "learning_rate": 4.0091836734693874e-05, "loss": 3.2463, "step": 4861 }, { "epoch": 19.844897959183672, "grad_norm": 6.895021438598633, "learning_rate": 4.008979591836735e-05, "loss": 3.261, "step": 4862 }, { "epoch": 19.848979591836734, "grad_norm": 8.219829559326172, "learning_rate": 4.008775510204082e-05, "loss": 3.4646, "step": 4863 }, { "epoch": 19.853061224489796, "grad_norm": 6.2234954833984375, "learning_rate": 4.008571428571429e-05, "loss": 3.5636, "step": 4864 }, { "epoch": 19.857142857142858, "grad_norm": 14.158602714538574, "learning_rate": 4.0083673469387756e-05, "loss": 2.4941, "step": 4865 }, { "epoch": 19.86122448979592, "grad_norm": 7.338642120361328, "learning_rate": 4.008163265306122e-05, "loss": 3.586, "step": 4866 }, { "epoch": 19.86530612244898, "grad_norm": 8.751904487609863, "learning_rate": 4.00795918367347e-05, "loss": 3.5882, "step": 4867 }, { "epoch": 19.86938775510204, "grad_norm": 15.956904411315918, "learning_rate": 4.0077551020408164e-05, "loss": 2.7754, "step": 4868 }, { "epoch": 19.8734693877551, "grad_norm": 20.963134765625, "learning_rate": 4.007551020408163e-05, "loss": 2.7751, "step": 4869 }, { "epoch": 19.877551020408163, "grad_norm": 9.690105438232422, "learning_rate": 4.0073469387755105e-05, "loss": 3.2795, "step": 4870 }, { "epoch": 19.881632653061224, "grad_norm": 5.728886127471924, "learning_rate": 4.007142857142857e-05, "loss": 3.7059, "step": 4871 }, { "epoch": 19.885714285714286, "grad_norm": 9.522072792053223, "learning_rate": 4.0069387755102046e-05, "loss": 3.5009, "step": 4872 }, { "epoch": 19.889795918367348, "grad_norm": 6.530374050140381, "learning_rate": 4.0067346938775514e-05, "loss": 3.5442, "step": 4873 }, { "epoch": 19.89387755102041, "grad_norm": 7.5130205154418945, "learning_rate": 4.006530612244898e-05, "loss": 3.1442, "step": 4874 }, { "epoch": 19.897959183673468, "grad_norm": 14.814940452575684, "learning_rate": 4.006326530612245e-05, "loss": 2.0907, "step": 4875 }, { "epoch": 19.90204081632653, "grad_norm": 9.0040283203125, "learning_rate": 4.0061224489795915e-05, "loss": 3.3182, "step": 4876 }, { "epoch": 19.90612244897959, "grad_norm": 7.838642120361328, "learning_rate": 4.005918367346939e-05, "loss": 3.2659, "step": 4877 }, { "epoch": 19.910204081632653, "grad_norm": 9.105779647827148, "learning_rate": 4.005714285714286e-05, "loss": 3.4856, "step": 4878 }, { "epoch": 19.914285714285715, "grad_norm": 12.627882957458496, "learning_rate": 4.005510204081633e-05, "loss": 2.8429, "step": 4879 }, { "epoch": 19.918367346938776, "grad_norm": 8.342013359069824, "learning_rate": 4.00530612244898e-05, "loss": 3.2798, "step": 4880 }, { "epoch": 19.922448979591838, "grad_norm": 15.092728614807129, "learning_rate": 4.0051020408163264e-05, "loss": 2.582, "step": 4881 }, { "epoch": 19.926530612244896, "grad_norm": 7.5983357429504395, "learning_rate": 4.004897959183674e-05, "loss": 3.3798, "step": 4882 }, { "epoch": 19.930612244897958, "grad_norm": 20.87086296081543, "learning_rate": 4.0046938775510205e-05, "loss": 2.7513, "step": 4883 }, { "epoch": 19.93469387755102, "grad_norm": 8.96458625793457, "learning_rate": 4.004489795918367e-05, "loss": 3.2361, "step": 4884 }, { "epoch": 19.93877551020408, "grad_norm": 7.795864105224609, "learning_rate": 4.0042857142857146e-05, "loss": 3.3461, "step": 4885 }, { "epoch": 19.942857142857143, "grad_norm": 8.435327529907227, "learning_rate": 4.004081632653062e-05, "loss": 3.4162, "step": 4886 }, { "epoch": 19.946938775510205, "grad_norm": 10.954415321350098, "learning_rate": 4.003877551020409e-05, "loss": 2.6551, "step": 4887 }, { "epoch": 19.951020408163266, "grad_norm": 5.849365234375, "learning_rate": 4.0036734693877554e-05, "loss": 3.4711, "step": 4888 }, { "epoch": 19.955102040816328, "grad_norm": 5.93666934967041, "learning_rate": 4.003469387755102e-05, "loss": 3.6503, "step": 4889 }, { "epoch": 19.959183673469386, "grad_norm": 8.00197696685791, "learning_rate": 4.003265306122449e-05, "loss": 3.2063, "step": 4890 }, { "epoch": 19.963265306122448, "grad_norm": 15.374115943908691, "learning_rate": 4.003061224489796e-05, "loss": 2.6732, "step": 4891 }, { "epoch": 19.96734693877551, "grad_norm": 9.360815048217773, "learning_rate": 4.002857142857143e-05, "loss": 3.2147, "step": 4892 }, { "epoch": 19.97142857142857, "grad_norm": 7.395462512969971, "learning_rate": 4.0026530612244904e-05, "loss": 3.6023, "step": 4893 }, { "epoch": 19.975510204081633, "grad_norm": 18.18115234375, "learning_rate": 4.002448979591837e-05, "loss": 1.6961, "step": 4894 }, { "epoch": 19.979591836734695, "grad_norm": 16.438879013061523, "learning_rate": 4.002244897959184e-05, "loss": 2.2919, "step": 4895 }, { "epoch": 19.983673469387757, "grad_norm": 9.718236923217773, "learning_rate": 4.002040816326531e-05, "loss": 3.0253, "step": 4896 }, { "epoch": 19.987755102040815, "grad_norm": 10.833646774291992, "learning_rate": 4.001836734693878e-05, "loss": 2.9011, "step": 4897 }, { "epoch": 19.991836734693877, "grad_norm": 6.659721851348877, "learning_rate": 4.0016326530612246e-05, "loss": 3.4802, "step": 4898 }, { "epoch": 19.99591836734694, "grad_norm": 9.230568885803223, "learning_rate": 4.001428571428571e-05, "loss": 3.1605, "step": 4899 }, { "epoch": 20.0, "grad_norm": 14.102110862731934, "learning_rate": 4.001224489795918e-05, "loss": 3.0811, "step": 4900 }, { "epoch": 20.00408163265306, "grad_norm": 12.773348808288574, "learning_rate": 4.0010204081632654e-05, "loss": 2.6887, "step": 4901 }, { "epoch": 20.008163265306123, "grad_norm": 20.383609771728516, "learning_rate": 4.000816326530613e-05, "loss": 1.9126, "step": 4902 }, { "epoch": 20.012244897959185, "grad_norm": 9.516345977783203, "learning_rate": 4.0006122448979595e-05, "loss": 3.0014, "step": 4903 }, { "epoch": 20.016326530612243, "grad_norm": 6.153237342834473, "learning_rate": 4.000408163265306e-05, "loss": 3.4631, "step": 4904 }, { "epoch": 20.020408163265305, "grad_norm": 6.007094860076904, "learning_rate": 4.000204081632653e-05, "loss": 3.2214, "step": 4905 }, { "epoch": 20.024489795918367, "grad_norm": 6.100944995880127, "learning_rate": 4e-05, "loss": 3.4203, "step": 4906 }, { "epoch": 20.02857142857143, "grad_norm": 15.820619583129883, "learning_rate": 3.999795918367347e-05, "loss": 2.1096, "step": 4907 }, { "epoch": 20.03265306122449, "grad_norm": 8.482933044433594, "learning_rate": 3.999591836734694e-05, "loss": 3.4033, "step": 4908 }, { "epoch": 20.036734693877552, "grad_norm": 6.967326641082764, "learning_rate": 3.999387755102041e-05, "loss": 3.415, "step": 4909 }, { "epoch": 20.040816326530614, "grad_norm": 8.757939338684082, "learning_rate": 3.999183673469388e-05, "loss": 3.1338, "step": 4910 }, { "epoch": 20.044897959183672, "grad_norm": 10.982855796813965, "learning_rate": 3.998979591836735e-05, "loss": 2.7113, "step": 4911 }, { "epoch": 20.048979591836734, "grad_norm": 11.3953275680542, "learning_rate": 3.998775510204082e-05, "loss": 2.6031, "step": 4912 }, { "epoch": 20.053061224489795, "grad_norm": 10.272797584533691, "learning_rate": 3.998571428571429e-05, "loss": 3.0699, "step": 4913 }, { "epoch": 20.057142857142857, "grad_norm": 15.553187370300293, "learning_rate": 3.9983673469387754e-05, "loss": 2.5475, "step": 4914 }, { "epoch": 20.06122448979592, "grad_norm": 6.844090938568115, "learning_rate": 3.998163265306122e-05, "loss": 3.444, "step": 4915 }, { "epoch": 20.06530612244898, "grad_norm": 14.992759704589844, "learning_rate": 3.9979591836734695e-05, "loss": 2.6027, "step": 4916 }, { "epoch": 20.069387755102042, "grad_norm": 17.88313865661621, "learning_rate": 3.997755102040817e-05, "loss": 1.4434, "step": 4917 }, { "epoch": 20.073469387755104, "grad_norm": 12.933293342590332, "learning_rate": 3.9975510204081636e-05, "loss": 2.4934, "step": 4918 }, { "epoch": 20.077551020408162, "grad_norm": 8.498187065124512, "learning_rate": 3.99734693877551e-05, "loss": 3.0386, "step": 4919 }, { "epoch": 20.081632653061224, "grad_norm": 19.48438262939453, "learning_rate": 3.997142857142857e-05, "loss": 2.5888, "step": 4920 }, { "epoch": 20.085714285714285, "grad_norm": 12.141260147094727, "learning_rate": 3.9969387755102044e-05, "loss": 2.9439, "step": 4921 }, { "epoch": 20.089795918367347, "grad_norm": 11.01434326171875, "learning_rate": 3.996734693877551e-05, "loss": 3.2629, "step": 4922 }, { "epoch": 20.09387755102041, "grad_norm": 9.139379501342773, "learning_rate": 3.996530612244898e-05, "loss": 3.4027, "step": 4923 }, { "epoch": 20.09795918367347, "grad_norm": 15.009608268737793, "learning_rate": 3.996326530612245e-05, "loss": 2.6431, "step": 4924 }, { "epoch": 20.102040816326532, "grad_norm": 7.297417640686035, "learning_rate": 3.996122448979592e-05, "loss": 3.478, "step": 4925 }, { "epoch": 20.10612244897959, "grad_norm": 11.106610298156738, "learning_rate": 3.995918367346939e-05, "loss": 3.1317, "step": 4926 }, { "epoch": 20.110204081632652, "grad_norm": 22.86302947998047, "learning_rate": 3.995714285714286e-05, "loss": 3.221, "step": 4927 }, { "epoch": 20.114285714285714, "grad_norm": 14.846989631652832, "learning_rate": 3.995510204081633e-05, "loss": 3.1435, "step": 4928 }, { "epoch": 20.118367346938776, "grad_norm": 12.139775276184082, "learning_rate": 3.9953061224489795e-05, "loss": 2.5727, "step": 4929 }, { "epoch": 20.122448979591837, "grad_norm": 9.61871337890625, "learning_rate": 3.995102040816327e-05, "loss": 3.0367, "step": 4930 }, { "epoch": 20.1265306122449, "grad_norm": 9.945866584777832, "learning_rate": 3.9948979591836736e-05, "loss": 3.2776, "step": 4931 }, { "epoch": 20.13061224489796, "grad_norm": 5.820842266082764, "learning_rate": 3.994693877551021e-05, "loss": 3.6042, "step": 4932 }, { "epoch": 20.13469387755102, "grad_norm": 6.198068141937256, "learning_rate": 3.994489795918368e-05, "loss": 3.4631, "step": 4933 }, { "epoch": 20.13877551020408, "grad_norm": 7.768253803253174, "learning_rate": 3.9942857142857144e-05, "loss": 3.0222, "step": 4934 }, { "epoch": 20.142857142857142, "grad_norm": 6.911296367645264, "learning_rate": 3.994081632653062e-05, "loss": 3.2617, "step": 4935 }, { "epoch": 20.146938775510204, "grad_norm": 25.081512451171875, "learning_rate": 3.9938775510204085e-05, "loss": 0.7803, "step": 4936 }, { "epoch": 20.151020408163266, "grad_norm": 16.209758758544922, "learning_rate": 3.993673469387755e-05, "loss": 2.4263, "step": 4937 }, { "epoch": 20.155102040816328, "grad_norm": 9.776235580444336, "learning_rate": 3.993469387755102e-05, "loss": 3.2701, "step": 4938 }, { "epoch": 20.15918367346939, "grad_norm": 5.454807281494141, "learning_rate": 3.993265306122449e-05, "loss": 3.7405, "step": 4939 }, { "epoch": 20.163265306122447, "grad_norm": 9.52662467956543, "learning_rate": 3.993061224489797e-05, "loss": 3.1072, "step": 4940 }, { "epoch": 20.16734693877551, "grad_norm": 17.0894718170166, "learning_rate": 3.9928571428571434e-05, "loss": 2.5519, "step": 4941 }, { "epoch": 20.17142857142857, "grad_norm": 14.409480094909668, "learning_rate": 3.99265306122449e-05, "loss": 2.7485, "step": 4942 }, { "epoch": 20.175510204081633, "grad_norm": 7.103010177612305, "learning_rate": 3.992448979591837e-05, "loss": 3.6051, "step": 4943 }, { "epoch": 20.179591836734694, "grad_norm": 13.532942771911621, "learning_rate": 3.9922448979591835e-05, "loss": 2.9786, "step": 4944 }, { "epoch": 20.183673469387756, "grad_norm": 10.497352600097656, "learning_rate": 3.992040816326531e-05, "loss": 3.2825, "step": 4945 }, { "epoch": 20.187755102040818, "grad_norm": 5.738491535186768, "learning_rate": 3.9918367346938776e-05, "loss": 3.3791, "step": 4946 }, { "epoch": 20.191836734693876, "grad_norm": 9.047123908996582, "learning_rate": 3.991632653061225e-05, "loss": 3.0509, "step": 4947 }, { "epoch": 20.195918367346938, "grad_norm": 15.307479858398438, "learning_rate": 3.991428571428572e-05, "loss": 3.2138, "step": 4948 }, { "epoch": 20.2, "grad_norm": 25.0079402923584, "learning_rate": 3.9912244897959185e-05, "loss": 1.6703, "step": 4949 }, { "epoch": 20.20408163265306, "grad_norm": 9.81916618347168, "learning_rate": 3.991020408163266e-05, "loss": 3.3574, "step": 4950 }, { "epoch": 20.208163265306123, "grad_norm": 5.888308048248291, "learning_rate": 3.9908163265306126e-05, "loss": 3.388, "step": 4951 }, { "epoch": 20.212244897959184, "grad_norm": 14.094561576843262, "learning_rate": 3.990612244897959e-05, "loss": 2.5005, "step": 4952 }, { "epoch": 20.216326530612246, "grad_norm": 8.168919563293457, "learning_rate": 3.990408163265306e-05, "loss": 3.3644, "step": 4953 }, { "epoch": 20.220408163265308, "grad_norm": 9.379941940307617, "learning_rate": 3.990204081632653e-05, "loss": 3.3668, "step": 4954 }, { "epoch": 20.224489795918366, "grad_norm": 16.496490478515625, "learning_rate": 3.99e-05, "loss": 2.782, "step": 4955 }, { "epoch": 20.228571428571428, "grad_norm": 7.0028767585754395, "learning_rate": 3.9897959183673475e-05, "loss": 3.4282, "step": 4956 }, { "epoch": 20.23265306122449, "grad_norm": 10.433249473571777, "learning_rate": 3.989591836734694e-05, "loss": 3.2473, "step": 4957 }, { "epoch": 20.23673469387755, "grad_norm": 21.83877182006836, "learning_rate": 3.989387755102041e-05, "loss": 1.9668, "step": 4958 }, { "epoch": 20.240816326530613, "grad_norm": 13.664897918701172, "learning_rate": 3.9891836734693876e-05, "loss": 3.0322, "step": 4959 }, { "epoch": 20.244897959183675, "grad_norm": 15.908147811889648, "learning_rate": 3.988979591836735e-05, "loss": 3.0274, "step": 4960 }, { "epoch": 20.248979591836736, "grad_norm": 5.41146993637085, "learning_rate": 3.988775510204082e-05, "loss": 3.8771, "step": 4961 }, { "epoch": 20.253061224489795, "grad_norm": 8.270210266113281, "learning_rate": 3.9885714285714284e-05, "loss": 3.4204, "step": 4962 }, { "epoch": 20.257142857142856, "grad_norm": 9.910472869873047, "learning_rate": 3.988367346938776e-05, "loss": 3.4987, "step": 4963 }, { "epoch": 20.261224489795918, "grad_norm": 7.028512954711914, "learning_rate": 3.9881632653061225e-05, "loss": 3.5887, "step": 4964 }, { "epoch": 20.26530612244898, "grad_norm": 10.998921394348145, "learning_rate": 3.98795918367347e-05, "loss": 2.7102, "step": 4965 }, { "epoch": 20.26938775510204, "grad_norm": 9.398628234863281, "learning_rate": 3.9877551020408166e-05, "loss": 3.1034, "step": 4966 }, { "epoch": 20.273469387755103, "grad_norm": 13.11337947845459, "learning_rate": 3.9875510204081633e-05, "loss": 2.4436, "step": 4967 }, { "epoch": 20.277551020408165, "grad_norm": 20.64214515686035, "learning_rate": 3.98734693877551e-05, "loss": 1.8524, "step": 4968 }, { "epoch": 20.281632653061223, "grad_norm": 17.650583267211914, "learning_rate": 3.987142857142857e-05, "loss": 1.7897, "step": 4969 }, { "epoch": 20.285714285714285, "grad_norm": 9.501489639282227, "learning_rate": 3.986938775510204e-05, "loss": 2.9094, "step": 4970 }, { "epoch": 20.289795918367346, "grad_norm": 7.942456245422363, "learning_rate": 3.9867346938775516e-05, "loss": 3.2855, "step": 4971 }, { "epoch": 20.293877551020408, "grad_norm": 15.676589012145996, "learning_rate": 3.986530612244898e-05, "loss": 2.0437, "step": 4972 }, { "epoch": 20.29795918367347, "grad_norm": 11.123196601867676, "learning_rate": 3.986326530612245e-05, "loss": 2.7741, "step": 4973 }, { "epoch": 20.30204081632653, "grad_norm": 5.240579605102539, "learning_rate": 3.986122448979592e-05, "loss": 4.0322, "step": 4974 }, { "epoch": 20.306122448979593, "grad_norm": 6.4121623039245605, "learning_rate": 3.985918367346939e-05, "loss": 3.4561, "step": 4975 }, { "epoch": 20.31020408163265, "grad_norm": 8.826045989990234, "learning_rate": 3.985714285714286e-05, "loss": 3.2951, "step": 4976 }, { "epoch": 20.314285714285713, "grad_norm": 7.872946262359619, "learning_rate": 3.9855102040816325e-05, "loss": 3.401, "step": 4977 }, { "epoch": 20.318367346938775, "grad_norm": 12.267415046691895, "learning_rate": 3.98530612244898e-05, "loss": 3.1252, "step": 4978 }, { "epoch": 20.322448979591837, "grad_norm": 5.618978023529053, "learning_rate": 3.985102040816327e-05, "loss": 3.7885, "step": 4979 }, { "epoch": 20.3265306122449, "grad_norm": 7.408487796783447, "learning_rate": 3.984897959183674e-05, "loss": 3.6733, "step": 4980 }, { "epoch": 20.33061224489796, "grad_norm": 20.561641693115234, "learning_rate": 3.984693877551021e-05, "loss": 1.9442, "step": 4981 }, { "epoch": 20.33469387755102, "grad_norm": 17.286605834960938, "learning_rate": 3.9844897959183674e-05, "loss": 2.3303, "step": 4982 }, { "epoch": 20.33877551020408, "grad_norm": 12.04388427734375, "learning_rate": 3.984285714285714e-05, "loss": 3.1112, "step": 4983 }, { "epoch": 20.34285714285714, "grad_norm": 5.470860958099365, "learning_rate": 3.9840816326530615e-05, "loss": 3.6725, "step": 4984 }, { "epoch": 20.346938775510203, "grad_norm": 10.002988815307617, "learning_rate": 3.983877551020408e-05, "loss": 3.0701, "step": 4985 }, { "epoch": 20.351020408163265, "grad_norm": 6.39451265335083, "learning_rate": 3.9836734693877556e-05, "loss": 3.471, "step": 4986 }, { "epoch": 20.355102040816327, "grad_norm": 8.130990982055664, "learning_rate": 3.9834693877551023e-05, "loss": 3.4157, "step": 4987 }, { "epoch": 20.35918367346939, "grad_norm": 15.59316349029541, "learning_rate": 3.983265306122449e-05, "loss": 2.4738, "step": 4988 }, { "epoch": 20.36326530612245, "grad_norm": 7.350527763366699, "learning_rate": 3.9830612244897964e-05, "loss": 3.3156, "step": 4989 }, { "epoch": 20.367346938775512, "grad_norm": 6.892255783081055, "learning_rate": 3.982857142857143e-05, "loss": 3.4069, "step": 4990 }, { "epoch": 20.37142857142857, "grad_norm": 7.640811920166016, "learning_rate": 3.98265306122449e-05, "loss": 3.5709, "step": 4991 }, { "epoch": 20.375510204081632, "grad_norm": 12.96036434173584, "learning_rate": 3.9824489795918366e-05, "loss": 3.2121, "step": 4992 }, { "epoch": 20.379591836734694, "grad_norm": 5.6072492599487305, "learning_rate": 3.982244897959184e-05, "loss": 3.6551, "step": 4993 }, { "epoch": 20.383673469387755, "grad_norm": 7.485471725463867, "learning_rate": 3.9820408163265314e-05, "loss": 3.4292, "step": 4994 }, { "epoch": 20.387755102040817, "grad_norm": 13.175899505615234, "learning_rate": 3.981836734693878e-05, "loss": 2.577, "step": 4995 }, { "epoch": 20.39183673469388, "grad_norm": 7.523924827575684, "learning_rate": 3.981632653061225e-05, "loss": 3.4616, "step": 4996 }, { "epoch": 20.39591836734694, "grad_norm": 8.170062065124512, "learning_rate": 3.9814285714285715e-05, "loss": 3.2578, "step": 4997 }, { "epoch": 20.4, "grad_norm": 9.981294631958008, "learning_rate": 3.981224489795918e-05, "loss": 3.2008, "step": 4998 }, { "epoch": 20.40408163265306, "grad_norm": 7.065181255340576, "learning_rate": 3.9810204081632656e-05, "loss": 3.608, "step": 4999 }, { "epoch": 20.408163265306122, "grad_norm": 7.182878017425537, "learning_rate": 3.980816326530612e-05, "loss": 3.4911, "step": 5000 }, { "epoch": 20.412244897959184, "grad_norm": 7.153806686401367, "learning_rate": 3.980612244897959e-05, "loss": 3.5075, "step": 5001 }, { "epoch": 20.416326530612245, "grad_norm": 6.77479362487793, "learning_rate": 3.9804081632653064e-05, "loss": 3.8012, "step": 5002 }, { "epoch": 20.420408163265307, "grad_norm": 10.591020584106445, "learning_rate": 3.980204081632653e-05, "loss": 2.914, "step": 5003 }, { "epoch": 20.42448979591837, "grad_norm": 9.690373420715332, "learning_rate": 3.9800000000000005e-05, "loss": 3.3383, "step": 5004 }, { "epoch": 20.428571428571427, "grad_norm": 11.855111122131348, "learning_rate": 3.979795918367347e-05, "loss": 2.756, "step": 5005 }, { "epoch": 20.43265306122449, "grad_norm": 14.46593952178955, "learning_rate": 3.979591836734694e-05, "loss": 2.8226, "step": 5006 }, { "epoch": 20.43673469387755, "grad_norm": 12.861804962158203, "learning_rate": 3.9793877551020407e-05, "loss": 2.7407, "step": 5007 }, { "epoch": 20.440816326530612, "grad_norm": 10.33735466003418, "learning_rate": 3.9791836734693874e-05, "loss": 2.753, "step": 5008 }, { "epoch": 20.444897959183674, "grad_norm": 9.960040092468262, "learning_rate": 3.978979591836735e-05, "loss": 2.9655, "step": 5009 }, { "epoch": 20.448979591836736, "grad_norm": 7.549472808837891, "learning_rate": 3.978775510204082e-05, "loss": 3.4611, "step": 5010 }, { "epoch": 20.453061224489797, "grad_norm": 16.300565719604492, "learning_rate": 3.978571428571429e-05, "loss": 2.5049, "step": 5011 }, { "epoch": 20.457142857142856, "grad_norm": 6.216301918029785, "learning_rate": 3.9783673469387756e-05, "loss": 3.383, "step": 5012 }, { "epoch": 20.461224489795917, "grad_norm": 11.019254684448242, "learning_rate": 3.978163265306122e-05, "loss": 2.5657, "step": 5013 }, { "epoch": 20.46530612244898, "grad_norm": 15.793111801147461, "learning_rate": 3.97795918367347e-05, "loss": 2.0346, "step": 5014 }, { "epoch": 20.46938775510204, "grad_norm": 6.241787910461426, "learning_rate": 3.9777551020408164e-05, "loss": 3.6931, "step": 5015 }, { "epoch": 20.473469387755102, "grad_norm": 7.129871845245361, "learning_rate": 3.977551020408163e-05, "loss": 3.3769, "step": 5016 }, { "epoch": 20.477551020408164, "grad_norm": 8.84078598022461, "learning_rate": 3.9773469387755105e-05, "loss": 3.2621, "step": 5017 }, { "epoch": 20.481632653061226, "grad_norm": 12.395792007446289, "learning_rate": 3.977142857142857e-05, "loss": 2.9705, "step": 5018 }, { "epoch": 20.485714285714284, "grad_norm": 7.846827507019043, "learning_rate": 3.9769387755102046e-05, "loss": 3.4571, "step": 5019 }, { "epoch": 20.489795918367346, "grad_norm": 15.820357322692871, "learning_rate": 3.976734693877551e-05, "loss": 2.9769, "step": 5020 }, { "epoch": 20.493877551020407, "grad_norm": 11.931794166564941, "learning_rate": 3.976530612244898e-05, "loss": 2.8299, "step": 5021 }, { "epoch": 20.49795918367347, "grad_norm": 16.566904067993164, "learning_rate": 3.976326530612245e-05, "loss": 2.534, "step": 5022 }, { "epoch": 20.50204081632653, "grad_norm": 7.522677898406982, "learning_rate": 3.976122448979592e-05, "loss": 3.3213, "step": 5023 }, { "epoch": 20.506122448979593, "grad_norm": 11.311244010925293, "learning_rate": 3.975918367346939e-05, "loss": 3.1033, "step": 5024 }, { "epoch": 20.510204081632654, "grad_norm": 7.811583042144775, "learning_rate": 3.975714285714286e-05, "loss": 3.3716, "step": 5025 }, { "epoch": 20.514285714285712, "grad_norm": 13.661845207214355, "learning_rate": 3.975510204081633e-05, "loss": 2.7865, "step": 5026 }, { "epoch": 20.518367346938774, "grad_norm": 6.6775031089782715, "learning_rate": 3.9753061224489797e-05, "loss": 3.4186, "step": 5027 }, { "epoch": 20.522448979591836, "grad_norm": 6.711625099182129, "learning_rate": 3.975102040816327e-05, "loss": 3.4539, "step": 5028 }, { "epoch": 20.526530612244898, "grad_norm": 5.946176528930664, "learning_rate": 3.974897959183674e-05, "loss": 3.61, "step": 5029 }, { "epoch": 20.53061224489796, "grad_norm": 6.330472469329834, "learning_rate": 3.9746938775510205e-05, "loss": 3.3804, "step": 5030 }, { "epoch": 20.53469387755102, "grad_norm": 6.009026527404785, "learning_rate": 3.974489795918367e-05, "loss": 3.6968, "step": 5031 }, { "epoch": 20.538775510204083, "grad_norm": 19.700037002563477, "learning_rate": 3.9742857142857146e-05, "loss": 2.6046, "step": 5032 }, { "epoch": 20.542857142857144, "grad_norm": 8.50942611694336, "learning_rate": 3.974081632653062e-05, "loss": 3.4196, "step": 5033 }, { "epoch": 20.546938775510203, "grad_norm": 13.12881851196289, "learning_rate": 3.973877551020409e-05, "loss": 3.1764, "step": 5034 }, { "epoch": 20.551020408163264, "grad_norm": 9.798879623413086, "learning_rate": 3.9736734693877554e-05, "loss": 2.9579, "step": 5035 }, { "epoch": 20.555102040816326, "grad_norm": 8.138628005981445, "learning_rate": 3.973469387755102e-05, "loss": 3.28, "step": 5036 }, { "epoch": 20.559183673469388, "grad_norm": 10.943512916564941, "learning_rate": 3.973265306122449e-05, "loss": 2.8133, "step": 5037 }, { "epoch": 20.56326530612245, "grad_norm": 10.998661994934082, "learning_rate": 3.973061224489796e-05, "loss": 2.884, "step": 5038 }, { "epoch": 20.56734693877551, "grad_norm": 6.883761882781982, "learning_rate": 3.972857142857143e-05, "loss": 3.3867, "step": 5039 }, { "epoch": 20.571428571428573, "grad_norm": 7.3392791748046875, "learning_rate": 3.97265306122449e-05, "loss": 3.1897, "step": 5040 }, { "epoch": 20.57551020408163, "grad_norm": 4.403986930847168, "learning_rate": 3.972448979591837e-05, "loss": 3.922, "step": 5041 }, { "epoch": 20.579591836734693, "grad_norm": 8.313894271850586, "learning_rate": 3.972244897959184e-05, "loss": 3.527, "step": 5042 }, { "epoch": 20.583673469387755, "grad_norm": 6.976861000061035, "learning_rate": 3.972040816326531e-05, "loss": 3.3425, "step": 5043 }, { "epoch": 20.587755102040816, "grad_norm": 9.145329475402832, "learning_rate": 3.971836734693878e-05, "loss": 3.0815, "step": 5044 }, { "epoch": 20.591836734693878, "grad_norm": 13.009130477905273, "learning_rate": 3.9716326530612245e-05, "loss": 2.9708, "step": 5045 }, { "epoch": 20.59591836734694, "grad_norm": 5.278367042541504, "learning_rate": 3.971428571428571e-05, "loss": 3.5956, "step": 5046 }, { "epoch": 20.6, "grad_norm": 5.60724401473999, "learning_rate": 3.9712244897959186e-05, "loss": 3.2821, "step": 5047 }, { "epoch": 20.60408163265306, "grad_norm": 6.582006931304932, "learning_rate": 3.971020408163266e-05, "loss": 3.5267, "step": 5048 }, { "epoch": 20.60816326530612, "grad_norm": 9.931268692016602, "learning_rate": 3.970816326530613e-05, "loss": 2.9332, "step": 5049 }, { "epoch": 20.612244897959183, "grad_norm": 10.999028205871582, "learning_rate": 3.9706122448979595e-05, "loss": 3.2784, "step": 5050 }, { "epoch": 20.616326530612245, "grad_norm": 9.4263334274292, "learning_rate": 3.970408163265306e-05, "loss": 3.1376, "step": 5051 }, { "epoch": 20.620408163265306, "grad_norm": 22.469926834106445, "learning_rate": 3.970204081632653e-05, "loss": 2.5422, "step": 5052 }, { "epoch": 20.624489795918368, "grad_norm": 7.085938930511475, "learning_rate": 3.97e-05, "loss": 3.6218, "step": 5053 }, { "epoch": 20.62857142857143, "grad_norm": 10.206206321716309, "learning_rate": 3.969795918367347e-05, "loss": 3.3919, "step": 5054 }, { "epoch": 20.632653061224488, "grad_norm": 7.681192398071289, "learning_rate": 3.969591836734694e-05, "loss": 3.27, "step": 5055 }, { "epoch": 20.63673469387755, "grad_norm": 6.419294357299805, "learning_rate": 3.969387755102041e-05, "loss": 3.6083, "step": 5056 }, { "epoch": 20.64081632653061, "grad_norm": 17.21001434326172, "learning_rate": 3.969183673469388e-05, "loss": 2.6739, "step": 5057 }, { "epoch": 20.644897959183673, "grad_norm": 10.393528938293457, "learning_rate": 3.968979591836735e-05, "loss": 3.3377, "step": 5058 }, { "epoch": 20.648979591836735, "grad_norm": 12.952502250671387, "learning_rate": 3.968775510204082e-05, "loss": 2.687, "step": 5059 }, { "epoch": 20.653061224489797, "grad_norm": 4.442288875579834, "learning_rate": 3.9685714285714286e-05, "loss": 3.669, "step": 5060 }, { "epoch": 20.65714285714286, "grad_norm": 11.163614273071289, "learning_rate": 3.968367346938775e-05, "loss": 2.5027, "step": 5061 }, { "epoch": 20.66122448979592, "grad_norm": 6.271758079528809, "learning_rate": 3.968163265306122e-05, "loss": 3.777, "step": 5062 }, { "epoch": 20.66530612244898, "grad_norm": 9.573421478271484, "learning_rate": 3.9679591836734694e-05, "loss": 3.0612, "step": 5063 }, { "epoch": 20.66938775510204, "grad_norm": 8.818650245666504, "learning_rate": 3.967755102040817e-05, "loss": 3.4766, "step": 5064 }, { "epoch": 20.6734693877551, "grad_norm": 7.288727760314941, "learning_rate": 3.9675510204081635e-05, "loss": 3.3115, "step": 5065 }, { "epoch": 20.677551020408163, "grad_norm": 7.18410062789917, "learning_rate": 3.96734693877551e-05, "loss": 3.4414, "step": 5066 }, { "epoch": 20.681632653061225, "grad_norm": 5.897848129272461, "learning_rate": 3.9671428571428576e-05, "loss": 3.5988, "step": 5067 }, { "epoch": 20.685714285714287, "grad_norm": 15.103052139282227, "learning_rate": 3.9669387755102044e-05, "loss": 2.5159, "step": 5068 }, { "epoch": 20.68979591836735, "grad_norm": 11.268028259277344, "learning_rate": 3.966734693877551e-05, "loss": 2.7979, "step": 5069 }, { "epoch": 20.693877551020407, "grad_norm": 14.764151573181152, "learning_rate": 3.966530612244898e-05, "loss": 3.0125, "step": 5070 }, { "epoch": 20.69795918367347, "grad_norm": 6.797955513000488, "learning_rate": 3.966326530612245e-05, "loss": 3.6469, "step": 5071 }, { "epoch": 20.70204081632653, "grad_norm": 15.414961814880371, "learning_rate": 3.9661224489795926e-05, "loss": 2.1802, "step": 5072 }, { "epoch": 20.706122448979592, "grad_norm": 5.254593849182129, "learning_rate": 3.965918367346939e-05, "loss": 3.6885, "step": 5073 }, { "epoch": 20.710204081632654, "grad_norm": 7.540963172912598, "learning_rate": 3.965714285714286e-05, "loss": 3.5535, "step": 5074 }, { "epoch": 20.714285714285715, "grad_norm": 12.36397933959961, "learning_rate": 3.965510204081633e-05, "loss": 2.9952, "step": 5075 }, { "epoch": 20.718367346938777, "grad_norm": 10.22012996673584, "learning_rate": 3.9653061224489794e-05, "loss": 2.9219, "step": 5076 }, { "epoch": 20.722448979591835, "grad_norm": 8.368711471557617, "learning_rate": 3.965102040816327e-05, "loss": 3.2014, "step": 5077 }, { "epoch": 20.726530612244897, "grad_norm": 6.688894748687744, "learning_rate": 3.9648979591836735e-05, "loss": 3.528, "step": 5078 }, { "epoch": 20.73061224489796, "grad_norm": 4.496669292449951, "learning_rate": 3.964693877551021e-05, "loss": 3.8744, "step": 5079 }, { "epoch": 20.73469387755102, "grad_norm": 14.523295402526855, "learning_rate": 3.9644897959183676e-05, "loss": 2.5302, "step": 5080 }, { "epoch": 20.738775510204082, "grad_norm": 9.781792640686035, "learning_rate": 3.964285714285714e-05, "loss": 2.7145, "step": 5081 }, { "epoch": 20.742857142857144, "grad_norm": 11.860284805297852, "learning_rate": 3.964081632653062e-05, "loss": 2.5771, "step": 5082 }, { "epoch": 20.746938775510205, "grad_norm": 7.173804759979248, "learning_rate": 3.9638775510204084e-05, "loss": 3.25, "step": 5083 }, { "epoch": 20.751020408163264, "grad_norm": 14.003044128417969, "learning_rate": 3.963673469387755e-05, "loss": 1.8581, "step": 5084 }, { "epoch": 20.755102040816325, "grad_norm": 5.831615447998047, "learning_rate": 3.963469387755102e-05, "loss": 3.4221, "step": 5085 }, { "epoch": 20.759183673469387, "grad_norm": 9.887492179870605, "learning_rate": 3.963265306122449e-05, "loss": 3.1706, "step": 5086 }, { "epoch": 20.76326530612245, "grad_norm": 17.371938705444336, "learning_rate": 3.9630612244897966e-05, "loss": 2.1397, "step": 5087 }, { "epoch": 20.76734693877551, "grad_norm": 9.715625762939453, "learning_rate": 3.9628571428571433e-05, "loss": 3.1955, "step": 5088 }, { "epoch": 20.771428571428572, "grad_norm": 8.519152641296387, "learning_rate": 3.96265306122449e-05, "loss": 3.082, "step": 5089 }, { "epoch": 20.775510204081634, "grad_norm": 8.354763984680176, "learning_rate": 3.962448979591837e-05, "loss": 3.469, "step": 5090 }, { "epoch": 20.779591836734696, "grad_norm": 10.490194320678711, "learning_rate": 3.9622448979591835e-05, "loss": 3.2615, "step": 5091 }, { "epoch": 20.783673469387754, "grad_norm": 9.600502967834473, "learning_rate": 3.962040816326531e-05, "loss": 3.4133, "step": 5092 }, { "epoch": 20.787755102040816, "grad_norm": 8.921932220458984, "learning_rate": 3.9618367346938776e-05, "loss": 3.4362, "step": 5093 }, { "epoch": 20.791836734693877, "grad_norm": 16.831361770629883, "learning_rate": 3.961632653061225e-05, "loss": 2.4135, "step": 5094 }, { "epoch": 20.79591836734694, "grad_norm": 5.8424906730651855, "learning_rate": 3.961428571428572e-05, "loss": 3.6184, "step": 5095 }, { "epoch": 20.8, "grad_norm": 8.470630645751953, "learning_rate": 3.9612244897959184e-05, "loss": 3.433, "step": 5096 }, { "epoch": 20.804081632653062, "grad_norm": 5.66008186340332, "learning_rate": 3.961020408163266e-05, "loss": 3.8441, "step": 5097 }, { "epoch": 20.808163265306124, "grad_norm": 13.533740997314453, "learning_rate": 3.9608163265306125e-05, "loss": 2.8641, "step": 5098 }, { "epoch": 20.812244897959182, "grad_norm": 8.93287467956543, "learning_rate": 3.960612244897959e-05, "loss": 3.1815, "step": 5099 }, { "epoch": 20.816326530612244, "grad_norm": 22.59768295288086, "learning_rate": 3.960408163265306e-05, "loss": 2.5376, "step": 5100 }, { "epoch": 20.820408163265306, "grad_norm": 7.311178207397461, "learning_rate": 3.960204081632653e-05, "loss": 3.2981, "step": 5101 }, { "epoch": 20.824489795918367, "grad_norm": 21.516984939575195, "learning_rate": 3.960000000000001e-05, "loss": 2.06, "step": 5102 }, { "epoch": 20.82857142857143, "grad_norm": 11.359323501586914, "learning_rate": 3.9597959183673474e-05, "loss": 3.1294, "step": 5103 }, { "epoch": 20.83265306122449, "grad_norm": 9.188822746276855, "learning_rate": 3.959591836734694e-05, "loss": 3.3765, "step": 5104 }, { "epoch": 20.836734693877553, "grad_norm": 7.30082368850708, "learning_rate": 3.959387755102041e-05, "loss": 3.3931, "step": 5105 }, { "epoch": 20.84081632653061, "grad_norm": 6.833849906921387, "learning_rate": 3.9591836734693876e-05, "loss": 3.6736, "step": 5106 }, { "epoch": 20.844897959183672, "grad_norm": 10.249411582946777, "learning_rate": 3.958979591836735e-05, "loss": 3.3451, "step": 5107 }, { "epoch": 20.848979591836734, "grad_norm": 11.910697937011719, "learning_rate": 3.958775510204082e-05, "loss": 2.4947, "step": 5108 }, { "epoch": 20.853061224489796, "grad_norm": 8.885964393615723, "learning_rate": 3.9585714285714284e-05, "loss": 3.1895, "step": 5109 }, { "epoch": 20.857142857142858, "grad_norm": 10.389079093933105, "learning_rate": 3.958367346938776e-05, "loss": 3.0759, "step": 5110 }, { "epoch": 20.86122448979592, "grad_norm": 9.044522285461426, "learning_rate": 3.958163265306123e-05, "loss": 3.0526, "step": 5111 }, { "epoch": 20.86530612244898, "grad_norm": 20.89982032775879, "learning_rate": 3.95795918367347e-05, "loss": 2.0086, "step": 5112 }, { "epoch": 20.86938775510204, "grad_norm": 7.981893539428711, "learning_rate": 3.9577551020408166e-05, "loss": 3.3746, "step": 5113 }, { "epoch": 20.8734693877551, "grad_norm": 11.906006813049316, "learning_rate": 3.957551020408163e-05, "loss": 3.3799, "step": 5114 }, { "epoch": 20.877551020408163, "grad_norm": 5.790480613708496, "learning_rate": 3.95734693877551e-05, "loss": 3.6316, "step": 5115 }, { "epoch": 20.881632653061224, "grad_norm": 9.564498901367188, "learning_rate": 3.9571428571428574e-05, "loss": 3.0976, "step": 5116 }, { "epoch": 20.885714285714286, "grad_norm": 10.70400333404541, "learning_rate": 3.956938775510204e-05, "loss": 3.1711, "step": 5117 }, { "epoch": 20.889795918367348, "grad_norm": 8.988455772399902, "learning_rate": 3.9567346938775515e-05, "loss": 3.1842, "step": 5118 }, { "epoch": 20.89387755102041, "grad_norm": 14.515088081359863, "learning_rate": 3.956530612244898e-05, "loss": 2.8206, "step": 5119 }, { "epoch": 20.897959183673468, "grad_norm": 8.955592155456543, "learning_rate": 3.956326530612245e-05, "loss": 3.3658, "step": 5120 }, { "epoch": 20.90204081632653, "grad_norm": 14.654783248901367, "learning_rate": 3.956122448979592e-05, "loss": 3.024, "step": 5121 }, { "epoch": 20.90612244897959, "grad_norm": 13.83609676361084, "learning_rate": 3.955918367346939e-05, "loss": 2.8867, "step": 5122 }, { "epoch": 20.910204081632653, "grad_norm": 11.242286682128906, "learning_rate": 3.955714285714286e-05, "loss": 2.9665, "step": 5123 }, { "epoch": 20.914285714285715, "grad_norm": 7.836981296539307, "learning_rate": 3.9555102040816325e-05, "loss": 3.2166, "step": 5124 }, { "epoch": 20.918367346938776, "grad_norm": 9.647542953491211, "learning_rate": 3.95530612244898e-05, "loss": 3.1516, "step": 5125 }, { "epoch": 20.922448979591838, "grad_norm": 8.085177421569824, "learning_rate": 3.955102040816327e-05, "loss": 3.3534, "step": 5126 }, { "epoch": 20.926530612244896, "grad_norm": 9.771846771240234, "learning_rate": 3.954897959183674e-05, "loss": 3.166, "step": 5127 }, { "epoch": 20.930612244897958, "grad_norm": 9.018810272216797, "learning_rate": 3.9546938775510207e-05, "loss": 3.4168, "step": 5128 }, { "epoch": 20.93469387755102, "grad_norm": 15.435126304626465, "learning_rate": 3.9544897959183674e-05, "loss": 2.1906, "step": 5129 }, { "epoch": 20.93877551020408, "grad_norm": 10.49095344543457, "learning_rate": 3.954285714285714e-05, "loss": 3.0477, "step": 5130 }, { "epoch": 20.942857142857143, "grad_norm": 18.780323028564453, "learning_rate": 3.9540816326530615e-05, "loss": 1.7613, "step": 5131 }, { "epoch": 20.946938775510205, "grad_norm": 14.729034423828125, "learning_rate": 3.953877551020408e-05, "loss": 3.1035, "step": 5132 }, { "epoch": 20.951020408163266, "grad_norm": 5.0393147468566895, "learning_rate": 3.9536734693877556e-05, "loss": 3.7948, "step": 5133 }, { "epoch": 20.955102040816328, "grad_norm": 9.628721237182617, "learning_rate": 3.953469387755102e-05, "loss": 3.2304, "step": 5134 }, { "epoch": 20.959183673469386, "grad_norm": 13.716557502746582, "learning_rate": 3.953265306122449e-05, "loss": 2.5611, "step": 5135 }, { "epoch": 20.963265306122448, "grad_norm": 9.481158256530762, "learning_rate": 3.9530612244897964e-05, "loss": 3.4924, "step": 5136 }, { "epoch": 20.96734693877551, "grad_norm": 18.259857177734375, "learning_rate": 3.952857142857143e-05, "loss": 2.9898, "step": 5137 }, { "epoch": 20.97142857142857, "grad_norm": 10.272977828979492, "learning_rate": 3.95265306122449e-05, "loss": 2.7869, "step": 5138 }, { "epoch": 20.975510204081633, "grad_norm": 6.7304229736328125, "learning_rate": 3.9524489795918365e-05, "loss": 3.6527, "step": 5139 }, { "epoch": 20.979591836734695, "grad_norm": 5.4559149742126465, "learning_rate": 3.952244897959184e-05, "loss": 3.7162, "step": 5140 }, { "epoch": 20.983673469387757, "grad_norm": 8.882341384887695, "learning_rate": 3.952040816326531e-05, "loss": 3.2286, "step": 5141 }, { "epoch": 20.987755102040815, "grad_norm": 14.137483596801758, "learning_rate": 3.951836734693878e-05, "loss": 2.5252, "step": 5142 }, { "epoch": 20.991836734693877, "grad_norm": 8.376768112182617, "learning_rate": 3.951632653061225e-05, "loss": 3.1382, "step": 5143 }, { "epoch": 20.99591836734694, "grad_norm": 12.24028205871582, "learning_rate": 3.9514285714285714e-05, "loss": 2.72, "step": 5144 }, { "epoch": 21.0, "grad_norm": 5.7737627029418945, "learning_rate": 3.951224489795918e-05, "loss": 3.501, "step": 5145 }, { "epoch": 21.00408163265306, "grad_norm": 7.085809707641602, "learning_rate": 3.9510204081632655e-05, "loss": 3.429, "step": 5146 }, { "epoch": 21.008163265306123, "grad_norm": 10.058809280395508, "learning_rate": 3.950816326530612e-05, "loss": 2.6188, "step": 5147 }, { "epoch": 21.012244897959185, "grad_norm": 6.461313724517822, "learning_rate": 3.9506122448979597e-05, "loss": 3.6096, "step": 5148 }, { "epoch": 21.016326530612243, "grad_norm": 10.561075210571289, "learning_rate": 3.9504081632653064e-05, "loss": 3.0008, "step": 5149 }, { "epoch": 21.020408163265305, "grad_norm": 7.045515537261963, "learning_rate": 3.950204081632653e-05, "loss": 3.6229, "step": 5150 }, { "epoch": 21.024489795918367, "grad_norm": 9.266956329345703, "learning_rate": 3.9500000000000005e-05, "loss": 3.043, "step": 5151 }, { "epoch": 21.02857142857143, "grad_norm": 8.7189359664917, "learning_rate": 3.949795918367347e-05, "loss": 3.0274, "step": 5152 }, { "epoch": 21.03265306122449, "grad_norm": 8.931843757629395, "learning_rate": 3.949591836734694e-05, "loss": 3.1186, "step": 5153 }, { "epoch": 21.036734693877552, "grad_norm": 5.091703414916992, "learning_rate": 3.9493877551020406e-05, "loss": 3.9836, "step": 5154 }, { "epoch": 21.040816326530614, "grad_norm": 15.993962287902832, "learning_rate": 3.949183673469388e-05, "loss": 2.3042, "step": 5155 }, { "epoch": 21.044897959183672, "grad_norm": 7.2628912925720215, "learning_rate": 3.9489795918367354e-05, "loss": 3.5094, "step": 5156 }, { "epoch": 21.048979591836734, "grad_norm": 14.329217910766602, "learning_rate": 3.948775510204082e-05, "loss": 2.654, "step": 5157 }, { "epoch": 21.053061224489795, "grad_norm": 13.702263832092285, "learning_rate": 3.948571428571429e-05, "loss": 2.5655, "step": 5158 }, { "epoch": 21.057142857142857, "grad_norm": 12.250945091247559, "learning_rate": 3.9483673469387755e-05, "loss": 2.9146, "step": 5159 }, { "epoch": 21.06122448979592, "grad_norm": 6.447226047515869, "learning_rate": 3.948163265306123e-05, "loss": 3.3899, "step": 5160 }, { "epoch": 21.06530612244898, "grad_norm": 17.96792221069336, "learning_rate": 3.9479591836734696e-05, "loss": 1.7363, "step": 5161 }, { "epoch": 21.069387755102042, "grad_norm": 8.530529975891113, "learning_rate": 3.947755102040816e-05, "loss": 3.4909, "step": 5162 }, { "epoch": 21.073469387755104, "grad_norm": 9.68747329711914, "learning_rate": 3.947551020408163e-05, "loss": 3.3582, "step": 5163 }, { "epoch": 21.077551020408162, "grad_norm": 9.422730445861816, "learning_rate": 3.9473469387755104e-05, "loss": 3.1689, "step": 5164 }, { "epoch": 21.081632653061224, "grad_norm": 6.639805316925049, "learning_rate": 3.947142857142858e-05, "loss": 3.6976, "step": 5165 }, { "epoch": 21.085714285714285, "grad_norm": 7.175825595855713, "learning_rate": 3.9469387755102045e-05, "loss": 3.4151, "step": 5166 }, { "epoch": 21.089795918367347, "grad_norm": 15.869336128234863, "learning_rate": 3.946734693877551e-05, "loss": 2.8913, "step": 5167 }, { "epoch": 21.09387755102041, "grad_norm": 12.126279830932617, "learning_rate": 3.946530612244898e-05, "loss": 2.8196, "step": 5168 }, { "epoch": 21.09795918367347, "grad_norm": 16.201284408569336, "learning_rate": 3.946326530612245e-05, "loss": 2.7344, "step": 5169 }, { "epoch": 21.102040816326532, "grad_norm": 10.927230834960938, "learning_rate": 3.946122448979592e-05, "loss": 2.6499, "step": 5170 }, { "epoch": 21.10612244897959, "grad_norm": 7.500367164611816, "learning_rate": 3.945918367346939e-05, "loss": 3.3821, "step": 5171 }, { "epoch": 21.110204081632652, "grad_norm": 8.582083702087402, "learning_rate": 3.945714285714286e-05, "loss": 3.2807, "step": 5172 }, { "epoch": 21.114285714285714, "grad_norm": 6.905340671539307, "learning_rate": 3.945510204081633e-05, "loss": 3.477, "step": 5173 }, { "epoch": 21.118367346938776, "grad_norm": 10.259950637817383, "learning_rate": 3.9453061224489796e-05, "loss": 3.0676, "step": 5174 }, { "epoch": 21.122448979591837, "grad_norm": 13.795195579528809, "learning_rate": 3.945102040816327e-05, "loss": 2.4728, "step": 5175 }, { "epoch": 21.1265306122449, "grad_norm": 9.31102180480957, "learning_rate": 3.944897959183674e-05, "loss": 3.2496, "step": 5176 }, { "epoch": 21.13061224489796, "grad_norm": 7.069518566131592, "learning_rate": 3.9446938775510204e-05, "loss": 3.5225, "step": 5177 }, { "epoch": 21.13469387755102, "grad_norm": 7.5960211753845215, "learning_rate": 3.944489795918367e-05, "loss": 3.3663, "step": 5178 }, { "epoch": 21.13877551020408, "grad_norm": 8.145946502685547, "learning_rate": 3.9442857142857145e-05, "loss": 3.2762, "step": 5179 }, { "epoch": 21.142857142857142, "grad_norm": 9.540048599243164, "learning_rate": 3.944081632653062e-05, "loss": 3.0431, "step": 5180 }, { "epoch": 21.146938775510204, "grad_norm": 8.014137268066406, "learning_rate": 3.9438775510204086e-05, "loss": 3.4785, "step": 5181 }, { "epoch": 21.151020408163266, "grad_norm": 16.80652618408203, "learning_rate": 3.943673469387755e-05, "loss": 2.4002, "step": 5182 }, { "epoch": 21.155102040816328, "grad_norm": 8.722626686096191, "learning_rate": 3.943469387755102e-05, "loss": 3.0125, "step": 5183 }, { "epoch": 21.15918367346939, "grad_norm": 11.515612602233887, "learning_rate": 3.943265306122449e-05, "loss": 2.9382, "step": 5184 }, { "epoch": 21.163265306122447, "grad_norm": 7.017389297485352, "learning_rate": 3.943061224489796e-05, "loss": 3.451, "step": 5185 }, { "epoch": 21.16734693877551, "grad_norm": 9.44365406036377, "learning_rate": 3.942857142857143e-05, "loss": 3.0661, "step": 5186 }, { "epoch": 21.17142857142857, "grad_norm": 6.66222620010376, "learning_rate": 3.94265306122449e-05, "loss": 3.7597, "step": 5187 }, { "epoch": 21.175510204081633, "grad_norm": 7.622019290924072, "learning_rate": 3.942448979591837e-05, "loss": 3.3135, "step": 5188 }, { "epoch": 21.179591836734694, "grad_norm": 9.023941040039062, "learning_rate": 3.942244897959184e-05, "loss": 3.3506, "step": 5189 }, { "epoch": 21.183673469387756, "grad_norm": 7.321356296539307, "learning_rate": 3.942040816326531e-05, "loss": 3.3814, "step": 5190 }, { "epoch": 21.187755102040818, "grad_norm": 11.80970573425293, "learning_rate": 3.941836734693878e-05, "loss": 3.2287, "step": 5191 }, { "epoch": 21.191836734693876, "grad_norm": 9.291913986206055, "learning_rate": 3.9416326530612245e-05, "loss": 3.3415, "step": 5192 }, { "epoch": 21.195918367346938, "grad_norm": 20.652692794799805, "learning_rate": 3.941428571428571e-05, "loss": 2.4923, "step": 5193 }, { "epoch": 21.2, "grad_norm": 15.312694549560547, "learning_rate": 3.9412244897959186e-05, "loss": 2.7235, "step": 5194 }, { "epoch": 21.20408163265306, "grad_norm": 6.472026348114014, "learning_rate": 3.941020408163266e-05, "loss": 3.5588, "step": 5195 }, { "epoch": 21.208163265306123, "grad_norm": 10.517683982849121, "learning_rate": 3.940816326530613e-05, "loss": 3.0033, "step": 5196 }, { "epoch": 21.212244897959184, "grad_norm": 11.364988327026367, "learning_rate": 3.9406122448979594e-05, "loss": 2.7217, "step": 5197 }, { "epoch": 21.216326530612246, "grad_norm": 11.939435005187988, "learning_rate": 3.940408163265306e-05, "loss": 3.029, "step": 5198 }, { "epoch": 21.220408163265308, "grad_norm": 20.721982955932617, "learning_rate": 3.940204081632653e-05, "loss": 1.8192, "step": 5199 }, { "epoch": 21.224489795918366, "grad_norm": 10.189983367919922, "learning_rate": 3.94e-05, "loss": 3.223, "step": 5200 }, { "epoch": 21.228571428571428, "grad_norm": 9.36852741241455, "learning_rate": 3.939795918367347e-05, "loss": 3.3528, "step": 5201 }, { "epoch": 21.23265306122449, "grad_norm": 5.491027355194092, "learning_rate": 3.939591836734694e-05, "loss": 3.5464, "step": 5202 }, { "epoch": 21.23673469387755, "grad_norm": 17.21194076538086, "learning_rate": 3.939387755102041e-05, "loss": 1.8575, "step": 5203 }, { "epoch": 21.240816326530613, "grad_norm": 11.240957260131836, "learning_rate": 3.9391836734693884e-05, "loss": 3.0212, "step": 5204 }, { "epoch": 21.244897959183675, "grad_norm": 6.401057720184326, "learning_rate": 3.938979591836735e-05, "loss": 3.3611, "step": 5205 }, { "epoch": 21.248979591836736, "grad_norm": 15.9696044921875, "learning_rate": 3.938775510204082e-05, "loss": 2.4761, "step": 5206 }, { "epoch": 21.253061224489795, "grad_norm": 9.775256156921387, "learning_rate": 3.9385714285714286e-05, "loss": 3.0097, "step": 5207 }, { "epoch": 21.257142857142856, "grad_norm": 13.338313102722168, "learning_rate": 3.938367346938775e-05, "loss": 2.9163, "step": 5208 }, { "epoch": 21.261224489795918, "grad_norm": 17.382469177246094, "learning_rate": 3.938163265306123e-05, "loss": 2.3801, "step": 5209 }, { "epoch": 21.26530612244898, "grad_norm": 9.354108810424805, "learning_rate": 3.9379591836734694e-05, "loss": 3.2271, "step": 5210 }, { "epoch": 21.26938775510204, "grad_norm": 13.437894821166992, "learning_rate": 3.937755102040817e-05, "loss": 2.5116, "step": 5211 }, { "epoch": 21.273469387755103, "grad_norm": 10.56302547454834, "learning_rate": 3.9375510204081635e-05, "loss": 3.0054, "step": 5212 }, { "epoch": 21.277551020408165, "grad_norm": 6.359447956085205, "learning_rate": 3.93734693877551e-05, "loss": 3.3441, "step": 5213 }, { "epoch": 21.281632653061223, "grad_norm": 12.685518264770508, "learning_rate": 3.9371428571428576e-05, "loss": 3.2078, "step": 5214 }, { "epoch": 21.285714285714285, "grad_norm": 6.985279560089111, "learning_rate": 3.936938775510204e-05, "loss": 3.3458, "step": 5215 }, { "epoch": 21.289795918367346, "grad_norm": 10.791194915771484, "learning_rate": 3.936734693877551e-05, "loss": 3.1408, "step": 5216 }, { "epoch": 21.293877551020408, "grad_norm": 8.384581565856934, "learning_rate": 3.936530612244898e-05, "loss": 3.4185, "step": 5217 }, { "epoch": 21.29795918367347, "grad_norm": 16.798254013061523, "learning_rate": 3.936326530612245e-05, "loss": 2.1194, "step": 5218 }, { "epoch": 21.30204081632653, "grad_norm": 16.15319061279297, "learning_rate": 3.9361224489795925e-05, "loss": 2.6518, "step": 5219 }, { "epoch": 21.306122448979593, "grad_norm": 7.429584503173828, "learning_rate": 3.935918367346939e-05, "loss": 3.4214, "step": 5220 }, { "epoch": 21.31020408163265, "grad_norm": 16.55104637145996, "learning_rate": 3.935714285714286e-05, "loss": 2.4496, "step": 5221 }, { "epoch": 21.314285714285713, "grad_norm": 8.357048988342285, "learning_rate": 3.9355102040816326e-05, "loss": 3.2584, "step": 5222 }, { "epoch": 21.318367346938775, "grad_norm": 11.128162384033203, "learning_rate": 3.9353061224489794e-05, "loss": 3.1709, "step": 5223 }, { "epoch": 21.322448979591837, "grad_norm": 8.865250587463379, "learning_rate": 3.935102040816327e-05, "loss": 3.0773, "step": 5224 }, { "epoch": 21.3265306122449, "grad_norm": 7.9653000831604, "learning_rate": 3.9348979591836735e-05, "loss": 3.3977, "step": 5225 }, { "epoch": 21.33061224489796, "grad_norm": 8.48056411743164, "learning_rate": 3.934693877551021e-05, "loss": 3.403, "step": 5226 }, { "epoch": 21.33469387755102, "grad_norm": 14.526159286499023, "learning_rate": 3.9344897959183676e-05, "loss": 2.392, "step": 5227 }, { "epoch": 21.33877551020408, "grad_norm": 9.47711181640625, "learning_rate": 3.934285714285714e-05, "loss": 3.3211, "step": 5228 }, { "epoch": 21.34285714285714, "grad_norm": 9.983134269714355, "learning_rate": 3.934081632653062e-05, "loss": 3.106, "step": 5229 }, { "epoch": 21.346938775510203, "grad_norm": 9.168636322021484, "learning_rate": 3.9338775510204084e-05, "loss": 3.1816, "step": 5230 }, { "epoch": 21.351020408163265, "grad_norm": 9.470083236694336, "learning_rate": 3.933673469387755e-05, "loss": 3.3745, "step": 5231 }, { "epoch": 21.355102040816327, "grad_norm": 10.265650749206543, "learning_rate": 3.933469387755102e-05, "loss": 3.1261, "step": 5232 }, { "epoch": 21.35918367346939, "grad_norm": 11.253599166870117, "learning_rate": 3.933265306122449e-05, "loss": 3.2928, "step": 5233 }, { "epoch": 21.36326530612245, "grad_norm": 16.788936614990234, "learning_rate": 3.9330612244897966e-05, "loss": 2.8582, "step": 5234 }, { "epoch": 21.367346938775512, "grad_norm": 9.50517463684082, "learning_rate": 3.932857142857143e-05, "loss": 3.2787, "step": 5235 }, { "epoch": 21.37142857142857, "grad_norm": 9.378486633300781, "learning_rate": 3.93265306122449e-05, "loss": 3.4006, "step": 5236 }, { "epoch": 21.375510204081632, "grad_norm": 7.0178542137146, "learning_rate": 3.932448979591837e-05, "loss": 3.5703, "step": 5237 }, { "epoch": 21.379591836734694, "grad_norm": 10.771141052246094, "learning_rate": 3.9322448979591834e-05, "loss": 3.3165, "step": 5238 }, { "epoch": 21.383673469387755, "grad_norm": 7.739558219909668, "learning_rate": 3.932040816326531e-05, "loss": 3.4955, "step": 5239 }, { "epoch": 21.387755102040817, "grad_norm": 20.662837982177734, "learning_rate": 3.9318367346938775e-05, "loss": 1.9599, "step": 5240 }, { "epoch": 21.39183673469388, "grad_norm": 21.868486404418945, "learning_rate": 3.931632653061225e-05, "loss": 3.292, "step": 5241 }, { "epoch": 21.39591836734694, "grad_norm": 14.777811050415039, "learning_rate": 3.9314285714285716e-05, "loss": 2.6058, "step": 5242 }, { "epoch": 21.4, "grad_norm": 4.125577926635742, "learning_rate": 3.9312244897959184e-05, "loss": 3.8314, "step": 5243 }, { "epoch": 21.40408163265306, "grad_norm": 7.89963436126709, "learning_rate": 3.931020408163266e-05, "loss": 3.3779, "step": 5244 }, { "epoch": 21.408163265306122, "grad_norm": 6.23142147064209, "learning_rate": 3.9308163265306125e-05, "loss": 3.5953, "step": 5245 }, { "epoch": 21.412244897959184, "grad_norm": 11.501220703125, "learning_rate": 3.930612244897959e-05, "loss": 2.8396, "step": 5246 }, { "epoch": 21.416326530612245, "grad_norm": 8.666069984436035, "learning_rate": 3.930408163265306e-05, "loss": 3.2225, "step": 5247 }, { "epoch": 21.420408163265307, "grad_norm": 10.179546356201172, "learning_rate": 3.930204081632653e-05, "loss": 2.8528, "step": 5248 }, { "epoch": 21.42448979591837, "grad_norm": 12.44373893737793, "learning_rate": 3.9300000000000007e-05, "loss": 2.4605, "step": 5249 }, { "epoch": 21.428571428571427, "grad_norm": 15.982176780700684, "learning_rate": 3.9297959183673474e-05, "loss": 2.9231, "step": 5250 }, { "epoch": 21.43265306122449, "grad_norm": 6.863236904144287, "learning_rate": 3.929591836734694e-05, "loss": 3.4298, "step": 5251 }, { "epoch": 21.43673469387755, "grad_norm": 15.171073913574219, "learning_rate": 3.929387755102041e-05, "loss": 2.3818, "step": 5252 }, { "epoch": 21.440816326530612, "grad_norm": 17.31285285949707, "learning_rate": 3.929183673469388e-05, "loss": 2.2063, "step": 5253 }, { "epoch": 21.444897959183674, "grad_norm": 8.878372192382812, "learning_rate": 3.928979591836735e-05, "loss": 3.195, "step": 5254 }, { "epoch": 21.448979591836736, "grad_norm": 13.390290260314941, "learning_rate": 3.9287755102040816e-05, "loss": 2.9849, "step": 5255 }, { "epoch": 21.453061224489797, "grad_norm": 9.350056648254395, "learning_rate": 3.928571428571429e-05, "loss": 2.9533, "step": 5256 }, { "epoch": 21.457142857142856, "grad_norm": 11.038847923278809, "learning_rate": 3.928367346938776e-05, "loss": 3.2033, "step": 5257 }, { "epoch": 21.461224489795917, "grad_norm": 8.198698997497559, "learning_rate": 3.928163265306123e-05, "loss": 3.2819, "step": 5258 }, { "epoch": 21.46530612244898, "grad_norm": 8.465054512023926, "learning_rate": 3.92795918367347e-05, "loss": 3.4001, "step": 5259 }, { "epoch": 21.46938775510204, "grad_norm": 12.866791725158691, "learning_rate": 3.9277551020408165e-05, "loss": 2.9697, "step": 5260 }, { "epoch": 21.473469387755102, "grad_norm": 5.62930154800415, "learning_rate": 3.927551020408163e-05, "loss": 3.6453, "step": 5261 }, { "epoch": 21.477551020408164, "grad_norm": 12.31285572052002, "learning_rate": 3.92734693877551e-05, "loss": 2.7317, "step": 5262 }, { "epoch": 21.481632653061226, "grad_norm": 27.112707138061523, "learning_rate": 3.9271428571428573e-05, "loss": 1.9456, "step": 5263 }, { "epoch": 21.485714285714284, "grad_norm": 10.05698299407959, "learning_rate": 3.926938775510204e-05, "loss": 3.2183, "step": 5264 }, { "epoch": 21.489795918367346, "grad_norm": 5.719160079956055, "learning_rate": 3.9267346938775514e-05, "loss": 3.6685, "step": 5265 }, { "epoch": 21.493877551020407, "grad_norm": 13.887262344360352, "learning_rate": 3.926530612244898e-05, "loss": 2.6835, "step": 5266 }, { "epoch": 21.49795918367347, "grad_norm": 4.882192611694336, "learning_rate": 3.926326530612245e-05, "loss": 3.643, "step": 5267 }, { "epoch": 21.50204081632653, "grad_norm": 8.855864524841309, "learning_rate": 3.926122448979592e-05, "loss": 3.2492, "step": 5268 }, { "epoch": 21.506122448979593, "grad_norm": 15.909682273864746, "learning_rate": 3.925918367346939e-05, "loss": 2.5761, "step": 5269 }, { "epoch": 21.510204081632654, "grad_norm": 6.723417282104492, "learning_rate": 3.925714285714286e-05, "loss": 3.4528, "step": 5270 }, { "epoch": 21.514285714285712, "grad_norm": 6.322744369506836, "learning_rate": 3.9255102040816324e-05, "loss": 3.5515, "step": 5271 }, { "epoch": 21.518367346938774, "grad_norm": 10.02153491973877, "learning_rate": 3.92530612244898e-05, "loss": 2.9778, "step": 5272 }, { "epoch": 21.522448979591836, "grad_norm": 11.696873664855957, "learning_rate": 3.925102040816327e-05, "loss": 2.7017, "step": 5273 }, { "epoch": 21.526530612244898, "grad_norm": 15.913138389587402, "learning_rate": 3.924897959183674e-05, "loss": 2.7522, "step": 5274 }, { "epoch": 21.53061224489796, "grad_norm": 5.90537166595459, "learning_rate": 3.9246938775510206e-05, "loss": 3.5858, "step": 5275 }, { "epoch": 21.53469387755102, "grad_norm": 7.678041934967041, "learning_rate": 3.924489795918367e-05, "loss": 3.3365, "step": 5276 }, { "epoch": 21.538775510204083, "grad_norm": 14.079538345336914, "learning_rate": 3.924285714285714e-05, "loss": 2.8109, "step": 5277 }, { "epoch": 21.542857142857144, "grad_norm": 12.93355941772461, "learning_rate": 3.9240816326530614e-05, "loss": 2.7747, "step": 5278 }, { "epoch": 21.546938775510203, "grad_norm": 11.21134090423584, "learning_rate": 3.923877551020408e-05, "loss": 2.4971, "step": 5279 }, { "epoch": 21.551020408163264, "grad_norm": 5.806981086730957, "learning_rate": 3.9236734693877555e-05, "loss": 3.7522, "step": 5280 }, { "epoch": 21.555102040816326, "grad_norm": 5.838054180145264, "learning_rate": 3.923469387755102e-05, "loss": 3.7864, "step": 5281 }, { "epoch": 21.559183673469388, "grad_norm": 12.042131423950195, "learning_rate": 3.923265306122449e-05, "loss": 3.0036, "step": 5282 }, { "epoch": 21.56326530612245, "grad_norm": 11.034103393554688, "learning_rate": 3.9230612244897963e-05, "loss": 3.0654, "step": 5283 }, { "epoch": 21.56734693877551, "grad_norm": 10.090615272521973, "learning_rate": 3.922857142857143e-05, "loss": 3.0086, "step": 5284 }, { "epoch": 21.571428571428573, "grad_norm": 7.708790302276611, "learning_rate": 3.92265306122449e-05, "loss": 3.4534, "step": 5285 }, { "epoch": 21.57551020408163, "grad_norm": 14.202064514160156, "learning_rate": 3.9224489795918365e-05, "loss": 2.7651, "step": 5286 }, { "epoch": 21.579591836734693, "grad_norm": 7.004822731018066, "learning_rate": 3.922244897959184e-05, "loss": 3.2508, "step": 5287 }, { "epoch": 21.583673469387755, "grad_norm": 6.754845142364502, "learning_rate": 3.922040816326531e-05, "loss": 3.3571, "step": 5288 }, { "epoch": 21.587755102040816, "grad_norm": 9.880568504333496, "learning_rate": 3.921836734693878e-05, "loss": 3.1331, "step": 5289 }, { "epoch": 21.591836734693878, "grad_norm": 7.6317362785339355, "learning_rate": 3.921632653061225e-05, "loss": 3.3581, "step": 5290 }, { "epoch": 21.59591836734694, "grad_norm": 9.780532836914062, "learning_rate": 3.9214285714285714e-05, "loss": 3.0908, "step": 5291 }, { "epoch": 21.6, "grad_norm": 6.325939178466797, "learning_rate": 3.921224489795919e-05, "loss": 3.4402, "step": 5292 }, { "epoch": 21.60408163265306, "grad_norm": 5.09993839263916, "learning_rate": 3.9210204081632655e-05, "loss": 3.7618, "step": 5293 }, { "epoch": 21.60816326530612, "grad_norm": 10.832161903381348, "learning_rate": 3.920816326530612e-05, "loss": 2.7301, "step": 5294 }, { "epoch": 21.612244897959183, "grad_norm": 5.584571361541748, "learning_rate": 3.9206122448979596e-05, "loss": 3.6416, "step": 5295 }, { "epoch": 21.616326530612245, "grad_norm": 6.65135383605957, "learning_rate": 3.920408163265306e-05, "loss": 3.5554, "step": 5296 }, { "epoch": 21.620408163265306, "grad_norm": 6.633152961730957, "learning_rate": 3.920204081632654e-05, "loss": 3.2646, "step": 5297 }, { "epoch": 21.624489795918368, "grad_norm": 9.509200096130371, "learning_rate": 3.9200000000000004e-05, "loss": 3.4487, "step": 5298 }, { "epoch": 21.62857142857143, "grad_norm": 7.090089797973633, "learning_rate": 3.919795918367347e-05, "loss": 3.5964, "step": 5299 }, { "epoch": 21.632653061224488, "grad_norm": 16.12508773803711, "learning_rate": 3.919591836734694e-05, "loss": 2.5842, "step": 5300 }, { "epoch": 21.63673469387755, "grad_norm": 19.341529846191406, "learning_rate": 3.9193877551020406e-05, "loss": 2.5901, "step": 5301 }, { "epoch": 21.64081632653061, "grad_norm": 6.179440498352051, "learning_rate": 3.919183673469388e-05, "loss": 3.6209, "step": 5302 }, { "epoch": 21.644897959183673, "grad_norm": 7.643826961517334, "learning_rate": 3.918979591836735e-05, "loss": 3.3802, "step": 5303 }, { "epoch": 21.648979591836735, "grad_norm": 16.671611785888672, "learning_rate": 3.918775510204082e-05, "loss": 2.5193, "step": 5304 }, { "epoch": 21.653061224489797, "grad_norm": 6.457637786865234, "learning_rate": 3.918571428571429e-05, "loss": 3.403, "step": 5305 }, { "epoch": 21.65714285714286, "grad_norm": 8.661864280700684, "learning_rate": 3.9183673469387755e-05, "loss": 3.0803, "step": 5306 }, { "epoch": 21.66122448979592, "grad_norm": 12.244818687438965, "learning_rate": 3.918163265306123e-05, "loss": 2.9152, "step": 5307 }, { "epoch": 21.66530612244898, "grad_norm": 13.681102752685547, "learning_rate": 3.9179591836734696e-05, "loss": 3.0073, "step": 5308 }, { "epoch": 21.66938775510204, "grad_norm": 12.588359832763672, "learning_rate": 3.917755102040816e-05, "loss": 2.9205, "step": 5309 }, { "epoch": 21.6734693877551, "grad_norm": 17.547563552856445, "learning_rate": 3.917551020408164e-05, "loss": 2.3148, "step": 5310 }, { "epoch": 21.677551020408163, "grad_norm": 8.170877456665039, "learning_rate": 3.9173469387755104e-05, "loss": 3.6006, "step": 5311 }, { "epoch": 21.681632653061225, "grad_norm": 10.410049438476562, "learning_rate": 3.917142857142858e-05, "loss": 3.4265, "step": 5312 }, { "epoch": 21.685714285714287, "grad_norm": 19.375429153442383, "learning_rate": 3.9169387755102045e-05, "loss": 2.5504, "step": 5313 }, { "epoch": 21.68979591836735, "grad_norm": 6.907191753387451, "learning_rate": 3.916734693877551e-05, "loss": 3.4575, "step": 5314 }, { "epoch": 21.693877551020407, "grad_norm": 11.27939510345459, "learning_rate": 3.916530612244898e-05, "loss": 2.9895, "step": 5315 }, { "epoch": 21.69795918367347, "grad_norm": 27.210289001464844, "learning_rate": 3.9163265306122446e-05, "loss": 1.5815, "step": 5316 }, { "epoch": 21.70204081632653, "grad_norm": 28.265457153320312, "learning_rate": 3.916122448979592e-05, "loss": 1.6434, "step": 5317 }, { "epoch": 21.706122448979592, "grad_norm": 10.283126831054688, "learning_rate": 3.915918367346939e-05, "loss": 3.0891, "step": 5318 }, { "epoch": 21.710204081632654, "grad_norm": 7.346092700958252, "learning_rate": 3.915714285714286e-05, "loss": 3.2516, "step": 5319 }, { "epoch": 21.714285714285715, "grad_norm": 7.318119049072266, "learning_rate": 3.915510204081633e-05, "loss": 3.504, "step": 5320 }, { "epoch": 21.718367346938777, "grad_norm": 16.068937301635742, "learning_rate": 3.9153061224489795e-05, "loss": 2.9803, "step": 5321 }, { "epoch": 21.722448979591835, "grad_norm": 12.64255428314209, "learning_rate": 3.915102040816327e-05, "loss": 2.9135, "step": 5322 }, { "epoch": 21.726530612244897, "grad_norm": 16.029735565185547, "learning_rate": 3.9148979591836737e-05, "loss": 3.1232, "step": 5323 }, { "epoch": 21.73061224489796, "grad_norm": 19.86552619934082, "learning_rate": 3.9146938775510204e-05, "loss": 2.1422, "step": 5324 }, { "epoch": 21.73469387755102, "grad_norm": 7.080488204956055, "learning_rate": 3.914489795918367e-05, "loss": 3.313, "step": 5325 }, { "epoch": 21.738775510204082, "grad_norm": 14.531201362609863, "learning_rate": 3.9142857142857145e-05, "loss": 2.7388, "step": 5326 }, { "epoch": 21.742857142857144, "grad_norm": 15.178973197937012, "learning_rate": 3.914081632653062e-05, "loss": 1.984, "step": 5327 }, { "epoch": 21.746938775510205, "grad_norm": 9.421892166137695, "learning_rate": 3.9138775510204086e-05, "loss": 3.5009, "step": 5328 }, { "epoch": 21.751020408163264, "grad_norm": 14.2135591506958, "learning_rate": 3.913673469387755e-05, "loss": 3.2339, "step": 5329 }, { "epoch": 21.755102040816325, "grad_norm": 12.57523250579834, "learning_rate": 3.913469387755102e-05, "loss": 3.094, "step": 5330 }, { "epoch": 21.759183673469387, "grad_norm": 15.722180366516113, "learning_rate": 3.913265306122449e-05, "loss": 2.5232, "step": 5331 }, { "epoch": 21.76326530612245, "grad_norm": 41.29357147216797, "learning_rate": 3.913061224489796e-05, "loss": 0.9096, "step": 5332 }, { "epoch": 21.76734693877551, "grad_norm": 13.161029815673828, "learning_rate": 3.912857142857143e-05, "loss": 2.7536, "step": 5333 }, { "epoch": 21.771428571428572, "grad_norm": 7.035447597503662, "learning_rate": 3.91265306122449e-05, "loss": 3.3646, "step": 5334 }, { "epoch": 21.775510204081634, "grad_norm": 17.111448287963867, "learning_rate": 3.912448979591837e-05, "loss": 3.1577, "step": 5335 }, { "epoch": 21.779591836734696, "grad_norm": 13.152811050415039, "learning_rate": 3.9122448979591836e-05, "loss": 2.9676, "step": 5336 }, { "epoch": 21.783673469387754, "grad_norm": 13.979843139648438, "learning_rate": 3.912040816326531e-05, "loss": 2.4376, "step": 5337 }, { "epoch": 21.787755102040816, "grad_norm": 7.271454811096191, "learning_rate": 3.911836734693878e-05, "loss": 3.5895, "step": 5338 }, { "epoch": 21.791836734693877, "grad_norm": 19.09225082397461, "learning_rate": 3.9116326530612244e-05, "loss": 1.6682, "step": 5339 }, { "epoch": 21.79591836734694, "grad_norm": 16.96795082092285, "learning_rate": 3.911428571428571e-05, "loss": 2.4658, "step": 5340 }, { "epoch": 21.8, "grad_norm": 9.39088249206543, "learning_rate": 3.9112244897959185e-05, "loss": 3.0435, "step": 5341 }, { "epoch": 21.804081632653062, "grad_norm": 8.497851371765137, "learning_rate": 3.911020408163266e-05, "loss": 3.3444, "step": 5342 }, { "epoch": 21.808163265306124, "grad_norm": 18.004783630371094, "learning_rate": 3.9108163265306126e-05, "loss": 2.5, "step": 5343 }, { "epoch": 21.812244897959182, "grad_norm": 19.098968505859375, "learning_rate": 3.9106122448979594e-05, "loss": 2.6172, "step": 5344 }, { "epoch": 21.816326530612244, "grad_norm": 21.566274642944336, "learning_rate": 3.910408163265306e-05, "loss": 2.3922, "step": 5345 }, { "epoch": 21.820408163265306, "grad_norm": 4.695374488830566, "learning_rate": 3.9102040816326535e-05, "loss": 3.9028, "step": 5346 }, { "epoch": 21.824489795918367, "grad_norm": 10.307470321655273, "learning_rate": 3.91e-05, "loss": 3.0624, "step": 5347 }, { "epoch": 21.82857142857143, "grad_norm": 12.918807029724121, "learning_rate": 3.909795918367347e-05, "loss": 2.9252, "step": 5348 }, { "epoch": 21.83265306122449, "grad_norm": 24.792572021484375, "learning_rate": 3.909591836734694e-05, "loss": 2.0836, "step": 5349 }, { "epoch": 21.836734693877553, "grad_norm": 10.35031795501709, "learning_rate": 3.909387755102041e-05, "loss": 3.095, "step": 5350 }, { "epoch": 21.84081632653061, "grad_norm": 9.068787574768066, "learning_rate": 3.9091836734693884e-05, "loss": 3.3179, "step": 5351 }, { "epoch": 21.844897959183672, "grad_norm": 8.352224349975586, "learning_rate": 3.908979591836735e-05, "loss": 3.2494, "step": 5352 }, { "epoch": 21.848979591836734, "grad_norm": 10.63367748260498, "learning_rate": 3.908775510204082e-05, "loss": 2.8704, "step": 5353 }, { "epoch": 21.853061224489796, "grad_norm": 17.959697723388672, "learning_rate": 3.9085714285714285e-05, "loss": 2.0639, "step": 5354 }, { "epoch": 21.857142857142858, "grad_norm": 6.2079243659973145, "learning_rate": 3.908367346938775e-05, "loss": 3.6051, "step": 5355 }, { "epoch": 21.86122448979592, "grad_norm": 5.927908420562744, "learning_rate": 3.9081632653061226e-05, "loss": 3.7588, "step": 5356 }, { "epoch": 21.86530612244898, "grad_norm": 18.41315269470215, "learning_rate": 3.90795918367347e-05, "loss": 2.0087, "step": 5357 }, { "epoch": 21.86938775510204, "grad_norm": 5.8262410163879395, "learning_rate": 3.907755102040817e-05, "loss": 3.6947, "step": 5358 }, { "epoch": 21.8734693877551, "grad_norm": 10.834977149963379, "learning_rate": 3.9075510204081634e-05, "loss": 3.2186, "step": 5359 }, { "epoch": 21.877551020408163, "grad_norm": 12.157873153686523, "learning_rate": 3.90734693877551e-05, "loss": 3.2962, "step": 5360 }, { "epoch": 21.881632653061224, "grad_norm": 8.507820129394531, "learning_rate": 3.9071428571428575e-05, "loss": 3.2982, "step": 5361 }, { "epoch": 21.885714285714286, "grad_norm": 8.00412368774414, "learning_rate": 3.906938775510204e-05, "loss": 3.34, "step": 5362 }, { "epoch": 21.889795918367348, "grad_norm": 14.705574989318848, "learning_rate": 3.906734693877551e-05, "loss": 2.7698, "step": 5363 }, { "epoch": 21.89387755102041, "grad_norm": 9.602391242980957, "learning_rate": 3.9065306122448984e-05, "loss": 3.266, "step": 5364 }, { "epoch": 21.897959183673468, "grad_norm": 18.66460609436035, "learning_rate": 3.906326530612245e-05, "loss": 2.5028, "step": 5365 }, { "epoch": 21.90204081632653, "grad_norm": 8.110833168029785, "learning_rate": 3.9061224489795925e-05, "loss": 3.3365, "step": 5366 }, { "epoch": 21.90612244897959, "grad_norm": 15.527217864990234, "learning_rate": 3.905918367346939e-05, "loss": 2.5242, "step": 5367 }, { "epoch": 21.910204081632653, "grad_norm": 21.78234100341797, "learning_rate": 3.905714285714286e-05, "loss": 1.8235, "step": 5368 }, { "epoch": 21.914285714285715, "grad_norm": 8.198573112487793, "learning_rate": 3.9055102040816326e-05, "loss": 3.2646, "step": 5369 }, { "epoch": 21.918367346938776, "grad_norm": 16.48544692993164, "learning_rate": 3.905306122448979e-05, "loss": 2.6256, "step": 5370 }, { "epoch": 21.922448979591838, "grad_norm": 9.790444374084473, "learning_rate": 3.905102040816327e-05, "loss": 3.0689, "step": 5371 }, { "epoch": 21.926530612244896, "grad_norm": 5.271088123321533, "learning_rate": 3.9048979591836734e-05, "loss": 3.8781, "step": 5372 }, { "epoch": 21.930612244897958, "grad_norm": 9.107704162597656, "learning_rate": 3.904693877551021e-05, "loss": 3.0627, "step": 5373 }, { "epoch": 21.93469387755102, "grad_norm": 7.146377086639404, "learning_rate": 3.9044897959183675e-05, "loss": 3.4354, "step": 5374 }, { "epoch": 21.93877551020408, "grad_norm": 11.747342109680176, "learning_rate": 3.904285714285714e-05, "loss": 3.2192, "step": 5375 }, { "epoch": 21.942857142857143, "grad_norm": 8.165575981140137, "learning_rate": 3.9040816326530616e-05, "loss": 3.205, "step": 5376 }, { "epoch": 21.946938775510205, "grad_norm": 19.23051643371582, "learning_rate": 3.903877551020408e-05, "loss": 2.1218, "step": 5377 }, { "epoch": 21.951020408163266, "grad_norm": 11.646354675292969, "learning_rate": 3.903673469387755e-05, "loss": 3.0431, "step": 5378 }, { "epoch": 21.955102040816328, "grad_norm": 10.40970230102539, "learning_rate": 3.903469387755102e-05, "loss": 3.3573, "step": 5379 }, { "epoch": 21.959183673469386, "grad_norm": 12.7279691696167, "learning_rate": 3.903265306122449e-05, "loss": 2.6765, "step": 5380 }, { "epoch": 21.963265306122448, "grad_norm": 12.859862327575684, "learning_rate": 3.9030612244897965e-05, "loss": 2.804, "step": 5381 }, { "epoch": 21.96734693877551, "grad_norm": 8.810935020446777, "learning_rate": 3.902857142857143e-05, "loss": 3.5039, "step": 5382 }, { "epoch": 21.97142857142857, "grad_norm": 9.051169395446777, "learning_rate": 3.90265306122449e-05, "loss": 3.3741, "step": 5383 }, { "epoch": 21.975510204081633, "grad_norm": 10.555389404296875, "learning_rate": 3.902448979591837e-05, "loss": 2.9109, "step": 5384 }, { "epoch": 21.979591836734695, "grad_norm": 7.6695661544799805, "learning_rate": 3.902244897959184e-05, "loss": 3.6536, "step": 5385 }, { "epoch": 21.983673469387757, "grad_norm": 25.995616912841797, "learning_rate": 3.902040816326531e-05, "loss": 1.8187, "step": 5386 }, { "epoch": 21.987755102040815, "grad_norm": 7.816768169403076, "learning_rate": 3.9018367346938775e-05, "loss": 3.1708, "step": 5387 }, { "epoch": 21.991836734693877, "grad_norm": 7.4928364753723145, "learning_rate": 3.901632653061225e-05, "loss": 3.4478, "step": 5388 }, { "epoch": 21.99591836734694, "grad_norm": 11.348310470581055, "learning_rate": 3.9014285714285716e-05, "loss": 3.2802, "step": 5389 }, { "epoch": 22.0, "grad_norm": 10.033151626586914, "learning_rate": 3.901224489795919e-05, "loss": 2.8673, "step": 5390 }, { "epoch": 22.00408163265306, "grad_norm": 10.837406158447266, "learning_rate": 3.901020408163266e-05, "loss": 2.9167, "step": 5391 }, { "epoch": 22.008163265306123, "grad_norm": 6.6716742515563965, "learning_rate": 3.9008163265306124e-05, "loss": 3.2246, "step": 5392 }, { "epoch": 22.012244897959185, "grad_norm": 5.4990363121032715, "learning_rate": 3.900612244897959e-05, "loss": 3.5919, "step": 5393 }, { "epoch": 22.016326530612243, "grad_norm": 6.148000240325928, "learning_rate": 3.900408163265306e-05, "loss": 3.5472, "step": 5394 }, { "epoch": 22.020408163265305, "grad_norm": 5.597961902618408, "learning_rate": 3.900204081632653e-05, "loss": 3.7137, "step": 5395 }, { "epoch": 22.024489795918367, "grad_norm": 7.195807456970215, "learning_rate": 3.9000000000000006e-05, "loss": 3.4217, "step": 5396 }, { "epoch": 22.02857142857143, "grad_norm": 15.714385032653809, "learning_rate": 3.899795918367347e-05, "loss": 2.8939, "step": 5397 }, { "epoch": 22.03265306122449, "grad_norm": 29.67262077331543, "learning_rate": 3.899591836734694e-05, "loss": 0.7441, "step": 5398 }, { "epoch": 22.036734693877552, "grad_norm": 6.040183067321777, "learning_rate": 3.899387755102041e-05, "loss": 3.5134, "step": 5399 }, { "epoch": 22.040816326530614, "grad_norm": 5.909049034118652, "learning_rate": 3.899183673469388e-05, "loss": 3.6017, "step": 5400 }, { "epoch": 22.044897959183672, "grad_norm": 8.162402153015137, "learning_rate": 3.898979591836735e-05, "loss": 3.2814, "step": 5401 }, { "epoch": 22.048979591836734, "grad_norm": 11.694894790649414, "learning_rate": 3.8987755102040816e-05, "loss": 3.1746, "step": 5402 }, { "epoch": 22.053061224489795, "grad_norm": 16.34869956970215, "learning_rate": 3.898571428571429e-05, "loss": 2.6632, "step": 5403 }, { "epoch": 22.057142857142857, "grad_norm": 6.883347988128662, "learning_rate": 3.898367346938776e-05, "loss": 3.715, "step": 5404 }, { "epoch": 22.06122448979592, "grad_norm": 6.782951354980469, "learning_rate": 3.898163265306123e-05, "loss": 3.3444, "step": 5405 }, { "epoch": 22.06530612244898, "grad_norm": 14.438286781311035, "learning_rate": 3.89795918367347e-05, "loss": 2.2504, "step": 5406 }, { "epoch": 22.069387755102042, "grad_norm": 7.280511856079102, "learning_rate": 3.8977551020408165e-05, "loss": 3.3387, "step": 5407 }, { "epoch": 22.073469387755104, "grad_norm": 11.728118896484375, "learning_rate": 3.897551020408163e-05, "loss": 2.821, "step": 5408 }, { "epoch": 22.077551020408162, "grad_norm": 10.161273956298828, "learning_rate": 3.89734693877551e-05, "loss": 2.8929, "step": 5409 }, { "epoch": 22.081632653061224, "grad_norm": 8.12052059173584, "learning_rate": 3.897142857142857e-05, "loss": 3.33, "step": 5410 }, { "epoch": 22.085714285714285, "grad_norm": 16.80866241455078, "learning_rate": 3.896938775510205e-05, "loss": 2.8213, "step": 5411 }, { "epoch": 22.089795918367347, "grad_norm": 11.020188331604004, "learning_rate": 3.8967346938775514e-05, "loss": 2.8516, "step": 5412 }, { "epoch": 22.09387755102041, "grad_norm": 12.705037117004395, "learning_rate": 3.896530612244898e-05, "loss": 2.5855, "step": 5413 }, { "epoch": 22.09795918367347, "grad_norm": 12.87465763092041, "learning_rate": 3.896326530612245e-05, "loss": 2.9924, "step": 5414 }, { "epoch": 22.102040816326532, "grad_norm": 11.170294761657715, "learning_rate": 3.896122448979592e-05, "loss": 3.1584, "step": 5415 }, { "epoch": 22.10612244897959, "grad_norm": 15.506930351257324, "learning_rate": 3.895918367346939e-05, "loss": 2.4336, "step": 5416 }, { "epoch": 22.110204081632652, "grad_norm": 15.327917098999023, "learning_rate": 3.8957142857142856e-05, "loss": 2.6582, "step": 5417 }, { "epoch": 22.114285714285714, "grad_norm": 10.187525749206543, "learning_rate": 3.895510204081633e-05, "loss": 3.1204, "step": 5418 }, { "epoch": 22.118367346938776, "grad_norm": 7.795262813568115, "learning_rate": 3.89530612244898e-05, "loss": 3.4947, "step": 5419 }, { "epoch": 22.122448979591837, "grad_norm": 9.46252727508545, "learning_rate": 3.895102040816327e-05, "loss": 3.2647, "step": 5420 }, { "epoch": 22.1265306122449, "grad_norm": 21.659496307373047, "learning_rate": 3.894897959183674e-05, "loss": 1.9524, "step": 5421 }, { "epoch": 22.13061224489796, "grad_norm": 8.548308372497559, "learning_rate": 3.8946938775510206e-05, "loss": 3.0409, "step": 5422 }, { "epoch": 22.13469387755102, "grad_norm": 21.056142807006836, "learning_rate": 3.894489795918367e-05, "loss": 2.1309, "step": 5423 }, { "epoch": 22.13877551020408, "grad_norm": 5.146143913269043, "learning_rate": 3.894285714285714e-05, "loss": 3.6016, "step": 5424 }, { "epoch": 22.142857142857142, "grad_norm": 6.866601467132568, "learning_rate": 3.8940816326530614e-05, "loss": 3.3806, "step": 5425 }, { "epoch": 22.146938775510204, "grad_norm": 14.72608470916748, "learning_rate": 3.893877551020408e-05, "loss": 2.6146, "step": 5426 }, { "epoch": 22.151020408163266, "grad_norm": 10.4614896774292, "learning_rate": 3.8936734693877555e-05, "loss": 3.3794, "step": 5427 }, { "epoch": 22.155102040816328, "grad_norm": 7.464935779571533, "learning_rate": 3.893469387755102e-05, "loss": 3.4976, "step": 5428 }, { "epoch": 22.15918367346939, "grad_norm": 22.175901412963867, "learning_rate": 3.8932653061224496e-05, "loss": 2.4441, "step": 5429 }, { "epoch": 22.163265306122447, "grad_norm": 5.199661731719971, "learning_rate": 3.893061224489796e-05, "loss": 3.9613, "step": 5430 }, { "epoch": 22.16734693877551, "grad_norm": 20.399717330932617, "learning_rate": 3.892857142857143e-05, "loss": 3.0611, "step": 5431 }, { "epoch": 22.17142857142857, "grad_norm": 6.866021633148193, "learning_rate": 3.89265306122449e-05, "loss": 3.302, "step": 5432 }, { "epoch": 22.175510204081633, "grad_norm": 12.41447639465332, "learning_rate": 3.8924489795918364e-05, "loss": 2.3693, "step": 5433 }, { "epoch": 22.179591836734694, "grad_norm": 18.523149490356445, "learning_rate": 3.892244897959184e-05, "loss": 1.7868, "step": 5434 }, { "epoch": 22.183673469387756, "grad_norm": 9.9772310256958, "learning_rate": 3.892040816326531e-05, "loss": 3.2545, "step": 5435 }, { "epoch": 22.187755102040818, "grad_norm": 17.64374351501465, "learning_rate": 3.891836734693878e-05, "loss": 3.0671, "step": 5436 }, { "epoch": 22.191836734693876, "grad_norm": 6.288527488708496, "learning_rate": 3.8916326530612246e-05, "loss": 3.4742, "step": 5437 }, { "epoch": 22.195918367346938, "grad_norm": 6.477043151855469, "learning_rate": 3.8914285714285713e-05, "loss": 3.3279, "step": 5438 }, { "epoch": 22.2, "grad_norm": 14.593412399291992, "learning_rate": 3.891224489795919e-05, "loss": 2.573, "step": 5439 }, { "epoch": 22.20408163265306, "grad_norm": 11.25446891784668, "learning_rate": 3.8910204081632654e-05, "loss": 3.1166, "step": 5440 }, { "epoch": 22.208163265306123, "grad_norm": 7.765546798706055, "learning_rate": 3.890816326530612e-05, "loss": 3.4048, "step": 5441 }, { "epoch": 22.212244897959184, "grad_norm": 11.135490417480469, "learning_rate": 3.8906122448979595e-05, "loss": 2.957, "step": 5442 }, { "epoch": 22.216326530612246, "grad_norm": 23.01011848449707, "learning_rate": 3.890408163265306e-05, "loss": 1.5601, "step": 5443 }, { "epoch": 22.220408163265308, "grad_norm": 9.274351119995117, "learning_rate": 3.8902040816326537e-05, "loss": 3.3353, "step": 5444 }, { "epoch": 22.224489795918366, "grad_norm": 16.58902359008789, "learning_rate": 3.8900000000000004e-05, "loss": 2.3422, "step": 5445 }, { "epoch": 22.228571428571428, "grad_norm": 10.966280937194824, "learning_rate": 3.889795918367347e-05, "loss": 3.1553, "step": 5446 }, { "epoch": 22.23265306122449, "grad_norm": 11.232818603515625, "learning_rate": 3.889591836734694e-05, "loss": 2.8888, "step": 5447 }, { "epoch": 22.23673469387755, "grad_norm": 11.62741470336914, "learning_rate": 3.8893877551020405e-05, "loss": 2.9747, "step": 5448 }, { "epoch": 22.240816326530613, "grad_norm": 12.497214317321777, "learning_rate": 3.889183673469388e-05, "loss": 3.0061, "step": 5449 }, { "epoch": 22.244897959183675, "grad_norm": 16.550153732299805, "learning_rate": 3.888979591836735e-05, "loss": 3.08, "step": 5450 }, { "epoch": 22.248979591836736, "grad_norm": 8.540329933166504, "learning_rate": 3.888775510204082e-05, "loss": 3.46, "step": 5451 }, { "epoch": 22.253061224489795, "grad_norm": 4.78341817855835, "learning_rate": 3.888571428571429e-05, "loss": 3.81, "step": 5452 }, { "epoch": 22.257142857142856, "grad_norm": 18.899433135986328, "learning_rate": 3.8883673469387754e-05, "loss": 1.6991, "step": 5453 }, { "epoch": 22.261224489795918, "grad_norm": 8.785179138183594, "learning_rate": 3.888163265306123e-05, "loss": 3.2252, "step": 5454 }, { "epoch": 22.26530612244898, "grad_norm": 11.368565559387207, "learning_rate": 3.8879591836734695e-05, "loss": 2.7415, "step": 5455 }, { "epoch": 22.26938775510204, "grad_norm": 11.095187187194824, "learning_rate": 3.887755102040816e-05, "loss": 3.2452, "step": 5456 }, { "epoch": 22.273469387755103, "grad_norm": 7.604832172393799, "learning_rate": 3.8875510204081636e-05, "loss": 3.4889, "step": 5457 }, { "epoch": 22.277551020408165, "grad_norm": 12.503345489501953, "learning_rate": 3.88734693877551e-05, "loss": 2.9177, "step": 5458 }, { "epoch": 22.281632653061223, "grad_norm": 19.124013900756836, "learning_rate": 3.887142857142858e-05, "loss": 2.175, "step": 5459 }, { "epoch": 22.285714285714285, "grad_norm": 12.571282386779785, "learning_rate": 3.8869387755102044e-05, "loss": 2.9936, "step": 5460 }, { "epoch": 22.289795918367346, "grad_norm": 12.379569053649902, "learning_rate": 3.886734693877551e-05, "loss": 2.8052, "step": 5461 }, { "epoch": 22.293877551020408, "grad_norm": 23.118629455566406, "learning_rate": 3.886530612244898e-05, "loss": 1.9862, "step": 5462 }, { "epoch": 22.29795918367347, "grad_norm": 6.610323429107666, "learning_rate": 3.8863265306122446e-05, "loss": 3.4062, "step": 5463 }, { "epoch": 22.30204081632653, "grad_norm": 9.348610877990723, "learning_rate": 3.886122448979592e-05, "loss": 3.2569, "step": 5464 }, { "epoch": 22.306122448979593, "grad_norm": 16.087486267089844, "learning_rate": 3.8859183673469394e-05, "loss": 2.5972, "step": 5465 }, { "epoch": 22.31020408163265, "grad_norm": 12.920024871826172, "learning_rate": 3.885714285714286e-05, "loss": 3.0866, "step": 5466 }, { "epoch": 22.314285714285713, "grad_norm": 19.64531707763672, "learning_rate": 3.885510204081633e-05, "loss": 2.5329, "step": 5467 }, { "epoch": 22.318367346938775, "grad_norm": 9.896714210510254, "learning_rate": 3.8853061224489795e-05, "loss": 3.3231, "step": 5468 }, { "epoch": 22.322448979591837, "grad_norm": 13.798032760620117, "learning_rate": 3.885102040816327e-05, "loss": 2.6062, "step": 5469 }, { "epoch": 22.3265306122449, "grad_norm": 25.993684768676758, "learning_rate": 3.8848979591836736e-05, "loss": 1.6007, "step": 5470 }, { "epoch": 22.33061224489796, "grad_norm": 9.332524299621582, "learning_rate": 3.88469387755102e-05, "loss": 3.0179, "step": 5471 }, { "epoch": 22.33469387755102, "grad_norm": 7.638887405395508, "learning_rate": 3.884489795918368e-05, "loss": 3.5521, "step": 5472 }, { "epoch": 22.33877551020408, "grad_norm": 8.227090835571289, "learning_rate": 3.8842857142857144e-05, "loss": 3.3772, "step": 5473 }, { "epoch": 22.34285714285714, "grad_norm": 14.276363372802734, "learning_rate": 3.884081632653062e-05, "loss": 2.9837, "step": 5474 }, { "epoch": 22.346938775510203, "grad_norm": 7.850384712219238, "learning_rate": 3.8838775510204085e-05, "loss": 3.4571, "step": 5475 }, { "epoch": 22.351020408163265, "grad_norm": 13.854061126708984, "learning_rate": 3.883673469387755e-05, "loss": 2.6791, "step": 5476 }, { "epoch": 22.355102040816327, "grad_norm": 8.069331169128418, "learning_rate": 3.883469387755102e-05, "loss": 3.2632, "step": 5477 }, { "epoch": 22.35918367346939, "grad_norm": 5.521214485168457, "learning_rate": 3.883265306122449e-05, "loss": 3.788, "step": 5478 }, { "epoch": 22.36326530612245, "grad_norm": 9.298871040344238, "learning_rate": 3.883061224489796e-05, "loss": 3.1927, "step": 5479 }, { "epoch": 22.367346938775512, "grad_norm": 10.878087997436523, "learning_rate": 3.882857142857143e-05, "loss": 3.279, "step": 5480 }, { "epoch": 22.37142857142857, "grad_norm": 8.695112228393555, "learning_rate": 3.88265306122449e-05, "loss": 3.4323, "step": 5481 }, { "epoch": 22.375510204081632, "grad_norm": 17.84671974182129, "learning_rate": 3.882448979591837e-05, "loss": 1.7993, "step": 5482 }, { "epoch": 22.379591836734694, "grad_norm": 7.679091453552246, "learning_rate": 3.882244897959184e-05, "loss": 3.5483, "step": 5483 }, { "epoch": 22.383673469387755, "grad_norm": 11.205924987792969, "learning_rate": 3.882040816326531e-05, "loss": 3.1294, "step": 5484 }, { "epoch": 22.387755102040817, "grad_norm": 10.622922897338867, "learning_rate": 3.881836734693878e-05, "loss": 3.0849, "step": 5485 }, { "epoch": 22.39183673469388, "grad_norm": 23.17711067199707, "learning_rate": 3.8816326530612244e-05, "loss": 1.6814, "step": 5486 }, { "epoch": 22.39591836734694, "grad_norm": 7.765395164489746, "learning_rate": 3.881428571428571e-05, "loss": 3.4258, "step": 5487 }, { "epoch": 22.4, "grad_norm": 10.336729049682617, "learning_rate": 3.8812244897959185e-05, "loss": 2.8865, "step": 5488 }, { "epoch": 22.40408163265306, "grad_norm": 7.864658832550049, "learning_rate": 3.881020408163266e-05, "loss": 3.5059, "step": 5489 }, { "epoch": 22.408163265306122, "grad_norm": 8.95151138305664, "learning_rate": 3.8808163265306126e-05, "loss": 3.4131, "step": 5490 }, { "epoch": 22.412244897959184, "grad_norm": 16.264421463012695, "learning_rate": 3.880612244897959e-05, "loss": 2.4915, "step": 5491 }, { "epoch": 22.416326530612245, "grad_norm": 8.017352104187012, "learning_rate": 3.880408163265306e-05, "loss": 3.1716, "step": 5492 }, { "epoch": 22.420408163265307, "grad_norm": 18.828388214111328, "learning_rate": 3.8802040816326534e-05, "loss": 2.4885, "step": 5493 }, { "epoch": 22.42448979591837, "grad_norm": 8.14694881439209, "learning_rate": 3.88e-05, "loss": 3.1959, "step": 5494 }, { "epoch": 22.428571428571427, "grad_norm": 9.294734954833984, "learning_rate": 3.879795918367347e-05, "loss": 3.2531, "step": 5495 }, { "epoch": 22.43265306122449, "grad_norm": 10.244837760925293, "learning_rate": 3.879591836734694e-05, "loss": 2.9555, "step": 5496 }, { "epoch": 22.43673469387755, "grad_norm": 9.185967445373535, "learning_rate": 3.879387755102041e-05, "loss": 3.2398, "step": 5497 }, { "epoch": 22.440816326530612, "grad_norm": 8.199723243713379, "learning_rate": 3.879183673469388e-05, "loss": 3.5391, "step": 5498 }, { "epoch": 22.444897959183674, "grad_norm": 7.505916118621826, "learning_rate": 3.878979591836735e-05, "loss": 3.3117, "step": 5499 }, { "epoch": 22.448979591836736, "grad_norm": 19.72258186340332, "learning_rate": 3.878775510204082e-05, "loss": 2.5036, "step": 5500 }, { "epoch": 22.453061224489797, "grad_norm": 9.093313217163086, "learning_rate": 3.8785714285714285e-05, "loss": 3.236, "step": 5501 }, { "epoch": 22.457142857142856, "grad_norm": 10.810147285461426, "learning_rate": 3.878367346938775e-05, "loss": 3.3699, "step": 5502 }, { "epoch": 22.461224489795917, "grad_norm": 23.584402084350586, "learning_rate": 3.8781632653061226e-05, "loss": 2.0375, "step": 5503 }, { "epoch": 22.46530612244898, "grad_norm": 23.69069480895996, "learning_rate": 3.87795918367347e-05, "loss": 2.523, "step": 5504 }, { "epoch": 22.46938775510204, "grad_norm": 19.64825439453125, "learning_rate": 3.877755102040817e-05, "loss": 1.9143, "step": 5505 }, { "epoch": 22.473469387755102, "grad_norm": 19.143857955932617, "learning_rate": 3.8775510204081634e-05, "loss": 2.3879, "step": 5506 }, { "epoch": 22.477551020408164, "grad_norm": 6.587261199951172, "learning_rate": 3.87734693877551e-05, "loss": 3.3269, "step": 5507 }, { "epoch": 22.481632653061226, "grad_norm": 19.969257354736328, "learning_rate": 3.8771428571428575e-05, "loss": 1.4555, "step": 5508 }, { "epoch": 22.485714285714284, "grad_norm": 9.941447257995605, "learning_rate": 3.876938775510204e-05, "loss": 3.3675, "step": 5509 }, { "epoch": 22.489795918367346, "grad_norm": 9.146453857421875, "learning_rate": 3.876734693877551e-05, "loss": 3.4767, "step": 5510 }, { "epoch": 22.493877551020407, "grad_norm": 10.401275634765625, "learning_rate": 3.876530612244898e-05, "loss": 3.1018, "step": 5511 }, { "epoch": 22.49795918367347, "grad_norm": 9.224689483642578, "learning_rate": 3.876326530612245e-05, "loss": 3.4928, "step": 5512 }, { "epoch": 22.50204081632653, "grad_norm": 6.240593433380127, "learning_rate": 3.8761224489795924e-05, "loss": 3.5484, "step": 5513 }, { "epoch": 22.506122448979593, "grad_norm": 12.133464813232422, "learning_rate": 3.875918367346939e-05, "loss": 3.0342, "step": 5514 }, { "epoch": 22.510204081632654, "grad_norm": 8.54173755645752, "learning_rate": 3.875714285714286e-05, "loss": 3.1746, "step": 5515 }, { "epoch": 22.514285714285712, "grad_norm": 6.523205757141113, "learning_rate": 3.8755102040816325e-05, "loss": 3.4049, "step": 5516 }, { "epoch": 22.518367346938774, "grad_norm": 7.196131706237793, "learning_rate": 3.875306122448979e-05, "loss": 3.6376, "step": 5517 }, { "epoch": 22.522448979591836, "grad_norm": 10.435738563537598, "learning_rate": 3.8751020408163266e-05, "loss": 3.1201, "step": 5518 }, { "epoch": 22.526530612244898, "grad_norm": 9.563190460205078, "learning_rate": 3.874897959183674e-05, "loss": 2.9808, "step": 5519 }, { "epoch": 22.53061224489796, "grad_norm": 8.851167678833008, "learning_rate": 3.874693877551021e-05, "loss": 3.2462, "step": 5520 }, { "epoch": 22.53469387755102, "grad_norm": 11.916755676269531, "learning_rate": 3.8744897959183675e-05, "loss": 2.447, "step": 5521 }, { "epoch": 22.538775510204083, "grad_norm": 6.490126609802246, "learning_rate": 3.874285714285715e-05, "loss": 3.3476, "step": 5522 }, { "epoch": 22.542857142857144, "grad_norm": 6.242536544799805, "learning_rate": 3.8740816326530616e-05, "loss": 3.5432, "step": 5523 }, { "epoch": 22.546938775510203, "grad_norm": 7.95475435256958, "learning_rate": 3.873877551020408e-05, "loss": 3.3219, "step": 5524 }, { "epoch": 22.551020408163264, "grad_norm": 8.23968505859375, "learning_rate": 3.873673469387755e-05, "loss": 3.2782, "step": 5525 }, { "epoch": 22.555102040816326, "grad_norm": 12.923823356628418, "learning_rate": 3.8734693877551024e-05, "loss": 2.6414, "step": 5526 }, { "epoch": 22.559183673469388, "grad_norm": 11.859283447265625, "learning_rate": 3.873265306122449e-05, "loss": 2.9893, "step": 5527 }, { "epoch": 22.56326530612245, "grad_norm": 14.689285278320312, "learning_rate": 3.8730612244897965e-05, "loss": 3.1397, "step": 5528 }, { "epoch": 22.56734693877551, "grad_norm": 8.858403205871582, "learning_rate": 3.872857142857143e-05, "loss": 3.1954, "step": 5529 }, { "epoch": 22.571428571428573, "grad_norm": 6.485034942626953, "learning_rate": 3.87265306122449e-05, "loss": 3.5702, "step": 5530 }, { "epoch": 22.57551020408163, "grad_norm": 9.169476509094238, "learning_rate": 3.8724489795918366e-05, "loss": 3.2899, "step": 5531 }, { "epoch": 22.579591836734693, "grad_norm": 10.61047649383545, "learning_rate": 3.872244897959184e-05, "loss": 2.8049, "step": 5532 }, { "epoch": 22.583673469387755, "grad_norm": 10.649260520935059, "learning_rate": 3.872040816326531e-05, "loss": 2.977, "step": 5533 }, { "epoch": 22.587755102040816, "grad_norm": 12.104839324951172, "learning_rate": 3.8718367346938774e-05, "loss": 2.9109, "step": 5534 }, { "epoch": 22.591836734693878, "grad_norm": 6.990994930267334, "learning_rate": 3.871632653061225e-05, "loss": 3.3467, "step": 5535 }, { "epoch": 22.59591836734694, "grad_norm": 16.220529556274414, "learning_rate": 3.8714285714285715e-05, "loss": 2.4073, "step": 5536 }, { "epoch": 22.6, "grad_norm": 5.09710168838501, "learning_rate": 3.871224489795919e-05, "loss": 3.8812, "step": 5537 }, { "epoch": 22.60408163265306, "grad_norm": 9.472790718078613, "learning_rate": 3.8710204081632656e-05, "loss": 3.1346, "step": 5538 }, { "epoch": 22.60816326530612, "grad_norm": 9.213616371154785, "learning_rate": 3.8708163265306123e-05, "loss": 3.2359, "step": 5539 }, { "epoch": 22.612244897959183, "grad_norm": 10.093656539916992, "learning_rate": 3.870612244897959e-05, "loss": 3.2493, "step": 5540 }, { "epoch": 22.616326530612245, "grad_norm": 9.896926879882812, "learning_rate": 3.870408163265306e-05, "loss": 3.4776, "step": 5541 }, { "epoch": 22.620408163265306, "grad_norm": 10.495248794555664, "learning_rate": 3.870204081632653e-05, "loss": 3.3039, "step": 5542 }, { "epoch": 22.624489795918368, "grad_norm": 9.963119506835938, "learning_rate": 3.8700000000000006e-05, "loss": 3.2942, "step": 5543 }, { "epoch": 22.62857142857143, "grad_norm": 19.064098358154297, "learning_rate": 3.869795918367347e-05, "loss": 2.2912, "step": 5544 }, { "epoch": 22.632653061224488, "grad_norm": 7.982792377471924, "learning_rate": 3.869591836734694e-05, "loss": 3.219, "step": 5545 }, { "epoch": 22.63673469387755, "grad_norm": 12.995180130004883, "learning_rate": 3.869387755102041e-05, "loss": 2.4305, "step": 5546 }, { "epoch": 22.64081632653061, "grad_norm": 8.050731658935547, "learning_rate": 3.869183673469388e-05, "loss": 3.2098, "step": 5547 }, { "epoch": 22.644897959183673, "grad_norm": 8.182747840881348, "learning_rate": 3.868979591836735e-05, "loss": 3.391, "step": 5548 }, { "epoch": 22.648979591836735, "grad_norm": 10.805578231811523, "learning_rate": 3.8687755102040815e-05, "loss": 2.9482, "step": 5549 }, { "epoch": 22.653061224489797, "grad_norm": 7.343017578125, "learning_rate": 3.868571428571429e-05, "loss": 3.3583, "step": 5550 }, { "epoch": 22.65714285714286, "grad_norm": 25.311840057373047, "learning_rate": 3.8683673469387756e-05, "loss": 2.446, "step": 5551 }, { "epoch": 22.66122448979592, "grad_norm": 16.362646102905273, "learning_rate": 3.868163265306123e-05, "loss": 2.7212, "step": 5552 }, { "epoch": 22.66530612244898, "grad_norm": 16.16511344909668, "learning_rate": 3.86795918367347e-05, "loss": 2.4381, "step": 5553 }, { "epoch": 22.66938775510204, "grad_norm": 17.097524642944336, "learning_rate": 3.8677551020408164e-05, "loss": 2.5825, "step": 5554 }, { "epoch": 22.6734693877551, "grad_norm": 13.985696792602539, "learning_rate": 3.867551020408163e-05, "loss": 2.5544, "step": 5555 }, { "epoch": 22.677551020408163, "grad_norm": 20.492172241210938, "learning_rate": 3.86734693877551e-05, "loss": 2.3528, "step": 5556 }, { "epoch": 22.681632653061225, "grad_norm": 14.763449668884277, "learning_rate": 3.867142857142857e-05, "loss": 2.9189, "step": 5557 }, { "epoch": 22.685714285714287, "grad_norm": 16.026609420776367, "learning_rate": 3.8669387755102046e-05, "loss": 2.4206, "step": 5558 }, { "epoch": 22.68979591836735, "grad_norm": 14.600273132324219, "learning_rate": 3.8667346938775513e-05, "loss": 2.8214, "step": 5559 }, { "epoch": 22.693877551020407, "grad_norm": 8.108211517333984, "learning_rate": 3.866530612244898e-05, "loss": 3.581, "step": 5560 }, { "epoch": 22.69795918367347, "grad_norm": 5.962648391723633, "learning_rate": 3.866326530612245e-05, "loss": 3.6399, "step": 5561 }, { "epoch": 22.70204081632653, "grad_norm": 8.470389366149902, "learning_rate": 3.866122448979592e-05, "loss": 3.3485, "step": 5562 }, { "epoch": 22.706122448979592, "grad_norm": 9.417022705078125, "learning_rate": 3.865918367346939e-05, "loss": 3.1535, "step": 5563 }, { "epoch": 22.710204081632654, "grad_norm": 8.416001319885254, "learning_rate": 3.8657142857142856e-05, "loss": 3.3523, "step": 5564 }, { "epoch": 22.714285714285715, "grad_norm": 9.002419471740723, "learning_rate": 3.865510204081633e-05, "loss": 3.4071, "step": 5565 }, { "epoch": 22.718367346938777, "grad_norm": 15.010438919067383, "learning_rate": 3.8653061224489804e-05, "loss": 2.6765, "step": 5566 }, { "epoch": 22.722448979591835, "grad_norm": 8.974737167358398, "learning_rate": 3.865102040816327e-05, "loss": 3.0113, "step": 5567 }, { "epoch": 22.726530612244897, "grad_norm": 9.29326343536377, "learning_rate": 3.864897959183674e-05, "loss": 3.1461, "step": 5568 }, { "epoch": 22.73061224489796, "grad_norm": 22.62070655822754, "learning_rate": 3.8646938775510205e-05, "loss": 2.8362, "step": 5569 }, { "epoch": 22.73469387755102, "grad_norm": 13.511926651000977, "learning_rate": 3.864489795918367e-05, "loss": 2.6829, "step": 5570 }, { "epoch": 22.738775510204082, "grad_norm": 11.448036193847656, "learning_rate": 3.8642857142857146e-05, "loss": 3.2764, "step": 5571 }, { "epoch": 22.742857142857144, "grad_norm": 5.677359104156494, "learning_rate": 3.864081632653061e-05, "loss": 3.7353, "step": 5572 }, { "epoch": 22.746938775510205, "grad_norm": 19.515945434570312, "learning_rate": 3.863877551020409e-05, "loss": 2.2494, "step": 5573 }, { "epoch": 22.751020408163264, "grad_norm": 9.921796798706055, "learning_rate": 3.8636734693877554e-05, "loss": 3.1062, "step": 5574 }, { "epoch": 22.755102040816325, "grad_norm": 12.322837829589844, "learning_rate": 3.863469387755102e-05, "loss": 2.6546, "step": 5575 }, { "epoch": 22.759183673469387, "grad_norm": 10.030616760253906, "learning_rate": 3.8632653061224495e-05, "loss": 3.2635, "step": 5576 }, { "epoch": 22.76326530612245, "grad_norm": 8.62817668914795, "learning_rate": 3.863061224489796e-05, "loss": 3.6107, "step": 5577 }, { "epoch": 22.76734693877551, "grad_norm": 14.637382507324219, "learning_rate": 3.862857142857143e-05, "loss": 3.2225, "step": 5578 }, { "epoch": 22.771428571428572, "grad_norm": 27.961402893066406, "learning_rate": 3.8626530612244897e-05, "loss": 1.5618, "step": 5579 }, { "epoch": 22.775510204081634, "grad_norm": 11.735939025878906, "learning_rate": 3.862448979591837e-05, "loss": 3.1266, "step": 5580 }, { "epoch": 22.779591836734696, "grad_norm": 8.291421890258789, "learning_rate": 3.862244897959184e-05, "loss": 3.3905, "step": 5581 }, { "epoch": 22.783673469387754, "grad_norm": 11.617823600769043, "learning_rate": 3.862040816326531e-05, "loss": 3.4662, "step": 5582 }, { "epoch": 22.787755102040816, "grad_norm": 10.742053985595703, "learning_rate": 3.861836734693878e-05, "loss": 3.3498, "step": 5583 }, { "epoch": 22.791836734693877, "grad_norm": 22.07794952392578, "learning_rate": 3.8616326530612246e-05, "loss": 1.7408, "step": 5584 }, { "epoch": 22.79591836734694, "grad_norm": 7.34246826171875, "learning_rate": 3.861428571428571e-05, "loss": 3.7335, "step": 5585 }, { "epoch": 22.8, "grad_norm": 6.388539791107178, "learning_rate": 3.861224489795919e-05, "loss": 3.2316, "step": 5586 }, { "epoch": 22.804081632653062, "grad_norm": 10.570444107055664, "learning_rate": 3.8610204081632654e-05, "loss": 3.082, "step": 5587 }, { "epoch": 22.808163265306124, "grad_norm": 6.202775001525879, "learning_rate": 3.860816326530612e-05, "loss": 3.7358, "step": 5588 }, { "epoch": 22.812244897959182, "grad_norm": 12.353095054626465, "learning_rate": 3.8606122448979595e-05, "loss": 2.6218, "step": 5589 }, { "epoch": 22.816326530612244, "grad_norm": 18.60797882080078, "learning_rate": 3.860408163265306e-05, "loss": 1.9562, "step": 5590 }, { "epoch": 22.820408163265306, "grad_norm": 5.908120632171631, "learning_rate": 3.8602040816326536e-05, "loss": 3.6156, "step": 5591 }, { "epoch": 22.824489795918367, "grad_norm": 17.523900985717773, "learning_rate": 3.86e-05, "loss": 2.3647, "step": 5592 }, { "epoch": 22.82857142857143, "grad_norm": 17.431791305541992, "learning_rate": 3.859795918367347e-05, "loss": 2.3691, "step": 5593 }, { "epoch": 22.83265306122449, "grad_norm": 5.349156379699707, "learning_rate": 3.859591836734694e-05, "loss": 3.7076, "step": 5594 }, { "epoch": 22.836734693877553, "grad_norm": 18.58393669128418, "learning_rate": 3.8593877551020405e-05, "loss": 2.5169, "step": 5595 }, { "epoch": 22.84081632653061, "grad_norm": 11.199836730957031, "learning_rate": 3.859183673469388e-05, "loss": 2.6828, "step": 5596 }, { "epoch": 22.844897959183672, "grad_norm": 15.644664764404297, "learning_rate": 3.858979591836735e-05, "loss": 3.1281, "step": 5597 }, { "epoch": 22.848979591836734, "grad_norm": 10.57599925994873, "learning_rate": 3.858775510204082e-05, "loss": 2.7733, "step": 5598 }, { "epoch": 22.853061224489796, "grad_norm": 8.048310279846191, "learning_rate": 3.8585714285714287e-05, "loss": 3.6165, "step": 5599 }, { "epoch": 22.857142857142858, "grad_norm": 15.133027076721191, "learning_rate": 3.8583673469387754e-05, "loss": 2.4974, "step": 5600 }, { "epoch": 22.86122448979592, "grad_norm": 16.06246566772461, "learning_rate": 3.858163265306123e-05, "loss": 2.8989, "step": 5601 }, { "epoch": 22.86530612244898, "grad_norm": 10.955923080444336, "learning_rate": 3.8579591836734695e-05, "loss": 3.0932, "step": 5602 }, { "epoch": 22.86938775510204, "grad_norm": 12.532604217529297, "learning_rate": 3.857755102040816e-05, "loss": 2.8595, "step": 5603 }, { "epoch": 22.8734693877551, "grad_norm": 12.37504768371582, "learning_rate": 3.8575510204081636e-05, "loss": 2.6691, "step": 5604 }, { "epoch": 22.877551020408163, "grad_norm": 13.674872398376465, "learning_rate": 3.85734693877551e-05, "loss": 2.84, "step": 5605 }, { "epoch": 22.881632653061224, "grad_norm": 9.635394096374512, "learning_rate": 3.857142857142858e-05, "loss": 3.0321, "step": 5606 }, { "epoch": 22.885714285714286, "grad_norm": 19.831981658935547, "learning_rate": 3.8569387755102044e-05, "loss": 3.1824, "step": 5607 }, { "epoch": 22.889795918367348, "grad_norm": 13.55347728729248, "learning_rate": 3.856734693877551e-05, "loss": 2.9107, "step": 5608 }, { "epoch": 22.89387755102041, "grad_norm": 10.96756649017334, "learning_rate": 3.856530612244898e-05, "loss": 3.0528, "step": 5609 }, { "epoch": 22.897959183673468, "grad_norm": 19.622316360473633, "learning_rate": 3.856326530612245e-05, "loss": 2.4262, "step": 5610 }, { "epoch": 22.90204081632653, "grad_norm": 10.675850868225098, "learning_rate": 3.856122448979592e-05, "loss": 3.147, "step": 5611 }, { "epoch": 22.90612244897959, "grad_norm": 13.90103816986084, "learning_rate": 3.855918367346939e-05, "loss": 2.7424, "step": 5612 }, { "epoch": 22.910204081632653, "grad_norm": 10.229608535766602, "learning_rate": 3.855714285714286e-05, "loss": 3.0854, "step": 5613 }, { "epoch": 22.914285714285715, "grad_norm": 12.025068283081055, "learning_rate": 3.855510204081633e-05, "loss": 2.6553, "step": 5614 }, { "epoch": 22.918367346938776, "grad_norm": 13.05787467956543, "learning_rate": 3.85530612244898e-05, "loss": 3.2008, "step": 5615 }, { "epoch": 22.922448979591838, "grad_norm": 16.826265335083008, "learning_rate": 3.855102040816327e-05, "loss": 1.9237, "step": 5616 }, { "epoch": 22.926530612244896, "grad_norm": 12.329187393188477, "learning_rate": 3.8548979591836735e-05, "loss": 2.8203, "step": 5617 }, { "epoch": 22.930612244897958, "grad_norm": 12.482162475585938, "learning_rate": 3.85469387755102e-05, "loss": 3.1694, "step": 5618 }, { "epoch": 22.93469387755102, "grad_norm": 9.237112045288086, "learning_rate": 3.8544897959183676e-05, "loss": 3.1474, "step": 5619 }, { "epoch": 22.93877551020408, "grad_norm": 11.511005401611328, "learning_rate": 3.854285714285715e-05, "loss": 3.263, "step": 5620 }, { "epoch": 22.942857142857143, "grad_norm": 10.7545747756958, "learning_rate": 3.854081632653062e-05, "loss": 3.0804, "step": 5621 }, { "epoch": 22.946938775510205, "grad_norm": 17.498685836791992, "learning_rate": 3.8538775510204085e-05, "loss": 2.874, "step": 5622 }, { "epoch": 22.951020408163266, "grad_norm": 14.761513710021973, "learning_rate": 3.853673469387755e-05, "loss": 2.5469, "step": 5623 }, { "epoch": 22.955102040816328, "grad_norm": 15.767457008361816, "learning_rate": 3.853469387755102e-05, "loss": 2.489, "step": 5624 }, { "epoch": 22.959183673469386, "grad_norm": 19.570377349853516, "learning_rate": 3.853265306122449e-05, "loss": 2.4823, "step": 5625 }, { "epoch": 22.963265306122448, "grad_norm": 8.501166343688965, "learning_rate": 3.853061224489796e-05, "loss": 3.3586, "step": 5626 }, { "epoch": 22.96734693877551, "grad_norm": 14.037040710449219, "learning_rate": 3.8528571428571434e-05, "loss": 2.7094, "step": 5627 }, { "epoch": 22.97142857142857, "grad_norm": 10.634845733642578, "learning_rate": 3.85265306122449e-05, "loss": 3.2471, "step": 5628 }, { "epoch": 22.975510204081633, "grad_norm": 20.60276985168457, "learning_rate": 3.852448979591837e-05, "loss": 1.8586, "step": 5629 }, { "epoch": 22.979591836734695, "grad_norm": 13.041250228881836, "learning_rate": 3.852244897959184e-05, "loss": 2.8848, "step": 5630 }, { "epoch": 22.983673469387757, "grad_norm": 9.25121784210205, "learning_rate": 3.852040816326531e-05, "loss": 3.3775, "step": 5631 }, { "epoch": 22.987755102040815, "grad_norm": 10.949396133422852, "learning_rate": 3.8518367346938776e-05, "loss": 3.06, "step": 5632 }, { "epoch": 22.991836734693877, "grad_norm": 8.674039840698242, "learning_rate": 3.851632653061224e-05, "loss": 2.9908, "step": 5633 }, { "epoch": 22.99591836734694, "grad_norm": 13.52514362335205, "learning_rate": 3.851428571428571e-05, "loss": 2.8486, "step": 5634 }, { "epoch": 23.0, "grad_norm": 13.644238471984863, "learning_rate": 3.8512244897959184e-05, "loss": 2.96, "step": 5635 }, { "epoch": 23.00408163265306, "grad_norm": 20.996034622192383, "learning_rate": 3.851020408163266e-05, "loss": 2.2746, "step": 5636 }, { "epoch": 23.008163265306123, "grad_norm": 12.873703002929688, "learning_rate": 3.8508163265306125e-05, "loss": 2.6139, "step": 5637 }, { "epoch": 23.012244897959185, "grad_norm": 10.173751831054688, "learning_rate": 3.850612244897959e-05, "loss": 3.3006, "step": 5638 }, { "epoch": 23.016326530612243, "grad_norm": 15.729249000549316, "learning_rate": 3.850408163265306e-05, "loss": 1.7316, "step": 5639 }, { "epoch": 23.020408163265305, "grad_norm": 8.458544731140137, "learning_rate": 3.8502040816326534e-05, "loss": 3.5192, "step": 5640 }, { "epoch": 23.024489795918367, "grad_norm": 12.179272651672363, "learning_rate": 3.85e-05, "loss": 2.9461, "step": 5641 }, { "epoch": 23.02857142857143, "grad_norm": 17.62708854675293, "learning_rate": 3.849795918367347e-05, "loss": 2.5126, "step": 5642 }, { "epoch": 23.03265306122449, "grad_norm": 11.047697067260742, "learning_rate": 3.849591836734694e-05, "loss": 2.9867, "step": 5643 }, { "epoch": 23.036734693877552, "grad_norm": 9.62039566040039, "learning_rate": 3.849387755102041e-05, "loss": 3.2817, "step": 5644 }, { "epoch": 23.040816326530614, "grad_norm": 14.439020156860352, "learning_rate": 3.849183673469388e-05, "loss": 2.9283, "step": 5645 }, { "epoch": 23.044897959183672, "grad_norm": 16.02446174621582, "learning_rate": 3.848979591836735e-05, "loss": 2.1038, "step": 5646 }, { "epoch": 23.048979591836734, "grad_norm": 7.364044189453125, "learning_rate": 3.848775510204082e-05, "loss": 3.2059, "step": 5647 }, { "epoch": 23.053061224489795, "grad_norm": 13.508021354675293, "learning_rate": 3.8485714285714284e-05, "loss": 2.7587, "step": 5648 }, { "epoch": 23.057142857142857, "grad_norm": 8.791210174560547, "learning_rate": 3.848367346938775e-05, "loss": 3.1453, "step": 5649 }, { "epoch": 23.06122448979592, "grad_norm": 9.618719100952148, "learning_rate": 3.8481632653061225e-05, "loss": 3.127, "step": 5650 }, { "epoch": 23.06530612244898, "grad_norm": 7.667047023773193, "learning_rate": 3.84795918367347e-05, "loss": 3.3083, "step": 5651 }, { "epoch": 23.069387755102042, "grad_norm": 12.009617805480957, "learning_rate": 3.8477551020408166e-05, "loss": 3.188, "step": 5652 }, { "epoch": 23.073469387755104, "grad_norm": 9.517509460449219, "learning_rate": 3.847551020408163e-05, "loss": 3.2509, "step": 5653 }, { "epoch": 23.077551020408162, "grad_norm": 20.80876350402832, "learning_rate": 3.84734693877551e-05, "loss": 1.7949, "step": 5654 }, { "epoch": 23.081632653061224, "grad_norm": 10.024742126464844, "learning_rate": 3.8471428571428574e-05, "loss": 3.2212, "step": 5655 }, { "epoch": 23.085714285714285, "grad_norm": 9.459818840026855, "learning_rate": 3.846938775510204e-05, "loss": 3.233, "step": 5656 }, { "epoch": 23.089795918367347, "grad_norm": 16.898889541625977, "learning_rate": 3.846734693877551e-05, "loss": 2.3506, "step": 5657 }, { "epoch": 23.09387755102041, "grad_norm": 14.723546981811523, "learning_rate": 3.846530612244898e-05, "loss": 2.5663, "step": 5658 }, { "epoch": 23.09795918367347, "grad_norm": 8.904290199279785, "learning_rate": 3.8463265306122456e-05, "loss": 3.2878, "step": 5659 }, { "epoch": 23.102040816326532, "grad_norm": 8.480947494506836, "learning_rate": 3.8461224489795924e-05, "loss": 3.4745, "step": 5660 }, { "epoch": 23.10612244897959, "grad_norm": 7.109222888946533, "learning_rate": 3.845918367346939e-05, "loss": 3.2556, "step": 5661 }, { "epoch": 23.110204081632652, "grad_norm": 10.55693531036377, "learning_rate": 3.845714285714286e-05, "loss": 3.3281, "step": 5662 }, { "epoch": 23.114285714285714, "grad_norm": 15.62253189086914, "learning_rate": 3.8455102040816325e-05, "loss": 2.9585, "step": 5663 }, { "epoch": 23.118367346938776, "grad_norm": 24.984134674072266, "learning_rate": 3.84530612244898e-05, "loss": 1.6218, "step": 5664 }, { "epoch": 23.122448979591837, "grad_norm": 13.304535865783691, "learning_rate": 3.8451020408163266e-05, "loss": 3.0229, "step": 5665 }, { "epoch": 23.1265306122449, "grad_norm": 14.81531810760498, "learning_rate": 3.844897959183674e-05, "loss": 2.4167, "step": 5666 }, { "epoch": 23.13061224489796, "grad_norm": 8.80210018157959, "learning_rate": 3.844693877551021e-05, "loss": 3.268, "step": 5667 }, { "epoch": 23.13469387755102, "grad_norm": 18.273237228393555, "learning_rate": 3.8444897959183674e-05, "loss": 2.8122, "step": 5668 }, { "epoch": 23.13877551020408, "grad_norm": 7.5885329246521, "learning_rate": 3.844285714285715e-05, "loss": 3.2447, "step": 5669 }, { "epoch": 23.142857142857142, "grad_norm": 11.833998680114746, "learning_rate": 3.8440816326530615e-05, "loss": 3.369, "step": 5670 }, { "epoch": 23.146938775510204, "grad_norm": 9.639688491821289, "learning_rate": 3.843877551020408e-05, "loss": 3.2205, "step": 5671 }, { "epoch": 23.151020408163266, "grad_norm": 27.933475494384766, "learning_rate": 3.843673469387755e-05, "loss": 1.5437, "step": 5672 }, { "epoch": 23.155102040816328, "grad_norm": 14.618447303771973, "learning_rate": 3.843469387755102e-05, "loss": 2.3346, "step": 5673 }, { "epoch": 23.15918367346939, "grad_norm": 8.521385192871094, "learning_rate": 3.84326530612245e-05, "loss": 3.1472, "step": 5674 }, { "epoch": 23.163265306122447, "grad_norm": 8.393470764160156, "learning_rate": 3.8430612244897964e-05, "loss": 3.317, "step": 5675 }, { "epoch": 23.16734693877551, "grad_norm": 20.407024383544922, "learning_rate": 3.842857142857143e-05, "loss": 2.3592, "step": 5676 }, { "epoch": 23.17142857142857, "grad_norm": 14.812117576599121, "learning_rate": 3.84265306122449e-05, "loss": 2.5403, "step": 5677 }, { "epoch": 23.175510204081633, "grad_norm": 10.578495025634766, "learning_rate": 3.8424489795918366e-05, "loss": 2.9, "step": 5678 }, { "epoch": 23.179591836734694, "grad_norm": 18.036779403686523, "learning_rate": 3.842244897959184e-05, "loss": 2.3952, "step": 5679 }, { "epoch": 23.183673469387756, "grad_norm": 21.223979949951172, "learning_rate": 3.842040816326531e-05, "loss": 1.6267, "step": 5680 }, { "epoch": 23.187755102040818, "grad_norm": 9.525248527526855, "learning_rate": 3.841836734693878e-05, "loss": 3.2363, "step": 5681 }, { "epoch": 23.191836734693876, "grad_norm": 17.975431442260742, "learning_rate": 3.841632653061225e-05, "loss": 2.5626, "step": 5682 }, { "epoch": 23.195918367346938, "grad_norm": 6.660527229309082, "learning_rate": 3.8414285714285715e-05, "loss": 3.5139, "step": 5683 }, { "epoch": 23.2, "grad_norm": 14.278305053710938, "learning_rate": 3.841224489795919e-05, "loss": 2.5396, "step": 5684 }, { "epoch": 23.20408163265306, "grad_norm": 15.039762496948242, "learning_rate": 3.8410204081632656e-05, "loss": 2.522, "step": 5685 }, { "epoch": 23.208163265306123, "grad_norm": 16.261917114257812, "learning_rate": 3.840816326530612e-05, "loss": 2.2439, "step": 5686 }, { "epoch": 23.212244897959184, "grad_norm": 9.302846908569336, "learning_rate": 3.840612244897959e-05, "loss": 3.0728, "step": 5687 }, { "epoch": 23.216326530612246, "grad_norm": 14.154882431030273, "learning_rate": 3.840408163265306e-05, "loss": 2.3231, "step": 5688 }, { "epoch": 23.220408163265308, "grad_norm": 7.666555404663086, "learning_rate": 3.840204081632653e-05, "loss": 3.4736, "step": 5689 }, { "epoch": 23.224489795918366, "grad_norm": 6.140222072601318, "learning_rate": 3.8400000000000005e-05, "loss": 3.7801, "step": 5690 }, { "epoch": 23.228571428571428, "grad_norm": 9.931879043579102, "learning_rate": 3.839795918367347e-05, "loss": 3.2329, "step": 5691 }, { "epoch": 23.23265306122449, "grad_norm": 11.32555103302002, "learning_rate": 3.839591836734694e-05, "loss": 2.7083, "step": 5692 }, { "epoch": 23.23673469387755, "grad_norm": 13.11875057220459, "learning_rate": 3.8393877551020406e-05, "loss": 2.8657, "step": 5693 }, { "epoch": 23.240816326530613, "grad_norm": 13.633179664611816, "learning_rate": 3.839183673469388e-05, "loss": 3.0896, "step": 5694 }, { "epoch": 23.244897959183675, "grad_norm": 11.062881469726562, "learning_rate": 3.838979591836735e-05, "loss": 3.294, "step": 5695 }, { "epoch": 23.248979591836736, "grad_norm": 11.617423057556152, "learning_rate": 3.8387755102040815e-05, "loss": 2.9034, "step": 5696 }, { "epoch": 23.253061224489795, "grad_norm": 14.643780708312988, "learning_rate": 3.838571428571429e-05, "loss": 2.9558, "step": 5697 }, { "epoch": 23.257142857142856, "grad_norm": 20.38959312438965, "learning_rate": 3.8383673469387756e-05, "loss": 1.6591, "step": 5698 }, { "epoch": 23.261224489795918, "grad_norm": 11.907057762145996, "learning_rate": 3.838163265306123e-05, "loss": 3.0221, "step": 5699 }, { "epoch": 23.26530612244898, "grad_norm": 16.473051071166992, "learning_rate": 3.83795918367347e-05, "loss": 2.6204, "step": 5700 }, { "epoch": 23.26938775510204, "grad_norm": 6.404569149017334, "learning_rate": 3.8377551020408164e-05, "loss": 3.6146, "step": 5701 }, { "epoch": 23.273469387755103, "grad_norm": 18.26311492919922, "learning_rate": 3.837551020408163e-05, "loss": 2.2527, "step": 5702 }, { "epoch": 23.277551020408165, "grad_norm": 9.086907386779785, "learning_rate": 3.8373469387755105e-05, "loss": 3.1484, "step": 5703 }, { "epoch": 23.281632653061223, "grad_norm": 12.38737964630127, "learning_rate": 3.837142857142857e-05, "loss": 2.7835, "step": 5704 }, { "epoch": 23.285714285714285, "grad_norm": 5.863259792327881, "learning_rate": 3.8369387755102046e-05, "loss": 3.6776, "step": 5705 }, { "epoch": 23.289795918367346, "grad_norm": 7.420604228973389, "learning_rate": 3.836734693877551e-05, "loss": 3.3616, "step": 5706 }, { "epoch": 23.293877551020408, "grad_norm": 8.396024703979492, "learning_rate": 3.836530612244898e-05, "loss": 3.2258, "step": 5707 }, { "epoch": 23.29795918367347, "grad_norm": 11.287688255310059, "learning_rate": 3.8363265306122454e-05, "loss": 2.717, "step": 5708 }, { "epoch": 23.30204081632653, "grad_norm": 7.1641154289245605, "learning_rate": 3.836122448979592e-05, "loss": 3.3551, "step": 5709 }, { "epoch": 23.306122448979593, "grad_norm": 7.796696186065674, "learning_rate": 3.835918367346939e-05, "loss": 3.4106, "step": 5710 }, { "epoch": 23.31020408163265, "grad_norm": 8.233626365661621, "learning_rate": 3.8357142857142855e-05, "loss": 3.3858, "step": 5711 }, { "epoch": 23.314285714285713, "grad_norm": 12.878535270690918, "learning_rate": 3.835510204081633e-05, "loss": 2.58, "step": 5712 }, { "epoch": 23.318367346938775, "grad_norm": 10.822005271911621, "learning_rate": 3.83530612244898e-05, "loss": 3.1986, "step": 5713 }, { "epoch": 23.322448979591837, "grad_norm": 8.472188949584961, "learning_rate": 3.835102040816327e-05, "loss": 3.4059, "step": 5714 }, { "epoch": 23.3265306122449, "grad_norm": 7.271556377410889, "learning_rate": 3.834897959183674e-05, "loss": 3.2634, "step": 5715 }, { "epoch": 23.33061224489796, "grad_norm": 14.840344429016113, "learning_rate": 3.8346938775510205e-05, "loss": 2.5674, "step": 5716 }, { "epoch": 23.33469387755102, "grad_norm": 20.175050735473633, "learning_rate": 3.834489795918367e-05, "loss": 2.8025, "step": 5717 }, { "epoch": 23.33877551020408, "grad_norm": 16.837310791015625, "learning_rate": 3.8342857142857146e-05, "loss": 2.4702, "step": 5718 }, { "epoch": 23.34285714285714, "grad_norm": 11.003643035888672, "learning_rate": 3.834081632653061e-05, "loss": 3.1086, "step": 5719 }, { "epoch": 23.346938775510203, "grad_norm": 12.555583953857422, "learning_rate": 3.8338775510204087e-05, "loss": 2.8084, "step": 5720 }, { "epoch": 23.351020408163265, "grad_norm": 7.620269775390625, "learning_rate": 3.8336734693877554e-05, "loss": 3.348, "step": 5721 }, { "epoch": 23.355102040816327, "grad_norm": 34.49384689331055, "learning_rate": 3.833469387755102e-05, "loss": 1.6677, "step": 5722 }, { "epoch": 23.35918367346939, "grad_norm": 11.730320930480957, "learning_rate": 3.8332653061224495e-05, "loss": 2.8781, "step": 5723 }, { "epoch": 23.36326530612245, "grad_norm": 10.686736106872559, "learning_rate": 3.833061224489796e-05, "loss": 2.8913, "step": 5724 }, { "epoch": 23.367346938775512, "grad_norm": 6.1230926513671875, "learning_rate": 3.832857142857143e-05, "loss": 3.5029, "step": 5725 }, { "epoch": 23.37142857142857, "grad_norm": 9.735239028930664, "learning_rate": 3.8326530612244896e-05, "loss": 3.0362, "step": 5726 }, { "epoch": 23.375510204081632, "grad_norm": 9.115250587463379, "learning_rate": 3.832448979591837e-05, "loss": 3.2464, "step": 5727 }, { "epoch": 23.379591836734694, "grad_norm": 12.27773666381836, "learning_rate": 3.8322448979591844e-05, "loss": 2.8773, "step": 5728 }, { "epoch": 23.383673469387755, "grad_norm": 9.480838775634766, "learning_rate": 3.832040816326531e-05, "loss": 3.2799, "step": 5729 }, { "epoch": 23.387755102040817, "grad_norm": 13.615025520324707, "learning_rate": 3.831836734693878e-05, "loss": 2.9691, "step": 5730 }, { "epoch": 23.39183673469388, "grad_norm": 8.096269607543945, "learning_rate": 3.8316326530612245e-05, "loss": 3.392, "step": 5731 }, { "epoch": 23.39591836734694, "grad_norm": 7.079988479614258, "learning_rate": 3.831428571428571e-05, "loss": 3.7201, "step": 5732 }, { "epoch": 23.4, "grad_norm": 14.301004409790039, "learning_rate": 3.8312244897959186e-05, "loss": 2.7293, "step": 5733 }, { "epoch": 23.40408163265306, "grad_norm": 12.45488452911377, "learning_rate": 3.8310204081632653e-05, "loss": 2.8781, "step": 5734 }, { "epoch": 23.408163265306122, "grad_norm": 11.739832878112793, "learning_rate": 3.830816326530613e-05, "loss": 3.043, "step": 5735 }, { "epoch": 23.412244897959184, "grad_norm": 17.21050262451172, "learning_rate": 3.8306122448979594e-05, "loss": 2.5674, "step": 5736 }, { "epoch": 23.416326530612245, "grad_norm": 5.159763813018799, "learning_rate": 3.830408163265306e-05, "loss": 3.5953, "step": 5737 }, { "epoch": 23.420408163265307, "grad_norm": 6.606707572937012, "learning_rate": 3.8302040816326535e-05, "loss": 3.1695, "step": 5738 }, { "epoch": 23.42448979591837, "grad_norm": 9.820398330688477, "learning_rate": 3.83e-05, "loss": 2.9474, "step": 5739 }, { "epoch": 23.428571428571427, "grad_norm": 11.369953155517578, "learning_rate": 3.829795918367347e-05, "loss": 3.0207, "step": 5740 }, { "epoch": 23.43265306122449, "grad_norm": 17.73191261291504, "learning_rate": 3.829591836734694e-05, "loss": 2.3125, "step": 5741 }, { "epoch": 23.43673469387755, "grad_norm": 17.95063018798828, "learning_rate": 3.8293877551020404e-05, "loss": 2.6325, "step": 5742 }, { "epoch": 23.440816326530612, "grad_norm": 7.530145168304443, "learning_rate": 3.829183673469388e-05, "loss": 3.7059, "step": 5743 }, { "epoch": 23.444897959183674, "grad_norm": 33.01496887207031, "learning_rate": 3.828979591836735e-05, "loss": 1.4701, "step": 5744 }, { "epoch": 23.448979591836736, "grad_norm": 6.818145751953125, "learning_rate": 3.828775510204082e-05, "loss": 3.5404, "step": 5745 }, { "epoch": 23.453061224489797, "grad_norm": 14.10399055480957, "learning_rate": 3.8285714285714286e-05, "loss": 2.2847, "step": 5746 }, { "epoch": 23.457142857142856, "grad_norm": 12.112871170043945, "learning_rate": 3.828367346938776e-05, "loss": 3.1101, "step": 5747 }, { "epoch": 23.461224489795917, "grad_norm": 11.157268524169922, "learning_rate": 3.828163265306123e-05, "loss": 3.239, "step": 5748 }, { "epoch": 23.46530612244898, "grad_norm": 8.646574974060059, "learning_rate": 3.8279591836734694e-05, "loss": 3.1845, "step": 5749 }, { "epoch": 23.46938775510204, "grad_norm": 8.387974739074707, "learning_rate": 3.827755102040816e-05, "loss": 3.2422, "step": 5750 }, { "epoch": 23.473469387755102, "grad_norm": 7.2028961181640625, "learning_rate": 3.8275510204081635e-05, "loss": 3.5147, "step": 5751 }, { "epoch": 23.477551020408164, "grad_norm": 20.27781867980957, "learning_rate": 3.827346938775511e-05, "loss": 3.1371, "step": 5752 }, { "epoch": 23.481632653061226, "grad_norm": 7.422042369842529, "learning_rate": 3.8271428571428576e-05, "loss": 3.5105, "step": 5753 }, { "epoch": 23.485714285714284, "grad_norm": 8.393872261047363, "learning_rate": 3.826938775510204e-05, "loss": 3.1738, "step": 5754 }, { "epoch": 23.489795918367346, "grad_norm": 6.883426666259766, "learning_rate": 3.826734693877551e-05, "loss": 3.4903, "step": 5755 }, { "epoch": 23.493877551020407, "grad_norm": 7.407438278198242, "learning_rate": 3.826530612244898e-05, "loss": 3.5746, "step": 5756 }, { "epoch": 23.49795918367347, "grad_norm": 21.325498580932617, "learning_rate": 3.826326530612245e-05, "loss": 1.9209, "step": 5757 }, { "epoch": 23.50204081632653, "grad_norm": 27.283586502075195, "learning_rate": 3.826122448979592e-05, "loss": 2.4744, "step": 5758 }, { "epoch": 23.506122448979593, "grad_norm": 10.872682571411133, "learning_rate": 3.825918367346939e-05, "loss": 2.9578, "step": 5759 }, { "epoch": 23.510204081632654, "grad_norm": 21.38298225402832, "learning_rate": 3.825714285714286e-05, "loss": 1.8343, "step": 5760 }, { "epoch": 23.514285714285712, "grad_norm": 29.028709411621094, "learning_rate": 3.825510204081633e-05, "loss": 1.5606, "step": 5761 }, { "epoch": 23.518367346938774, "grad_norm": 17.5439510345459, "learning_rate": 3.82530612244898e-05, "loss": 3.0307, "step": 5762 }, { "epoch": 23.522448979591836, "grad_norm": 12.359702110290527, "learning_rate": 3.825102040816327e-05, "loss": 3.208, "step": 5763 }, { "epoch": 23.526530612244898, "grad_norm": 12.544378280639648, "learning_rate": 3.8248979591836735e-05, "loss": 3.1365, "step": 5764 }, { "epoch": 23.53061224489796, "grad_norm": 15.80369758605957, "learning_rate": 3.82469387755102e-05, "loss": 2.4006, "step": 5765 }, { "epoch": 23.53469387755102, "grad_norm": 8.188284873962402, "learning_rate": 3.8244897959183676e-05, "loss": 3.4276, "step": 5766 }, { "epoch": 23.538775510204083, "grad_norm": 9.422491073608398, "learning_rate": 3.824285714285715e-05, "loss": 3.0557, "step": 5767 }, { "epoch": 23.542857142857144, "grad_norm": 8.391226768493652, "learning_rate": 3.824081632653062e-05, "loss": 3.1527, "step": 5768 }, { "epoch": 23.546938775510203, "grad_norm": 15.246956825256348, "learning_rate": 3.8238775510204084e-05, "loss": 2.7955, "step": 5769 }, { "epoch": 23.551020408163264, "grad_norm": 20.125364303588867, "learning_rate": 3.823673469387755e-05, "loss": 1.9441, "step": 5770 }, { "epoch": 23.555102040816326, "grad_norm": 16.210391998291016, "learning_rate": 3.823469387755102e-05, "loss": 3.1332, "step": 5771 }, { "epoch": 23.559183673469388, "grad_norm": 11.600960731506348, "learning_rate": 3.823265306122449e-05, "loss": 2.7007, "step": 5772 }, { "epoch": 23.56326530612245, "grad_norm": 10.023338317871094, "learning_rate": 3.823061224489796e-05, "loss": 3.2684, "step": 5773 }, { "epoch": 23.56734693877551, "grad_norm": 19.681795120239258, "learning_rate": 3.822857142857143e-05, "loss": 2.1504, "step": 5774 }, { "epoch": 23.571428571428573, "grad_norm": 12.357096672058105, "learning_rate": 3.82265306122449e-05, "loss": 3.1319, "step": 5775 }, { "epoch": 23.57551020408163, "grad_norm": 13.089616775512695, "learning_rate": 3.822448979591837e-05, "loss": 2.9025, "step": 5776 }, { "epoch": 23.579591836734693, "grad_norm": 14.155936241149902, "learning_rate": 3.822244897959184e-05, "loss": 2.333, "step": 5777 }, { "epoch": 23.583673469387755, "grad_norm": 7.971196174621582, "learning_rate": 3.822040816326531e-05, "loss": 3.3885, "step": 5778 }, { "epoch": 23.587755102040816, "grad_norm": 6.10043478012085, "learning_rate": 3.8218367346938776e-05, "loss": 3.5903, "step": 5779 }, { "epoch": 23.591836734693878, "grad_norm": 21.556957244873047, "learning_rate": 3.821632653061224e-05, "loss": 2.4152, "step": 5780 }, { "epoch": 23.59591836734694, "grad_norm": 16.629003524780273, "learning_rate": 3.821428571428572e-05, "loss": 3.027, "step": 5781 }, { "epoch": 23.6, "grad_norm": 6.202086448669434, "learning_rate": 3.821224489795919e-05, "loss": 3.6707, "step": 5782 }, { "epoch": 23.60408163265306, "grad_norm": 20.64980697631836, "learning_rate": 3.821020408163266e-05, "loss": 2.3727, "step": 5783 }, { "epoch": 23.60816326530612, "grad_norm": 18.518800735473633, "learning_rate": 3.8208163265306125e-05, "loss": 2.0822, "step": 5784 }, { "epoch": 23.612244897959183, "grad_norm": 12.668755531311035, "learning_rate": 3.820612244897959e-05, "loss": 2.544, "step": 5785 }, { "epoch": 23.616326530612245, "grad_norm": 8.21242618560791, "learning_rate": 3.820408163265306e-05, "loss": 3.5623, "step": 5786 }, { "epoch": 23.620408163265306, "grad_norm": 16.292423248291016, "learning_rate": 3.820204081632653e-05, "loss": 2.4829, "step": 5787 }, { "epoch": 23.624489795918368, "grad_norm": 26.874967575073242, "learning_rate": 3.82e-05, "loss": 1.6558, "step": 5788 }, { "epoch": 23.62857142857143, "grad_norm": 31.175621032714844, "learning_rate": 3.8197959183673474e-05, "loss": 0.6944, "step": 5789 }, { "epoch": 23.632653061224488, "grad_norm": 14.235799789428711, "learning_rate": 3.819591836734694e-05, "loss": 2.9047, "step": 5790 }, { "epoch": 23.63673469387755, "grad_norm": 6.09254789352417, "learning_rate": 3.8193877551020415e-05, "loss": 3.6131, "step": 5791 }, { "epoch": 23.64081632653061, "grad_norm": 6.913078784942627, "learning_rate": 3.819183673469388e-05, "loss": 3.6463, "step": 5792 }, { "epoch": 23.644897959183673, "grad_norm": 10.719491004943848, "learning_rate": 3.818979591836735e-05, "loss": 3.307, "step": 5793 }, { "epoch": 23.648979591836735, "grad_norm": 12.667725563049316, "learning_rate": 3.8187755102040816e-05, "loss": 2.7632, "step": 5794 }, { "epoch": 23.653061224489797, "grad_norm": 10.897505760192871, "learning_rate": 3.8185714285714284e-05, "loss": 2.9263, "step": 5795 }, { "epoch": 23.65714285714286, "grad_norm": 10.102203369140625, "learning_rate": 3.818367346938776e-05, "loss": 3.2299, "step": 5796 }, { "epoch": 23.66122448979592, "grad_norm": 8.417367935180664, "learning_rate": 3.8181632653061225e-05, "loss": 3.4276, "step": 5797 }, { "epoch": 23.66530612244898, "grad_norm": 13.808554649353027, "learning_rate": 3.81795918367347e-05, "loss": 2.9507, "step": 5798 }, { "epoch": 23.66938775510204, "grad_norm": 19.088735580444336, "learning_rate": 3.8177551020408166e-05, "loss": 2.2777, "step": 5799 }, { "epoch": 23.6734693877551, "grad_norm": 10.79984188079834, "learning_rate": 3.817551020408163e-05, "loss": 2.9624, "step": 5800 }, { "epoch": 23.677551020408163, "grad_norm": 11.963582992553711, "learning_rate": 3.817346938775511e-05, "loss": 3.0197, "step": 5801 }, { "epoch": 23.681632653061225, "grad_norm": 6.3908281326293945, "learning_rate": 3.8171428571428574e-05, "loss": 3.7003, "step": 5802 }, { "epoch": 23.685714285714287, "grad_norm": 11.20394229888916, "learning_rate": 3.816938775510204e-05, "loss": 2.8952, "step": 5803 }, { "epoch": 23.68979591836735, "grad_norm": 17.08698844909668, "learning_rate": 3.816734693877551e-05, "loss": 2.7867, "step": 5804 }, { "epoch": 23.693877551020407, "grad_norm": 10.255784034729004, "learning_rate": 3.816530612244898e-05, "loss": 2.8475, "step": 5805 }, { "epoch": 23.69795918367347, "grad_norm": 5.410593509674072, "learning_rate": 3.8163265306122456e-05, "loss": 3.9455, "step": 5806 }, { "epoch": 23.70204081632653, "grad_norm": 11.151535034179688, "learning_rate": 3.816122448979592e-05, "loss": 3.1567, "step": 5807 }, { "epoch": 23.706122448979592, "grad_norm": 12.922508239746094, "learning_rate": 3.815918367346939e-05, "loss": 2.5797, "step": 5808 }, { "epoch": 23.710204081632654, "grad_norm": 18.531169891357422, "learning_rate": 3.815714285714286e-05, "loss": 2.0973, "step": 5809 }, { "epoch": 23.714285714285715, "grad_norm": 14.20008373260498, "learning_rate": 3.8155102040816324e-05, "loss": 2.789, "step": 5810 }, { "epoch": 23.718367346938777, "grad_norm": 17.240991592407227, "learning_rate": 3.81530612244898e-05, "loss": 2.6588, "step": 5811 }, { "epoch": 23.722448979591835, "grad_norm": 9.292527198791504, "learning_rate": 3.8151020408163265e-05, "loss": 3.3592, "step": 5812 }, { "epoch": 23.726530612244897, "grad_norm": 9.357869148254395, "learning_rate": 3.814897959183674e-05, "loss": 3.3755, "step": 5813 }, { "epoch": 23.73061224489796, "grad_norm": 20.36642837524414, "learning_rate": 3.8146938775510206e-05, "loss": 3.0024, "step": 5814 }, { "epoch": 23.73469387755102, "grad_norm": 16.64046859741211, "learning_rate": 3.8144897959183674e-05, "loss": 2.7848, "step": 5815 }, { "epoch": 23.738775510204082, "grad_norm": 13.310809135437012, "learning_rate": 3.814285714285715e-05, "loss": 2.8449, "step": 5816 }, { "epoch": 23.742857142857144, "grad_norm": 12.765861511230469, "learning_rate": 3.8140816326530615e-05, "loss": 3.1489, "step": 5817 }, { "epoch": 23.746938775510205, "grad_norm": 17.5546875, "learning_rate": 3.813877551020408e-05, "loss": 2.3867, "step": 5818 }, { "epoch": 23.751020408163264, "grad_norm": 9.892720222473145, "learning_rate": 3.813673469387755e-05, "loss": 3.3782, "step": 5819 }, { "epoch": 23.755102040816325, "grad_norm": 21.947114944458008, "learning_rate": 3.813469387755102e-05, "loss": 2.5055, "step": 5820 }, { "epoch": 23.759183673469387, "grad_norm": 6.801471710205078, "learning_rate": 3.81326530612245e-05, "loss": 3.2901, "step": 5821 }, { "epoch": 23.76326530612245, "grad_norm": 12.385820388793945, "learning_rate": 3.8130612244897964e-05, "loss": 2.9988, "step": 5822 }, { "epoch": 23.76734693877551, "grad_norm": 11.540783882141113, "learning_rate": 3.812857142857143e-05, "loss": 3.0843, "step": 5823 }, { "epoch": 23.771428571428572, "grad_norm": 14.280637741088867, "learning_rate": 3.81265306122449e-05, "loss": 3.2514, "step": 5824 }, { "epoch": 23.775510204081634, "grad_norm": 8.222456932067871, "learning_rate": 3.8124489795918365e-05, "loss": 3.5172, "step": 5825 }, { "epoch": 23.779591836734696, "grad_norm": 8.940139770507812, "learning_rate": 3.812244897959184e-05, "loss": 3.0951, "step": 5826 }, { "epoch": 23.783673469387754, "grad_norm": 6.890418529510498, "learning_rate": 3.8120408163265306e-05, "loss": 3.5369, "step": 5827 }, { "epoch": 23.787755102040816, "grad_norm": 7.925544738769531, "learning_rate": 3.811836734693878e-05, "loss": 3.4651, "step": 5828 }, { "epoch": 23.791836734693877, "grad_norm": 11.474687576293945, "learning_rate": 3.811632653061225e-05, "loss": 3.1694, "step": 5829 }, { "epoch": 23.79591836734694, "grad_norm": 10.499903678894043, "learning_rate": 3.8114285714285714e-05, "loss": 3.0216, "step": 5830 }, { "epoch": 23.8, "grad_norm": 8.866511344909668, "learning_rate": 3.811224489795919e-05, "loss": 3.1342, "step": 5831 }, { "epoch": 23.804081632653062, "grad_norm": 20.926708221435547, "learning_rate": 3.8110204081632655e-05, "loss": 2.261, "step": 5832 }, { "epoch": 23.808163265306124, "grad_norm": 6.191143989562988, "learning_rate": 3.810816326530612e-05, "loss": 3.569, "step": 5833 }, { "epoch": 23.812244897959182, "grad_norm": 19.706056594848633, "learning_rate": 3.810612244897959e-05, "loss": 1.9815, "step": 5834 }, { "epoch": 23.816326530612244, "grad_norm": 5.200870037078857, "learning_rate": 3.8104081632653063e-05, "loss": 3.8418, "step": 5835 }, { "epoch": 23.820408163265306, "grad_norm": 23.476472854614258, "learning_rate": 3.810204081632654e-05, "loss": 2.3404, "step": 5836 }, { "epoch": 23.824489795918367, "grad_norm": 14.102449417114258, "learning_rate": 3.8100000000000005e-05, "loss": 2.7591, "step": 5837 }, { "epoch": 23.82857142857143, "grad_norm": 12.78042984008789, "learning_rate": 3.809795918367347e-05, "loss": 3.014, "step": 5838 }, { "epoch": 23.83265306122449, "grad_norm": 12.885252952575684, "learning_rate": 3.809591836734694e-05, "loss": 2.6249, "step": 5839 }, { "epoch": 23.836734693877553, "grad_norm": 14.929351806640625, "learning_rate": 3.809387755102041e-05, "loss": 2.3461, "step": 5840 }, { "epoch": 23.84081632653061, "grad_norm": 10.244983673095703, "learning_rate": 3.809183673469388e-05, "loss": 3.0923, "step": 5841 }, { "epoch": 23.844897959183672, "grad_norm": 7.728078842163086, "learning_rate": 3.808979591836735e-05, "loss": 3.3406, "step": 5842 }, { "epoch": 23.848979591836734, "grad_norm": 8.891605377197266, "learning_rate": 3.8087755102040814e-05, "loss": 3.4361, "step": 5843 }, { "epoch": 23.853061224489796, "grad_norm": 9.296248435974121, "learning_rate": 3.808571428571429e-05, "loss": 3.2225, "step": 5844 }, { "epoch": 23.857142857142858, "grad_norm": 8.387397766113281, "learning_rate": 3.808367346938776e-05, "loss": 3.3357, "step": 5845 }, { "epoch": 23.86122448979592, "grad_norm": 17.172027587890625, "learning_rate": 3.808163265306123e-05, "loss": 2.7712, "step": 5846 }, { "epoch": 23.86530612244898, "grad_norm": 11.103536605834961, "learning_rate": 3.8079591836734696e-05, "loss": 3.2829, "step": 5847 }, { "epoch": 23.86938775510204, "grad_norm": 12.650978088378906, "learning_rate": 3.807755102040816e-05, "loss": 2.5492, "step": 5848 }, { "epoch": 23.8734693877551, "grad_norm": 23.635656356811523, "learning_rate": 3.807551020408163e-05, "loss": 2.8397, "step": 5849 }, { "epoch": 23.877551020408163, "grad_norm": 10.3865327835083, "learning_rate": 3.8073469387755104e-05, "loss": 2.9186, "step": 5850 }, { "epoch": 23.881632653061224, "grad_norm": 10.22407054901123, "learning_rate": 3.807142857142857e-05, "loss": 3.1126, "step": 5851 }, { "epoch": 23.885714285714286, "grad_norm": 7.5531721115112305, "learning_rate": 3.8069387755102045e-05, "loss": 3.3956, "step": 5852 }, { "epoch": 23.889795918367348, "grad_norm": 12.137145042419434, "learning_rate": 3.806734693877551e-05, "loss": 3.2931, "step": 5853 }, { "epoch": 23.89387755102041, "grad_norm": 7.688391208648682, "learning_rate": 3.806530612244898e-05, "loss": 3.3026, "step": 5854 }, { "epoch": 23.897959183673468, "grad_norm": 11.03412914276123, "learning_rate": 3.8063265306122453e-05, "loss": 2.9758, "step": 5855 }, { "epoch": 23.90204081632653, "grad_norm": 8.46595287322998, "learning_rate": 3.806122448979592e-05, "loss": 3.3588, "step": 5856 }, { "epoch": 23.90612244897959, "grad_norm": 12.310522079467773, "learning_rate": 3.805918367346939e-05, "loss": 2.9091, "step": 5857 }, { "epoch": 23.910204081632653, "grad_norm": 19.720375061035156, "learning_rate": 3.8057142857142855e-05, "loss": 2.4047, "step": 5858 }, { "epoch": 23.914285714285715, "grad_norm": 12.94554615020752, "learning_rate": 3.805510204081633e-05, "loss": 3.1504, "step": 5859 }, { "epoch": 23.918367346938776, "grad_norm": 23.748517990112305, "learning_rate": 3.80530612244898e-05, "loss": 1.8576, "step": 5860 }, { "epoch": 23.922448979591838, "grad_norm": 10.637574195861816, "learning_rate": 3.805102040816327e-05, "loss": 2.967, "step": 5861 }, { "epoch": 23.926530612244896, "grad_norm": 24.802566528320312, "learning_rate": 3.804897959183674e-05, "loss": 1.4356, "step": 5862 }, { "epoch": 23.930612244897958, "grad_norm": 16.490192413330078, "learning_rate": 3.8046938775510204e-05, "loss": 3.3259, "step": 5863 }, { "epoch": 23.93469387755102, "grad_norm": 11.068549156188965, "learning_rate": 3.804489795918367e-05, "loss": 3.0849, "step": 5864 }, { "epoch": 23.93877551020408, "grad_norm": 6.544704914093018, "learning_rate": 3.8042857142857145e-05, "loss": 3.5437, "step": 5865 }, { "epoch": 23.942857142857143, "grad_norm": 10.282536506652832, "learning_rate": 3.804081632653061e-05, "loss": 3.014, "step": 5866 }, { "epoch": 23.946938775510205, "grad_norm": 15.507475852966309, "learning_rate": 3.8038775510204086e-05, "loss": 2.4337, "step": 5867 }, { "epoch": 23.951020408163266, "grad_norm": 8.658038139343262, "learning_rate": 3.803673469387755e-05, "loss": 3.4575, "step": 5868 }, { "epoch": 23.955102040816328, "grad_norm": 11.347901344299316, "learning_rate": 3.803469387755102e-05, "loss": 3.4833, "step": 5869 }, { "epoch": 23.959183673469386, "grad_norm": 7.843108654022217, "learning_rate": 3.8032653061224494e-05, "loss": 3.3525, "step": 5870 }, { "epoch": 23.963265306122448, "grad_norm": 16.113319396972656, "learning_rate": 3.803061224489796e-05, "loss": 2.4175, "step": 5871 }, { "epoch": 23.96734693877551, "grad_norm": 15.146787643432617, "learning_rate": 3.802857142857143e-05, "loss": 2.4614, "step": 5872 }, { "epoch": 23.97142857142857, "grad_norm": 10.591307640075684, "learning_rate": 3.8026530612244896e-05, "loss": 3.0792, "step": 5873 }, { "epoch": 23.975510204081633, "grad_norm": 15.883466720581055, "learning_rate": 3.802448979591837e-05, "loss": 2.697, "step": 5874 }, { "epoch": 23.979591836734695, "grad_norm": 11.794107437133789, "learning_rate": 3.802244897959184e-05, "loss": 3.2083, "step": 5875 }, { "epoch": 23.983673469387757, "grad_norm": 19.933034896850586, "learning_rate": 3.802040816326531e-05, "loss": 2.3522, "step": 5876 }, { "epoch": 23.987755102040815, "grad_norm": 9.793352127075195, "learning_rate": 3.801836734693878e-05, "loss": 3.1866, "step": 5877 }, { "epoch": 23.991836734693877, "grad_norm": 7.927567005157471, "learning_rate": 3.8016326530612245e-05, "loss": 3.5041, "step": 5878 }, { "epoch": 23.99591836734694, "grad_norm": 5.40911865234375, "learning_rate": 3.801428571428571e-05, "loss": 3.8225, "step": 5879 }, { "epoch": 24.0, "grad_norm": 9.840526580810547, "learning_rate": 3.8012244897959186e-05, "loss": 3.1698, "step": 5880 }, { "epoch": 24.00408163265306, "grad_norm": 14.363692283630371, "learning_rate": 3.801020408163265e-05, "loss": 2.9109, "step": 5881 }, { "epoch": 24.008163265306123, "grad_norm": 4.1802520751953125, "learning_rate": 3.800816326530613e-05, "loss": 3.7766, "step": 5882 }, { "epoch": 24.012244897959185, "grad_norm": 9.369162559509277, "learning_rate": 3.8006122448979594e-05, "loss": 3.1826, "step": 5883 }, { "epoch": 24.016326530612243, "grad_norm": 9.407499313354492, "learning_rate": 3.800408163265307e-05, "loss": 3.1391, "step": 5884 }, { "epoch": 24.020408163265305, "grad_norm": 8.460071563720703, "learning_rate": 3.8002040816326535e-05, "loss": 3.223, "step": 5885 }, { "epoch": 24.024489795918367, "grad_norm": 20.95940589904785, "learning_rate": 3.8e-05, "loss": 2.2074, "step": 5886 }, { "epoch": 24.02857142857143, "grad_norm": 6.385224342346191, "learning_rate": 3.799795918367347e-05, "loss": 3.5702, "step": 5887 }, { "epoch": 24.03265306122449, "grad_norm": 34.7515983581543, "learning_rate": 3.7995918367346936e-05, "loss": 2.6327, "step": 5888 }, { "epoch": 24.036734693877552, "grad_norm": 13.217780113220215, "learning_rate": 3.799387755102041e-05, "loss": 2.6543, "step": 5889 }, { "epoch": 24.040816326530614, "grad_norm": 23.998998641967773, "learning_rate": 3.7991836734693884e-05, "loss": 1.7102, "step": 5890 }, { "epoch": 24.044897959183672, "grad_norm": 17.436220169067383, "learning_rate": 3.798979591836735e-05, "loss": 2.0894, "step": 5891 }, { "epoch": 24.048979591836734, "grad_norm": 21.2922420501709, "learning_rate": 3.798775510204082e-05, "loss": 1.7754, "step": 5892 }, { "epoch": 24.053061224489795, "grad_norm": 12.285664558410645, "learning_rate": 3.7985714285714286e-05, "loss": 2.8794, "step": 5893 }, { "epoch": 24.057142857142857, "grad_norm": 8.037235260009766, "learning_rate": 3.798367346938776e-05, "loss": 3.4454, "step": 5894 }, { "epoch": 24.06122448979592, "grad_norm": 12.176933288574219, "learning_rate": 3.7981632653061227e-05, "loss": 2.9033, "step": 5895 }, { "epoch": 24.06530612244898, "grad_norm": 14.159093856811523, "learning_rate": 3.7979591836734694e-05, "loss": 3.0367, "step": 5896 }, { "epoch": 24.069387755102042, "grad_norm": 19.768863677978516, "learning_rate": 3.797755102040816e-05, "loss": 2.4655, "step": 5897 }, { "epoch": 24.073469387755104, "grad_norm": 14.750240325927734, "learning_rate": 3.7975510204081635e-05, "loss": 2.7154, "step": 5898 }, { "epoch": 24.077551020408162, "grad_norm": 19.945167541503906, "learning_rate": 3.797346938775511e-05, "loss": 2.3884, "step": 5899 }, { "epoch": 24.081632653061224, "grad_norm": 15.365691184997559, "learning_rate": 3.7971428571428576e-05, "loss": 2.5015, "step": 5900 }, { "epoch": 24.085714285714285, "grad_norm": 8.316901206970215, "learning_rate": 3.796938775510204e-05, "loss": 3.1712, "step": 5901 }, { "epoch": 24.089795918367347, "grad_norm": 13.094791412353516, "learning_rate": 3.796734693877551e-05, "loss": 2.659, "step": 5902 }, { "epoch": 24.09387755102041, "grad_norm": 32.10322189331055, "learning_rate": 3.796530612244898e-05, "loss": 1.4508, "step": 5903 }, { "epoch": 24.09795918367347, "grad_norm": 19.07168960571289, "learning_rate": 3.796326530612245e-05, "loss": 2.364, "step": 5904 }, { "epoch": 24.102040816326532, "grad_norm": 8.278473854064941, "learning_rate": 3.796122448979592e-05, "loss": 3.3123, "step": 5905 }, { "epoch": 24.10612244897959, "grad_norm": 15.822975158691406, "learning_rate": 3.795918367346939e-05, "loss": 2.3345, "step": 5906 }, { "epoch": 24.110204081632652, "grad_norm": 9.651365280151367, "learning_rate": 3.795714285714286e-05, "loss": 3.2752, "step": 5907 }, { "epoch": 24.114285714285714, "grad_norm": 11.796770095825195, "learning_rate": 3.7955102040816326e-05, "loss": 2.6075, "step": 5908 }, { "epoch": 24.118367346938776, "grad_norm": 23.24387550354004, "learning_rate": 3.79530612244898e-05, "loss": 2.5653, "step": 5909 }, { "epoch": 24.122448979591837, "grad_norm": 13.42680835723877, "learning_rate": 3.795102040816327e-05, "loss": 3.0532, "step": 5910 }, { "epoch": 24.1265306122449, "grad_norm": 9.85569953918457, "learning_rate": 3.7948979591836734e-05, "loss": 3.2456, "step": 5911 }, { "epoch": 24.13061224489796, "grad_norm": 10.040945053100586, "learning_rate": 3.79469387755102e-05, "loss": 3.0678, "step": 5912 }, { "epoch": 24.13469387755102, "grad_norm": 7.870799541473389, "learning_rate": 3.7944897959183675e-05, "loss": 3.4551, "step": 5913 }, { "epoch": 24.13877551020408, "grad_norm": 15.50890064239502, "learning_rate": 3.794285714285715e-05, "loss": 2.5086, "step": 5914 }, { "epoch": 24.142857142857142, "grad_norm": 14.670706748962402, "learning_rate": 3.7940816326530616e-05, "loss": 2.9257, "step": 5915 }, { "epoch": 24.146938775510204, "grad_norm": 10.274012565612793, "learning_rate": 3.7938775510204084e-05, "loss": 2.9463, "step": 5916 }, { "epoch": 24.151020408163266, "grad_norm": 18.893659591674805, "learning_rate": 3.793673469387755e-05, "loss": 2.6639, "step": 5917 }, { "epoch": 24.155102040816328, "grad_norm": 13.228147506713867, "learning_rate": 3.793469387755102e-05, "loss": 3.0406, "step": 5918 }, { "epoch": 24.15918367346939, "grad_norm": 10.496131896972656, "learning_rate": 3.793265306122449e-05, "loss": 3.2242, "step": 5919 }, { "epoch": 24.163265306122447, "grad_norm": 18.973474502563477, "learning_rate": 3.793061224489796e-05, "loss": 3.034, "step": 5920 }, { "epoch": 24.16734693877551, "grad_norm": 11.933337211608887, "learning_rate": 3.792857142857143e-05, "loss": 2.8182, "step": 5921 }, { "epoch": 24.17142857142857, "grad_norm": 5.36415958404541, "learning_rate": 3.79265306122449e-05, "loss": 3.5565, "step": 5922 }, { "epoch": 24.175510204081633, "grad_norm": 6.341534614562988, "learning_rate": 3.792448979591837e-05, "loss": 3.4357, "step": 5923 }, { "epoch": 24.179591836734694, "grad_norm": 6.685601711273193, "learning_rate": 3.792244897959184e-05, "loss": 3.7492, "step": 5924 }, { "epoch": 24.183673469387756, "grad_norm": 7.493026256561279, "learning_rate": 3.792040816326531e-05, "loss": 3.571, "step": 5925 }, { "epoch": 24.187755102040818, "grad_norm": 10.854621887207031, "learning_rate": 3.7918367346938775e-05, "loss": 2.9783, "step": 5926 }, { "epoch": 24.191836734693876, "grad_norm": 9.24692153930664, "learning_rate": 3.791632653061224e-05, "loss": 2.8244, "step": 5927 }, { "epoch": 24.195918367346938, "grad_norm": 7.951578140258789, "learning_rate": 3.7914285714285716e-05, "loss": 3.2424, "step": 5928 }, { "epoch": 24.2, "grad_norm": 9.121843338012695, "learning_rate": 3.791224489795919e-05, "loss": 3.0706, "step": 5929 }, { "epoch": 24.20408163265306, "grad_norm": 11.439299583435059, "learning_rate": 3.791020408163266e-05, "loss": 2.9254, "step": 5930 }, { "epoch": 24.208163265306123, "grad_norm": 8.346940040588379, "learning_rate": 3.7908163265306124e-05, "loss": 3.3167, "step": 5931 }, { "epoch": 24.212244897959184, "grad_norm": 31.033037185668945, "learning_rate": 3.790612244897959e-05, "loss": 1.6296, "step": 5932 }, { "epoch": 24.216326530612246, "grad_norm": 8.274601936340332, "learning_rate": 3.7904081632653065e-05, "loss": 3.5376, "step": 5933 }, { "epoch": 24.220408163265308, "grad_norm": 14.816387176513672, "learning_rate": 3.790204081632653e-05, "loss": 2.5068, "step": 5934 }, { "epoch": 24.224489795918366, "grad_norm": 15.95567798614502, "learning_rate": 3.79e-05, "loss": 2.701, "step": 5935 }, { "epoch": 24.228571428571428, "grad_norm": 21.63446044921875, "learning_rate": 3.7897959183673474e-05, "loss": 2.3805, "step": 5936 }, { "epoch": 24.23265306122449, "grad_norm": 8.60581111907959, "learning_rate": 3.789591836734694e-05, "loss": 3.3406, "step": 5937 }, { "epoch": 24.23673469387755, "grad_norm": 6.9753899574279785, "learning_rate": 3.7893877551020415e-05, "loss": 3.3169, "step": 5938 }, { "epoch": 24.240816326530613, "grad_norm": 17.367061614990234, "learning_rate": 3.789183673469388e-05, "loss": 2.3354, "step": 5939 }, { "epoch": 24.244897959183675, "grad_norm": 10.32045841217041, "learning_rate": 3.788979591836735e-05, "loss": 2.8905, "step": 5940 }, { "epoch": 24.248979591836736, "grad_norm": 19.757078170776367, "learning_rate": 3.7887755102040816e-05, "loss": 1.6894, "step": 5941 }, { "epoch": 24.253061224489795, "grad_norm": 10.436251640319824, "learning_rate": 3.788571428571428e-05, "loss": 3.1868, "step": 5942 }, { "epoch": 24.257142857142856, "grad_norm": 12.698736190795898, "learning_rate": 3.788367346938776e-05, "loss": 2.6985, "step": 5943 }, { "epoch": 24.261224489795918, "grad_norm": 13.839469909667969, "learning_rate": 3.788163265306123e-05, "loss": 2.6568, "step": 5944 }, { "epoch": 24.26530612244898, "grad_norm": 14.46320629119873, "learning_rate": 3.78795918367347e-05, "loss": 2.4746, "step": 5945 }, { "epoch": 24.26938775510204, "grad_norm": 20.52033233642578, "learning_rate": 3.7877551020408165e-05, "loss": 2.2026, "step": 5946 }, { "epoch": 24.273469387755103, "grad_norm": 11.97889518737793, "learning_rate": 3.787551020408163e-05, "loss": 2.8728, "step": 5947 }, { "epoch": 24.277551020408165, "grad_norm": 18.061819076538086, "learning_rate": 3.7873469387755106e-05, "loss": 3.0339, "step": 5948 }, { "epoch": 24.281632653061223, "grad_norm": 18.464956283569336, "learning_rate": 3.787142857142857e-05, "loss": 2.2163, "step": 5949 }, { "epoch": 24.285714285714285, "grad_norm": 15.403233528137207, "learning_rate": 3.786938775510204e-05, "loss": 2.3068, "step": 5950 }, { "epoch": 24.289795918367346, "grad_norm": 18.194868087768555, "learning_rate": 3.786734693877551e-05, "loss": 2.2365, "step": 5951 }, { "epoch": 24.293877551020408, "grad_norm": 27.81174659729004, "learning_rate": 3.786530612244898e-05, "loss": 2.1536, "step": 5952 }, { "epoch": 24.29795918367347, "grad_norm": 8.222526550292969, "learning_rate": 3.7863265306122455e-05, "loss": 3.2933, "step": 5953 }, { "epoch": 24.30204081632653, "grad_norm": 11.47107219696045, "learning_rate": 3.786122448979592e-05, "loss": 2.8274, "step": 5954 }, { "epoch": 24.306122448979593, "grad_norm": 5.821232795715332, "learning_rate": 3.785918367346939e-05, "loss": 3.7959, "step": 5955 }, { "epoch": 24.31020408163265, "grad_norm": 12.078981399536133, "learning_rate": 3.785714285714286e-05, "loss": 2.7541, "step": 5956 }, { "epoch": 24.314285714285713, "grad_norm": 10.8734130859375, "learning_rate": 3.7855102040816324e-05, "loss": 2.8876, "step": 5957 }, { "epoch": 24.318367346938775, "grad_norm": 21.481237411499023, "learning_rate": 3.78530612244898e-05, "loss": 2.1105, "step": 5958 }, { "epoch": 24.322448979591837, "grad_norm": 24.946672439575195, "learning_rate": 3.7851020408163265e-05, "loss": 0.5618, "step": 5959 }, { "epoch": 24.3265306122449, "grad_norm": 6.246306896209717, "learning_rate": 3.784897959183674e-05, "loss": 3.6089, "step": 5960 }, { "epoch": 24.33061224489796, "grad_norm": 16.23762321472168, "learning_rate": 3.7846938775510206e-05, "loss": 2.9319, "step": 5961 }, { "epoch": 24.33469387755102, "grad_norm": 11.333280563354492, "learning_rate": 3.784489795918367e-05, "loss": 3.2586, "step": 5962 }, { "epoch": 24.33877551020408, "grad_norm": 11.846768379211426, "learning_rate": 3.784285714285715e-05, "loss": 2.9693, "step": 5963 }, { "epoch": 24.34285714285714, "grad_norm": 10.516962051391602, "learning_rate": 3.7840816326530614e-05, "loss": 3.2908, "step": 5964 }, { "epoch": 24.346938775510203, "grad_norm": 8.003439903259277, "learning_rate": 3.783877551020408e-05, "loss": 3.2398, "step": 5965 }, { "epoch": 24.351020408163265, "grad_norm": 16.10408592224121, "learning_rate": 3.783673469387755e-05, "loss": 2.5097, "step": 5966 }, { "epoch": 24.355102040816327, "grad_norm": 11.685348510742188, "learning_rate": 3.783469387755102e-05, "loss": 2.7526, "step": 5967 }, { "epoch": 24.35918367346939, "grad_norm": 20.193897247314453, "learning_rate": 3.7832653061224496e-05, "loss": 1.813, "step": 5968 }, { "epoch": 24.36326530612245, "grad_norm": 13.687141418457031, "learning_rate": 3.783061224489796e-05, "loss": 2.4848, "step": 5969 }, { "epoch": 24.367346938775512, "grad_norm": 19.810686111450195, "learning_rate": 3.782857142857143e-05, "loss": 1.7175, "step": 5970 }, { "epoch": 24.37142857142857, "grad_norm": 9.418404579162598, "learning_rate": 3.78265306122449e-05, "loss": 3.4508, "step": 5971 }, { "epoch": 24.375510204081632, "grad_norm": 8.4124174118042, "learning_rate": 3.7824489795918365e-05, "loss": 3.3505, "step": 5972 }, { "epoch": 24.379591836734694, "grad_norm": 22.206127166748047, "learning_rate": 3.782244897959184e-05, "loss": 1.2236, "step": 5973 }, { "epoch": 24.383673469387755, "grad_norm": 9.477554321289062, "learning_rate": 3.7820408163265306e-05, "loss": 3.2072, "step": 5974 }, { "epoch": 24.387755102040817, "grad_norm": 22.221059799194336, "learning_rate": 3.781836734693878e-05, "loss": 2.2147, "step": 5975 }, { "epoch": 24.39183673469388, "grad_norm": 10.732254981994629, "learning_rate": 3.781632653061225e-05, "loss": 3.0975, "step": 5976 }, { "epoch": 24.39591836734694, "grad_norm": 7.100422382354736, "learning_rate": 3.781428571428572e-05, "loss": 3.5065, "step": 5977 }, { "epoch": 24.4, "grad_norm": 8.160658836364746, "learning_rate": 3.781224489795919e-05, "loss": 3.2413, "step": 5978 }, { "epoch": 24.40408163265306, "grad_norm": 11.340450286865234, "learning_rate": 3.7810204081632655e-05, "loss": 3.0054, "step": 5979 }, { "epoch": 24.408163265306122, "grad_norm": 8.096067428588867, "learning_rate": 3.780816326530612e-05, "loss": 3.4969, "step": 5980 }, { "epoch": 24.412244897959184, "grad_norm": 5.8948516845703125, "learning_rate": 3.780612244897959e-05, "loss": 3.904, "step": 5981 }, { "epoch": 24.416326530612245, "grad_norm": 14.5794677734375, "learning_rate": 3.780408163265306e-05, "loss": 3.0908, "step": 5982 }, { "epoch": 24.420408163265307, "grad_norm": 10.088412284851074, "learning_rate": 3.780204081632654e-05, "loss": 3.2215, "step": 5983 }, { "epoch": 24.42448979591837, "grad_norm": 20.008493423461914, "learning_rate": 3.7800000000000004e-05, "loss": 2.5066, "step": 5984 }, { "epoch": 24.428571428571427, "grad_norm": 12.275003433227539, "learning_rate": 3.779795918367347e-05, "loss": 2.6967, "step": 5985 }, { "epoch": 24.43265306122449, "grad_norm": 10.74232292175293, "learning_rate": 3.779591836734694e-05, "loss": 3.2078, "step": 5986 }, { "epoch": 24.43673469387755, "grad_norm": 19.972585678100586, "learning_rate": 3.779387755102041e-05, "loss": 2.4464, "step": 5987 }, { "epoch": 24.440816326530612, "grad_norm": 8.958446502685547, "learning_rate": 3.779183673469388e-05, "loss": 3.2454, "step": 5988 }, { "epoch": 24.444897959183674, "grad_norm": 10.652371406555176, "learning_rate": 3.7789795918367346e-05, "loss": 3.1696, "step": 5989 }, { "epoch": 24.448979591836736, "grad_norm": 18.17371940612793, "learning_rate": 3.778775510204082e-05, "loss": 2.8838, "step": 5990 }, { "epoch": 24.453061224489797, "grad_norm": 18.353721618652344, "learning_rate": 3.778571428571429e-05, "loss": 2.5316, "step": 5991 }, { "epoch": 24.457142857142856, "grad_norm": 20.383211135864258, "learning_rate": 3.778367346938776e-05, "loss": 2.3382, "step": 5992 }, { "epoch": 24.461224489795917, "grad_norm": 12.460992813110352, "learning_rate": 3.778163265306123e-05, "loss": 2.8376, "step": 5993 }, { "epoch": 24.46530612244898, "grad_norm": 10.666467666625977, "learning_rate": 3.7779591836734696e-05, "loss": 3.1084, "step": 5994 }, { "epoch": 24.46938775510204, "grad_norm": 16.195892333984375, "learning_rate": 3.777755102040816e-05, "loss": 2.2292, "step": 5995 }, { "epoch": 24.473469387755102, "grad_norm": 12.929513931274414, "learning_rate": 3.777551020408163e-05, "loss": 2.7724, "step": 5996 }, { "epoch": 24.477551020408164, "grad_norm": 19.452308654785156, "learning_rate": 3.7773469387755104e-05, "loss": 2.5454, "step": 5997 }, { "epoch": 24.481632653061226, "grad_norm": 25.265600204467773, "learning_rate": 3.777142857142858e-05, "loss": 1.718, "step": 5998 }, { "epoch": 24.485714285714284, "grad_norm": 18.194934844970703, "learning_rate": 3.7769387755102045e-05, "loss": 2.3078, "step": 5999 }, { "epoch": 24.489795918367346, "grad_norm": 21.05728530883789, "learning_rate": 3.776734693877551e-05, "loss": 2.2818, "step": 6000 }, { "epoch": 24.493877551020407, "grad_norm": 11.20350456237793, "learning_rate": 3.776530612244898e-05, "loss": 3.0022, "step": 6001 }, { "epoch": 24.49795918367347, "grad_norm": 8.838228225708008, "learning_rate": 3.776326530612245e-05, "loss": 3.2649, "step": 6002 }, { "epoch": 24.50204081632653, "grad_norm": 6.75518798828125, "learning_rate": 3.776122448979592e-05, "loss": 3.5346, "step": 6003 }, { "epoch": 24.506122448979593, "grad_norm": 7.763035297393799, "learning_rate": 3.775918367346939e-05, "loss": 3.2585, "step": 6004 }, { "epoch": 24.510204081632654, "grad_norm": 11.695416450500488, "learning_rate": 3.7757142857142854e-05, "loss": 3.09, "step": 6005 }, { "epoch": 24.514285714285712, "grad_norm": 9.477802276611328, "learning_rate": 3.775510204081633e-05, "loss": 3.2003, "step": 6006 }, { "epoch": 24.518367346938774, "grad_norm": 8.255719184875488, "learning_rate": 3.77530612244898e-05, "loss": 3.2242, "step": 6007 }, { "epoch": 24.522448979591836, "grad_norm": 10.633461952209473, "learning_rate": 3.775102040816327e-05, "loss": 3.0074, "step": 6008 }, { "epoch": 24.526530612244898, "grad_norm": 9.058398246765137, "learning_rate": 3.7748979591836736e-05, "loss": 3.3678, "step": 6009 }, { "epoch": 24.53061224489796, "grad_norm": 17.052934646606445, "learning_rate": 3.7746938775510203e-05, "loss": 2.3125, "step": 6010 }, { "epoch": 24.53469387755102, "grad_norm": 8.67896842956543, "learning_rate": 3.774489795918367e-05, "loss": 3.286, "step": 6011 }, { "epoch": 24.538775510204083, "grad_norm": 14.710329055786133, "learning_rate": 3.7742857142857145e-05, "loss": 2.8233, "step": 6012 }, { "epoch": 24.542857142857144, "grad_norm": 10.670089721679688, "learning_rate": 3.774081632653061e-05, "loss": 3.2177, "step": 6013 }, { "epoch": 24.546938775510203, "grad_norm": 6.5815629959106445, "learning_rate": 3.7738775510204086e-05, "loss": 3.6527, "step": 6014 }, { "epoch": 24.551020408163264, "grad_norm": 18.978206634521484, "learning_rate": 3.773673469387755e-05, "loss": 2.7597, "step": 6015 }, { "epoch": 24.555102040816326, "grad_norm": 8.815546035766602, "learning_rate": 3.773469387755102e-05, "loss": 3.1039, "step": 6016 }, { "epoch": 24.559183673469388, "grad_norm": 8.886019706726074, "learning_rate": 3.7732653061224494e-05, "loss": 3.2075, "step": 6017 }, { "epoch": 24.56326530612245, "grad_norm": 11.403627395629883, "learning_rate": 3.773061224489796e-05, "loss": 2.8943, "step": 6018 }, { "epoch": 24.56734693877551, "grad_norm": 12.859882354736328, "learning_rate": 3.772857142857143e-05, "loss": 3.326, "step": 6019 }, { "epoch": 24.571428571428573, "grad_norm": 19.838821411132812, "learning_rate": 3.7726530612244895e-05, "loss": 2.9977, "step": 6020 }, { "epoch": 24.57551020408163, "grad_norm": 6.5848541259765625, "learning_rate": 3.772448979591837e-05, "loss": 3.5744, "step": 6021 }, { "epoch": 24.579591836734693, "grad_norm": 7.169225215911865, "learning_rate": 3.772244897959184e-05, "loss": 3.7046, "step": 6022 }, { "epoch": 24.583673469387755, "grad_norm": 9.899275779724121, "learning_rate": 3.772040816326531e-05, "loss": 3.2126, "step": 6023 }, { "epoch": 24.587755102040816, "grad_norm": 13.990409851074219, "learning_rate": 3.771836734693878e-05, "loss": 2.479, "step": 6024 }, { "epoch": 24.591836734693878, "grad_norm": 12.86701488494873, "learning_rate": 3.7716326530612244e-05, "loss": 3.0222, "step": 6025 }, { "epoch": 24.59591836734694, "grad_norm": 11.49988842010498, "learning_rate": 3.771428571428572e-05, "loss": 3.19, "step": 6026 }, { "epoch": 24.6, "grad_norm": 17.12278175354004, "learning_rate": 3.7712244897959185e-05, "loss": 2.6623, "step": 6027 }, { "epoch": 24.60408163265306, "grad_norm": 11.932971954345703, "learning_rate": 3.771020408163265e-05, "loss": 3.0053, "step": 6028 }, { "epoch": 24.60816326530612, "grad_norm": 10.49734878540039, "learning_rate": 3.7708163265306126e-05, "loss": 3.1556, "step": 6029 }, { "epoch": 24.612244897959183, "grad_norm": 6.758734226226807, "learning_rate": 3.7706122448979593e-05, "loss": 3.1531, "step": 6030 }, { "epoch": 24.616326530612245, "grad_norm": 5.963618755340576, "learning_rate": 3.770408163265307e-05, "loss": 3.6707, "step": 6031 }, { "epoch": 24.620408163265306, "grad_norm": 7.833986759185791, "learning_rate": 3.7702040816326534e-05, "loss": 3.4653, "step": 6032 }, { "epoch": 24.624489795918368, "grad_norm": 23.642837524414062, "learning_rate": 3.77e-05, "loss": 1.5087, "step": 6033 }, { "epoch": 24.62857142857143, "grad_norm": 17.15212631225586, "learning_rate": 3.769795918367347e-05, "loss": 2.7261, "step": 6034 }, { "epoch": 24.632653061224488, "grad_norm": 22.498340606689453, "learning_rate": 3.7695918367346936e-05, "loss": 2.0548, "step": 6035 }, { "epoch": 24.63673469387755, "grad_norm": 21.742490768432617, "learning_rate": 3.769387755102041e-05, "loss": 2.3471, "step": 6036 }, { "epoch": 24.64081632653061, "grad_norm": 8.234953880310059, "learning_rate": 3.7691836734693884e-05, "loss": 3.5397, "step": 6037 }, { "epoch": 24.644897959183673, "grad_norm": 12.457242012023926, "learning_rate": 3.768979591836735e-05, "loss": 2.8513, "step": 6038 }, { "epoch": 24.648979591836735, "grad_norm": 11.605146408081055, "learning_rate": 3.768775510204082e-05, "loss": 3.1687, "step": 6039 }, { "epoch": 24.653061224489797, "grad_norm": 8.044166564941406, "learning_rate": 3.7685714285714285e-05, "loss": 3.4453, "step": 6040 }, { "epoch": 24.65714285714286, "grad_norm": 12.63317584991455, "learning_rate": 3.768367346938776e-05, "loss": 2.6991, "step": 6041 }, { "epoch": 24.66122448979592, "grad_norm": 12.524018287658691, "learning_rate": 3.7681632653061226e-05, "loss": 2.9444, "step": 6042 }, { "epoch": 24.66530612244898, "grad_norm": 12.261632919311523, "learning_rate": 3.767959183673469e-05, "loss": 2.8512, "step": 6043 }, { "epoch": 24.66938775510204, "grad_norm": 16.961668014526367, "learning_rate": 3.767755102040817e-05, "loss": 2.5502, "step": 6044 }, { "epoch": 24.6734693877551, "grad_norm": 11.639568328857422, "learning_rate": 3.7675510204081634e-05, "loss": 3.1975, "step": 6045 }, { "epoch": 24.677551020408163, "grad_norm": 7.935963153839111, "learning_rate": 3.767346938775511e-05, "loss": 3.2949, "step": 6046 }, { "epoch": 24.681632653061225, "grad_norm": 25.272703170776367, "learning_rate": 3.7671428571428575e-05, "loss": 1.5655, "step": 6047 }, { "epoch": 24.685714285714287, "grad_norm": 16.06361198425293, "learning_rate": 3.766938775510204e-05, "loss": 2.5509, "step": 6048 }, { "epoch": 24.68979591836735, "grad_norm": 18.11986541748047, "learning_rate": 3.766734693877551e-05, "loss": 2.4138, "step": 6049 }, { "epoch": 24.693877551020407, "grad_norm": 8.534457206726074, "learning_rate": 3.7665306122448977e-05, "loss": 3.3953, "step": 6050 }, { "epoch": 24.69795918367347, "grad_norm": 13.697205543518066, "learning_rate": 3.766326530612245e-05, "loss": 2.8092, "step": 6051 }, { "epoch": 24.70204081632653, "grad_norm": 11.563554763793945, "learning_rate": 3.766122448979592e-05, "loss": 3.1989, "step": 6052 }, { "epoch": 24.706122448979592, "grad_norm": 10.461934089660645, "learning_rate": 3.765918367346939e-05, "loss": 3.1644, "step": 6053 }, { "epoch": 24.710204081632654, "grad_norm": 10.541679382324219, "learning_rate": 3.765714285714286e-05, "loss": 2.9437, "step": 6054 }, { "epoch": 24.714285714285715, "grad_norm": 9.44731616973877, "learning_rate": 3.7655102040816326e-05, "loss": 3.3787, "step": 6055 }, { "epoch": 24.718367346938777, "grad_norm": 11.15580940246582, "learning_rate": 3.76530612244898e-05, "loss": 2.9583, "step": 6056 }, { "epoch": 24.722448979591835, "grad_norm": 14.062050819396973, "learning_rate": 3.765102040816327e-05, "loss": 2.9444, "step": 6057 }, { "epoch": 24.726530612244897, "grad_norm": 13.189534187316895, "learning_rate": 3.7648979591836734e-05, "loss": 2.7754, "step": 6058 }, { "epoch": 24.73061224489796, "grad_norm": 13.877482414245605, "learning_rate": 3.76469387755102e-05, "loss": 2.7954, "step": 6059 }, { "epoch": 24.73469387755102, "grad_norm": 20.56405258178711, "learning_rate": 3.7644897959183675e-05, "loss": 1.8348, "step": 6060 }, { "epoch": 24.738775510204082, "grad_norm": 27.202241897583008, "learning_rate": 3.764285714285715e-05, "loss": 2.4146, "step": 6061 }, { "epoch": 24.742857142857144, "grad_norm": 12.75212287902832, "learning_rate": 3.7640816326530616e-05, "loss": 3.3893, "step": 6062 }, { "epoch": 24.746938775510205, "grad_norm": 9.668614387512207, "learning_rate": 3.763877551020408e-05, "loss": 3.2012, "step": 6063 }, { "epoch": 24.751020408163264, "grad_norm": 15.379508972167969, "learning_rate": 3.763673469387755e-05, "loss": 2.5187, "step": 6064 }, { "epoch": 24.755102040816325, "grad_norm": 20.79688835144043, "learning_rate": 3.7634693877551024e-05, "loss": 1.617, "step": 6065 }, { "epoch": 24.759183673469387, "grad_norm": 7.93733024597168, "learning_rate": 3.763265306122449e-05, "loss": 3.2545, "step": 6066 }, { "epoch": 24.76326530612245, "grad_norm": 20.54527473449707, "learning_rate": 3.763061224489796e-05, "loss": 2.0038, "step": 6067 }, { "epoch": 24.76734693877551, "grad_norm": 11.964444160461426, "learning_rate": 3.762857142857143e-05, "loss": 3.0636, "step": 6068 }, { "epoch": 24.771428571428572, "grad_norm": 9.416333198547363, "learning_rate": 3.76265306122449e-05, "loss": 3.1749, "step": 6069 }, { "epoch": 24.775510204081634, "grad_norm": 13.515937805175781, "learning_rate": 3.762448979591837e-05, "loss": 3.0839, "step": 6070 }, { "epoch": 24.779591836734696, "grad_norm": 6.017630577087402, "learning_rate": 3.762244897959184e-05, "loss": 3.5586, "step": 6071 }, { "epoch": 24.783673469387754, "grad_norm": 17.248685836791992, "learning_rate": 3.762040816326531e-05, "loss": 2.6271, "step": 6072 }, { "epoch": 24.787755102040816, "grad_norm": 10.815595626831055, "learning_rate": 3.7618367346938775e-05, "loss": 3.3545, "step": 6073 }, { "epoch": 24.791836734693877, "grad_norm": 9.151969909667969, "learning_rate": 3.761632653061224e-05, "loss": 3.4403, "step": 6074 }, { "epoch": 24.79591836734694, "grad_norm": 19.982051849365234, "learning_rate": 3.7614285714285716e-05, "loss": 1.6178, "step": 6075 }, { "epoch": 24.8, "grad_norm": 9.016544342041016, "learning_rate": 3.761224489795919e-05, "loss": 3.0878, "step": 6076 }, { "epoch": 24.804081632653062, "grad_norm": 14.205487251281738, "learning_rate": 3.761020408163266e-05, "loss": 2.9587, "step": 6077 }, { "epoch": 24.808163265306124, "grad_norm": 14.90219497680664, "learning_rate": 3.7608163265306124e-05, "loss": 2.5266, "step": 6078 }, { "epoch": 24.812244897959182, "grad_norm": 15.8873291015625, "learning_rate": 3.760612244897959e-05, "loss": 2.7206, "step": 6079 }, { "epoch": 24.816326530612244, "grad_norm": 23.95646858215332, "learning_rate": 3.7604081632653065e-05, "loss": 2.4535, "step": 6080 }, { "epoch": 24.820408163265306, "grad_norm": 13.748692512512207, "learning_rate": 3.760204081632653e-05, "loss": 2.8455, "step": 6081 }, { "epoch": 24.824489795918367, "grad_norm": 9.719364166259766, "learning_rate": 3.76e-05, "loss": 3.3431, "step": 6082 }, { "epoch": 24.82857142857143, "grad_norm": 6.494349479675293, "learning_rate": 3.759795918367347e-05, "loss": 3.5135, "step": 6083 }, { "epoch": 24.83265306122449, "grad_norm": 16.12993049621582, "learning_rate": 3.759591836734694e-05, "loss": 2.6035, "step": 6084 }, { "epoch": 24.836734693877553, "grad_norm": 10.0618896484375, "learning_rate": 3.7593877551020414e-05, "loss": 3.2469, "step": 6085 }, { "epoch": 24.84081632653061, "grad_norm": 7.922247886657715, "learning_rate": 3.759183673469388e-05, "loss": 3.3479, "step": 6086 }, { "epoch": 24.844897959183672, "grad_norm": 22.494138717651367, "learning_rate": 3.758979591836735e-05, "loss": 2.2145, "step": 6087 }, { "epoch": 24.848979591836734, "grad_norm": 10.409502983093262, "learning_rate": 3.7587755102040815e-05, "loss": 3.1305, "step": 6088 }, { "epoch": 24.853061224489796, "grad_norm": 26.340755462646484, "learning_rate": 3.758571428571428e-05, "loss": 1.9234, "step": 6089 }, { "epoch": 24.857142857142858, "grad_norm": 14.558758735656738, "learning_rate": 3.7583673469387756e-05, "loss": 3.104, "step": 6090 }, { "epoch": 24.86122448979592, "grad_norm": 9.741327285766602, "learning_rate": 3.758163265306123e-05, "loss": 3.3608, "step": 6091 }, { "epoch": 24.86530612244898, "grad_norm": 22.21347427368164, "learning_rate": 3.75795918367347e-05, "loss": 2.2249, "step": 6092 }, { "epoch": 24.86938775510204, "grad_norm": 16.718202590942383, "learning_rate": 3.7577551020408165e-05, "loss": 2.9158, "step": 6093 }, { "epoch": 24.8734693877551, "grad_norm": 8.761621475219727, "learning_rate": 3.757551020408163e-05, "loss": 3.1274, "step": 6094 }, { "epoch": 24.877551020408163, "grad_norm": 22.082395553588867, "learning_rate": 3.7573469387755106e-05, "loss": 1.4508, "step": 6095 }, { "epoch": 24.881632653061224, "grad_norm": 14.102797508239746, "learning_rate": 3.757142857142857e-05, "loss": 3.059, "step": 6096 }, { "epoch": 24.885714285714286, "grad_norm": 8.719642639160156, "learning_rate": 3.756938775510204e-05, "loss": 3.396, "step": 6097 }, { "epoch": 24.889795918367348, "grad_norm": 12.965418815612793, "learning_rate": 3.7567346938775514e-05, "loss": 2.9854, "step": 6098 }, { "epoch": 24.89387755102041, "grad_norm": 30.448240280151367, "learning_rate": 3.756530612244898e-05, "loss": 2.3499, "step": 6099 }, { "epoch": 24.897959183673468, "grad_norm": 10.944938659667969, "learning_rate": 3.7563265306122455e-05, "loss": 3.2048, "step": 6100 }, { "epoch": 24.90204081632653, "grad_norm": 9.738409996032715, "learning_rate": 3.756122448979592e-05, "loss": 3.3777, "step": 6101 }, { "epoch": 24.90612244897959, "grad_norm": 11.901937484741211, "learning_rate": 3.755918367346939e-05, "loss": 3.195, "step": 6102 }, { "epoch": 24.910204081632653, "grad_norm": 8.253056526184082, "learning_rate": 3.7557142857142856e-05, "loss": 3.3932, "step": 6103 }, { "epoch": 24.914285714285715, "grad_norm": 7.049127101898193, "learning_rate": 3.755510204081632e-05, "loss": 3.4614, "step": 6104 }, { "epoch": 24.918367346938776, "grad_norm": 7.923530578613281, "learning_rate": 3.75530612244898e-05, "loss": 3.6857, "step": 6105 }, { "epoch": 24.922448979591838, "grad_norm": 11.885440826416016, "learning_rate": 3.7551020408163264e-05, "loss": 3.02, "step": 6106 }, { "epoch": 24.926530612244896, "grad_norm": 19.81219482421875, "learning_rate": 3.754897959183674e-05, "loss": 2.3534, "step": 6107 }, { "epoch": 24.930612244897958, "grad_norm": 11.84090518951416, "learning_rate": 3.7546938775510205e-05, "loss": 3.1044, "step": 6108 }, { "epoch": 24.93469387755102, "grad_norm": 9.784829139709473, "learning_rate": 3.754489795918368e-05, "loss": 3.1116, "step": 6109 }, { "epoch": 24.93877551020408, "grad_norm": 11.649321556091309, "learning_rate": 3.7542857142857146e-05, "loss": 2.9462, "step": 6110 }, { "epoch": 24.942857142857143, "grad_norm": 6.903984546661377, "learning_rate": 3.7540816326530614e-05, "loss": 3.5073, "step": 6111 }, { "epoch": 24.946938775510205, "grad_norm": 9.010640144348145, "learning_rate": 3.753877551020408e-05, "loss": 3.0203, "step": 6112 }, { "epoch": 24.951020408163266, "grad_norm": 11.02528190612793, "learning_rate": 3.753673469387755e-05, "loss": 2.8808, "step": 6113 }, { "epoch": 24.955102040816328, "grad_norm": 8.834565162658691, "learning_rate": 3.753469387755102e-05, "loss": 3.5552, "step": 6114 }, { "epoch": 24.959183673469386, "grad_norm": 12.963186264038086, "learning_rate": 3.7532653061224496e-05, "loss": 2.8447, "step": 6115 }, { "epoch": 24.963265306122448, "grad_norm": 10.658967018127441, "learning_rate": 3.753061224489796e-05, "loss": 3.2849, "step": 6116 }, { "epoch": 24.96734693877551, "grad_norm": 17.585580825805664, "learning_rate": 3.752857142857143e-05, "loss": 2.2433, "step": 6117 }, { "epoch": 24.97142857142857, "grad_norm": 15.202224731445312, "learning_rate": 3.75265306122449e-05, "loss": 2.6135, "step": 6118 }, { "epoch": 24.975510204081633, "grad_norm": 13.46840763092041, "learning_rate": 3.752448979591837e-05, "loss": 3.0861, "step": 6119 }, { "epoch": 24.979591836734695, "grad_norm": 15.503110885620117, "learning_rate": 3.752244897959184e-05, "loss": 3.0422, "step": 6120 }, { "epoch": 24.983673469387757, "grad_norm": 12.186903953552246, "learning_rate": 3.7520408163265305e-05, "loss": 3.016, "step": 6121 }, { "epoch": 24.987755102040815, "grad_norm": 9.026901245117188, "learning_rate": 3.751836734693878e-05, "loss": 3.3104, "step": 6122 }, { "epoch": 24.991836734693877, "grad_norm": 8.480757713317871, "learning_rate": 3.7516326530612246e-05, "loss": 3.2718, "step": 6123 }, { "epoch": 24.99591836734694, "grad_norm": 8.565447807312012, "learning_rate": 3.751428571428572e-05, "loss": 3.5146, "step": 6124 }, { "epoch": 25.0, "grad_norm": 20.814611434936523, "learning_rate": 3.751224489795919e-05, "loss": 2.3342, "step": 6125 }, { "epoch": 25.00408163265306, "grad_norm": 22.021841049194336, "learning_rate": 3.7510204081632654e-05, "loss": 2.7285, "step": 6126 }, { "epoch": 25.008163265306123, "grad_norm": 15.072834968566895, "learning_rate": 3.750816326530612e-05, "loss": 2.9784, "step": 6127 }, { "epoch": 25.012244897959185, "grad_norm": 12.962369918823242, "learning_rate": 3.750612244897959e-05, "loss": 2.6251, "step": 6128 }, { "epoch": 25.016326530612243, "grad_norm": 14.01209545135498, "learning_rate": 3.750408163265306e-05, "loss": 2.8779, "step": 6129 }, { "epoch": 25.020408163265305, "grad_norm": 20.57238006591797, "learning_rate": 3.7502040816326536e-05, "loss": 2.9261, "step": 6130 }, { "epoch": 25.024489795918367, "grad_norm": 18.289459228515625, "learning_rate": 3.7500000000000003e-05, "loss": 2.2481, "step": 6131 }, { "epoch": 25.02857142857143, "grad_norm": 18.35220718383789, "learning_rate": 3.749795918367347e-05, "loss": 0.5014, "step": 6132 }, { "epoch": 25.03265306122449, "grad_norm": 14.665192604064941, "learning_rate": 3.749591836734694e-05, "loss": 2.2977, "step": 6133 }, { "epoch": 25.036734693877552, "grad_norm": 16.733789443969727, "learning_rate": 3.749387755102041e-05, "loss": 2.7769, "step": 6134 }, { "epoch": 25.040816326530614, "grad_norm": 9.882631301879883, "learning_rate": 3.749183673469388e-05, "loss": 3.3168, "step": 6135 }, { "epoch": 25.044897959183672, "grad_norm": 19.787843704223633, "learning_rate": 3.7489795918367346e-05, "loss": 1.4634, "step": 6136 }, { "epoch": 25.048979591836734, "grad_norm": 8.717400550842285, "learning_rate": 3.748775510204082e-05, "loss": 3.2716, "step": 6137 }, { "epoch": 25.053061224489795, "grad_norm": 12.870079040527344, "learning_rate": 3.748571428571429e-05, "loss": 2.8503, "step": 6138 }, { "epoch": 25.057142857142857, "grad_norm": 15.656563758850098, "learning_rate": 3.748367346938776e-05, "loss": 2.6639, "step": 6139 }, { "epoch": 25.06122448979592, "grad_norm": 14.264935493469238, "learning_rate": 3.748163265306123e-05, "loss": 2.9438, "step": 6140 }, { "epoch": 25.06530612244898, "grad_norm": 16.068559646606445, "learning_rate": 3.7479591836734695e-05, "loss": 2.4613, "step": 6141 }, { "epoch": 25.069387755102042, "grad_norm": 16.426719665527344, "learning_rate": 3.747755102040816e-05, "loss": 2.5978, "step": 6142 }, { "epoch": 25.073469387755104, "grad_norm": 9.163118362426758, "learning_rate": 3.747551020408163e-05, "loss": 3.1472, "step": 6143 }, { "epoch": 25.077551020408162, "grad_norm": 13.110809326171875, "learning_rate": 3.74734693877551e-05, "loss": 3.2673, "step": 6144 }, { "epoch": 25.081632653061224, "grad_norm": 8.962112426757812, "learning_rate": 3.747142857142858e-05, "loss": 3.3266, "step": 6145 }, { "epoch": 25.085714285714285, "grad_norm": 17.466449737548828, "learning_rate": 3.7469387755102044e-05, "loss": 2.7359, "step": 6146 }, { "epoch": 25.089795918367347, "grad_norm": 16.323495864868164, "learning_rate": 3.746734693877551e-05, "loss": 2.6394, "step": 6147 }, { "epoch": 25.09387755102041, "grad_norm": 11.77118968963623, "learning_rate": 3.746530612244898e-05, "loss": 3.0791, "step": 6148 }, { "epoch": 25.09795918367347, "grad_norm": 22.459238052368164, "learning_rate": 3.746326530612245e-05, "loss": 1.7279, "step": 6149 }, { "epoch": 25.102040816326532, "grad_norm": 10.237668991088867, "learning_rate": 3.746122448979592e-05, "loss": 3.1216, "step": 6150 }, { "epoch": 25.10612244897959, "grad_norm": 15.81849479675293, "learning_rate": 3.745918367346939e-05, "loss": 2.6966, "step": 6151 }, { "epoch": 25.110204081632652, "grad_norm": 10.604192733764648, "learning_rate": 3.745714285714286e-05, "loss": 3.1426, "step": 6152 }, { "epoch": 25.114285714285714, "grad_norm": 21.358308792114258, "learning_rate": 3.745510204081633e-05, "loss": 2.1412, "step": 6153 }, { "epoch": 25.118367346938776, "grad_norm": 13.156410217285156, "learning_rate": 3.74530612244898e-05, "loss": 3.0933, "step": 6154 }, { "epoch": 25.122448979591837, "grad_norm": 11.34353256225586, "learning_rate": 3.745102040816327e-05, "loss": 3.139, "step": 6155 }, { "epoch": 25.1265306122449, "grad_norm": 15.731584548950195, "learning_rate": 3.7448979591836736e-05, "loss": 2.2685, "step": 6156 }, { "epoch": 25.13061224489796, "grad_norm": 15.41954517364502, "learning_rate": 3.74469387755102e-05, "loss": 2.4835, "step": 6157 }, { "epoch": 25.13469387755102, "grad_norm": 22.801483154296875, "learning_rate": 3.744489795918368e-05, "loss": 2.0087, "step": 6158 }, { "epoch": 25.13877551020408, "grad_norm": 10.172082901000977, "learning_rate": 3.7442857142857144e-05, "loss": 2.7849, "step": 6159 }, { "epoch": 25.142857142857142, "grad_norm": 23.279504776000977, "learning_rate": 3.744081632653061e-05, "loss": 1.6785, "step": 6160 }, { "epoch": 25.146938775510204, "grad_norm": 28.2279109954834, "learning_rate": 3.7438775510204085e-05, "loss": 2.0585, "step": 6161 }, { "epoch": 25.151020408163266, "grad_norm": 12.906418800354004, "learning_rate": 3.743673469387755e-05, "loss": 2.7697, "step": 6162 }, { "epoch": 25.155102040816328, "grad_norm": 12.432849884033203, "learning_rate": 3.7434693877551026e-05, "loss": 2.7379, "step": 6163 }, { "epoch": 25.15918367346939, "grad_norm": 14.50540828704834, "learning_rate": 3.743265306122449e-05, "loss": 2.8749, "step": 6164 }, { "epoch": 25.163265306122447, "grad_norm": 14.038348197937012, "learning_rate": 3.743061224489796e-05, "loss": 3.0302, "step": 6165 }, { "epoch": 25.16734693877551, "grad_norm": 19.933317184448242, "learning_rate": 3.742857142857143e-05, "loss": 2.8794, "step": 6166 }, { "epoch": 25.17142857142857, "grad_norm": 16.435508728027344, "learning_rate": 3.7426530612244895e-05, "loss": 2.8312, "step": 6167 }, { "epoch": 25.175510204081633, "grad_norm": 11.960067749023438, "learning_rate": 3.742448979591837e-05, "loss": 2.822, "step": 6168 }, { "epoch": 25.179591836734694, "grad_norm": 13.702274322509766, "learning_rate": 3.742244897959184e-05, "loss": 2.9256, "step": 6169 }, { "epoch": 25.183673469387756, "grad_norm": 11.881253242492676, "learning_rate": 3.742040816326531e-05, "loss": 2.832, "step": 6170 }, { "epoch": 25.187755102040818, "grad_norm": 19.640151977539062, "learning_rate": 3.7418367346938777e-05, "loss": 2.2726, "step": 6171 }, { "epoch": 25.191836734693876, "grad_norm": 21.271913528442383, "learning_rate": 3.7416326530612244e-05, "loss": 2.5074, "step": 6172 }, { "epoch": 25.195918367346938, "grad_norm": 14.691140174865723, "learning_rate": 3.741428571428572e-05, "loss": 2.6586, "step": 6173 }, { "epoch": 25.2, "grad_norm": 16.88669204711914, "learning_rate": 3.7412244897959185e-05, "loss": 2.2815, "step": 6174 }, { "epoch": 25.20408163265306, "grad_norm": 8.445369720458984, "learning_rate": 3.741020408163265e-05, "loss": 3.1813, "step": 6175 }, { "epoch": 25.208163265306123, "grad_norm": 8.995348930358887, "learning_rate": 3.7408163265306126e-05, "loss": 2.9632, "step": 6176 }, { "epoch": 25.212244897959184, "grad_norm": 11.268915176391602, "learning_rate": 3.740612244897959e-05, "loss": 3.1017, "step": 6177 }, { "epoch": 25.216326530612246, "grad_norm": 13.186744689941406, "learning_rate": 3.740408163265307e-05, "loss": 3.1808, "step": 6178 }, { "epoch": 25.220408163265308, "grad_norm": 18.580059051513672, "learning_rate": 3.7402040816326534e-05, "loss": 2.4643, "step": 6179 }, { "epoch": 25.224489795918366, "grad_norm": 14.9129056930542, "learning_rate": 3.74e-05, "loss": 3.0632, "step": 6180 }, { "epoch": 25.228571428571428, "grad_norm": 6.60468864440918, "learning_rate": 3.739795918367347e-05, "loss": 3.7136, "step": 6181 }, { "epoch": 25.23265306122449, "grad_norm": 18.28799819946289, "learning_rate": 3.7395918367346935e-05, "loss": 2.1096, "step": 6182 }, { "epoch": 25.23673469387755, "grad_norm": 12.955047607421875, "learning_rate": 3.739387755102041e-05, "loss": 2.8568, "step": 6183 }, { "epoch": 25.240816326530613, "grad_norm": 11.375622749328613, "learning_rate": 3.739183673469388e-05, "loss": 3.0174, "step": 6184 }, { "epoch": 25.244897959183675, "grad_norm": 18.94764518737793, "learning_rate": 3.738979591836735e-05, "loss": 2.2179, "step": 6185 }, { "epoch": 25.248979591836736, "grad_norm": 15.89786148071289, "learning_rate": 3.738775510204082e-05, "loss": 2.8627, "step": 6186 }, { "epoch": 25.253061224489795, "grad_norm": 8.413773536682129, "learning_rate": 3.7385714285714284e-05, "loss": 3.1837, "step": 6187 }, { "epoch": 25.257142857142856, "grad_norm": 21.666006088256836, "learning_rate": 3.738367346938776e-05, "loss": 2.1909, "step": 6188 }, { "epoch": 25.261224489795918, "grad_norm": 6.3285603523254395, "learning_rate": 3.7381632653061226e-05, "loss": 3.5489, "step": 6189 }, { "epoch": 25.26530612244898, "grad_norm": 10.570106506347656, "learning_rate": 3.737959183673469e-05, "loss": 3.1388, "step": 6190 }, { "epoch": 25.26938775510204, "grad_norm": 17.152172088623047, "learning_rate": 3.7377551020408167e-05, "loss": 3.3183, "step": 6191 }, { "epoch": 25.273469387755103, "grad_norm": 8.091960906982422, "learning_rate": 3.7375510204081634e-05, "loss": 3.2912, "step": 6192 }, { "epoch": 25.277551020408165, "grad_norm": 14.370540618896484, "learning_rate": 3.737346938775511e-05, "loss": 2.755, "step": 6193 }, { "epoch": 25.281632653061223, "grad_norm": 17.651491165161133, "learning_rate": 3.7371428571428575e-05, "loss": 2.4854, "step": 6194 }, { "epoch": 25.285714285714285, "grad_norm": 13.031435012817383, "learning_rate": 3.736938775510204e-05, "loss": 2.8833, "step": 6195 }, { "epoch": 25.289795918367346, "grad_norm": 6.369056701660156, "learning_rate": 3.736734693877551e-05, "loss": 3.4965, "step": 6196 }, { "epoch": 25.293877551020408, "grad_norm": 11.645493507385254, "learning_rate": 3.7365306122448976e-05, "loss": 2.9259, "step": 6197 }, { "epoch": 25.29795918367347, "grad_norm": 8.761568069458008, "learning_rate": 3.736326530612245e-05, "loss": 3.031, "step": 6198 }, { "epoch": 25.30204081632653, "grad_norm": 14.55342960357666, "learning_rate": 3.7361224489795924e-05, "loss": 2.3673, "step": 6199 }, { "epoch": 25.306122448979593, "grad_norm": 20.90094566345215, "learning_rate": 3.735918367346939e-05, "loss": 2.1244, "step": 6200 }, { "epoch": 25.31020408163265, "grad_norm": 25.83299446105957, "learning_rate": 3.735714285714286e-05, "loss": 1.6293, "step": 6201 }, { "epoch": 25.314285714285713, "grad_norm": 11.258940696716309, "learning_rate": 3.735510204081633e-05, "loss": 3.1473, "step": 6202 }, { "epoch": 25.318367346938775, "grad_norm": 8.917082786560059, "learning_rate": 3.73530612244898e-05, "loss": 3.3469, "step": 6203 }, { "epoch": 25.322448979591837, "grad_norm": 14.108646392822266, "learning_rate": 3.7351020408163266e-05, "loss": 2.6349, "step": 6204 }, { "epoch": 25.3265306122449, "grad_norm": 11.360673904418945, "learning_rate": 3.7348979591836733e-05, "loss": 3.1892, "step": 6205 }, { "epoch": 25.33061224489796, "grad_norm": 17.77019691467285, "learning_rate": 3.734693877551021e-05, "loss": 2.5154, "step": 6206 }, { "epoch": 25.33469387755102, "grad_norm": 6.453594207763672, "learning_rate": 3.734489795918368e-05, "loss": 3.6231, "step": 6207 }, { "epoch": 25.33877551020408, "grad_norm": 22.024627685546875, "learning_rate": 3.734285714285715e-05, "loss": 3.0108, "step": 6208 }, { "epoch": 25.34285714285714, "grad_norm": 17.906673431396484, "learning_rate": 3.7340816326530615e-05, "loss": 1.6638, "step": 6209 }, { "epoch": 25.346938775510203, "grad_norm": 12.577657699584961, "learning_rate": 3.733877551020408e-05, "loss": 2.8788, "step": 6210 }, { "epoch": 25.351020408163265, "grad_norm": 12.234051704406738, "learning_rate": 3.733673469387755e-05, "loss": 2.9837, "step": 6211 }, { "epoch": 25.355102040816327, "grad_norm": 10.415160179138184, "learning_rate": 3.7334693877551024e-05, "loss": 2.9418, "step": 6212 }, { "epoch": 25.35918367346939, "grad_norm": 11.778045654296875, "learning_rate": 3.733265306122449e-05, "loss": 2.9613, "step": 6213 }, { "epoch": 25.36326530612245, "grad_norm": 9.142240524291992, "learning_rate": 3.733061224489796e-05, "loss": 3.4428, "step": 6214 }, { "epoch": 25.367346938775512, "grad_norm": 8.99400520324707, "learning_rate": 3.732857142857143e-05, "loss": 3.3724, "step": 6215 }, { "epoch": 25.37142857142857, "grad_norm": 23.3885555267334, "learning_rate": 3.73265306122449e-05, "loss": 1.7864, "step": 6216 }, { "epoch": 25.375510204081632, "grad_norm": 20.99660873413086, "learning_rate": 3.732448979591837e-05, "loss": 2.1839, "step": 6217 }, { "epoch": 25.379591836734694, "grad_norm": 14.910802841186523, "learning_rate": 3.732244897959184e-05, "loss": 3.0766, "step": 6218 }, { "epoch": 25.383673469387755, "grad_norm": 9.00574016571045, "learning_rate": 3.732040816326531e-05, "loss": 3.2064, "step": 6219 }, { "epoch": 25.387755102040817, "grad_norm": 7.9548540115356445, "learning_rate": 3.7318367346938774e-05, "loss": 3.4736, "step": 6220 }, { "epoch": 25.39183673469388, "grad_norm": 9.596238136291504, "learning_rate": 3.731632653061224e-05, "loss": 3.2175, "step": 6221 }, { "epoch": 25.39591836734694, "grad_norm": 14.314981460571289, "learning_rate": 3.7314285714285715e-05, "loss": 2.4286, "step": 6222 }, { "epoch": 25.4, "grad_norm": 21.198017120361328, "learning_rate": 3.731224489795919e-05, "loss": 2.651, "step": 6223 }, { "epoch": 25.40408163265306, "grad_norm": 11.476414680480957, "learning_rate": 3.7310204081632656e-05, "loss": 2.6678, "step": 6224 }, { "epoch": 25.408163265306122, "grad_norm": 21.384490966796875, "learning_rate": 3.730816326530612e-05, "loss": 2.7197, "step": 6225 }, { "epoch": 25.412244897959184, "grad_norm": 12.593059539794922, "learning_rate": 3.730612244897959e-05, "loss": 2.8905, "step": 6226 }, { "epoch": 25.416326530612245, "grad_norm": 9.005276679992676, "learning_rate": 3.7304081632653064e-05, "loss": 3.1869, "step": 6227 }, { "epoch": 25.420408163265307, "grad_norm": 15.209820747375488, "learning_rate": 3.730204081632653e-05, "loss": 2.2196, "step": 6228 }, { "epoch": 25.42448979591837, "grad_norm": 14.338330268859863, "learning_rate": 3.73e-05, "loss": 2.858, "step": 6229 }, { "epoch": 25.428571428571427, "grad_norm": 7.929773330688477, "learning_rate": 3.729795918367347e-05, "loss": 3.1482, "step": 6230 }, { "epoch": 25.43265306122449, "grad_norm": 7.266107559204102, "learning_rate": 3.729591836734694e-05, "loss": 3.4699, "step": 6231 }, { "epoch": 25.43673469387755, "grad_norm": 18.204673767089844, "learning_rate": 3.7293877551020414e-05, "loss": 2.4603, "step": 6232 }, { "epoch": 25.440816326530612, "grad_norm": 13.32732105255127, "learning_rate": 3.729183673469388e-05, "loss": 2.7397, "step": 6233 }, { "epoch": 25.444897959183674, "grad_norm": 16.98843765258789, "learning_rate": 3.728979591836735e-05, "loss": 2.5261, "step": 6234 }, { "epoch": 25.448979591836736, "grad_norm": 22.00361442565918, "learning_rate": 3.7287755102040815e-05, "loss": 2.3664, "step": 6235 }, { "epoch": 25.453061224489797, "grad_norm": 11.783785820007324, "learning_rate": 3.728571428571428e-05, "loss": 3.0246, "step": 6236 }, { "epoch": 25.457142857142856, "grad_norm": 26.923450469970703, "learning_rate": 3.7283673469387756e-05, "loss": 1.9637, "step": 6237 }, { "epoch": 25.461224489795917, "grad_norm": 9.470069885253906, "learning_rate": 3.728163265306123e-05, "loss": 3.436, "step": 6238 }, { "epoch": 25.46530612244898, "grad_norm": 11.59900951385498, "learning_rate": 3.72795918367347e-05, "loss": 2.7765, "step": 6239 }, { "epoch": 25.46938775510204, "grad_norm": 11.465776443481445, "learning_rate": 3.7277551020408164e-05, "loss": 2.8201, "step": 6240 }, { "epoch": 25.473469387755102, "grad_norm": 14.494221687316895, "learning_rate": 3.727551020408163e-05, "loss": 2.7726, "step": 6241 }, { "epoch": 25.477551020408164, "grad_norm": 13.091142654418945, "learning_rate": 3.7273469387755105e-05, "loss": 2.8555, "step": 6242 }, { "epoch": 25.481632653061226, "grad_norm": 10.901582717895508, "learning_rate": 3.727142857142857e-05, "loss": 3.2565, "step": 6243 }, { "epoch": 25.485714285714284, "grad_norm": 14.398235321044922, "learning_rate": 3.726938775510204e-05, "loss": 2.9618, "step": 6244 }, { "epoch": 25.489795918367346, "grad_norm": 11.621671676635742, "learning_rate": 3.726734693877551e-05, "loss": 2.9176, "step": 6245 }, { "epoch": 25.493877551020407, "grad_norm": 26.99817657470703, "learning_rate": 3.726530612244899e-05, "loss": 2.1303, "step": 6246 }, { "epoch": 25.49795918367347, "grad_norm": 17.36751365661621, "learning_rate": 3.7263265306122454e-05, "loss": 2.4735, "step": 6247 }, { "epoch": 25.50204081632653, "grad_norm": 17.50054359436035, "learning_rate": 3.726122448979592e-05, "loss": 2.3443, "step": 6248 }, { "epoch": 25.506122448979593, "grad_norm": 7.077019691467285, "learning_rate": 3.725918367346939e-05, "loss": 3.5411, "step": 6249 }, { "epoch": 25.510204081632654, "grad_norm": 6.470124244689941, "learning_rate": 3.7257142857142856e-05, "loss": 3.7674, "step": 6250 }, { "epoch": 25.514285714285712, "grad_norm": 5.969508171081543, "learning_rate": 3.725510204081633e-05, "loss": 3.8722, "step": 6251 }, { "epoch": 25.518367346938774, "grad_norm": 14.24734115600586, "learning_rate": 3.72530612244898e-05, "loss": 2.8574, "step": 6252 }, { "epoch": 25.522448979591836, "grad_norm": 10.524158477783203, "learning_rate": 3.725102040816327e-05, "loss": 3.238, "step": 6253 }, { "epoch": 25.526530612244898, "grad_norm": 9.115243911743164, "learning_rate": 3.724897959183674e-05, "loss": 3.3288, "step": 6254 }, { "epoch": 25.53061224489796, "grad_norm": 23.767974853515625, "learning_rate": 3.7246938775510205e-05, "loss": 1.9619, "step": 6255 }, { "epoch": 25.53469387755102, "grad_norm": 23.097360610961914, "learning_rate": 3.724489795918368e-05, "loss": 2.2274, "step": 6256 }, { "epoch": 25.538775510204083, "grad_norm": 8.341228485107422, "learning_rate": 3.7242857142857146e-05, "loss": 3.3191, "step": 6257 }, { "epoch": 25.542857142857144, "grad_norm": 10.758482933044434, "learning_rate": 3.724081632653061e-05, "loss": 3.2641, "step": 6258 }, { "epoch": 25.546938775510203, "grad_norm": 7.347415924072266, "learning_rate": 3.723877551020408e-05, "loss": 3.5157, "step": 6259 }, { "epoch": 25.551020408163264, "grad_norm": 14.510908126831055, "learning_rate": 3.7236734693877554e-05, "loss": 3.0369, "step": 6260 }, { "epoch": 25.555102040816326, "grad_norm": 22.982114791870117, "learning_rate": 3.723469387755102e-05, "loss": 2.1264, "step": 6261 }, { "epoch": 25.559183673469388, "grad_norm": 15.74643611907959, "learning_rate": 3.7232653061224495e-05, "loss": 3.0252, "step": 6262 }, { "epoch": 25.56326530612245, "grad_norm": 19.15017318725586, "learning_rate": 3.723061224489796e-05, "loss": 2.0342, "step": 6263 }, { "epoch": 25.56734693877551, "grad_norm": 17.76136589050293, "learning_rate": 3.722857142857143e-05, "loss": 2.5393, "step": 6264 }, { "epoch": 25.571428571428573, "grad_norm": 11.00013542175293, "learning_rate": 3.7226530612244896e-05, "loss": 3.1996, "step": 6265 }, { "epoch": 25.57551020408163, "grad_norm": 13.33716869354248, "learning_rate": 3.722448979591837e-05, "loss": 3.1568, "step": 6266 }, { "epoch": 25.579591836734693, "grad_norm": 9.045112609863281, "learning_rate": 3.722244897959184e-05, "loss": 3.0802, "step": 6267 }, { "epoch": 25.583673469387755, "grad_norm": 14.403728485107422, "learning_rate": 3.7220408163265305e-05, "loss": 2.4349, "step": 6268 }, { "epoch": 25.587755102040816, "grad_norm": 7.011876583099365, "learning_rate": 3.721836734693878e-05, "loss": 3.4501, "step": 6269 }, { "epoch": 25.591836734693878, "grad_norm": 21.620141983032227, "learning_rate": 3.7216326530612246e-05, "loss": 2.3085, "step": 6270 }, { "epoch": 25.59591836734694, "grad_norm": 26.939285278320312, "learning_rate": 3.721428571428572e-05, "loss": 2.3682, "step": 6271 }, { "epoch": 25.6, "grad_norm": 11.665283203125, "learning_rate": 3.721224489795919e-05, "loss": 3.1044, "step": 6272 }, { "epoch": 25.60408163265306, "grad_norm": 7.229358196258545, "learning_rate": 3.7210204081632654e-05, "loss": 3.1023, "step": 6273 }, { "epoch": 25.60816326530612, "grad_norm": 8.710824966430664, "learning_rate": 3.720816326530612e-05, "loss": 3.2841, "step": 6274 }, { "epoch": 25.612244897959183, "grad_norm": 7.289457321166992, "learning_rate": 3.720612244897959e-05, "loss": 3.6557, "step": 6275 }, { "epoch": 25.616326530612245, "grad_norm": 21.661495208740234, "learning_rate": 3.720408163265306e-05, "loss": 2.322, "step": 6276 }, { "epoch": 25.620408163265306, "grad_norm": 11.95775318145752, "learning_rate": 3.7202040816326536e-05, "loss": 3.2002, "step": 6277 }, { "epoch": 25.624489795918368, "grad_norm": 7.170121669769287, "learning_rate": 3.72e-05, "loss": 3.5839, "step": 6278 }, { "epoch": 25.62857142857143, "grad_norm": 10.932831764221191, "learning_rate": 3.719795918367347e-05, "loss": 3.1397, "step": 6279 }, { "epoch": 25.632653061224488, "grad_norm": 9.489460945129395, "learning_rate": 3.719591836734694e-05, "loss": 3.2835, "step": 6280 }, { "epoch": 25.63673469387755, "grad_norm": 34.18141555786133, "learning_rate": 3.719387755102041e-05, "loss": 1.4567, "step": 6281 }, { "epoch": 25.64081632653061, "grad_norm": 9.933626174926758, "learning_rate": 3.719183673469388e-05, "loss": 3.0132, "step": 6282 }, { "epoch": 25.644897959183673, "grad_norm": 8.874939918518066, "learning_rate": 3.7189795918367345e-05, "loss": 3.1398, "step": 6283 }, { "epoch": 25.648979591836735, "grad_norm": 7.232555389404297, "learning_rate": 3.718775510204082e-05, "loss": 3.4617, "step": 6284 }, { "epoch": 25.653061224489797, "grad_norm": 9.251986503601074, "learning_rate": 3.7185714285714286e-05, "loss": 3.3427, "step": 6285 }, { "epoch": 25.65714285714286, "grad_norm": 12.12287712097168, "learning_rate": 3.718367346938776e-05, "loss": 2.7325, "step": 6286 }, { "epoch": 25.66122448979592, "grad_norm": 14.56489372253418, "learning_rate": 3.718163265306123e-05, "loss": 2.1469, "step": 6287 }, { "epoch": 25.66530612244898, "grad_norm": 7.9339189529418945, "learning_rate": 3.7179591836734695e-05, "loss": 3.2332, "step": 6288 }, { "epoch": 25.66938775510204, "grad_norm": 9.900714874267578, "learning_rate": 3.717755102040816e-05, "loss": 3.0939, "step": 6289 }, { "epoch": 25.6734693877551, "grad_norm": 14.191561698913574, "learning_rate": 3.7175510204081636e-05, "loss": 2.4185, "step": 6290 }, { "epoch": 25.677551020408163, "grad_norm": 9.447969436645508, "learning_rate": 3.71734693877551e-05, "loss": 3.2979, "step": 6291 }, { "epoch": 25.681632653061225, "grad_norm": 9.419601440429688, "learning_rate": 3.717142857142858e-05, "loss": 3.0853, "step": 6292 }, { "epoch": 25.685714285714287, "grad_norm": 13.13029670715332, "learning_rate": 3.7169387755102044e-05, "loss": 2.9614, "step": 6293 }, { "epoch": 25.68979591836735, "grad_norm": 11.811966896057129, "learning_rate": 3.716734693877551e-05, "loss": 3.5391, "step": 6294 }, { "epoch": 25.693877551020407, "grad_norm": 19.13620376586914, "learning_rate": 3.7165306122448985e-05, "loss": 1.6538, "step": 6295 }, { "epoch": 25.69795918367347, "grad_norm": 9.4580659866333, "learning_rate": 3.716326530612245e-05, "loss": 3.369, "step": 6296 }, { "epoch": 25.70204081632653, "grad_norm": 26.0861873626709, "learning_rate": 3.716122448979592e-05, "loss": 2.3614, "step": 6297 }, { "epoch": 25.706122448979592, "grad_norm": 27.05689811706543, "learning_rate": 3.7159183673469386e-05, "loss": 1.6282, "step": 6298 }, { "epoch": 25.710204081632654, "grad_norm": 10.198637008666992, "learning_rate": 3.715714285714286e-05, "loss": 3.2384, "step": 6299 }, { "epoch": 25.714285714285715, "grad_norm": 12.485481262207031, "learning_rate": 3.7155102040816334e-05, "loss": 2.664, "step": 6300 }, { "epoch": 25.718367346938777, "grad_norm": 9.112170219421387, "learning_rate": 3.71530612244898e-05, "loss": 3.2242, "step": 6301 }, { "epoch": 25.722448979591835, "grad_norm": 7.273221015930176, "learning_rate": 3.715102040816327e-05, "loss": 3.4827, "step": 6302 }, { "epoch": 25.726530612244897, "grad_norm": 14.277087211608887, "learning_rate": 3.7148979591836735e-05, "loss": 3.0035, "step": 6303 }, { "epoch": 25.73061224489796, "grad_norm": 7.6441264152526855, "learning_rate": 3.71469387755102e-05, "loss": 3.3811, "step": 6304 }, { "epoch": 25.73469387755102, "grad_norm": 8.087793350219727, "learning_rate": 3.7144897959183676e-05, "loss": 3.4992, "step": 6305 }, { "epoch": 25.738775510204082, "grad_norm": 6.08420991897583, "learning_rate": 3.7142857142857143e-05, "loss": 3.8011, "step": 6306 }, { "epoch": 25.742857142857144, "grad_norm": 19.753023147583008, "learning_rate": 3.714081632653062e-05, "loss": 2.2944, "step": 6307 }, { "epoch": 25.746938775510205, "grad_norm": 9.00516128540039, "learning_rate": 3.7138775510204085e-05, "loss": 3.2487, "step": 6308 }, { "epoch": 25.751020408163264, "grad_norm": 18.562698364257812, "learning_rate": 3.713673469387755e-05, "loss": 2.7282, "step": 6309 }, { "epoch": 25.755102040816325, "grad_norm": 8.018732070922852, "learning_rate": 3.7134693877551026e-05, "loss": 3.4052, "step": 6310 }, { "epoch": 25.759183673469387, "grad_norm": 19.532793045043945, "learning_rate": 3.713265306122449e-05, "loss": 2.6667, "step": 6311 }, { "epoch": 25.76326530612245, "grad_norm": 12.355335235595703, "learning_rate": 3.713061224489796e-05, "loss": 3.3164, "step": 6312 }, { "epoch": 25.76734693877551, "grad_norm": 16.106966018676758, "learning_rate": 3.712857142857143e-05, "loss": 2.7099, "step": 6313 }, { "epoch": 25.771428571428572, "grad_norm": 15.934677124023438, "learning_rate": 3.71265306122449e-05, "loss": 2.663, "step": 6314 }, { "epoch": 25.775510204081634, "grad_norm": 20.76049041748047, "learning_rate": 3.712448979591837e-05, "loss": 1.5163, "step": 6315 }, { "epoch": 25.779591836734696, "grad_norm": 12.683111190795898, "learning_rate": 3.712244897959184e-05, "loss": 2.6364, "step": 6316 }, { "epoch": 25.783673469387754, "grad_norm": 11.872920989990234, "learning_rate": 3.712040816326531e-05, "loss": 3.1833, "step": 6317 }, { "epoch": 25.787755102040816, "grad_norm": 24.486473083496094, "learning_rate": 3.7118367346938776e-05, "loss": 2.1691, "step": 6318 }, { "epoch": 25.791836734693877, "grad_norm": 35.60346984863281, "learning_rate": 3.711632653061224e-05, "loss": 1.6253, "step": 6319 }, { "epoch": 25.79591836734694, "grad_norm": 8.14885139465332, "learning_rate": 3.711428571428572e-05, "loss": 3.3172, "step": 6320 }, { "epoch": 25.8, "grad_norm": 14.18333911895752, "learning_rate": 3.7112244897959184e-05, "loss": 2.9881, "step": 6321 }, { "epoch": 25.804081632653062, "grad_norm": 9.429261207580566, "learning_rate": 3.711020408163265e-05, "loss": 3.2828, "step": 6322 }, { "epoch": 25.808163265306124, "grad_norm": 22.83118438720703, "learning_rate": 3.7108163265306125e-05, "loss": 1.341, "step": 6323 }, { "epoch": 25.812244897959182, "grad_norm": 14.26904296875, "learning_rate": 3.710612244897959e-05, "loss": 2.813, "step": 6324 }, { "epoch": 25.816326530612244, "grad_norm": 13.152350425720215, "learning_rate": 3.7104081632653066e-05, "loss": 2.9962, "step": 6325 }, { "epoch": 25.820408163265306, "grad_norm": 13.075668334960938, "learning_rate": 3.7102040816326533e-05, "loss": 2.9644, "step": 6326 }, { "epoch": 25.824489795918367, "grad_norm": 16.705869674682617, "learning_rate": 3.71e-05, "loss": 3.0367, "step": 6327 }, { "epoch": 25.82857142857143, "grad_norm": 13.37401008605957, "learning_rate": 3.709795918367347e-05, "loss": 2.9112, "step": 6328 }, { "epoch": 25.83265306122449, "grad_norm": 9.261760711669922, "learning_rate": 3.7095918367346935e-05, "loss": 3.2422, "step": 6329 }, { "epoch": 25.836734693877553, "grad_norm": 8.016461372375488, "learning_rate": 3.709387755102041e-05, "loss": 3.6231, "step": 6330 }, { "epoch": 25.84081632653061, "grad_norm": 10.657309532165527, "learning_rate": 3.709183673469388e-05, "loss": 3.0002, "step": 6331 }, { "epoch": 25.844897959183672, "grad_norm": 8.417279243469238, "learning_rate": 3.708979591836735e-05, "loss": 3.6217, "step": 6332 }, { "epoch": 25.848979591836734, "grad_norm": 7.436276912689209, "learning_rate": 3.708775510204082e-05, "loss": 3.2556, "step": 6333 }, { "epoch": 25.853061224489796, "grad_norm": 17.730226516723633, "learning_rate": 3.7085714285714284e-05, "loss": 2.5405, "step": 6334 }, { "epoch": 25.857142857142858, "grad_norm": 14.2410249710083, "learning_rate": 3.708367346938776e-05, "loss": 2.815, "step": 6335 }, { "epoch": 25.86122448979592, "grad_norm": 19.80784797668457, "learning_rate": 3.7081632653061225e-05, "loss": 2.5072, "step": 6336 }, { "epoch": 25.86530612244898, "grad_norm": 8.613065719604492, "learning_rate": 3.707959183673469e-05, "loss": 3.4124, "step": 6337 }, { "epoch": 25.86938775510204, "grad_norm": 16.87989044189453, "learning_rate": 3.7077551020408166e-05, "loss": 2.2757, "step": 6338 }, { "epoch": 25.8734693877551, "grad_norm": 8.469991683959961, "learning_rate": 3.707551020408164e-05, "loss": 3.4755, "step": 6339 }, { "epoch": 25.877551020408163, "grad_norm": 20.884815216064453, "learning_rate": 3.707346938775511e-05, "loss": 2.4339, "step": 6340 }, { "epoch": 25.881632653061224, "grad_norm": 11.024480819702148, "learning_rate": 3.7071428571428574e-05, "loss": 3.0999, "step": 6341 }, { "epoch": 25.885714285714286, "grad_norm": 6.514104843139648, "learning_rate": 3.706938775510204e-05, "loss": 3.596, "step": 6342 }, { "epoch": 25.889795918367348, "grad_norm": 18.46980857849121, "learning_rate": 3.706734693877551e-05, "loss": 2.4649, "step": 6343 }, { "epoch": 25.89387755102041, "grad_norm": 23.386310577392578, "learning_rate": 3.706530612244898e-05, "loss": 1.7118, "step": 6344 }, { "epoch": 25.897959183673468, "grad_norm": 15.533377647399902, "learning_rate": 3.706326530612245e-05, "loss": 2.6992, "step": 6345 }, { "epoch": 25.90204081632653, "grad_norm": 13.60688304901123, "learning_rate": 3.706122448979592e-05, "loss": 2.7568, "step": 6346 }, { "epoch": 25.90612244897959, "grad_norm": 9.641173362731934, "learning_rate": 3.705918367346939e-05, "loss": 3.4438, "step": 6347 }, { "epoch": 25.910204081632653, "grad_norm": 22.813034057617188, "learning_rate": 3.705714285714286e-05, "loss": 1.4418, "step": 6348 }, { "epoch": 25.914285714285715, "grad_norm": 12.335103034973145, "learning_rate": 3.705510204081633e-05, "loss": 2.6325, "step": 6349 }, { "epoch": 25.918367346938776, "grad_norm": 12.636456489562988, "learning_rate": 3.70530612244898e-05, "loss": 2.8336, "step": 6350 }, { "epoch": 25.922448979591838, "grad_norm": 12.461252212524414, "learning_rate": 3.7051020408163266e-05, "loss": 2.9001, "step": 6351 }, { "epoch": 25.926530612244896, "grad_norm": 9.228386878967285, "learning_rate": 3.704897959183673e-05, "loss": 3.3149, "step": 6352 }, { "epoch": 25.930612244897958, "grad_norm": 12.820366859436035, "learning_rate": 3.704693877551021e-05, "loss": 2.7904, "step": 6353 }, { "epoch": 25.93469387755102, "grad_norm": 26.799457550048828, "learning_rate": 3.704489795918368e-05, "loss": 2.4028, "step": 6354 }, { "epoch": 25.93877551020408, "grad_norm": 12.115894317626953, "learning_rate": 3.704285714285715e-05, "loss": 3.1532, "step": 6355 }, { "epoch": 25.942857142857143, "grad_norm": 10.70714282989502, "learning_rate": 3.7040816326530615e-05, "loss": 3.1007, "step": 6356 }, { "epoch": 25.946938775510205, "grad_norm": 11.453574180603027, "learning_rate": 3.703877551020408e-05, "loss": 3.121, "step": 6357 }, { "epoch": 25.951020408163266, "grad_norm": 8.203521728515625, "learning_rate": 3.703673469387755e-05, "loss": 3.2767, "step": 6358 }, { "epoch": 25.955102040816328, "grad_norm": 11.486546516418457, "learning_rate": 3.703469387755102e-05, "loss": 3.1792, "step": 6359 }, { "epoch": 25.959183673469386, "grad_norm": 6.997166156768799, "learning_rate": 3.703265306122449e-05, "loss": 3.6615, "step": 6360 }, { "epoch": 25.963265306122448, "grad_norm": 13.852197647094727, "learning_rate": 3.7030612244897964e-05, "loss": 2.8746, "step": 6361 }, { "epoch": 25.96734693877551, "grad_norm": 24.06423568725586, "learning_rate": 3.702857142857143e-05, "loss": 2.3055, "step": 6362 }, { "epoch": 25.97142857142857, "grad_norm": 12.572092056274414, "learning_rate": 3.70265306122449e-05, "loss": 3.1458, "step": 6363 }, { "epoch": 25.975510204081633, "grad_norm": 9.434945106506348, "learning_rate": 3.702448979591837e-05, "loss": 3.3192, "step": 6364 }, { "epoch": 25.979591836734695, "grad_norm": 25.478227615356445, "learning_rate": 3.702244897959184e-05, "loss": 1.3133, "step": 6365 }, { "epoch": 25.983673469387757, "grad_norm": 28.513648986816406, "learning_rate": 3.7020408163265307e-05, "loss": 1.191, "step": 6366 }, { "epoch": 25.987755102040815, "grad_norm": 7.972414493560791, "learning_rate": 3.7018367346938774e-05, "loss": 3.2793, "step": 6367 }, { "epoch": 25.991836734693877, "grad_norm": 9.420109748840332, "learning_rate": 3.701632653061225e-05, "loss": 3.1779, "step": 6368 }, { "epoch": 25.99591836734694, "grad_norm": 21.38880729675293, "learning_rate": 3.7014285714285715e-05, "loss": 2.5202, "step": 6369 }, { "epoch": 26.0, "grad_norm": 11.838739395141602, "learning_rate": 3.701224489795919e-05, "loss": 3.2311, "step": 6370 }, { "epoch": 26.00408163265306, "grad_norm": 15.111812591552734, "learning_rate": 3.7010204081632656e-05, "loss": 2.8146, "step": 6371 }, { "epoch": 26.008163265306123, "grad_norm": 7.2861857414245605, "learning_rate": 3.700816326530612e-05, "loss": 3.408, "step": 6372 }, { "epoch": 26.012244897959185, "grad_norm": 9.69088077545166, "learning_rate": 3.700612244897959e-05, "loss": 3.1409, "step": 6373 }, { "epoch": 26.016326530612243, "grad_norm": 8.45211410522461, "learning_rate": 3.7004081632653064e-05, "loss": 3.2117, "step": 6374 }, { "epoch": 26.020408163265305, "grad_norm": 8.711809158325195, "learning_rate": 3.700204081632653e-05, "loss": 3.0915, "step": 6375 }, { "epoch": 26.024489795918367, "grad_norm": 11.456433296203613, "learning_rate": 3.7e-05, "loss": 2.9285, "step": 6376 }, { "epoch": 26.02857142857143, "grad_norm": 25.908395767211914, "learning_rate": 3.699795918367347e-05, "loss": 1.6915, "step": 6377 }, { "epoch": 26.03265306122449, "grad_norm": 14.186384201049805, "learning_rate": 3.699591836734694e-05, "loss": 2.6118, "step": 6378 }, { "epoch": 26.036734693877552, "grad_norm": 17.719953536987305, "learning_rate": 3.699387755102041e-05, "loss": 2.1763, "step": 6379 }, { "epoch": 26.040816326530614, "grad_norm": 21.189380645751953, "learning_rate": 3.699183673469388e-05, "loss": 2.5541, "step": 6380 }, { "epoch": 26.044897959183672, "grad_norm": 8.29588508605957, "learning_rate": 3.698979591836735e-05, "loss": 3.4998, "step": 6381 }, { "epoch": 26.048979591836734, "grad_norm": 15.65904426574707, "learning_rate": 3.6987755102040814e-05, "loss": 2.8753, "step": 6382 }, { "epoch": 26.053061224489795, "grad_norm": NaN, "learning_rate": 3.698571428571429e-05, "loss": 1.2725, "step": 6383 }, { "epoch": 26.057142857142857, "grad_norm": 16.701404571533203, "learning_rate": 3.698571428571429e-05, "loss": 2.3849, "step": 6384 }, { "epoch": 26.06122448979592, "grad_norm": 10.936339378356934, "learning_rate": 3.6983673469387755e-05, "loss": 2.9446, "step": 6385 }, { "epoch": 26.06530612244898, "grad_norm": 13.917464256286621, "learning_rate": 3.698163265306123e-05, "loss": 2.5102, "step": 6386 }, { "epoch": 26.069387755102042, "grad_norm": 10.998281478881836, "learning_rate": 3.6979591836734696e-05, "loss": 2.9083, "step": 6387 }, { "epoch": 26.073469387755104, "grad_norm": 13.976533889770508, "learning_rate": 3.6977551020408164e-05, "loss": 3.1403, "step": 6388 }, { "epoch": 26.077551020408162, "grad_norm": 9.63923168182373, "learning_rate": 3.697551020408164e-05, "loss": 3.3184, "step": 6389 }, { "epoch": 26.081632653061224, "grad_norm": 26.06510353088379, "learning_rate": 3.6973469387755105e-05, "loss": 2.9735, "step": 6390 }, { "epoch": 26.085714285714285, "grad_norm": 23.656869888305664, "learning_rate": 3.697142857142857e-05, "loss": 2.2175, "step": 6391 }, { "epoch": 26.089795918367347, "grad_norm": 10.251351356506348, "learning_rate": 3.696938775510204e-05, "loss": 3.0066, "step": 6392 }, { "epoch": 26.09387755102041, "grad_norm": 16.564573287963867, "learning_rate": 3.696734693877551e-05, "loss": 2.2905, "step": 6393 }, { "epoch": 26.09795918367347, "grad_norm": 16.71468162536621, "learning_rate": 3.696530612244899e-05, "loss": 2.605, "step": 6394 }, { "epoch": 26.102040816326532, "grad_norm": 10.309269905090332, "learning_rate": 3.6963265306122454e-05, "loss": 2.9743, "step": 6395 }, { "epoch": 26.10612244897959, "grad_norm": 12.246085166931152, "learning_rate": 3.696122448979592e-05, "loss": 3.0182, "step": 6396 }, { "epoch": 26.110204081632652, "grad_norm": 27.333749771118164, "learning_rate": 3.695918367346939e-05, "loss": 2.1319, "step": 6397 }, { "epoch": 26.114285714285714, "grad_norm": 9.736226081848145, "learning_rate": 3.6957142857142855e-05, "loss": 3.1854, "step": 6398 }, { "epoch": 26.118367346938776, "grad_norm": 34.788238525390625, "learning_rate": 3.695510204081633e-05, "loss": 2.1468, "step": 6399 }, { "epoch": 26.122448979591837, "grad_norm": 13.46840763092041, "learning_rate": 3.6953061224489796e-05, "loss": 2.869, "step": 6400 }, { "epoch": 26.1265306122449, "grad_norm": 20.855085372924805, "learning_rate": 3.695102040816327e-05, "loss": 1.8828, "step": 6401 }, { "epoch": 26.13061224489796, "grad_norm": 12.234580993652344, "learning_rate": 3.694897959183674e-05, "loss": 2.8153, "step": 6402 }, { "epoch": 26.13469387755102, "grad_norm": 10.059844017028809, "learning_rate": 3.6946938775510204e-05, "loss": 3.1887, "step": 6403 }, { "epoch": 26.13877551020408, "grad_norm": 10.275270462036133, "learning_rate": 3.694489795918368e-05, "loss": 3.2937, "step": 6404 }, { "epoch": 26.142857142857142, "grad_norm": 22.28706932067871, "learning_rate": 3.6942857142857145e-05, "loss": 2.601, "step": 6405 }, { "epoch": 26.146938775510204, "grad_norm": 22.512895584106445, "learning_rate": 3.694081632653061e-05, "loss": 2.1513, "step": 6406 }, { "epoch": 26.151020408163266, "grad_norm": 10.341752052307129, "learning_rate": 3.693877551020408e-05, "loss": 3.3445, "step": 6407 }, { "epoch": 26.155102040816328, "grad_norm": 10.478778839111328, "learning_rate": 3.6936734693877554e-05, "loss": 3.1402, "step": 6408 }, { "epoch": 26.15918367346939, "grad_norm": 20.446537017822266, "learning_rate": 3.693469387755103e-05, "loss": 2.1742, "step": 6409 }, { "epoch": 26.163265306122447, "grad_norm": 19.05763816833496, "learning_rate": 3.6932653061224495e-05, "loss": 2.3038, "step": 6410 }, { "epoch": 26.16734693877551, "grad_norm": 18.287439346313477, "learning_rate": 3.693061224489796e-05, "loss": 2.4439, "step": 6411 }, { "epoch": 26.17142857142857, "grad_norm": 13.804206848144531, "learning_rate": 3.692857142857143e-05, "loss": 2.6358, "step": 6412 }, { "epoch": 26.175510204081633, "grad_norm": 14.865493774414062, "learning_rate": 3.6926530612244896e-05, "loss": 2.6491, "step": 6413 }, { "epoch": 26.179591836734694, "grad_norm": 9.840089797973633, "learning_rate": 3.692448979591837e-05, "loss": 3.0271, "step": 6414 }, { "epoch": 26.183673469387756, "grad_norm": 14.770448684692383, "learning_rate": 3.692244897959184e-05, "loss": 2.6948, "step": 6415 }, { "epoch": 26.187755102040818, "grad_norm": 15.550610542297363, "learning_rate": 3.692040816326531e-05, "loss": 2.8081, "step": 6416 }, { "epoch": 26.191836734693876, "grad_norm": 20.555484771728516, "learning_rate": 3.691836734693878e-05, "loss": 2.2265, "step": 6417 }, { "epoch": 26.195918367346938, "grad_norm": 12.407007217407227, "learning_rate": 3.6916326530612245e-05, "loss": 2.7753, "step": 6418 }, { "epoch": 26.2, "grad_norm": 21.535978317260742, "learning_rate": 3.691428571428572e-05, "loss": 2.049, "step": 6419 }, { "epoch": 26.20408163265306, "grad_norm": 18.484291076660156, "learning_rate": 3.6912244897959186e-05, "loss": 2.569, "step": 6420 }, { "epoch": 26.208163265306123, "grad_norm": 7.706765651702881, "learning_rate": 3.691020408163265e-05, "loss": 3.3784, "step": 6421 }, { "epoch": 26.212244897959184, "grad_norm": 13.447216033935547, "learning_rate": 3.690816326530612e-05, "loss": 2.7754, "step": 6422 }, { "epoch": 26.216326530612246, "grad_norm": 19.508939743041992, "learning_rate": 3.6906122448979594e-05, "loss": 1.4181, "step": 6423 }, { "epoch": 26.220408163265308, "grad_norm": 22.84441566467285, "learning_rate": 3.690408163265306e-05, "loss": 2.0579, "step": 6424 }, { "epoch": 26.224489795918366, "grad_norm": 7.937362194061279, "learning_rate": 3.6902040816326535e-05, "loss": 3.51, "step": 6425 }, { "epoch": 26.228571428571428, "grad_norm": 7.760074615478516, "learning_rate": 3.69e-05, "loss": 3.5714, "step": 6426 }, { "epoch": 26.23265306122449, "grad_norm": 12.267029762268066, "learning_rate": 3.689795918367347e-05, "loss": 3.0923, "step": 6427 }, { "epoch": 26.23673469387755, "grad_norm": 19.159034729003906, "learning_rate": 3.6895918367346943e-05, "loss": 2.2121, "step": 6428 }, { "epoch": 26.240816326530613, "grad_norm": 7.098235607147217, "learning_rate": 3.689387755102041e-05, "loss": 3.4897, "step": 6429 }, { "epoch": 26.244897959183675, "grad_norm": 14.977972984313965, "learning_rate": 3.689183673469388e-05, "loss": 2.7497, "step": 6430 }, { "epoch": 26.248979591836736, "grad_norm": 19.329893112182617, "learning_rate": 3.6889795918367345e-05, "loss": 2.1419, "step": 6431 }, { "epoch": 26.253061224489795, "grad_norm": 12.14674186706543, "learning_rate": 3.688775510204082e-05, "loss": 2.7536, "step": 6432 }, { "epoch": 26.257142857142856, "grad_norm": 10.162967681884766, "learning_rate": 3.688571428571429e-05, "loss": 2.732, "step": 6433 }, { "epoch": 26.261224489795918, "grad_norm": 21.202619552612305, "learning_rate": 3.688367346938776e-05, "loss": 2.1792, "step": 6434 }, { "epoch": 26.26530612244898, "grad_norm": 9.791357040405273, "learning_rate": 3.688163265306123e-05, "loss": 3.3977, "step": 6435 }, { "epoch": 26.26938775510204, "grad_norm": 11.992026329040527, "learning_rate": 3.6879591836734694e-05, "loss": 3.1163, "step": 6436 }, { "epoch": 26.273469387755103, "grad_norm": 17.48114776611328, "learning_rate": 3.687755102040816e-05, "loss": 2.3367, "step": 6437 }, { "epoch": 26.277551020408165, "grad_norm": 10.831780433654785, "learning_rate": 3.6875510204081635e-05, "loss": 3.1119, "step": 6438 }, { "epoch": 26.281632653061223, "grad_norm": 29.215978622436523, "learning_rate": 3.68734693877551e-05, "loss": 1.4859, "step": 6439 }, { "epoch": 26.285714285714285, "grad_norm": 8.653532981872559, "learning_rate": 3.6871428571428576e-05, "loss": 3.1291, "step": 6440 }, { "epoch": 26.289795918367346, "grad_norm": 10.088457107543945, "learning_rate": 3.686938775510204e-05, "loss": 3.1709, "step": 6441 }, { "epoch": 26.293877551020408, "grad_norm": 14.938396453857422, "learning_rate": 3.686734693877551e-05, "loss": 3.0756, "step": 6442 }, { "epoch": 26.29795918367347, "grad_norm": 6.980600357055664, "learning_rate": 3.6865306122448984e-05, "loss": 3.5887, "step": 6443 }, { "epoch": 26.30204081632653, "grad_norm": 31.664762496948242, "learning_rate": 3.686326530612245e-05, "loss": 1.4968, "step": 6444 }, { "epoch": 26.306122448979593, "grad_norm": 16.1339168548584, "learning_rate": 3.686122448979592e-05, "loss": 2.8491, "step": 6445 }, { "epoch": 26.31020408163265, "grad_norm": 25.86298179626465, "learning_rate": 3.6859183673469386e-05, "loss": 1.3825, "step": 6446 }, { "epoch": 26.314285714285713, "grad_norm": 15.23962688446045, "learning_rate": 3.685714285714286e-05, "loss": 2.8152, "step": 6447 }, { "epoch": 26.318367346938775, "grad_norm": 16.502357482910156, "learning_rate": 3.6855102040816333e-05, "loss": 2.6175, "step": 6448 }, { "epoch": 26.322448979591837, "grad_norm": 16.495851516723633, "learning_rate": 3.68530612244898e-05, "loss": 2.1984, "step": 6449 }, { "epoch": 26.3265306122449, "grad_norm": 7.188146114349365, "learning_rate": 3.685102040816327e-05, "loss": 3.3885, "step": 6450 }, { "epoch": 26.33061224489796, "grad_norm": 11.777525901794434, "learning_rate": 3.6848979591836735e-05, "loss": 3.1099, "step": 6451 }, { "epoch": 26.33469387755102, "grad_norm": 8.612537384033203, "learning_rate": 3.68469387755102e-05, "loss": 3.5588, "step": 6452 }, { "epoch": 26.33877551020408, "grad_norm": 7.879613399505615, "learning_rate": 3.6844897959183676e-05, "loss": 3.4063, "step": 6453 }, { "epoch": 26.34285714285714, "grad_norm": 16.668582916259766, "learning_rate": 3.684285714285714e-05, "loss": 2.4555, "step": 6454 }, { "epoch": 26.346938775510203, "grad_norm": 7.398522853851318, "learning_rate": 3.684081632653062e-05, "loss": 3.4034, "step": 6455 }, { "epoch": 26.351020408163265, "grad_norm": 12.843682289123535, "learning_rate": 3.6838775510204084e-05, "loss": 3.0636, "step": 6456 }, { "epoch": 26.355102040816327, "grad_norm": 8.97358226776123, "learning_rate": 3.683673469387755e-05, "loss": 3.3788, "step": 6457 }, { "epoch": 26.35918367346939, "grad_norm": 11.430157661437988, "learning_rate": 3.6834693877551025e-05, "loss": 3.0831, "step": 6458 }, { "epoch": 26.36326530612245, "grad_norm": 31.833236694335938, "learning_rate": 3.683265306122449e-05, "loss": 1.2038, "step": 6459 }, { "epoch": 26.367346938775512, "grad_norm": 13.929766654968262, "learning_rate": 3.683061224489796e-05, "loss": 2.6622, "step": 6460 }, { "epoch": 26.37142857142857, "grad_norm": 14.892179489135742, "learning_rate": 3.6828571428571426e-05, "loss": 2.4481, "step": 6461 }, { "epoch": 26.375510204081632, "grad_norm": 7.67315673828125, "learning_rate": 3.68265306122449e-05, "loss": 3.0731, "step": 6462 }, { "epoch": 26.379591836734694, "grad_norm": 10.218554496765137, "learning_rate": 3.6824489795918374e-05, "loss": 3.0726, "step": 6463 }, { "epoch": 26.383673469387755, "grad_norm": 8.392640113830566, "learning_rate": 3.682244897959184e-05, "loss": 3.1798, "step": 6464 }, { "epoch": 26.387755102040817, "grad_norm": 16.0135555267334, "learning_rate": 3.682040816326531e-05, "loss": 2.4541, "step": 6465 }, { "epoch": 26.39183673469388, "grad_norm": 21.702747344970703, "learning_rate": 3.6818367346938776e-05, "loss": 0.4243, "step": 6466 }, { "epoch": 26.39591836734694, "grad_norm": 9.958497047424316, "learning_rate": 3.681632653061224e-05, "loss": 2.9555, "step": 6467 }, { "epoch": 26.4, "grad_norm": 17.314912796020508, "learning_rate": 3.6814285714285717e-05, "loss": 2.49, "step": 6468 }, { "epoch": 26.40408163265306, "grad_norm": 15.22411060333252, "learning_rate": 3.6812244897959184e-05, "loss": 2.9664, "step": 6469 }, { "epoch": 26.408163265306122, "grad_norm": 13.24061107635498, "learning_rate": 3.681020408163266e-05, "loss": 3.0291, "step": 6470 }, { "epoch": 26.412244897959184, "grad_norm": 22.81415367126465, "learning_rate": 3.6808163265306125e-05, "loss": 2.2114, "step": 6471 }, { "epoch": 26.416326530612245, "grad_norm": 16.88934898376465, "learning_rate": 3.680612244897959e-05, "loss": 2.7135, "step": 6472 }, { "epoch": 26.420408163265307, "grad_norm": 7.035835266113281, "learning_rate": 3.6804081632653066e-05, "loss": 3.7131, "step": 6473 }, { "epoch": 26.42448979591837, "grad_norm": 11.99008560180664, "learning_rate": 3.680204081632653e-05, "loss": 2.9768, "step": 6474 }, { "epoch": 26.428571428571427, "grad_norm": 16.780506134033203, "learning_rate": 3.68e-05, "loss": 2.5835, "step": 6475 }, { "epoch": 26.43265306122449, "grad_norm": 9.061308860778809, "learning_rate": 3.679795918367347e-05, "loss": 3.1821, "step": 6476 }, { "epoch": 26.43673469387755, "grad_norm": 21.599546432495117, "learning_rate": 3.679591836734694e-05, "loss": 2.3941, "step": 6477 }, { "epoch": 26.440816326530612, "grad_norm": 14.315099716186523, "learning_rate": 3.679387755102041e-05, "loss": 2.6447, "step": 6478 }, { "epoch": 26.444897959183674, "grad_norm": 31.568876266479492, "learning_rate": 3.679183673469388e-05, "loss": 1.3188, "step": 6479 }, { "epoch": 26.448979591836736, "grad_norm": 19.63844108581543, "learning_rate": 3.678979591836735e-05, "loss": 2.2878, "step": 6480 }, { "epoch": 26.453061224489797, "grad_norm": 15.85689926147461, "learning_rate": 3.6787755102040816e-05, "loss": 2.8813, "step": 6481 }, { "epoch": 26.457142857142856, "grad_norm": 25.348072052001953, "learning_rate": 3.678571428571429e-05, "loss": 1.6156, "step": 6482 }, { "epoch": 26.461224489795917, "grad_norm": 10.813532829284668, "learning_rate": 3.678367346938776e-05, "loss": 2.9831, "step": 6483 }, { "epoch": 26.46530612244898, "grad_norm": 13.68209171295166, "learning_rate": 3.6781632653061224e-05, "loss": 2.8491, "step": 6484 }, { "epoch": 26.46938775510204, "grad_norm": 12.36092758178711, "learning_rate": 3.677959183673469e-05, "loss": 3.0206, "step": 6485 }, { "epoch": 26.473469387755102, "grad_norm": 19.084680557250977, "learning_rate": 3.6777551020408166e-05, "loss": 2.4331, "step": 6486 }, { "epoch": 26.477551020408164, "grad_norm": 13.158234596252441, "learning_rate": 3.677551020408164e-05, "loss": 2.9798, "step": 6487 }, { "epoch": 26.481632653061226, "grad_norm": 13.353387832641602, "learning_rate": 3.6773469387755107e-05, "loss": 2.8702, "step": 6488 }, { "epoch": 26.485714285714284, "grad_norm": 11.071451187133789, "learning_rate": 3.6771428571428574e-05, "loss": 3.1433, "step": 6489 }, { "epoch": 26.489795918367346, "grad_norm": 9.392638206481934, "learning_rate": 3.676938775510204e-05, "loss": 3.2864, "step": 6490 }, { "epoch": 26.493877551020407, "grad_norm": 10.501461029052734, "learning_rate": 3.676734693877551e-05, "loss": 3.1218, "step": 6491 }, { "epoch": 26.49795918367347, "grad_norm": 19.13886070251465, "learning_rate": 3.676530612244898e-05, "loss": 2.8073, "step": 6492 }, { "epoch": 26.50204081632653, "grad_norm": 12.90926456451416, "learning_rate": 3.676326530612245e-05, "loss": 2.9449, "step": 6493 }, { "epoch": 26.506122448979593, "grad_norm": 7.9603986740112305, "learning_rate": 3.676122448979592e-05, "loss": 3.2777, "step": 6494 }, { "epoch": 26.510204081632654, "grad_norm": 11.970065116882324, "learning_rate": 3.675918367346939e-05, "loss": 3.0725, "step": 6495 }, { "epoch": 26.514285714285712, "grad_norm": 8.933393478393555, "learning_rate": 3.675714285714286e-05, "loss": 3.1594, "step": 6496 }, { "epoch": 26.518367346938774, "grad_norm": 15.305338859558105, "learning_rate": 3.675510204081633e-05, "loss": 2.9004, "step": 6497 }, { "epoch": 26.522448979591836, "grad_norm": 15.620207786560059, "learning_rate": 3.67530612244898e-05, "loss": 2.7183, "step": 6498 }, { "epoch": 26.526530612244898, "grad_norm": 15.74936580657959, "learning_rate": 3.6751020408163265e-05, "loss": 2.9846, "step": 6499 }, { "epoch": 26.53061224489796, "grad_norm": 21.513874053955078, "learning_rate": 3.674897959183673e-05, "loss": 2.0784, "step": 6500 }, { "epoch": 26.53469387755102, "grad_norm": 29.179651260375977, "learning_rate": 3.6746938775510206e-05, "loss": 1.9728, "step": 6501 }, { "epoch": 26.538775510204083, "grad_norm": 7.882608890533447, "learning_rate": 3.674489795918368e-05, "loss": 3.6021, "step": 6502 }, { "epoch": 26.542857142857144, "grad_norm": 23.544174194335938, "learning_rate": 3.674285714285715e-05, "loss": 2.2271, "step": 6503 }, { "epoch": 26.546938775510203, "grad_norm": 21.750669479370117, "learning_rate": 3.6740816326530614e-05, "loss": 2.8303, "step": 6504 }, { "epoch": 26.551020408163264, "grad_norm": 21.2008113861084, "learning_rate": 3.673877551020408e-05, "loss": 1.6403, "step": 6505 }, { "epoch": 26.555102040816326, "grad_norm": 22.72040367126465, "learning_rate": 3.673673469387755e-05, "loss": 2.9132, "step": 6506 }, { "epoch": 26.559183673469388, "grad_norm": 15.159843444824219, "learning_rate": 3.673469387755102e-05, "loss": 2.6074, "step": 6507 }, { "epoch": 26.56326530612245, "grad_norm": 5.8695549964904785, "learning_rate": 3.673265306122449e-05, "loss": 3.5193, "step": 6508 }, { "epoch": 26.56734693877551, "grad_norm": 11.35338306427002, "learning_rate": 3.6730612244897964e-05, "loss": 3.0876, "step": 6509 }, { "epoch": 26.571428571428573, "grad_norm": 14.733514785766602, "learning_rate": 3.672857142857143e-05, "loss": 2.7364, "step": 6510 }, { "epoch": 26.57551020408163, "grad_norm": 10.407370567321777, "learning_rate": 3.67265306122449e-05, "loss": 3.3323, "step": 6511 }, { "epoch": 26.579591836734693, "grad_norm": 25.328182220458984, "learning_rate": 3.672448979591837e-05, "loss": 2.6182, "step": 6512 }, { "epoch": 26.583673469387755, "grad_norm": 10.703948020935059, "learning_rate": 3.672244897959184e-05, "loss": 3.2506, "step": 6513 }, { "epoch": 26.587755102040816, "grad_norm": 10.91263198852539, "learning_rate": 3.6720408163265306e-05, "loss": 3.0825, "step": 6514 }, { "epoch": 26.591836734693878, "grad_norm": 6.049426078796387, "learning_rate": 3.671836734693877e-05, "loss": 3.8392, "step": 6515 }, { "epoch": 26.59591836734694, "grad_norm": 19.591733932495117, "learning_rate": 3.671632653061225e-05, "loss": 2.113, "step": 6516 }, { "epoch": 26.6, "grad_norm": 24.98989486694336, "learning_rate": 3.671428571428572e-05, "loss": 2.4863, "step": 6517 }, { "epoch": 26.60408163265306, "grad_norm": 9.93703556060791, "learning_rate": 3.671224489795919e-05, "loss": 3.1237, "step": 6518 }, { "epoch": 26.60816326530612, "grad_norm": 12.29230785369873, "learning_rate": 3.6710204081632655e-05, "loss": 2.5176, "step": 6519 }, { "epoch": 26.612244897959183, "grad_norm": 12.909829139709473, "learning_rate": 3.670816326530612e-05, "loss": 2.9051, "step": 6520 }, { "epoch": 26.616326530612245, "grad_norm": 9.958157539367676, "learning_rate": 3.6706122448979596e-05, "loss": 3.2592, "step": 6521 }, { "epoch": 26.620408163265306, "grad_norm": 22.194477081298828, "learning_rate": 3.670408163265306e-05, "loss": 2.2428, "step": 6522 }, { "epoch": 26.624489795918368, "grad_norm": 30.723848342895508, "learning_rate": 3.670204081632653e-05, "loss": 1.6967, "step": 6523 }, { "epoch": 26.62857142857143, "grad_norm": 10.074454307556152, "learning_rate": 3.6700000000000004e-05, "loss": 3.3635, "step": 6524 }, { "epoch": 26.632653061224488, "grad_norm": 20.25838279724121, "learning_rate": 3.669795918367347e-05, "loss": 2.6213, "step": 6525 }, { "epoch": 26.63673469387755, "grad_norm": 9.625964164733887, "learning_rate": 3.6695918367346945e-05, "loss": 3.1825, "step": 6526 }, { "epoch": 26.64081632653061, "grad_norm": 14.151863098144531, "learning_rate": 3.669387755102041e-05, "loss": 2.6916, "step": 6527 }, { "epoch": 26.644897959183673, "grad_norm": 13.046282768249512, "learning_rate": 3.669183673469388e-05, "loss": 2.8604, "step": 6528 }, { "epoch": 26.648979591836735, "grad_norm": 12.130163192749023, "learning_rate": 3.668979591836735e-05, "loss": 3.0093, "step": 6529 }, { "epoch": 26.653061224489797, "grad_norm": 12.261472702026367, "learning_rate": 3.6687755102040814e-05, "loss": 3.202, "step": 6530 }, { "epoch": 26.65714285714286, "grad_norm": 20.870548248291016, "learning_rate": 3.668571428571429e-05, "loss": 2.1136, "step": 6531 }, { "epoch": 26.66122448979592, "grad_norm": 14.251008033752441, "learning_rate": 3.6683673469387755e-05, "loss": 2.7379, "step": 6532 }, { "epoch": 26.66530612244898, "grad_norm": 9.323070526123047, "learning_rate": 3.668163265306123e-05, "loss": 3.3397, "step": 6533 }, { "epoch": 26.66938775510204, "grad_norm": 12.847905158996582, "learning_rate": 3.6679591836734696e-05, "loss": 2.9605, "step": 6534 }, { "epoch": 26.6734693877551, "grad_norm": 10.899120330810547, "learning_rate": 3.667755102040816e-05, "loss": 3.1961, "step": 6535 }, { "epoch": 26.677551020408163, "grad_norm": 7.301661014556885, "learning_rate": 3.667551020408164e-05, "loss": 3.5391, "step": 6536 }, { "epoch": 26.681632653061225, "grad_norm": 13.515508651733398, "learning_rate": 3.6673469387755104e-05, "loss": 3.07, "step": 6537 }, { "epoch": 26.685714285714287, "grad_norm": 11.84900188446045, "learning_rate": 3.667142857142857e-05, "loss": 3.1979, "step": 6538 }, { "epoch": 26.68979591836735, "grad_norm": 19.448623657226562, "learning_rate": 3.666938775510204e-05, "loss": 2.313, "step": 6539 }, { "epoch": 26.693877551020407, "grad_norm": 12.520598411560059, "learning_rate": 3.666734693877551e-05, "loss": 2.9829, "step": 6540 }, { "epoch": 26.69795918367347, "grad_norm": 14.648909568786621, "learning_rate": 3.6665306122448986e-05, "loss": 2.904, "step": 6541 }, { "epoch": 26.70204081632653, "grad_norm": 18.626922607421875, "learning_rate": 3.666326530612245e-05, "loss": 2.4222, "step": 6542 }, { "epoch": 26.706122448979592, "grad_norm": 8.470755577087402, "learning_rate": 3.666122448979592e-05, "loss": 3.1705, "step": 6543 }, { "epoch": 26.710204081632654, "grad_norm": 6.952312469482422, "learning_rate": 3.665918367346939e-05, "loss": 3.4819, "step": 6544 }, { "epoch": 26.714285714285715, "grad_norm": 9.805940628051758, "learning_rate": 3.6657142857142855e-05, "loss": 3.2138, "step": 6545 }, { "epoch": 26.718367346938777, "grad_norm": 14.974654197692871, "learning_rate": 3.665510204081633e-05, "loss": 2.5724, "step": 6546 }, { "epoch": 26.722448979591835, "grad_norm": 15.333359718322754, "learning_rate": 3.6653061224489796e-05, "loss": 2.6651, "step": 6547 }, { "epoch": 26.726530612244897, "grad_norm": 13.226519584655762, "learning_rate": 3.665102040816327e-05, "loss": 2.809, "step": 6548 }, { "epoch": 26.73061224489796, "grad_norm": 8.43688678741455, "learning_rate": 3.664897959183674e-05, "loss": 3.2719, "step": 6549 }, { "epoch": 26.73469387755102, "grad_norm": 19.053213119506836, "learning_rate": 3.6646938775510204e-05, "loss": 2.5879, "step": 6550 }, { "epoch": 26.738775510204082, "grad_norm": 15.175678253173828, "learning_rate": 3.664489795918368e-05, "loss": 2.9698, "step": 6551 }, { "epoch": 26.742857142857144, "grad_norm": 8.328503608703613, "learning_rate": 3.6642857142857145e-05, "loss": 3.182, "step": 6552 }, { "epoch": 26.746938775510205, "grad_norm": 22.14813232421875, "learning_rate": 3.664081632653061e-05, "loss": 2.8926, "step": 6553 }, { "epoch": 26.751020408163264, "grad_norm": 16.006380081176758, "learning_rate": 3.663877551020408e-05, "loss": 2.7076, "step": 6554 }, { "epoch": 26.755102040816325, "grad_norm": 13.763225555419922, "learning_rate": 3.663673469387755e-05, "loss": 3.0322, "step": 6555 }, { "epoch": 26.759183673469387, "grad_norm": 9.310750961303711, "learning_rate": 3.663469387755103e-05, "loss": 3.1816, "step": 6556 }, { "epoch": 26.76326530612245, "grad_norm": 13.822303771972656, "learning_rate": 3.6632653061224494e-05, "loss": 3.0155, "step": 6557 }, { "epoch": 26.76734693877551, "grad_norm": 16.36149787902832, "learning_rate": 3.663061224489796e-05, "loss": 2.5713, "step": 6558 }, { "epoch": 26.771428571428572, "grad_norm": 14.110292434692383, "learning_rate": 3.662857142857143e-05, "loss": 2.8701, "step": 6559 }, { "epoch": 26.775510204081634, "grad_norm": 13.639229774475098, "learning_rate": 3.6626530612244895e-05, "loss": 2.9796, "step": 6560 }, { "epoch": 26.779591836734696, "grad_norm": 10.422100067138672, "learning_rate": 3.662448979591837e-05, "loss": 3.075, "step": 6561 }, { "epoch": 26.783673469387754, "grad_norm": 6.161971569061279, "learning_rate": 3.6622448979591836e-05, "loss": 3.769, "step": 6562 }, { "epoch": 26.787755102040816, "grad_norm": 13.956043243408203, "learning_rate": 3.662040816326531e-05, "loss": 3.3434, "step": 6563 }, { "epoch": 26.791836734693877, "grad_norm": 12.147483825683594, "learning_rate": 3.661836734693878e-05, "loss": 2.8731, "step": 6564 }, { "epoch": 26.79591836734694, "grad_norm": 17.167461395263672, "learning_rate": 3.661632653061225e-05, "loss": 2.139, "step": 6565 }, { "epoch": 26.8, "grad_norm": 11.379389762878418, "learning_rate": 3.661428571428572e-05, "loss": 3.3011, "step": 6566 }, { "epoch": 26.804081632653062, "grad_norm": 8.766999244689941, "learning_rate": 3.6612244897959186e-05, "loss": 3.2852, "step": 6567 }, { "epoch": 26.808163265306124, "grad_norm": 15.345902442932129, "learning_rate": 3.661020408163265e-05, "loss": 2.209, "step": 6568 }, { "epoch": 26.812244897959182, "grad_norm": 5.458672523498535, "learning_rate": 3.660816326530612e-05, "loss": 3.741, "step": 6569 }, { "epoch": 26.816326530612244, "grad_norm": 9.797812461853027, "learning_rate": 3.6606122448979594e-05, "loss": 3.2819, "step": 6570 }, { "epoch": 26.820408163265306, "grad_norm": 12.26540756225586, "learning_rate": 3.660408163265307e-05, "loss": 3.2521, "step": 6571 }, { "epoch": 26.824489795918367, "grad_norm": 8.811970710754395, "learning_rate": 3.6602040816326535e-05, "loss": 3.0318, "step": 6572 }, { "epoch": 26.82857142857143, "grad_norm": 14.818373680114746, "learning_rate": 3.66e-05, "loss": 2.2056, "step": 6573 }, { "epoch": 26.83265306122449, "grad_norm": 22.571800231933594, "learning_rate": 3.659795918367347e-05, "loss": 2.6183, "step": 6574 }, { "epoch": 26.836734693877553, "grad_norm": 8.05821418762207, "learning_rate": 3.659591836734694e-05, "loss": 3.3635, "step": 6575 }, { "epoch": 26.84081632653061, "grad_norm": 23.279582977294922, "learning_rate": 3.659387755102041e-05, "loss": 2.037, "step": 6576 }, { "epoch": 26.844897959183672, "grad_norm": 8.803240776062012, "learning_rate": 3.659183673469388e-05, "loss": 3.2217, "step": 6577 }, { "epoch": 26.848979591836734, "grad_norm": 11.985635757446289, "learning_rate": 3.658979591836735e-05, "loss": 2.8471, "step": 6578 }, { "epoch": 26.853061224489796, "grad_norm": 21.345279693603516, "learning_rate": 3.658775510204082e-05, "loss": 2.3768, "step": 6579 }, { "epoch": 26.857142857142858, "grad_norm": 10.071785926818848, "learning_rate": 3.658571428571429e-05, "loss": 3.0165, "step": 6580 }, { "epoch": 26.86122448979592, "grad_norm": 20.206512451171875, "learning_rate": 3.658367346938776e-05, "loss": 2.3057, "step": 6581 }, { "epoch": 26.86530612244898, "grad_norm": 13.240350723266602, "learning_rate": 3.6581632653061226e-05, "loss": 2.9299, "step": 6582 }, { "epoch": 26.86938775510204, "grad_norm": 9.038599967956543, "learning_rate": 3.6579591836734694e-05, "loss": 3.4412, "step": 6583 }, { "epoch": 26.8734693877551, "grad_norm": 7.245314121246338, "learning_rate": 3.657755102040816e-05, "loss": 3.5442, "step": 6584 }, { "epoch": 26.877551020408163, "grad_norm": 11.779600143432617, "learning_rate": 3.6575510204081635e-05, "loss": 3.1874, "step": 6585 }, { "epoch": 26.881632653061224, "grad_norm": 9.882845878601074, "learning_rate": 3.65734693877551e-05, "loss": 3.288, "step": 6586 }, { "epoch": 26.885714285714286, "grad_norm": 9.223430633544922, "learning_rate": 3.6571428571428576e-05, "loss": 3.3707, "step": 6587 }, { "epoch": 26.889795918367348, "grad_norm": 23.625839233398438, "learning_rate": 3.656938775510204e-05, "loss": 1.6717, "step": 6588 }, { "epoch": 26.89387755102041, "grad_norm": 29.182435989379883, "learning_rate": 3.656734693877551e-05, "loss": 1.8211, "step": 6589 }, { "epoch": 26.897959183673468, "grad_norm": 8.199968338012695, "learning_rate": 3.6565306122448984e-05, "loss": 3.4573, "step": 6590 }, { "epoch": 26.90204081632653, "grad_norm": 29.53972625732422, "learning_rate": 3.656326530612245e-05, "loss": 1.9412, "step": 6591 }, { "epoch": 26.90612244897959, "grad_norm": 13.595215797424316, "learning_rate": 3.656122448979592e-05, "loss": 2.923, "step": 6592 }, { "epoch": 26.910204081632653, "grad_norm": 17.477439880371094, "learning_rate": 3.6559183673469385e-05, "loss": 2.9821, "step": 6593 }, { "epoch": 26.914285714285715, "grad_norm": 10.424115180969238, "learning_rate": 3.655714285714286e-05, "loss": 3.2685, "step": 6594 }, { "epoch": 26.918367346938776, "grad_norm": 18.0715274810791, "learning_rate": 3.655510204081633e-05, "loss": 2.6458, "step": 6595 }, { "epoch": 26.922448979591838, "grad_norm": 13.727460861206055, "learning_rate": 3.65530612244898e-05, "loss": 2.386, "step": 6596 }, { "epoch": 26.926530612244896, "grad_norm": 11.339649200439453, "learning_rate": 3.655102040816327e-05, "loss": 2.7965, "step": 6597 }, { "epoch": 26.930612244897958, "grad_norm": 11.030318260192871, "learning_rate": 3.6548979591836734e-05, "loss": 3.1317, "step": 6598 }, { "epoch": 26.93469387755102, "grad_norm": 18.12757682800293, "learning_rate": 3.65469387755102e-05, "loss": 2.4387, "step": 6599 }, { "epoch": 26.93877551020408, "grad_norm": 12.974059104919434, "learning_rate": 3.6544897959183675e-05, "loss": 2.7676, "step": 6600 }, { "epoch": 26.942857142857143, "grad_norm": 16.438371658325195, "learning_rate": 3.654285714285714e-05, "loss": 2.3765, "step": 6601 }, { "epoch": 26.946938775510205, "grad_norm": 8.748517036437988, "learning_rate": 3.6540816326530616e-05, "loss": 3.3732, "step": 6602 }, { "epoch": 26.951020408163266, "grad_norm": 12.658669471740723, "learning_rate": 3.6538775510204083e-05, "loss": 2.9356, "step": 6603 }, { "epoch": 26.955102040816328, "grad_norm": 15.288919448852539, "learning_rate": 3.653673469387755e-05, "loss": 2.3615, "step": 6604 }, { "epoch": 26.959183673469386, "grad_norm": 10.715810775756836, "learning_rate": 3.6534693877551024e-05, "loss": 3.5053, "step": 6605 }, { "epoch": 26.963265306122448, "grad_norm": 25.840312957763672, "learning_rate": 3.653265306122449e-05, "loss": 1.5867, "step": 6606 }, { "epoch": 26.96734693877551, "grad_norm": 28.293624877929688, "learning_rate": 3.653061224489796e-05, "loss": 2.2234, "step": 6607 }, { "epoch": 26.97142857142857, "grad_norm": 15.708965301513672, "learning_rate": 3.6528571428571426e-05, "loss": 2.8979, "step": 6608 }, { "epoch": 26.975510204081633, "grad_norm": 14.735909461975098, "learning_rate": 3.65265306122449e-05, "loss": 2.44, "step": 6609 }, { "epoch": 26.979591836734695, "grad_norm": 27.618629455566406, "learning_rate": 3.6524489795918374e-05, "loss": 1.3669, "step": 6610 }, { "epoch": 26.983673469387757, "grad_norm": 23.400663375854492, "learning_rate": 3.652244897959184e-05, "loss": 1.9743, "step": 6611 }, { "epoch": 26.987755102040815, "grad_norm": 29.08207130432129, "learning_rate": 3.652040816326531e-05, "loss": 1.2136, "step": 6612 }, { "epoch": 26.991836734693877, "grad_norm": 9.589981079101562, "learning_rate": 3.6518367346938775e-05, "loss": 3.2582, "step": 6613 }, { "epoch": 26.99591836734694, "grad_norm": 13.204124450683594, "learning_rate": 3.651632653061225e-05, "loss": 2.6622, "step": 6614 }, { "epoch": 27.0, "grad_norm": 18.271217346191406, "learning_rate": 3.6514285714285716e-05, "loss": 2.0718, "step": 6615 }, { "epoch": 27.00408163265306, "grad_norm": 11.319297790527344, "learning_rate": 3.651224489795918e-05, "loss": 2.9221, "step": 6616 }, { "epoch": 27.008163265306123, "grad_norm": 25.536922454833984, "learning_rate": 3.651020408163266e-05, "loss": 1.2529, "step": 6617 }, { "epoch": 27.012244897959185, "grad_norm": 21.868526458740234, "learning_rate": 3.6508163265306124e-05, "loss": 2.3845, "step": 6618 }, { "epoch": 27.016326530612243, "grad_norm": 23.018142700195312, "learning_rate": 3.65061224489796e-05, "loss": 2.3477, "step": 6619 }, { "epoch": 27.020408163265305, "grad_norm": 13.806354522705078, "learning_rate": 3.6504081632653065e-05, "loss": 2.8196, "step": 6620 }, { "epoch": 27.024489795918367, "grad_norm": 16.724811553955078, "learning_rate": 3.650204081632653e-05, "loss": 2.5469, "step": 6621 }, { "epoch": 27.02857142857143, "grad_norm": 23.137920379638672, "learning_rate": 3.65e-05, "loss": 2.174, "step": 6622 }, { "epoch": 27.03265306122449, "grad_norm": 27.993911743164062, "learning_rate": 3.649795918367347e-05, "loss": 1.4571, "step": 6623 }, { "epoch": 27.036734693877552, "grad_norm": 7.395310401916504, "learning_rate": 3.649591836734694e-05, "loss": 3.4256, "step": 6624 }, { "epoch": 27.040816326530614, "grad_norm": 15.558562278747559, "learning_rate": 3.6493877551020414e-05, "loss": 2.9539, "step": 6625 }, { "epoch": 27.044897959183672, "grad_norm": 11.943392753601074, "learning_rate": 3.649183673469388e-05, "loss": 2.9926, "step": 6626 }, { "epoch": 27.048979591836734, "grad_norm": 7.028404235839844, "learning_rate": 3.648979591836735e-05, "loss": 3.7007, "step": 6627 }, { "epoch": 27.053061224489795, "grad_norm": 7.9905171394348145, "learning_rate": 3.6487755102040816e-05, "loss": 3.3963, "step": 6628 }, { "epoch": 27.057142857142857, "grad_norm": 6.973695278167725, "learning_rate": 3.648571428571429e-05, "loss": 3.4302, "step": 6629 }, { "epoch": 27.06122448979592, "grad_norm": 23.486568450927734, "learning_rate": 3.648367346938776e-05, "loss": 1.7762, "step": 6630 }, { "epoch": 27.06530612244898, "grad_norm": 12.364858627319336, "learning_rate": 3.6481632653061224e-05, "loss": 3.0664, "step": 6631 }, { "epoch": 27.069387755102042, "grad_norm": 10.355178833007812, "learning_rate": 3.64795918367347e-05, "loss": 3.1346, "step": 6632 }, { "epoch": 27.073469387755104, "grad_norm": 7.714032173156738, "learning_rate": 3.6477551020408165e-05, "loss": 3.0118, "step": 6633 }, { "epoch": 27.077551020408162, "grad_norm": 17.543487548828125, "learning_rate": 3.647551020408164e-05, "loss": 2.4096, "step": 6634 }, { "epoch": 27.081632653061224, "grad_norm": 19.93368148803711, "learning_rate": 3.6473469387755106e-05, "loss": 2.5759, "step": 6635 }, { "epoch": 27.085714285714285, "grad_norm": 10.142556190490723, "learning_rate": 3.647142857142857e-05, "loss": 3.1484, "step": 6636 }, { "epoch": 27.089795918367347, "grad_norm": 20.97888946533203, "learning_rate": 3.646938775510204e-05, "loss": 2.7397, "step": 6637 }, { "epoch": 27.09387755102041, "grad_norm": 12.612300872802734, "learning_rate": 3.646734693877551e-05, "loss": 2.7818, "step": 6638 }, { "epoch": 27.09795918367347, "grad_norm": 17.77667236328125, "learning_rate": 3.646530612244898e-05, "loss": 1.9521, "step": 6639 }, { "epoch": 27.102040816326532, "grad_norm": 15.144702911376953, "learning_rate": 3.646326530612245e-05, "loss": 2.6704, "step": 6640 }, { "epoch": 27.10612244897959, "grad_norm": 15.920584678649902, "learning_rate": 3.646122448979592e-05, "loss": 2.5709, "step": 6641 }, { "epoch": 27.110204081632652, "grad_norm": 9.449906349182129, "learning_rate": 3.645918367346939e-05, "loss": 2.9693, "step": 6642 }, { "epoch": 27.114285714285714, "grad_norm": 13.884599685668945, "learning_rate": 3.6457142857142857e-05, "loss": 2.6155, "step": 6643 }, { "epoch": 27.118367346938776, "grad_norm": 11.881203651428223, "learning_rate": 3.645510204081633e-05, "loss": 3.0332, "step": 6644 }, { "epoch": 27.122448979591837, "grad_norm": 12.557541847229004, "learning_rate": 3.64530612244898e-05, "loss": 2.7672, "step": 6645 }, { "epoch": 27.1265306122449, "grad_norm": 13.953871726989746, "learning_rate": 3.6451020408163265e-05, "loss": 2.8385, "step": 6646 }, { "epoch": 27.13061224489796, "grad_norm": 15.467265129089355, "learning_rate": 3.644897959183673e-05, "loss": 2.7573, "step": 6647 }, { "epoch": 27.13469387755102, "grad_norm": 20.498388290405273, "learning_rate": 3.6446938775510206e-05, "loss": 2.1154, "step": 6648 }, { "epoch": 27.13877551020408, "grad_norm": 13.346481323242188, "learning_rate": 3.644489795918368e-05, "loss": 2.5221, "step": 6649 }, { "epoch": 27.142857142857142, "grad_norm": 12.472480773925781, "learning_rate": 3.644285714285715e-05, "loss": 2.7245, "step": 6650 }, { "epoch": 27.146938775510204, "grad_norm": 28.221433639526367, "learning_rate": 3.6440816326530614e-05, "loss": 1.6405, "step": 6651 }, { "epoch": 27.151020408163266, "grad_norm": 28.218154907226562, "learning_rate": 3.643877551020408e-05, "loss": 2.113, "step": 6652 }, { "epoch": 27.155102040816328, "grad_norm": 11.26356315612793, "learning_rate": 3.643673469387755e-05, "loss": 3.0895, "step": 6653 }, { "epoch": 27.15918367346939, "grad_norm": 14.711939811706543, "learning_rate": 3.643469387755102e-05, "loss": 2.2116, "step": 6654 }, { "epoch": 27.163265306122447, "grad_norm": 14.108845710754395, "learning_rate": 3.643265306122449e-05, "loss": 2.9737, "step": 6655 }, { "epoch": 27.16734693877551, "grad_norm": 13.472235679626465, "learning_rate": 3.643061224489796e-05, "loss": 2.6334, "step": 6656 }, { "epoch": 27.17142857142857, "grad_norm": 10.368226051330566, "learning_rate": 3.642857142857143e-05, "loss": 3.0544, "step": 6657 }, { "epoch": 27.175510204081633, "grad_norm": 24.06848907470703, "learning_rate": 3.6426530612244904e-05, "loss": 2.0402, "step": 6658 }, { "epoch": 27.179591836734694, "grad_norm": 10.73825454711914, "learning_rate": 3.642448979591837e-05, "loss": 3.1484, "step": 6659 }, { "epoch": 27.183673469387756, "grad_norm": 18.093673706054688, "learning_rate": 3.642244897959184e-05, "loss": 2.6571, "step": 6660 }, { "epoch": 27.187755102040818, "grad_norm": 15.841750144958496, "learning_rate": 3.6420408163265305e-05, "loss": 2.6188, "step": 6661 }, { "epoch": 27.191836734693876, "grad_norm": 10.772754669189453, "learning_rate": 3.641836734693877e-05, "loss": 3.3165, "step": 6662 }, { "epoch": 27.195918367346938, "grad_norm": 11.166561126708984, "learning_rate": 3.6416326530612247e-05, "loss": 3.0058, "step": 6663 }, { "epoch": 27.2, "grad_norm": 13.77489185333252, "learning_rate": 3.641428571428572e-05, "loss": 3.1071, "step": 6664 }, { "epoch": 27.20408163265306, "grad_norm": 11.86767578125, "learning_rate": 3.641224489795919e-05, "loss": 3.1326, "step": 6665 }, { "epoch": 27.208163265306123, "grad_norm": 9.267443656921387, "learning_rate": 3.6410204081632655e-05, "loss": 3.1511, "step": 6666 }, { "epoch": 27.212244897959184, "grad_norm": 15.685461044311523, "learning_rate": 3.640816326530612e-05, "loss": 2.5126, "step": 6667 }, { "epoch": 27.216326530612246, "grad_norm": 8.095633506774902, "learning_rate": 3.6406122448979596e-05, "loss": 3.3576, "step": 6668 }, { "epoch": 27.220408163265308, "grad_norm": 29.74899673461914, "learning_rate": 3.640408163265306e-05, "loss": 1.9641, "step": 6669 }, { "epoch": 27.224489795918366, "grad_norm": 19.250240325927734, "learning_rate": 3.640204081632653e-05, "loss": 1.9984, "step": 6670 }, { "epoch": 27.228571428571428, "grad_norm": 24.779037475585938, "learning_rate": 3.6400000000000004e-05, "loss": 2.1141, "step": 6671 }, { "epoch": 27.23265306122449, "grad_norm": 9.25534725189209, "learning_rate": 3.639795918367347e-05, "loss": 3.2435, "step": 6672 }, { "epoch": 27.23673469387755, "grad_norm": 20.00059700012207, "learning_rate": 3.6395918367346945e-05, "loss": 2.1824, "step": 6673 }, { "epoch": 27.240816326530613, "grad_norm": 13.505146026611328, "learning_rate": 3.639387755102041e-05, "loss": 2.736, "step": 6674 }, { "epoch": 27.244897959183675, "grad_norm": 11.787599563598633, "learning_rate": 3.639183673469388e-05, "loss": 3.0207, "step": 6675 }, { "epoch": 27.248979591836736, "grad_norm": 16.6728515625, "learning_rate": 3.6389795918367346e-05, "loss": 2.8755, "step": 6676 }, { "epoch": 27.253061224489795, "grad_norm": 10.881921768188477, "learning_rate": 3.638775510204081e-05, "loss": 3.2651, "step": 6677 }, { "epoch": 27.257142857142856, "grad_norm": 26.229154586791992, "learning_rate": 3.638571428571429e-05, "loss": 2.9345, "step": 6678 }, { "epoch": 27.261224489795918, "grad_norm": 15.73399543762207, "learning_rate": 3.638367346938776e-05, "loss": 2.6336, "step": 6679 }, { "epoch": 27.26530612244898, "grad_norm": 11.853854179382324, "learning_rate": 3.638163265306123e-05, "loss": 2.9906, "step": 6680 }, { "epoch": 27.26938775510204, "grad_norm": 22.545040130615234, "learning_rate": 3.6379591836734695e-05, "loss": 1.3675, "step": 6681 }, { "epoch": 27.273469387755103, "grad_norm": 20.711225509643555, "learning_rate": 3.637755102040816e-05, "loss": 2.1904, "step": 6682 }, { "epoch": 27.277551020408165, "grad_norm": 28.68077850341797, "learning_rate": 3.6375510204081636e-05, "loss": 1.9697, "step": 6683 }, { "epoch": 27.281632653061223, "grad_norm": 16.147239685058594, "learning_rate": 3.6373469387755104e-05, "loss": 2.6795, "step": 6684 }, { "epoch": 27.285714285714285, "grad_norm": 16.453811645507812, "learning_rate": 3.637142857142857e-05, "loss": 2.9668, "step": 6685 }, { "epoch": 27.289795918367346, "grad_norm": 10.845789909362793, "learning_rate": 3.636938775510204e-05, "loss": 2.971, "step": 6686 }, { "epoch": 27.293877551020408, "grad_norm": 17.792049407958984, "learning_rate": 3.636734693877551e-05, "loss": 2.5015, "step": 6687 }, { "epoch": 27.29795918367347, "grad_norm": 21.621318817138672, "learning_rate": 3.6365306122448986e-05, "loss": 2.4566, "step": 6688 }, { "epoch": 27.30204081632653, "grad_norm": 10.008808135986328, "learning_rate": 3.636326530612245e-05, "loss": 3.2142, "step": 6689 }, { "epoch": 27.306122448979593, "grad_norm": 29.225265502929688, "learning_rate": 3.636122448979592e-05, "loss": 2.1521, "step": 6690 }, { "epoch": 27.31020408163265, "grad_norm": 9.21265983581543, "learning_rate": 3.635918367346939e-05, "loss": 3.3744, "step": 6691 }, { "epoch": 27.314285714285713, "grad_norm": 17.693817138671875, "learning_rate": 3.6357142857142854e-05, "loss": 2.5696, "step": 6692 }, { "epoch": 27.318367346938775, "grad_norm": 6.758193492889404, "learning_rate": 3.635510204081633e-05, "loss": 3.6513, "step": 6693 }, { "epoch": 27.322448979591837, "grad_norm": 10.633825302124023, "learning_rate": 3.6353061224489795e-05, "loss": 3.1522, "step": 6694 }, { "epoch": 27.3265306122449, "grad_norm": 16.44826316833496, "learning_rate": 3.635102040816327e-05, "loss": 2.8925, "step": 6695 }, { "epoch": 27.33061224489796, "grad_norm": 17.494306564331055, "learning_rate": 3.6348979591836736e-05, "loss": 2.4044, "step": 6696 }, { "epoch": 27.33469387755102, "grad_norm": 13.181976318359375, "learning_rate": 3.63469387755102e-05, "loss": 2.7573, "step": 6697 }, { "epoch": 27.33877551020408, "grad_norm": 12.000838279724121, "learning_rate": 3.634489795918368e-05, "loss": 3.2338, "step": 6698 }, { "epoch": 27.34285714285714, "grad_norm": 12.124822616577148, "learning_rate": 3.6342857142857144e-05, "loss": 3.1252, "step": 6699 }, { "epoch": 27.346938775510203, "grad_norm": 20.67298698425293, "learning_rate": 3.634081632653061e-05, "loss": 1.8537, "step": 6700 }, { "epoch": 27.351020408163265, "grad_norm": 18.114282608032227, "learning_rate": 3.633877551020408e-05, "loss": 2.6294, "step": 6701 }, { "epoch": 27.355102040816327, "grad_norm": 14.921513557434082, "learning_rate": 3.633673469387755e-05, "loss": 2.3923, "step": 6702 }, { "epoch": 27.35918367346939, "grad_norm": 17.540245056152344, "learning_rate": 3.6334693877551026e-05, "loss": 2.9552, "step": 6703 }, { "epoch": 27.36326530612245, "grad_norm": 11.575998306274414, "learning_rate": 3.6332653061224494e-05, "loss": 2.6806, "step": 6704 }, { "epoch": 27.367346938775512, "grad_norm": 9.09296703338623, "learning_rate": 3.633061224489796e-05, "loss": 3.1261, "step": 6705 }, { "epoch": 27.37142857142857, "grad_norm": 12.511026382446289, "learning_rate": 3.632857142857143e-05, "loss": 3.048, "step": 6706 }, { "epoch": 27.375510204081632, "grad_norm": 16.002342224121094, "learning_rate": 3.63265306122449e-05, "loss": 2.6574, "step": 6707 }, { "epoch": 27.379591836734694, "grad_norm": 9.879679679870605, "learning_rate": 3.632448979591837e-05, "loss": 3.3278, "step": 6708 }, { "epoch": 27.383673469387755, "grad_norm": 17.12925910949707, "learning_rate": 3.6322448979591836e-05, "loss": 2.5115, "step": 6709 }, { "epoch": 27.387755102040817, "grad_norm": 26.570484161376953, "learning_rate": 3.632040816326531e-05, "loss": 2.1749, "step": 6710 }, { "epoch": 27.39183673469388, "grad_norm": 10.172038078308105, "learning_rate": 3.631836734693878e-05, "loss": 3.176, "step": 6711 }, { "epoch": 27.39591836734694, "grad_norm": 9.794950485229492, "learning_rate": 3.631632653061225e-05, "loss": 3.1946, "step": 6712 }, { "epoch": 27.4, "grad_norm": 6.935622692108154, "learning_rate": 3.631428571428572e-05, "loss": 3.5603, "step": 6713 }, { "epoch": 27.40408163265306, "grad_norm": 15.825125694274902, "learning_rate": 3.6312244897959185e-05, "loss": 2.8031, "step": 6714 }, { "epoch": 27.408163265306122, "grad_norm": 7.54478645324707, "learning_rate": 3.631020408163265e-05, "loss": 3.4089, "step": 6715 }, { "epoch": 27.412244897959184, "grad_norm": 11.779816627502441, "learning_rate": 3.630816326530612e-05, "loss": 2.8417, "step": 6716 }, { "epoch": 27.416326530612245, "grad_norm": 13.305187225341797, "learning_rate": 3.630612244897959e-05, "loss": 3.1547, "step": 6717 }, { "epoch": 27.420408163265307, "grad_norm": 21.962955474853516, "learning_rate": 3.630408163265307e-05, "loss": 2.0003, "step": 6718 }, { "epoch": 27.42448979591837, "grad_norm": 25.381736755371094, "learning_rate": 3.6302040816326534e-05, "loss": 2.5207, "step": 6719 }, { "epoch": 27.428571428571427, "grad_norm": 14.708215713500977, "learning_rate": 3.63e-05, "loss": 2.7712, "step": 6720 }, { "epoch": 27.43265306122449, "grad_norm": 14.691654205322266, "learning_rate": 3.629795918367347e-05, "loss": 2.8915, "step": 6721 }, { "epoch": 27.43673469387755, "grad_norm": 10.05392837524414, "learning_rate": 3.629591836734694e-05, "loss": 2.8887, "step": 6722 }, { "epoch": 27.440816326530612, "grad_norm": 22.079599380493164, "learning_rate": 3.629387755102041e-05, "loss": 2.2617, "step": 6723 }, { "epoch": 27.444897959183674, "grad_norm": 21.865449905395508, "learning_rate": 3.629183673469388e-05, "loss": 2.3427, "step": 6724 }, { "epoch": 27.448979591836736, "grad_norm": 19.240571975708008, "learning_rate": 3.628979591836735e-05, "loss": 2.2363, "step": 6725 }, { "epoch": 27.453061224489797, "grad_norm": 9.720376014709473, "learning_rate": 3.628775510204082e-05, "loss": 3.3049, "step": 6726 }, { "epoch": 27.457142857142856, "grad_norm": 16.183387756347656, "learning_rate": 3.628571428571429e-05, "loss": 2.7634, "step": 6727 }, { "epoch": 27.461224489795917, "grad_norm": 22.17436981201172, "learning_rate": 3.628367346938776e-05, "loss": 2.5022, "step": 6728 }, { "epoch": 27.46530612244898, "grad_norm": 8.738126754760742, "learning_rate": 3.6281632653061226e-05, "loss": 3.2948, "step": 6729 }, { "epoch": 27.46938775510204, "grad_norm": 16.191612243652344, "learning_rate": 3.627959183673469e-05, "loss": 2.906, "step": 6730 }, { "epoch": 27.473469387755102, "grad_norm": 13.669597625732422, "learning_rate": 3.627755102040816e-05, "loss": 3.1046, "step": 6731 }, { "epoch": 27.477551020408164, "grad_norm": 35.41500473022461, "learning_rate": 3.6275510204081634e-05, "loss": 1.6047, "step": 6732 }, { "epoch": 27.481632653061226, "grad_norm": 14.286721229553223, "learning_rate": 3.627346938775511e-05, "loss": 2.77, "step": 6733 }, { "epoch": 27.485714285714284, "grad_norm": 11.467170715332031, "learning_rate": 3.6271428571428575e-05, "loss": 3.4223, "step": 6734 }, { "epoch": 27.489795918367346, "grad_norm": 19.411022186279297, "learning_rate": 3.626938775510204e-05, "loss": 2.631, "step": 6735 }, { "epoch": 27.493877551020407, "grad_norm": 12.165342330932617, "learning_rate": 3.626734693877551e-05, "loss": 3.1919, "step": 6736 }, { "epoch": 27.49795918367347, "grad_norm": 19.012451171875, "learning_rate": 3.626530612244898e-05, "loss": 2.7474, "step": 6737 }, { "epoch": 27.50204081632653, "grad_norm": 11.925329208374023, "learning_rate": 3.626326530612245e-05, "loss": 3.1589, "step": 6738 }, { "epoch": 27.506122448979593, "grad_norm": 6.31057596206665, "learning_rate": 3.626122448979592e-05, "loss": 3.4958, "step": 6739 }, { "epoch": 27.510204081632654, "grad_norm": 23.1285343170166, "learning_rate": 3.6259183673469385e-05, "loss": 2.5508, "step": 6740 }, { "epoch": 27.514285714285712, "grad_norm": 8.803691864013672, "learning_rate": 3.625714285714286e-05, "loss": 3.2027, "step": 6741 }, { "epoch": 27.518367346938774, "grad_norm": 12.556771278381348, "learning_rate": 3.625510204081633e-05, "loss": 3.0913, "step": 6742 }, { "epoch": 27.522448979591836, "grad_norm": 8.510496139526367, "learning_rate": 3.62530612244898e-05, "loss": 3.2404, "step": 6743 }, { "epoch": 27.526530612244898, "grad_norm": 14.227799415588379, "learning_rate": 3.625102040816327e-05, "loss": 2.8473, "step": 6744 }, { "epoch": 27.53061224489796, "grad_norm": 13.40361213684082, "learning_rate": 3.6248979591836734e-05, "loss": 2.3878, "step": 6745 }, { "epoch": 27.53469387755102, "grad_norm": 15.204680442810059, "learning_rate": 3.624693877551021e-05, "loss": 2.649, "step": 6746 }, { "epoch": 27.538775510204083, "grad_norm": 13.899070739746094, "learning_rate": 3.6244897959183675e-05, "loss": 2.7761, "step": 6747 }, { "epoch": 27.542857142857144, "grad_norm": 22.72048568725586, "learning_rate": 3.624285714285714e-05, "loss": 2.101, "step": 6748 }, { "epoch": 27.546938775510203, "grad_norm": 13.096957206726074, "learning_rate": 3.6240816326530616e-05, "loss": 3.0175, "step": 6749 }, { "epoch": 27.551020408163264, "grad_norm": 25.686649322509766, "learning_rate": 3.623877551020408e-05, "loss": 2.6571, "step": 6750 }, { "epoch": 27.555102040816326, "grad_norm": 7.541016578674316, "learning_rate": 3.623673469387756e-05, "loss": 3.4882, "step": 6751 }, { "epoch": 27.559183673469388, "grad_norm": 14.439800262451172, "learning_rate": 3.6234693877551024e-05, "loss": 2.9254, "step": 6752 }, { "epoch": 27.56326530612245, "grad_norm": 12.205012321472168, "learning_rate": 3.623265306122449e-05, "loss": 3.1119, "step": 6753 }, { "epoch": 27.56734693877551, "grad_norm": 6.409095764160156, "learning_rate": 3.623061224489796e-05, "loss": 3.7356, "step": 6754 }, { "epoch": 27.571428571428573, "grad_norm": 15.895907402038574, "learning_rate": 3.6228571428571425e-05, "loss": 2.0541, "step": 6755 }, { "epoch": 27.57551020408163, "grad_norm": 11.10976505279541, "learning_rate": 3.62265306122449e-05, "loss": 2.9637, "step": 6756 }, { "epoch": 27.579591836734693, "grad_norm": 28.948707580566406, "learning_rate": 3.622448979591837e-05, "loss": 1.5842, "step": 6757 }, { "epoch": 27.583673469387755, "grad_norm": 17.161375045776367, "learning_rate": 3.622244897959184e-05, "loss": 2.1245, "step": 6758 }, { "epoch": 27.587755102040816, "grad_norm": 7.7601518630981445, "learning_rate": 3.622040816326531e-05, "loss": 3.4233, "step": 6759 }, { "epoch": 27.591836734693878, "grad_norm": 16.10648536682129, "learning_rate": 3.6218367346938775e-05, "loss": 2.2597, "step": 6760 }, { "epoch": 27.59591836734694, "grad_norm": 10.838483810424805, "learning_rate": 3.621632653061225e-05, "loss": 3.162, "step": 6761 }, { "epoch": 27.6, "grad_norm": 9.91096305847168, "learning_rate": 3.6214285714285716e-05, "loss": 2.9779, "step": 6762 }, { "epoch": 27.60408163265306, "grad_norm": 20.608186721801758, "learning_rate": 3.621224489795918e-05, "loss": 2.317, "step": 6763 }, { "epoch": 27.60816326530612, "grad_norm": 26.399383544921875, "learning_rate": 3.6210204081632657e-05, "loss": 1.5382, "step": 6764 }, { "epoch": 27.612244897959183, "grad_norm": 11.153356552124023, "learning_rate": 3.6208163265306124e-05, "loss": 3.2461, "step": 6765 }, { "epoch": 27.616326530612245, "grad_norm": 8.366776466369629, "learning_rate": 3.62061224489796e-05, "loss": 3.6014, "step": 6766 }, { "epoch": 27.620408163265306, "grad_norm": 9.244100570678711, "learning_rate": 3.6204081632653065e-05, "loss": 3.3058, "step": 6767 }, { "epoch": 27.624489795918368, "grad_norm": 19.9239559173584, "learning_rate": 3.620204081632653e-05, "loss": 2.5027, "step": 6768 }, { "epoch": 27.62857142857143, "grad_norm": 12.471962928771973, "learning_rate": 3.62e-05, "loss": 2.9223, "step": 6769 }, { "epoch": 27.632653061224488, "grad_norm": 12.743624687194824, "learning_rate": 3.6197959183673466e-05, "loss": 2.9867, "step": 6770 }, { "epoch": 27.63673469387755, "grad_norm": 37.620906829833984, "learning_rate": 3.619591836734694e-05, "loss": 1.172, "step": 6771 }, { "epoch": 27.64081632653061, "grad_norm": 17.79973030090332, "learning_rate": 3.6193877551020414e-05, "loss": 2.7953, "step": 6772 }, { "epoch": 27.644897959183673, "grad_norm": 33.157466888427734, "learning_rate": 3.619183673469388e-05, "loss": 1.3148, "step": 6773 }, { "epoch": 27.648979591836735, "grad_norm": 11.922975540161133, "learning_rate": 3.618979591836735e-05, "loss": 3.1055, "step": 6774 }, { "epoch": 27.653061224489797, "grad_norm": 13.310813903808594, "learning_rate": 3.6187755102040815e-05, "loss": 3.1816, "step": 6775 }, { "epoch": 27.65714285714286, "grad_norm": 19.126197814941406, "learning_rate": 3.618571428571429e-05, "loss": 2.0561, "step": 6776 }, { "epoch": 27.66122448979592, "grad_norm": 16.07061004638672, "learning_rate": 3.6183673469387756e-05, "loss": 2.7173, "step": 6777 }, { "epoch": 27.66530612244898, "grad_norm": 13.973984718322754, "learning_rate": 3.6181632653061223e-05, "loss": 3.2593, "step": 6778 }, { "epoch": 27.66938775510204, "grad_norm": 16.891704559326172, "learning_rate": 3.61795918367347e-05, "loss": 2.9681, "step": 6779 }, { "epoch": 27.6734693877551, "grad_norm": 18.445980072021484, "learning_rate": 3.6177551020408164e-05, "loss": 2.2346, "step": 6780 }, { "epoch": 27.677551020408163, "grad_norm": 18.526283264160156, "learning_rate": 3.617551020408164e-05, "loss": 2.8904, "step": 6781 }, { "epoch": 27.681632653061225, "grad_norm": 9.782716751098633, "learning_rate": 3.6173469387755106e-05, "loss": 3.2386, "step": 6782 }, { "epoch": 27.685714285714287, "grad_norm": 23.334457397460938, "learning_rate": 3.617142857142857e-05, "loss": 1.6531, "step": 6783 }, { "epoch": 27.68979591836735, "grad_norm": 21.594579696655273, "learning_rate": 3.616938775510204e-05, "loss": 2.8932, "step": 6784 }, { "epoch": 27.693877551020407, "grad_norm": 12.756325721740723, "learning_rate": 3.616734693877551e-05, "loss": 2.8563, "step": 6785 }, { "epoch": 27.69795918367347, "grad_norm": 16.07202911376953, "learning_rate": 3.616530612244898e-05, "loss": 2.3499, "step": 6786 }, { "epoch": 27.70204081632653, "grad_norm": 12.423789978027344, "learning_rate": 3.6163265306122455e-05, "loss": 3.3606, "step": 6787 }, { "epoch": 27.706122448979592, "grad_norm": 8.803083419799805, "learning_rate": 3.616122448979592e-05, "loss": 3.5695, "step": 6788 }, { "epoch": 27.710204081632654, "grad_norm": 15.714926719665527, "learning_rate": 3.615918367346939e-05, "loss": 2.2672, "step": 6789 }, { "epoch": 27.714285714285715, "grad_norm": 13.484478950500488, "learning_rate": 3.615714285714286e-05, "loss": 2.8156, "step": 6790 }, { "epoch": 27.718367346938777, "grad_norm": 9.150823593139648, "learning_rate": 3.615510204081633e-05, "loss": 3.1526, "step": 6791 }, { "epoch": 27.722448979591835, "grad_norm": 25.35237693786621, "learning_rate": 3.61530612244898e-05, "loss": 2.2746, "step": 6792 }, { "epoch": 27.726530612244897, "grad_norm": 20.745058059692383, "learning_rate": 3.6151020408163264e-05, "loss": 2.1523, "step": 6793 }, { "epoch": 27.73061224489796, "grad_norm": 24.47043800354004, "learning_rate": 3.614897959183673e-05, "loss": 1.0561, "step": 6794 }, { "epoch": 27.73469387755102, "grad_norm": 7.166553974151611, "learning_rate": 3.6146938775510205e-05, "loss": 3.5003, "step": 6795 }, { "epoch": 27.738775510204082, "grad_norm": 8.42857551574707, "learning_rate": 3.614489795918368e-05, "loss": 3.4088, "step": 6796 }, { "epoch": 27.742857142857144, "grad_norm": 17.665069580078125, "learning_rate": 3.6142857142857146e-05, "loss": 2.1771, "step": 6797 }, { "epoch": 27.746938775510205, "grad_norm": 11.811633110046387, "learning_rate": 3.6140816326530613e-05, "loss": 2.9287, "step": 6798 }, { "epoch": 27.751020408163264, "grad_norm": 12.686395645141602, "learning_rate": 3.613877551020408e-05, "loss": 3.049, "step": 6799 }, { "epoch": 27.755102040816325, "grad_norm": 29.684385299682617, "learning_rate": 3.6136734693877554e-05, "loss": 1.9243, "step": 6800 }, { "epoch": 27.759183673469387, "grad_norm": 21.41620445251465, "learning_rate": 3.613469387755102e-05, "loss": 2.3143, "step": 6801 }, { "epoch": 27.76326530612245, "grad_norm": 22.14042091369629, "learning_rate": 3.613265306122449e-05, "loss": 2.2495, "step": 6802 }, { "epoch": 27.76734693877551, "grad_norm": 16.110431671142578, "learning_rate": 3.613061224489796e-05, "loss": 2.8971, "step": 6803 }, { "epoch": 27.771428571428572, "grad_norm": 9.949997901916504, "learning_rate": 3.612857142857143e-05, "loss": 3.1532, "step": 6804 }, { "epoch": 27.775510204081634, "grad_norm": 8.31371021270752, "learning_rate": 3.6126530612244904e-05, "loss": 3.391, "step": 6805 }, { "epoch": 27.779591836734696, "grad_norm": 23.476842880249023, "learning_rate": 3.612448979591837e-05, "loss": 1.6796, "step": 6806 }, { "epoch": 27.783673469387754, "grad_norm": 9.856648445129395, "learning_rate": 3.612244897959184e-05, "loss": 3.2273, "step": 6807 }, { "epoch": 27.787755102040816, "grad_norm": 28.72325897216797, "learning_rate": 3.6120408163265305e-05, "loss": 0.4513, "step": 6808 }, { "epoch": 27.791836734693877, "grad_norm": 12.513016700744629, "learning_rate": 3.611836734693877e-05, "loss": 3.0749, "step": 6809 }, { "epoch": 27.79591836734694, "grad_norm": 16.166259765625, "learning_rate": 3.6116326530612246e-05, "loss": 2.817, "step": 6810 }, { "epoch": 27.8, "grad_norm": 9.29129409790039, "learning_rate": 3.611428571428572e-05, "loss": 3.1127, "step": 6811 }, { "epoch": 27.804081632653062, "grad_norm": 14.081340789794922, "learning_rate": 3.611224489795919e-05, "loss": 2.8675, "step": 6812 }, { "epoch": 27.808163265306124, "grad_norm": 14.629594802856445, "learning_rate": 3.6110204081632654e-05, "loss": 3.0694, "step": 6813 }, { "epoch": 27.812244897959182, "grad_norm": 10.238236427307129, "learning_rate": 3.610816326530612e-05, "loss": 3.1752, "step": 6814 }, { "epoch": 27.816326530612244, "grad_norm": 18.258651733398438, "learning_rate": 3.6106122448979595e-05, "loss": 2.3539, "step": 6815 }, { "epoch": 27.820408163265306, "grad_norm": 10.625875473022461, "learning_rate": 3.610408163265306e-05, "loss": 3.3744, "step": 6816 }, { "epoch": 27.824489795918367, "grad_norm": 9.40074634552002, "learning_rate": 3.610204081632653e-05, "loss": 3.2272, "step": 6817 }, { "epoch": 27.82857142857143, "grad_norm": 20.34807586669922, "learning_rate": 3.61e-05, "loss": 2.5236, "step": 6818 }, { "epoch": 27.83265306122449, "grad_norm": 17.725217819213867, "learning_rate": 3.609795918367347e-05, "loss": 2.5603, "step": 6819 }, { "epoch": 27.836734693877553, "grad_norm": 22.636852264404297, "learning_rate": 3.6095918367346944e-05, "loss": 1.3593, "step": 6820 }, { "epoch": 27.84081632653061, "grad_norm": 10.400575637817383, "learning_rate": 3.609387755102041e-05, "loss": 3.3111, "step": 6821 }, { "epoch": 27.844897959183672, "grad_norm": 10.797917366027832, "learning_rate": 3.609183673469388e-05, "loss": 3.1244, "step": 6822 }, { "epoch": 27.848979591836734, "grad_norm": 18.278810501098633, "learning_rate": 3.6089795918367346e-05, "loss": 2.8478, "step": 6823 }, { "epoch": 27.853061224489796, "grad_norm": 14.303760528564453, "learning_rate": 3.608775510204081e-05, "loss": 2.498, "step": 6824 }, { "epoch": 27.857142857142858, "grad_norm": 9.086692810058594, "learning_rate": 3.608571428571429e-05, "loss": 3.1, "step": 6825 }, { "epoch": 27.86122448979592, "grad_norm": 16.445226669311523, "learning_rate": 3.608367346938776e-05, "loss": 2.6279, "step": 6826 }, { "epoch": 27.86530612244898, "grad_norm": 8.25795841217041, "learning_rate": 3.608163265306123e-05, "loss": 3.5338, "step": 6827 }, { "epoch": 27.86938775510204, "grad_norm": 8.174903869628906, "learning_rate": 3.6079591836734695e-05, "loss": 3.6068, "step": 6828 }, { "epoch": 27.8734693877551, "grad_norm": 20.711572647094727, "learning_rate": 3.607755102040816e-05, "loss": 2.7588, "step": 6829 }, { "epoch": 27.877551020408163, "grad_norm": 9.198554992675781, "learning_rate": 3.6075510204081636e-05, "loss": 3.1667, "step": 6830 }, { "epoch": 27.881632653061224, "grad_norm": 14.790363311767578, "learning_rate": 3.60734693877551e-05, "loss": 2.6713, "step": 6831 }, { "epoch": 27.885714285714286, "grad_norm": 17.95290756225586, "learning_rate": 3.607142857142857e-05, "loss": 2.0337, "step": 6832 }, { "epoch": 27.889795918367348, "grad_norm": 11.929414749145508, "learning_rate": 3.6069387755102044e-05, "loss": 3.3164, "step": 6833 }, { "epoch": 27.89387755102041, "grad_norm": 13.492620468139648, "learning_rate": 3.606734693877551e-05, "loss": 2.9205, "step": 6834 }, { "epoch": 27.897959183673468, "grad_norm": 20.162601470947266, "learning_rate": 3.6065306122448985e-05, "loss": 2.0558, "step": 6835 }, { "epoch": 27.90204081632653, "grad_norm": 11.550039291381836, "learning_rate": 3.606326530612245e-05, "loss": 2.9106, "step": 6836 }, { "epoch": 27.90612244897959, "grad_norm": 40.725284576416016, "learning_rate": 3.606122448979592e-05, "loss": 1.5673, "step": 6837 }, { "epoch": 27.910204081632653, "grad_norm": 16.61079216003418, "learning_rate": 3.6059183673469387e-05, "loss": 2.3069, "step": 6838 }, { "epoch": 27.914285714285715, "grad_norm": 14.359526634216309, "learning_rate": 3.605714285714286e-05, "loss": 2.7964, "step": 6839 }, { "epoch": 27.918367346938776, "grad_norm": 6.3252482414245605, "learning_rate": 3.605510204081633e-05, "loss": 3.8086, "step": 6840 }, { "epoch": 27.922448979591838, "grad_norm": 22.952823638916016, "learning_rate": 3.60530612244898e-05, "loss": 1.5026, "step": 6841 }, { "epoch": 27.926530612244896, "grad_norm": 12.416229248046875, "learning_rate": 3.605102040816327e-05, "loss": 3.0919, "step": 6842 }, { "epoch": 27.930612244897958, "grad_norm": 19.2477970123291, "learning_rate": 3.6048979591836736e-05, "loss": 2.3948, "step": 6843 }, { "epoch": 27.93469387755102, "grad_norm": 24.493579864501953, "learning_rate": 3.604693877551021e-05, "loss": 2.2214, "step": 6844 }, { "epoch": 27.93877551020408, "grad_norm": 15.207415580749512, "learning_rate": 3.604489795918368e-05, "loss": 2.5995, "step": 6845 }, { "epoch": 27.942857142857143, "grad_norm": 8.848692893981934, "learning_rate": 3.6042857142857144e-05, "loss": 3.178, "step": 6846 }, { "epoch": 27.946938775510205, "grad_norm": 24.081279754638672, "learning_rate": 3.604081632653061e-05, "loss": 2.1881, "step": 6847 }, { "epoch": 27.951020408163266, "grad_norm": 13.067093849182129, "learning_rate": 3.603877551020408e-05, "loss": 2.8632, "step": 6848 }, { "epoch": 27.955102040816328, "grad_norm": 13.710891723632812, "learning_rate": 3.603673469387755e-05, "loss": 2.9505, "step": 6849 }, { "epoch": 27.959183673469386, "grad_norm": 8.629323959350586, "learning_rate": 3.6034693877551026e-05, "loss": 3.4136, "step": 6850 }, { "epoch": 27.963265306122448, "grad_norm": 23.879615783691406, "learning_rate": 3.603265306122449e-05, "loss": 2.0693, "step": 6851 }, { "epoch": 27.96734693877551, "grad_norm": 23.81406021118164, "learning_rate": 3.603061224489796e-05, "loss": 2.0106, "step": 6852 }, { "epoch": 27.97142857142857, "grad_norm": 11.011804580688477, "learning_rate": 3.602857142857143e-05, "loss": 3.1281, "step": 6853 }, { "epoch": 27.975510204081633, "grad_norm": 13.900775909423828, "learning_rate": 3.60265306122449e-05, "loss": 2.7095, "step": 6854 }, { "epoch": 27.979591836734695, "grad_norm": 13.972675323486328, "learning_rate": 3.602448979591837e-05, "loss": 3.1663, "step": 6855 }, { "epoch": 27.983673469387757, "grad_norm": 11.666305541992188, "learning_rate": 3.6022448979591835e-05, "loss": 2.9492, "step": 6856 }, { "epoch": 27.987755102040815, "grad_norm": 39.096290588378906, "learning_rate": 3.602040816326531e-05, "loss": 1.3653, "step": 6857 }, { "epoch": 27.991836734693877, "grad_norm": 14.179606437683105, "learning_rate": 3.6018367346938776e-05, "loss": 2.8813, "step": 6858 }, { "epoch": 27.99591836734694, "grad_norm": 11.965124130249023, "learning_rate": 3.601632653061225e-05, "loss": 3.0919, "step": 6859 }, { "epoch": 28.0, "grad_norm": 15.176473617553711, "learning_rate": 3.601428571428572e-05, "loss": 2.5806, "step": 6860 }, { "epoch": 28.00408163265306, "grad_norm": 14.244414329528809, "learning_rate": 3.6012244897959185e-05, "loss": 2.5567, "step": 6861 }, { "epoch": 28.008163265306123, "grad_norm": 16.410512924194336, "learning_rate": 3.601020408163265e-05, "loss": 2.3128, "step": 6862 }, { "epoch": 28.012244897959185, "grad_norm": 12.265552520751953, "learning_rate": 3.600816326530612e-05, "loss": 2.8112, "step": 6863 }, { "epoch": 28.016326530612243, "grad_norm": 29.89031410217285, "learning_rate": 3.600612244897959e-05, "loss": 1.7875, "step": 6864 }, { "epoch": 28.020408163265305, "grad_norm": 15.462007522583008, "learning_rate": 3.600408163265307e-05, "loss": 2.6177, "step": 6865 }, { "epoch": 28.024489795918367, "grad_norm": 7.875706195831299, "learning_rate": 3.6002040816326534e-05, "loss": 3.4689, "step": 6866 }, { "epoch": 28.02857142857143, "grad_norm": 11.337075233459473, "learning_rate": 3.6e-05, "loss": 2.767, "step": 6867 }, { "epoch": 28.03265306122449, "grad_norm": 23.319568634033203, "learning_rate": 3.599795918367347e-05, "loss": 1.9002, "step": 6868 }, { "epoch": 28.036734693877552, "grad_norm": 14.349754333496094, "learning_rate": 3.599591836734694e-05, "loss": 2.4542, "step": 6869 }, { "epoch": 28.040816326530614, "grad_norm": 14.027327537536621, "learning_rate": 3.599387755102041e-05, "loss": 2.8715, "step": 6870 }, { "epoch": 28.044897959183672, "grad_norm": 8.580642700195312, "learning_rate": 3.5991836734693876e-05, "loss": 3.3981, "step": 6871 }, { "epoch": 28.048979591836734, "grad_norm": 24.4190673828125, "learning_rate": 3.598979591836735e-05, "loss": 1.4348, "step": 6872 }, { "epoch": 28.053061224489795, "grad_norm": 19.30338478088379, "learning_rate": 3.598775510204082e-05, "loss": 2.8419, "step": 6873 }, { "epoch": 28.057142857142857, "grad_norm": 9.822635650634766, "learning_rate": 3.598571428571429e-05, "loss": 3.1342, "step": 6874 }, { "epoch": 28.06122448979592, "grad_norm": 20.176170349121094, "learning_rate": 3.598367346938776e-05, "loss": 1.4635, "step": 6875 }, { "epoch": 28.06530612244898, "grad_norm": 17.382892608642578, "learning_rate": 3.5981632653061225e-05, "loss": 2.0857, "step": 6876 }, { "epoch": 28.069387755102042, "grad_norm": 48.32831573486328, "learning_rate": 3.597959183673469e-05, "loss": 1.4144, "step": 6877 }, { "epoch": 28.073469387755104, "grad_norm": 19.174951553344727, "learning_rate": 3.597755102040816e-05, "loss": 1.9893, "step": 6878 }, { "epoch": 28.077551020408162, "grad_norm": 18.113874435424805, "learning_rate": 3.5975510204081634e-05, "loss": 2.5794, "step": 6879 }, { "epoch": 28.081632653061224, "grad_norm": 24.069684982299805, "learning_rate": 3.597346938775511e-05, "loss": 2.1634, "step": 6880 }, { "epoch": 28.085714285714285, "grad_norm": 18.953901290893555, "learning_rate": 3.5971428571428575e-05, "loss": 2.7434, "step": 6881 }, { "epoch": 28.089795918367347, "grad_norm": 10.995854377746582, "learning_rate": 3.596938775510204e-05, "loss": 3.0553, "step": 6882 }, { "epoch": 28.09387755102041, "grad_norm": 14.950895309448242, "learning_rate": 3.5967346938775516e-05, "loss": 3.1434, "step": 6883 }, { "epoch": 28.09795918367347, "grad_norm": 15.829899787902832, "learning_rate": 3.596530612244898e-05, "loss": 2.6796, "step": 6884 }, { "epoch": 28.102040816326532, "grad_norm": 29.53498077392578, "learning_rate": 3.596326530612245e-05, "loss": 2.0035, "step": 6885 }, { "epoch": 28.10612244897959, "grad_norm": 6.369088649749756, "learning_rate": 3.596122448979592e-05, "loss": 3.6868, "step": 6886 }, { "epoch": 28.110204081632652, "grad_norm": 15.57094955444336, "learning_rate": 3.595918367346939e-05, "loss": 2.436, "step": 6887 }, { "epoch": 28.114285714285714, "grad_norm": 20.122770309448242, "learning_rate": 3.5957142857142865e-05, "loss": 2.2108, "step": 6888 }, { "epoch": 28.118367346938776, "grad_norm": 12.727188110351562, "learning_rate": 3.595510204081633e-05, "loss": 2.7296, "step": 6889 }, { "epoch": 28.122448979591837, "grad_norm": 8.804471969604492, "learning_rate": 3.59530612244898e-05, "loss": 3.0378, "step": 6890 }, { "epoch": 28.1265306122449, "grad_norm": 13.862204551696777, "learning_rate": 3.5951020408163266e-05, "loss": 2.5467, "step": 6891 }, { "epoch": 28.13061224489796, "grad_norm": 13.768071174621582, "learning_rate": 3.594897959183673e-05, "loss": 2.7021, "step": 6892 }, { "epoch": 28.13469387755102, "grad_norm": 11.295001029968262, "learning_rate": 3.594693877551021e-05, "loss": 3.1656, "step": 6893 }, { "epoch": 28.13877551020408, "grad_norm": 15.16443920135498, "learning_rate": 3.5944897959183674e-05, "loss": 2.5799, "step": 6894 }, { "epoch": 28.142857142857142, "grad_norm": 26.192270278930664, "learning_rate": 3.594285714285714e-05, "loss": 2.783, "step": 6895 }, { "epoch": 28.146938775510204, "grad_norm": 16.087051391601562, "learning_rate": 3.5940816326530615e-05, "loss": 2.8364, "step": 6896 }, { "epoch": 28.151020408163266, "grad_norm": 12.743819236755371, "learning_rate": 3.593877551020408e-05, "loss": 2.9992, "step": 6897 }, { "epoch": 28.155102040816328, "grad_norm": 12.873421669006348, "learning_rate": 3.5936734693877556e-05, "loss": 3.2639, "step": 6898 }, { "epoch": 28.15918367346939, "grad_norm": 11.534894943237305, "learning_rate": 3.5934693877551023e-05, "loss": 3.0301, "step": 6899 }, { "epoch": 28.163265306122447, "grad_norm": 15.537175178527832, "learning_rate": 3.593265306122449e-05, "loss": 2.708, "step": 6900 }, { "epoch": 28.16734693877551, "grad_norm": 17.637529373168945, "learning_rate": 3.593061224489796e-05, "loss": 2.3106, "step": 6901 }, { "epoch": 28.17142857142857, "grad_norm": 15.392980575561523, "learning_rate": 3.5928571428571425e-05, "loss": 2.8737, "step": 6902 }, { "epoch": 28.175510204081633, "grad_norm": 23.32959747314453, "learning_rate": 3.59265306122449e-05, "loss": 1.8742, "step": 6903 }, { "epoch": 28.179591836734694, "grad_norm": 17.504596710205078, "learning_rate": 3.592448979591837e-05, "loss": 2.034, "step": 6904 }, { "epoch": 28.183673469387756, "grad_norm": 9.36839485168457, "learning_rate": 3.592244897959184e-05, "loss": 3.3042, "step": 6905 }, { "epoch": 28.187755102040818, "grad_norm": 31.95294189453125, "learning_rate": 3.592040816326531e-05, "loss": 1.1646, "step": 6906 }, { "epoch": 28.191836734693876, "grad_norm": 14.91655158996582, "learning_rate": 3.5918367346938774e-05, "loss": 2.7767, "step": 6907 }, { "epoch": 28.195918367346938, "grad_norm": 11.666252136230469, "learning_rate": 3.591632653061225e-05, "loss": 3.0473, "step": 6908 }, { "epoch": 28.2, "grad_norm": 16.428686141967773, "learning_rate": 3.5914285714285715e-05, "loss": 2.7032, "step": 6909 }, { "epoch": 28.20408163265306, "grad_norm": 8.601991653442383, "learning_rate": 3.591224489795918e-05, "loss": 3.2341, "step": 6910 }, { "epoch": 28.208163265306123, "grad_norm": 22.960676193237305, "learning_rate": 3.5910204081632656e-05, "loss": 2.1345, "step": 6911 }, { "epoch": 28.212244897959184, "grad_norm": 17.587230682373047, "learning_rate": 3.590816326530612e-05, "loss": 2.1292, "step": 6912 }, { "epoch": 28.216326530612246, "grad_norm": 9.281326293945312, "learning_rate": 3.59061224489796e-05, "loss": 3.3356, "step": 6913 }, { "epoch": 28.220408163265308, "grad_norm": 32.39789962768555, "learning_rate": 3.5904081632653064e-05, "loss": 1.9125, "step": 6914 }, { "epoch": 28.224489795918366, "grad_norm": 18.971981048583984, "learning_rate": 3.590204081632653e-05, "loss": 2.2383, "step": 6915 }, { "epoch": 28.228571428571428, "grad_norm": 17.70920753479004, "learning_rate": 3.59e-05, "loss": 2.5085, "step": 6916 }, { "epoch": 28.23265306122449, "grad_norm": 8.359566688537598, "learning_rate": 3.5897959183673466e-05, "loss": 3.3701, "step": 6917 }, { "epoch": 28.23673469387755, "grad_norm": 21.429834365844727, "learning_rate": 3.589591836734694e-05, "loss": 2.3309, "step": 6918 }, { "epoch": 28.240816326530613, "grad_norm": 9.722735404968262, "learning_rate": 3.5893877551020413e-05, "loss": 3.3432, "step": 6919 }, { "epoch": 28.244897959183675, "grad_norm": 12.579320907592773, "learning_rate": 3.589183673469388e-05, "loss": 2.9885, "step": 6920 }, { "epoch": 28.248979591836736, "grad_norm": 13.162979125976562, "learning_rate": 3.588979591836735e-05, "loss": 3.0612, "step": 6921 }, { "epoch": 28.253061224489795, "grad_norm": 14.73714828491211, "learning_rate": 3.5887755102040815e-05, "loss": 2.6801, "step": 6922 }, { "epoch": 28.257142857142856, "grad_norm": 12.174487113952637, "learning_rate": 3.588571428571429e-05, "loss": 2.9372, "step": 6923 }, { "epoch": 28.261224489795918, "grad_norm": 12.716789245605469, "learning_rate": 3.5883673469387756e-05, "loss": 3.0242, "step": 6924 }, { "epoch": 28.26530612244898, "grad_norm": 29.581144332885742, "learning_rate": 3.588163265306122e-05, "loss": 1.4849, "step": 6925 }, { "epoch": 28.26938775510204, "grad_norm": 25.546552658081055, "learning_rate": 3.58795918367347e-05, "loss": 2.4482, "step": 6926 }, { "epoch": 28.273469387755103, "grad_norm": 15.221335411071777, "learning_rate": 3.587755102040817e-05, "loss": 3.19, "step": 6927 }, { "epoch": 28.277551020408165, "grad_norm": 23.057680130004883, "learning_rate": 3.587551020408164e-05, "loss": 1.7539, "step": 6928 }, { "epoch": 28.281632653061223, "grad_norm": 15.868719100952148, "learning_rate": 3.5873469387755105e-05, "loss": 3.0278, "step": 6929 }, { "epoch": 28.285714285714285, "grad_norm": 48.82826614379883, "learning_rate": 3.587142857142857e-05, "loss": 0.4863, "step": 6930 }, { "epoch": 28.289795918367346, "grad_norm": 11.706534385681152, "learning_rate": 3.586938775510204e-05, "loss": 3.2392, "step": 6931 }, { "epoch": 28.293877551020408, "grad_norm": 25.36849594116211, "learning_rate": 3.586734693877551e-05, "loss": 2.1251, "step": 6932 }, { "epoch": 28.29795918367347, "grad_norm": 11.041215896606445, "learning_rate": 3.586530612244898e-05, "loss": 2.9947, "step": 6933 }, { "epoch": 28.30204081632653, "grad_norm": 12.30406379699707, "learning_rate": 3.5863265306122454e-05, "loss": 3.2865, "step": 6934 }, { "epoch": 28.306122448979593, "grad_norm": 13.823579788208008, "learning_rate": 3.586122448979592e-05, "loss": 3.022, "step": 6935 }, { "epoch": 28.31020408163265, "grad_norm": 8.86953067779541, "learning_rate": 3.585918367346939e-05, "loss": 3.5345, "step": 6936 }, { "epoch": 28.314285714285713, "grad_norm": 13.533345222473145, "learning_rate": 3.585714285714286e-05, "loss": 2.9193, "step": 6937 }, { "epoch": 28.318367346938775, "grad_norm": 33.80491256713867, "learning_rate": 3.585510204081633e-05, "loss": 1.2657, "step": 6938 }, { "epoch": 28.322448979591837, "grad_norm": 10.741185188293457, "learning_rate": 3.5853061224489797e-05, "loss": 2.9491, "step": 6939 }, { "epoch": 28.3265306122449, "grad_norm": 29.95094871520996, "learning_rate": 3.5851020408163264e-05, "loss": 2.8687, "step": 6940 }, { "epoch": 28.33061224489796, "grad_norm": 9.251710891723633, "learning_rate": 3.584897959183674e-05, "loss": 3.0221, "step": 6941 }, { "epoch": 28.33469387755102, "grad_norm": 9.429303169250488, "learning_rate": 3.584693877551021e-05, "loss": 3.1385, "step": 6942 }, { "epoch": 28.33877551020408, "grad_norm": 12.406635284423828, "learning_rate": 3.584489795918368e-05, "loss": 2.6933, "step": 6943 }, { "epoch": 28.34285714285714, "grad_norm": 20.341129302978516, "learning_rate": 3.5842857142857146e-05, "loss": 2.597, "step": 6944 }, { "epoch": 28.346938775510203, "grad_norm": 19.18962860107422, "learning_rate": 3.584081632653061e-05, "loss": 1.9908, "step": 6945 }, { "epoch": 28.351020408163265, "grad_norm": 21.113086700439453, "learning_rate": 3.583877551020408e-05, "loss": 2.6966, "step": 6946 }, { "epoch": 28.355102040816327, "grad_norm": 16.022613525390625, "learning_rate": 3.5836734693877554e-05, "loss": 2.9442, "step": 6947 }, { "epoch": 28.35918367346939, "grad_norm": 28.002429962158203, "learning_rate": 3.583469387755102e-05, "loss": 1.966, "step": 6948 }, { "epoch": 28.36326530612245, "grad_norm": 9.493881225585938, "learning_rate": 3.583265306122449e-05, "loss": 3.0785, "step": 6949 }, { "epoch": 28.367346938775512, "grad_norm": 14.853446006774902, "learning_rate": 3.583061224489796e-05, "loss": 2.9042, "step": 6950 }, { "epoch": 28.37142857142857, "grad_norm": 9.890556335449219, "learning_rate": 3.582857142857143e-05, "loss": 3.0199, "step": 6951 }, { "epoch": 28.375510204081632, "grad_norm": 11.164584159851074, "learning_rate": 3.58265306122449e-05, "loss": 2.9541, "step": 6952 }, { "epoch": 28.379591836734694, "grad_norm": 13.302169799804688, "learning_rate": 3.582448979591837e-05, "loss": 3.0049, "step": 6953 }, { "epoch": 28.383673469387755, "grad_norm": 20.247783660888672, "learning_rate": 3.582244897959184e-05, "loss": 2.4168, "step": 6954 }, { "epoch": 28.387755102040817, "grad_norm": 12.79714298248291, "learning_rate": 3.5820408163265304e-05, "loss": 2.9345, "step": 6955 }, { "epoch": 28.39183673469388, "grad_norm": 32.8559455871582, "learning_rate": 3.581836734693877e-05, "loss": 1.0169, "step": 6956 }, { "epoch": 28.39591836734694, "grad_norm": 9.376317024230957, "learning_rate": 3.5816326530612245e-05, "loss": 3.5021, "step": 6957 }, { "epoch": 28.4, "grad_norm": 12.139780044555664, "learning_rate": 3.581428571428572e-05, "loss": 3.0266, "step": 6958 }, { "epoch": 28.40408163265306, "grad_norm": 21.27724838256836, "learning_rate": 3.5812244897959187e-05, "loss": 2.3757, "step": 6959 }, { "epoch": 28.408163265306122, "grad_norm": 15.401588439941406, "learning_rate": 3.5810204081632654e-05, "loss": 2.6967, "step": 6960 }, { "epoch": 28.412244897959184, "grad_norm": 16.617063522338867, "learning_rate": 3.580816326530612e-05, "loss": 2.9683, "step": 6961 }, { "epoch": 28.416326530612245, "grad_norm": 9.957005500793457, "learning_rate": 3.5806122448979595e-05, "loss": 3.2606, "step": 6962 }, { "epoch": 28.420408163265307, "grad_norm": 19.926389694213867, "learning_rate": 3.580408163265306e-05, "loss": 2.2563, "step": 6963 }, { "epoch": 28.42448979591837, "grad_norm": 29.349220275878906, "learning_rate": 3.580204081632653e-05, "loss": 2.2001, "step": 6964 }, { "epoch": 28.428571428571427, "grad_norm": 19.05047607421875, "learning_rate": 3.58e-05, "loss": 2.0724, "step": 6965 }, { "epoch": 28.43265306122449, "grad_norm": 17.186443328857422, "learning_rate": 3.579795918367347e-05, "loss": 2.7722, "step": 6966 }, { "epoch": 28.43673469387755, "grad_norm": 20.314800262451172, "learning_rate": 3.5795918367346944e-05, "loss": 2.1884, "step": 6967 }, { "epoch": 28.440816326530612, "grad_norm": 13.999526977539062, "learning_rate": 3.579387755102041e-05, "loss": 2.9974, "step": 6968 }, { "epoch": 28.444897959183674, "grad_norm": 15.491443634033203, "learning_rate": 3.579183673469388e-05, "loss": 2.7597, "step": 6969 }, { "epoch": 28.448979591836736, "grad_norm": 14.596220016479492, "learning_rate": 3.5789795918367345e-05, "loss": 2.6751, "step": 6970 }, { "epoch": 28.453061224489797, "grad_norm": 18.173492431640625, "learning_rate": 3.578775510204081e-05, "loss": 2.5146, "step": 6971 }, { "epoch": 28.457142857142856, "grad_norm": 27.64484214782715, "learning_rate": 3.5785714285714286e-05, "loss": 2.0971, "step": 6972 }, { "epoch": 28.461224489795917, "grad_norm": 15.560739517211914, "learning_rate": 3.578367346938776e-05, "loss": 2.7987, "step": 6973 }, { "epoch": 28.46530612244898, "grad_norm": 20.892662048339844, "learning_rate": 3.578163265306123e-05, "loss": 2.0522, "step": 6974 }, { "epoch": 28.46938775510204, "grad_norm": 15.082688331604004, "learning_rate": 3.5779591836734694e-05, "loss": 2.2601, "step": 6975 }, { "epoch": 28.473469387755102, "grad_norm": 16.26776123046875, "learning_rate": 3.577755102040817e-05, "loss": 2.4532, "step": 6976 }, { "epoch": 28.477551020408164, "grad_norm": 9.891846656799316, "learning_rate": 3.5775510204081635e-05, "loss": 3.2569, "step": 6977 }, { "epoch": 28.481632653061226, "grad_norm": 10.284820556640625, "learning_rate": 3.57734693877551e-05, "loss": 3.1872, "step": 6978 }, { "epoch": 28.485714285714284, "grad_norm": 29.892969131469727, "learning_rate": 3.577142857142857e-05, "loss": 1.5506, "step": 6979 }, { "epoch": 28.489795918367346, "grad_norm": 9.426667213439941, "learning_rate": 3.5769387755102044e-05, "loss": 3.1577, "step": 6980 }, { "epoch": 28.493877551020407, "grad_norm": 8.02852725982666, "learning_rate": 3.576734693877552e-05, "loss": 3.444, "step": 6981 }, { "epoch": 28.49795918367347, "grad_norm": 12.294534683227539, "learning_rate": 3.5765306122448985e-05, "loss": 2.6645, "step": 6982 }, { "epoch": 28.50204081632653, "grad_norm": 12.915312767028809, "learning_rate": 3.576326530612245e-05, "loss": 2.7762, "step": 6983 }, { "epoch": 28.506122448979593, "grad_norm": 19.199892044067383, "learning_rate": 3.576122448979592e-05, "loss": 2.2054, "step": 6984 }, { "epoch": 28.510204081632654, "grad_norm": 19.94198226928711, "learning_rate": 3.5759183673469386e-05, "loss": 2.8764, "step": 6985 }, { "epoch": 28.514285714285712, "grad_norm": 10.37708854675293, "learning_rate": 3.575714285714286e-05, "loss": 3.0785, "step": 6986 }, { "epoch": 28.518367346938774, "grad_norm": 24.916229248046875, "learning_rate": 3.575510204081633e-05, "loss": 1.8804, "step": 6987 }, { "epoch": 28.522448979591836, "grad_norm": 8.474668502807617, "learning_rate": 3.57530612244898e-05, "loss": 3.4776, "step": 6988 }, { "epoch": 28.526530612244898, "grad_norm": 24.463848114013672, "learning_rate": 3.575102040816327e-05, "loss": 1.3053, "step": 6989 }, { "epoch": 28.53061224489796, "grad_norm": 21.17654800415039, "learning_rate": 3.5748979591836735e-05, "loss": 2.5492, "step": 6990 }, { "epoch": 28.53469387755102, "grad_norm": 13.471123695373535, "learning_rate": 3.574693877551021e-05, "loss": 2.8593, "step": 6991 }, { "epoch": 28.538775510204083, "grad_norm": 22.66640281677246, "learning_rate": 3.5744897959183676e-05, "loss": 2.45, "step": 6992 }, { "epoch": 28.542857142857144, "grad_norm": 25.73676300048828, "learning_rate": 3.574285714285714e-05, "loss": 1.9961, "step": 6993 }, { "epoch": 28.546938775510203, "grad_norm": 10.936002731323242, "learning_rate": 3.574081632653061e-05, "loss": 3.1342, "step": 6994 }, { "epoch": 28.551020408163264, "grad_norm": 12.849732398986816, "learning_rate": 3.5738775510204084e-05, "loss": 3.0156, "step": 6995 }, { "epoch": 28.555102040816326, "grad_norm": 15.556280136108398, "learning_rate": 3.573673469387756e-05, "loss": 2.8175, "step": 6996 }, { "epoch": 28.559183673469388, "grad_norm": 20.95358657836914, "learning_rate": 3.5734693877551025e-05, "loss": 1.3549, "step": 6997 }, { "epoch": 28.56326530612245, "grad_norm": 23.299331665039062, "learning_rate": 3.573265306122449e-05, "loss": 2.0057, "step": 6998 }, { "epoch": 28.56734693877551, "grad_norm": 11.619946479797363, "learning_rate": 3.573061224489796e-05, "loss": 3.1113, "step": 6999 }, { "epoch": 28.571428571428573, "grad_norm": 11.805465698242188, "learning_rate": 3.572857142857143e-05, "loss": 3.0531, "step": 7000 }, { "epoch": 28.57551020408163, "grad_norm": 14.191543579101562, "learning_rate": 3.57265306122449e-05, "loss": 2.9702, "step": 7001 }, { "epoch": 28.579591836734693, "grad_norm": 7.693013668060303, "learning_rate": 3.572448979591837e-05, "loss": 3.3514, "step": 7002 }, { "epoch": 28.583673469387755, "grad_norm": 7.234233856201172, "learning_rate": 3.5722448979591835e-05, "loss": 3.5373, "step": 7003 }, { "epoch": 28.587755102040816, "grad_norm": 7.696149826049805, "learning_rate": 3.572040816326531e-05, "loss": 3.3689, "step": 7004 }, { "epoch": 28.591836734693878, "grad_norm": 9.07200813293457, "learning_rate": 3.5718367346938776e-05, "loss": 3.1362, "step": 7005 }, { "epoch": 28.59591836734694, "grad_norm": 22.672590255737305, "learning_rate": 3.571632653061225e-05, "loss": 2.0808, "step": 7006 }, { "epoch": 28.6, "grad_norm": 22.735565185546875, "learning_rate": 3.571428571428572e-05, "loss": 2.4155, "step": 7007 }, { "epoch": 28.60408163265306, "grad_norm": 26.2374267578125, "learning_rate": 3.5712244897959184e-05, "loss": 2.0958, "step": 7008 }, { "epoch": 28.60816326530612, "grad_norm": 9.191357612609863, "learning_rate": 3.571020408163265e-05, "loss": 3.2038, "step": 7009 }, { "epoch": 28.612244897959183, "grad_norm": 14.643318176269531, "learning_rate": 3.570816326530612e-05, "loss": 2.8399, "step": 7010 }, { "epoch": 28.616326530612245, "grad_norm": 15.402362823486328, "learning_rate": 3.570612244897959e-05, "loss": 2.8739, "step": 7011 }, { "epoch": 28.620408163265306, "grad_norm": 8.126522064208984, "learning_rate": 3.5704081632653066e-05, "loss": 3.017, "step": 7012 }, { "epoch": 28.624489795918368, "grad_norm": 18.767610549926758, "learning_rate": 3.570204081632653e-05, "loss": 1.8853, "step": 7013 }, { "epoch": 28.62857142857143, "grad_norm": 25.28367042541504, "learning_rate": 3.57e-05, "loss": 2.548, "step": 7014 }, { "epoch": 28.632653061224488, "grad_norm": 34.431053161621094, "learning_rate": 3.569795918367347e-05, "loss": 2.1389, "step": 7015 }, { "epoch": 28.63673469387755, "grad_norm": 18.90338134765625, "learning_rate": 3.569591836734694e-05, "loss": 2.8613, "step": 7016 }, { "epoch": 28.64081632653061, "grad_norm": 16.87870216369629, "learning_rate": 3.569387755102041e-05, "loss": 2.6529, "step": 7017 }, { "epoch": 28.644897959183673, "grad_norm": 19.55954360961914, "learning_rate": 3.5691836734693876e-05, "loss": 2.3599, "step": 7018 }, { "epoch": 28.648979591836735, "grad_norm": 9.128524780273438, "learning_rate": 3.568979591836735e-05, "loss": 3.1938, "step": 7019 }, { "epoch": 28.653061224489797, "grad_norm": 10.794805526733398, "learning_rate": 3.5687755102040823e-05, "loss": 3.2649, "step": 7020 }, { "epoch": 28.65714285714286, "grad_norm": 19.268476486206055, "learning_rate": 3.568571428571429e-05, "loss": 2.5711, "step": 7021 }, { "epoch": 28.66122448979592, "grad_norm": 10.55186939239502, "learning_rate": 3.568367346938776e-05, "loss": 2.9431, "step": 7022 }, { "epoch": 28.66530612244898, "grad_norm": 14.975886344909668, "learning_rate": 3.5681632653061225e-05, "loss": 2.8327, "step": 7023 }, { "epoch": 28.66938775510204, "grad_norm": 16.55685043334961, "learning_rate": 3.567959183673469e-05, "loss": 2.1912, "step": 7024 }, { "epoch": 28.6734693877551, "grad_norm": 12.933247566223145, "learning_rate": 3.5677551020408166e-05, "loss": 3.097, "step": 7025 }, { "epoch": 28.677551020408163, "grad_norm": 23.86578941345215, "learning_rate": 3.567551020408163e-05, "loss": 1.3152, "step": 7026 }, { "epoch": 28.681632653061225, "grad_norm": 10.509674072265625, "learning_rate": 3.567346938775511e-05, "loss": 3.1519, "step": 7027 }, { "epoch": 28.685714285714287, "grad_norm": 26.38502311706543, "learning_rate": 3.5671428571428574e-05, "loss": 2.2475, "step": 7028 }, { "epoch": 28.68979591836735, "grad_norm": 12.170774459838867, "learning_rate": 3.566938775510204e-05, "loss": 3.4265, "step": 7029 }, { "epoch": 28.693877551020407, "grad_norm": 11.413511276245117, "learning_rate": 3.5667346938775515e-05, "loss": 3.1302, "step": 7030 }, { "epoch": 28.69795918367347, "grad_norm": 22.2534236907959, "learning_rate": 3.566530612244898e-05, "loss": 2.2851, "step": 7031 }, { "epoch": 28.70204081632653, "grad_norm": 21.01336669921875, "learning_rate": 3.566326530612245e-05, "loss": 2.6878, "step": 7032 }, { "epoch": 28.706122448979592, "grad_norm": 16.08587074279785, "learning_rate": 3.5661224489795916e-05, "loss": 2.454, "step": 7033 }, { "epoch": 28.710204081632654, "grad_norm": 17.997264862060547, "learning_rate": 3.565918367346939e-05, "loss": 2.7842, "step": 7034 }, { "epoch": 28.714285714285715, "grad_norm": 12.156133651733398, "learning_rate": 3.5657142857142864e-05, "loss": 2.8748, "step": 7035 }, { "epoch": 28.718367346938777, "grad_norm": 8.219942092895508, "learning_rate": 3.565510204081633e-05, "loss": 3.6623, "step": 7036 }, { "epoch": 28.722448979591835, "grad_norm": 21.098417282104492, "learning_rate": 3.56530612244898e-05, "loss": 2.4871, "step": 7037 }, { "epoch": 28.726530612244897, "grad_norm": 14.988370895385742, "learning_rate": 3.5651020408163266e-05, "loss": 2.9152, "step": 7038 }, { "epoch": 28.73061224489796, "grad_norm": 17.780359268188477, "learning_rate": 3.564897959183673e-05, "loss": 2.7163, "step": 7039 }, { "epoch": 28.73469387755102, "grad_norm": 21.62946128845215, "learning_rate": 3.564693877551021e-05, "loss": 2.1903, "step": 7040 }, { "epoch": 28.738775510204082, "grad_norm": 10.620255470275879, "learning_rate": 3.5644897959183674e-05, "loss": 3.1093, "step": 7041 }, { "epoch": 28.742857142857144, "grad_norm": 7.360499382019043, "learning_rate": 3.564285714285715e-05, "loss": 3.4697, "step": 7042 }, { "epoch": 28.746938775510205, "grad_norm": 15.451969146728516, "learning_rate": 3.5640816326530615e-05, "loss": 2.5684, "step": 7043 }, { "epoch": 28.751020408163264, "grad_norm": 16.6920223236084, "learning_rate": 3.563877551020408e-05, "loss": 2.6461, "step": 7044 }, { "epoch": 28.755102040816325, "grad_norm": 11.229132652282715, "learning_rate": 3.5636734693877556e-05, "loss": 3.1896, "step": 7045 }, { "epoch": 28.759183673469387, "grad_norm": 14.288091659545898, "learning_rate": 3.563469387755102e-05, "loss": 2.7402, "step": 7046 }, { "epoch": 28.76326530612245, "grad_norm": 19.014244079589844, "learning_rate": 3.563265306122449e-05, "loss": 2.3032, "step": 7047 }, { "epoch": 28.76734693877551, "grad_norm": 16.528017044067383, "learning_rate": 3.563061224489796e-05, "loss": 2.6235, "step": 7048 }, { "epoch": 28.771428571428572, "grad_norm": 12.535595893859863, "learning_rate": 3.562857142857143e-05, "loss": 2.9729, "step": 7049 }, { "epoch": 28.775510204081634, "grad_norm": 15.056992530822754, "learning_rate": 3.5626530612244905e-05, "loss": 2.5209, "step": 7050 }, { "epoch": 28.779591836734696, "grad_norm": 6.235021591186523, "learning_rate": 3.562448979591837e-05, "loss": 3.4485, "step": 7051 }, { "epoch": 28.783673469387754, "grad_norm": 12.497944831848145, "learning_rate": 3.562244897959184e-05, "loss": 3.1892, "step": 7052 }, { "epoch": 28.787755102040816, "grad_norm": 14.900517463684082, "learning_rate": 3.5620408163265306e-05, "loss": 2.7668, "step": 7053 }, { "epoch": 28.791836734693877, "grad_norm": 24.798847198486328, "learning_rate": 3.5618367346938774e-05, "loss": 1.5338, "step": 7054 }, { "epoch": 28.79591836734694, "grad_norm": 20.952238082885742, "learning_rate": 3.561632653061225e-05, "loss": 1.9738, "step": 7055 }, { "epoch": 28.8, "grad_norm": 25.262977600097656, "learning_rate": 3.5614285714285715e-05, "loss": 1.2952, "step": 7056 }, { "epoch": 28.804081632653062, "grad_norm": 13.38943099975586, "learning_rate": 3.561224489795918e-05, "loss": 3.2029, "step": 7057 }, { "epoch": 28.808163265306124, "grad_norm": 17.815536499023438, "learning_rate": 3.5610204081632656e-05, "loss": 2.5543, "step": 7058 }, { "epoch": 28.812244897959182, "grad_norm": 10.610191345214844, "learning_rate": 3.560816326530612e-05, "loss": 3.1359, "step": 7059 }, { "epoch": 28.816326530612244, "grad_norm": 19.69878387451172, "learning_rate": 3.5606122448979597e-05, "loss": 2.8978, "step": 7060 }, { "epoch": 28.820408163265306, "grad_norm": 8.9140625, "learning_rate": 3.5604081632653064e-05, "loss": 3.4203, "step": 7061 }, { "epoch": 28.824489795918367, "grad_norm": 19.73749542236328, "learning_rate": 3.560204081632653e-05, "loss": 2.1905, "step": 7062 }, { "epoch": 28.82857142857143, "grad_norm": 11.735090255737305, "learning_rate": 3.56e-05, "loss": 3.365, "step": 7063 }, { "epoch": 28.83265306122449, "grad_norm": 9.665326118469238, "learning_rate": 3.559795918367347e-05, "loss": 3.1148, "step": 7064 }, { "epoch": 28.836734693877553, "grad_norm": 14.208961486816406, "learning_rate": 3.559591836734694e-05, "loss": 2.8012, "step": 7065 }, { "epoch": 28.84081632653061, "grad_norm": 14.217594146728516, "learning_rate": 3.559387755102041e-05, "loss": 2.8014, "step": 7066 }, { "epoch": 28.844897959183672, "grad_norm": 13.890885353088379, "learning_rate": 3.559183673469388e-05, "loss": 2.4054, "step": 7067 }, { "epoch": 28.848979591836734, "grad_norm": 20.44586753845215, "learning_rate": 3.558979591836735e-05, "loss": 2.7388, "step": 7068 }, { "epoch": 28.853061224489796, "grad_norm": 11.00368881225586, "learning_rate": 3.558775510204082e-05, "loss": 3.0339, "step": 7069 }, { "epoch": 28.857142857142858, "grad_norm": 6.771301746368408, "learning_rate": 3.558571428571429e-05, "loss": 3.7626, "step": 7070 }, { "epoch": 28.86122448979592, "grad_norm": 14.863645553588867, "learning_rate": 3.5583673469387755e-05, "loss": 2.5459, "step": 7071 }, { "epoch": 28.86530612244898, "grad_norm": 17.04236602783203, "learning_rate": 3.558163265306122e-05, "loss": 3.1712, "step": 7072 }, { "epoch": 28.86938775510204, "grad_norm": 9.38072395324707, "learning_rate": 3.5579591836734696e-05, "loss": 3.112, "step": 7073 }, { "epoch": 28.8734693877551, "grad_norm": 6.579622268676758, "learning_rate": 3.557755102040817e-05, "loss": 3.7228, "step": 7074 }, { "epoch": 28.877551020408163, "grad_norm": 22.880887985229492, "learning_rate": 3.557551020408164e-05, "loss": 2.2224, "step": 7075 }, { "epoch": 28.881632653061224, "grad_norm": 28.447935104370117, "learning_rate": 3.5573469387755104e-05, "loss": 2.5363, "step": 7076 }, { "epoch": 28.885714285714286, "grad_norm": 12.435007095336914, "learning_rate": 3.557142857142857e-05, "loss": 3.0537, "step": 7077 }, { "epoch": 28.889795918367348, "grad_norm": 30.299118041992188, "learning_rate": 3.556938775510204e-05, "loss": 1.2275, "step": 7078 }, { "epoch": 28.89387755102041, "grad_norm": 18.118167877197266, "learning_rate": 3.556734693877551e-05, "loss": 2.6428, "step": 7079 }, { "epoch": 28.897959183673468, "grad_norm": 27.14510154724121, "learning_rate": 3.556530612244898e-05, "loss": 1.8755, "step": 7080 }, { "epoch": 28.90204081632653, "grad_norm": 16.779052734375, "learning_rate": 3.5563265306122454e-05, "loss": 2.6635, "step": 7081 }, { "epoch": 28.90612244897959, "grad_norm": 20.553802490234375, "learning_rate": 3.556122448979592e-05, "loss": 2.1618, "step": 7082 }, { "epoch": 28.910204081632653, "grad_norm": 10.653539657592773, "learning_rate": 3.555918367346939e-05, "loss": 3.3204, "step": 7083 }, { "epoch": 28.914285714285715, "grad_norm": 7.012850284576416, "learning_rate": 3.555714285714286e-05, "loss": 3.4385, "step": 7084 }, { "epoch": 28.918367346938776, "grad_norm": 11.436483383178711, "learning_rate": 3.555510204081633e-05, "loss": 3.1649, "step": 7085 }, { "epoch": 28.922448979591838, "grad_norm": 13.36429500579834, "learning_rate": 3.5553061224489796e-05, "loss": 2.7109, "step": 7086 }, { "epoch": 28.926530612244896, "grad_norm": 32.230316162109375, "learning_rate": 3.555102040816326e-05, "loss": 1.0717, "step": 7087 }, { "epoch": 28.930612244897958, "grad_norm": 8.490642547607422, "learning_rate": 3.554897959183674e-05, "loss": 3.3434, "step": 7088 }, { "epoch": 28.93469387755102, "grad_norm": 14.41336441040039, "learning_rate": 3.554693877551021e-05, "loss": 3.126, "step": 7089 }, { "epoch": 28.93877551020408, "grad_norm": 28.299739837646484, "learning_rate": 3.554489795918368e-05, "loss": 1.9196, "step": 7090 }, { "epoch": 28.942857142857143, "grad_norm": 16.69915008544922, "learning_rate": 3.5542857142857145e-05, "loss": 2.7433, "step": 7091 }, { "epoch": 28.946938775510205, "grad_norm": 15.062102317810059, "learning_rate": 3.554081632653061e-05, "loss": 3.0251, "step": 7092 }, { "epoch": 28.951020408163266, "grad_norm": 13.896608352661133, "learning_rate": 3.553877551020408e-05, "loss": 2.8654, "step": 7093 }, { "epoch": 28.955102040816328, "grad_norm": 22.7467041015625, "learning_rate": 3.5536734693877553e-05, "loss": 1.991, "step": 7094 }, { "epoch": 28.959183673469386, "grad_norm": 11.654461860656738, "learning_rate": 3.553469387755102e-05, "loss": 3.138, "step": 7095 }, { "epoch": 28.963265306122448, "grad_norm": 17.67146110534668, "learning_rate": 3.5532653061224494e-05, "loss": 2.5027, "step": 7096 }, { "epoch": 28.96734693877551, "grad_norm": 14.673888206481934, "learning_rate": 3.553061224489796e-05, "loss": 2.7701, "step": 7097 }, { "epoch": 28.97142857142857, "grad_norm": 9.843182563781738, "learning_rate": 3.552857142857143e-05, "loss": 3.2711, "step": 7098 }, { "epoch": 28.975510204081633, "grad_norm": 18.506370544433594, "learning_rate": 3.55265306122449e-05, "loss": 2.0341, "step": 7099 }, { "epoch": 28.979591836734695, "grad_norm": 12.180303573608398, "learning_rate": 3.552448979591837e-05, "loss": 2.9617, "step": 7100 }, { "epoch": 28.983673469387757, "grad_norm": 10.076242446899414, "learning_rate": 3.552244897959184e-05, "loss": 3.1296, "step": 7101 }, { "epoch": 28.987755102040815, "grad_norm": 12.401586532592773, "learning_rate": 3.5520408163265304e-05, "loss": 2.8933, "step": 7102 }, { "epoch": 28.991836734693877, "grad_norm": 10.41903305053711, "learning_rate": 3.551836734693878e-05, "loss": 3.3489, "step": 7103 }, { "epoch": 28.99591836734694, "grad_norm": 22.324663162231445, "learning_rate": 3.5516326530612245e-05, "loss": 1.4287, "step": 7104 }, { "epoch": 29.0, "grad_norm": 12.670069694519043, "learning_rate": 3.551428571428572e-05, "loss": 3.0354, "step": 7105 }, { "epoch": 29.00408163265306, "grad_norm": 12.424421310424805, "learning_rate": 3.5512244897959186e-05, "loss": 2.7555, "step": 7106 }, { "epoch": 29.008163265306123, "grad_norm": 21.85478973388672, "learning_rate": 3.551020408163265e-05, "loss": 2.5197, "step": 7107 }, { "epoch": 29.012244897959185, "grad_norm": 10.3956298828125, "learning_rate": 3.550816326530613e-05, "loss": 3.2037, "step": 7108 }, { "epoch": 29.016326530612243, "grad_norm": 17.575551986694336, "learning_rate": 3.5506122448979594e-05, "loss": 2.296, "step": 7109 }, { "epoch": 29.020408163265305, "grad_norm": 13.002191543579102, "learning_rate": 3.550408163265306e-05, "loss": 2.9342, "step": 7110 }, { "epoch": 29.024489795918367, "grad_norm": 12.381507873535156, "learning_rate": 3.550204081632653e-05, "loss": 2.9759, "step": 7111 }, { "epoch": 29.02857142857143, "grad_norm": 18.49835777282715, "learning_rate": 3.55e-05, "loss": 1.7621, "step": 7112 }, { "epoch": 29.03265306122449, "grad_norm": 28.9414005279541, "learning_rate": 3.5497959183673476e-05, "loss": 1.2233, "step": 7113 }, { "epoch": 29.036734693877552, "grad_norm": 10.482522010803223, "learning_rate": 3.549591836734694e-05, "loss": 3.1219, "step": 7114 }, { "epoch": 29.040816326530614, "grad_norm": 20.65890121459961, "learning_rate": 3.549387755102041e-05, "loss": 2.5637, "step": 7115 }, { "epoch": 29.044897959183672, "grad_norm": 26.41530418395996, "learning_rate": 3.549183673469388e-05, "loss": 1.8682, "step": 7116 }, { "epoch": 29.048979591836734, "grad_norm": 18.417442321777344, "learning_rate": 3.5489795918367345e-05, "loss": 2.2198, "step": 7117 }, { "epoch": 29.053061224489795, "grad_norm": 17.206356048583984, "learning_rate": 3.548775510204082e-05, "loss": 2.7569, "step": 7118 }, { "epoch": 29.057142857142857, "grad_norm": 11.194414138793945, "learning_rate": 3.5485714285714286e-05, "loss": 2.9714, "step": 7119 }, { "epoch": 29.06122448979592, "grad_norm": 10.97851848602295, "learning_rate": 3.548367346938776e-05, "loss": 3.0965, "step": 7120 }, { "epoch": 29.06530612244898, "grad_norm": 30.14231300354004, "learning_rate": 3.548163265306123e-05, "loss": 2.0051, "step": 7121 }, { "epoch": 29.069387755102042, "grad_norm": 16.229511260986328, "learning_rate": 3.5479591836734694e-05, "loss": 2.3787, "step": 7122 }, { "epoch": 29.073469387755104, "grad_norm": 19.322729110717773, "learning_rate": 3.547755102040817e-05, "loss": 2.1088, "step": 7123 }, { "epoch": 29.077551020408162, "grad_norm": 19.24711036682129, "learning_rate": 3.5475510204081635e-05, "loss": 2.5483, "step": 7124 }, { "epoch": 29.081632653061224, "grad_norm": 9.655980110168457, "learning_rate": 3.54734693877551e-05, "loss": 3.4335, "step": 7125 }, { "epoch": 29.085714285714285, "grad_norm": 9.94908332824707, "learning_rate": 3.547142857142857e-05, "loss": 3.0416, "step": 7126 }, { "epoch": 29.089795918367347, "grad_norm": 10.057979583740234, "learning_rate": 3.546938775510204e-05, "loss": 3.0738, "step": 7127 }, { "epoch": 29.09387755102041, "grad_norm": 16.1978816986084, "learning_rate": 3.546734693877552e-05, "loss": 2.7312, "step": 7128 }, { "epoch": 29.09795918367347, "grad_norm": 17.88935089111328, "learning_rate": 3.5465306122448984e-05, "loss": 2.4648, "step": 7129 }, { "epoch": 29.102040816326532, "grad_norm": 17.225339889526367, "learning_rate": 3.546326530612245e-05, "loss": 2.1821, "step": 7130 }, { "epoch": 29.10612244897959, "grad_norm": 12.73598575592041, "learning_rate": 3.546122448979592e-05, "loss": 3.0317, "step": 7131 }, { "epoch": 29.110204081632652, "grad_norm": 18.09552001953125, "learning_rate": 3.5459183673469385e-05, "loss": 2.4019, "step": 7132 }, { "epoch": 29.114285714285714, "grad_norm": 19.621070861816406, "learning_rate": 3.545714285714286e-05, "loss": 1.9875, "step": 7133 }, { "epoch": 29.118367346938776, "grad_norm": 13.374165534973145, "learning_rate": 3.5455102040816327e-05, "loss": 2.9733, "step": 7134 }, { "epoch": 29.122448979591837, "grad_norm": 27.271554946899414, "learning_rate": 3.54530612244898e-05, "loss": 2.0826, "step": 7135 }, { "epoch": 29.1265306122449, "grad_norm": 31.556636810302734, "learning_rate": 3.545102040816327e-05, "loss": 1.7593, "step": 7136 }, { "epoch": 29.13061224489796, "grad_norm": 13.961320877075195, "learning_rate": 3.5448979591836735e-05, "loss": 2.9326, "step": 7137 }, { "epoch": 29.13469387755102, "grad_norm": 14.108684539794922, "learning_rate": 3.544693877551021e-05, "loss": 2.9591, "step": 7138 }, { "epoch": 29.13877551020408, "grad_norm": 17.49044418334961, "learning_rate": 3.5444897959183676e-05, "loss": 2.0897, "step": 7139 }, { "epoch": 29.142857142857142, "grad_norm": 13.45602798461914, "learning_rate": 3.544285714285714e-05, "loss": 3.1868, "step": 7140 }, { "epoch": 29.146938775510204, "grad_norm": 14.916139602661133, "learning_rate": 3.544081632653061e-05, "loss": 2.7367, "step": 7141 }, { "epoch": 29.151020408163266, "grad_norm": 20.865922927856445, "learning_rate": 3.5438775510204084e-05, "loss": 2.5447, "step": 7142 }, { "epoch": 29.155102040816328, "grad_norm": 15.652730941772461, "learning_rate": 3.543673469387756e-05, "loss": 2.4804, "step": 7143 }, { "epoch": 29.15918367346939, "grad_norm": 11.504417419433594, "learning_rate": 3.5434693877551025e-05, "loss": 2.9129, "step": 7144 }, { "epoch": 29.163265306122447, "grad_norm": 14.648550987243652, "learning_rate": 3.543265306122449e-05, "loss": 3.0087, "step": 7145 }, { "epoch": 29.16734693877551, "grad_norm": 10.236610412597656, "learning_rate": 3.543061224489796e-05, "loss": 3.0274, "step": 7146 }, { "epoch": 29.17142857142857, "grad_norm": 11.583834648132324, "learning_rate": 3.5428571428571426e-05, "loss": 2.953, "step": 7147 }, { "epoch": 29.175510204081633, "grad_norm": 6.646231651306152, "learning_rate": 3.54265306122449e-05, "loss": 3.6846, "step": 7148 }, { "epoch": 29.179591836734694, "grad_norm": 13.334656715393066, "learning_rate": 3.542448979591837e-05, "loss": 3.247, "step": 7149 }, { "epoch": 29.183673469387756, "grad_norm": 10.616888046264648, "learning_rate": 3.542244897959184e-05, "loss": 3.0688, "step": 7150 }, { "epoch": 29.187755102040818, "grad_norm": 17.037158966064453, "learning_rate": 3.542040816326531e-05, "loss": 2.6539, "step": 7151 }, { "epoch": 29.191836734693876, "grad_norm": 12.401909828186035, "learning_rate": 3.5418367346938775e-05, "loss": 2.8441, "step": 7152 }, { "epoch": 29.195918367346938, "grad_norm": 9.695143699645996, "learning_rate": 3.541632653061225e-05, "loss": 3.1173, "step": 7153 }, { "epoch": 29.2, "grad_norm": 10.094513893127441, "learning_rate": 3.5414285714285716e-05, "loss": 3.5272, "step": 7154 }, { "epoch": 29.20408163265306, "grad_norm": 12.801824569702148, "learning_rate": 3.5412244897959184e-05, "loss": 3.2954, "step": 7155 }, { "epoch": 29.208163265306123, "grad_norm": 32.71733093261719, "learning_rate": 3.541020408163265e-05, "loss": 1.884, "step": 7156 }, { "epoch": 29.212244897959184, "grad_norm": 25.700334548950195, "learning_rate": 3.5408163265306125e-05, "loss": 2.4, "step": 7157 }, { "epoch": 29.216326530612246, "grad_norm": 18.289827346801758, "learning_rate": 3.540612244897959e-05, "loss": 2.4086, "step": 7158 }, { "epoch": 29.220408163265308, "grad_norm": 21.441089630126953, "learning_rate": 3.5404081632653066e-05, "loss": 2.3362, "step": 7159 }, { "epoch": 29.224489795918366, "grad_norm": 13.84390640258789, "learning_rate": 3.540204081632653e-05, "loss": 2.8704, "step": 7160 }, { "epoch": 29.228571428571428, "grad_norm": 8.86191177368164, "learning_rate": 3.54e-05, "loss": 3.4291, "step": 7161 }, { "epoch": 29.23265306122449, "grad_norm": 27.27244758605957, "learning_rate": 3.5397959183673474e-05, "loss": 1.5604, "step": 7162 }, { "epoch": 29.23673469387755, "grad_norm": 17.944061279296875, "learning_rate": 3.539591836734694e-05, "loss": 2.1636, "step": 7163 }, { "epoch": 29.240816326530613, "grad_norm": 11.367623329162598, "learning_rate": 3.539387755102041e-05, "loss": 3.0818, "step": 7164 }, { "epoch": 29.244897959183675, "grad_norm": 11.561798095703125, "learning_rate": 3.5391836734693875e-05, "loss": 2.9925, "step": 7165 }, { "epoch": 29.248979591836736, "grad_norm": 22.891555786132812, "learning_rate": 3.538979591836735e-05, "loss": 1.9679, "step": 7166 }, { "epoch": 29.253061224489795, "grad_norm": 9.15778923034668, "learning_rate": 3.538775510204082e-05, "loss": 3.0779, "step": 7167 }, { "epoch": 29.257142857142856, "grad_norm": 22.595849990844727, "learning_rate": 3.538571428571429e-05, "loss": 1.9919, "step": 7168 }, { "epoch": 29.261224489795918, "grad_norm": 13.34701919555664, "learning_rate": 3.538367346938776e-05, "loss": 2.9663, "step": 7169 }, { "epoch": 29.26530612244898, "grad_norm": 18.262895584106445, "learning_rate": 3.5381632653061224e-05, "loss": 2.7367, "step": 7170 }, { "epoch": 29.26938775510204, "grad_norm": 18.49338722229004, "learning_rate": 3.537959183673469e-05, "loss": 2.636, "step": 7171 }, { "epoch": 29.273469387755103, "grad_norm": 11.900930404663086, "learning_rate": 3.5377551020408165e-05, "loss": 3.2054, "step": 7172 }, { "epoch": 29.277551020408165, "grad_norm": 12.437328338623047, "learning_rate": 3.537551020408163e-05, "loss": 2.9086, "step": 7173 }, { "epoch": 29.281632653061223, "grad_norm": 14.675176620483398, "learning_rate": 3.5373469387755106e-05, "loss": 2.6907, "step": 7174 }, { "epoch": 29.285714285714285, "grad_norm": 25.49799919128418, "learning_rate": 3.5371428571428574e-05, "loss": 2.6212, "step": 7175 }, { "epoch": 29.289795918367346, "grad_norm": 19.39385414123535, "learning_rate": 3.536938775510204e-05, "loss": 2.0715, "step": 7176 }, { "epoch": 29.293877551020408, "grad_norm": 23.857593536376953, "learning_rate": 3.5367346938775515e-05, "loss": 2.0142, "step": 7177 }, { "epoch": 29.29795918367347, "grad_norm": 29.188289642333984, "learning_rate": 3.536530612244898e-05, "loss": 1.965, "step": 7178 }, { "epoch": 29.30204081632653, "grad_norm": 16.227264404296875, "learning_rate": 3.536326530612245e-05, "loss": 2.4262, "step": 7179 }, { "epoch": 29.306122448979593, "grad_norm": 16.183013916015625, "learning_rate": 3.5361224489795916e-05, "loss": 2.6739, "step": 7180 }, { "epoch": 29.31020408163265, "grad_norm": 15.959851264953613, "learning_rate": 3.535918367346939e-05, "loss": 2.7174, "step": 7181 }, { "epoch": 29.314285714285713, "grad_norm": 16.266523361206055, "learning_rate": 3.5357142857142864e-05, "loss": 3.0202, "step": 7182 }, { "epoch": 29.318367346938775, "grad_norm": 9.786382675170898, "learning_rate": 3.535510204081633e-05, "loss": 3.0861, "step": 7183 }, { "epoch": 29.322448979591837, "grad_norm": 12.562065124511719, "learning_rate": 3.53530612244898e-05, "loss": 2.8179, "step": 7184 }, { "epoch": 29.3265306122449, "grad_norm": 22.67146110534668, "learning_rate": 3.5351020408163265e-05, "loss": 2.7883, "step": 7185 }, { "epoch": 29.33061224489796, "grad_norm": 31.43265151977539, "learning_rate": 3.534897959183673e-05, "loss": 0.9789, "step": 7186 }, { "epoch": 29.33469387755102, "grad_norm": 43.89213943481445, "learning_rate": 3.5346938775510206e-05, "loss": 1.4866, "step": 7187 }, { "epoch": 29.33877551020408, "grad_norm": 17.302989959716797, "learning_rate": 3.534489795918367e-05, "loss": 2.5393, "step": 7188 }, { "epoch": 29.34285714285714, "grad_norm": 18.806371688842773, "learning_rate": 3.534285714285715e-05, "loss": 2.5309, "step": 7189 }, { "epoch": 29.346938775510203, "grad_norm": 32.465179443359375, "learning_rate": 3.5340816326530614e-05, "loss": 1.6971, "step": 7190 }, { "epoch": 29.351020408163265, "grad_norm": 22.650129318237305, "learning_rate": 3.533877551020408e-05, "loss": 2.187, "step": 7191 }, { "epoch": 29.355102040816327, "grad_norm": 13.779861450195312, "learning_rate": 3.5336734693877555e-05, "loss": 3.0163, "step": 7192 }, { "epoch": 29.35918367346939, "grad_norm": 15.996742248535156, "learning_rate": 3.533469387755102e-05, "loss": 2.6773, "step": 7193 }, { "epoch": 29.36326530612245, "grad_norm": 19.328393936157227, "learning_rate": 3.533265306122449e-05, "loss": 2.3488, "step": 7194 }, { "epoch": 29.367346938775512, "grad_norm": 19.646696090698242, "learning_rate": 3.533061224489796e-05, "loss": 2.9037, "step": 7195 }, { "epoch": 29.37142857142857, "grad_norm": 10.482221603393555, "learning_rate": 3.532857142857143e-05, "loss": 2.9948, "step": 7196 }, { "epoch": 29.375510204081632, "grad_norm": 10.687599182128906, "learning_rate": 3.5326530612244904e-05, "loss": 3.0465, "step": 7197 }, { "epoch": 29.379591836734694, "grad_norm": 39.94601821899414, "learning_rate": 3.532448979591837e-05, "loss": 2.8855, "step": 7198 }, { "epoch": 29.383673469387755, "grad_norm": 13.677982330322266, "learning_rate": 3.532244897959184e-05, "loss": 2.9735, "step": 7199 }, { "epoch": 29.387755102040817, "grad_norm": 19.053691864013672, "learning_rate": 3.5320408163265306e-05, "loss": 2.6963, "step": 7200 }, { "epoch": 29.39183673469388, "grad_norm": 14.084721565246582, "learning_rate": 3.531836734693878e-05, "loss": 2.7081, "step": 7201 }, { "epoch": 29.39591836734694, "grad_norm": 10.416022300720215, "learning_rate": 3.531632653061225e-05, "loss": 3.2331, "step": 7202 }, { "epoch": 29.4, "grad_norm": 31.941829681396484, "learning_rate": 3.5314285714285714e-05, "loss": 1.2233, "step": 7203 }, { "epoch": 29.40408163265306, "grad_norm": 13.606999397277832, "learning_rate": 3.531224489795919e-05, "loss": 2.9772, "step": 7204 }, { "epoch": 29.408163265306122, "grad_norm": 16.4722843170166, "learning_rate": 3.5310204081632655e-05, "loss": 2.805, "step": 7205 }, { "epoch": 29.412244897959184, "grad_norm": 16.185861587524414, "learning_rate": 3.530816326530613e-05, "loss": 2.5892, "step": 7206 }, { "epoch": 29.416326530612245, "grad_norm": 24.13245391845703, "learning_rate": 3.5306122448979596e-05, "loss": 2.5313, "step": 7207 }, { "epoch": 29.420408163265307, "grad_norm": 7.933183670043945, "learning_rate": 3.530408163265306e-05, "loss": 3.3782, "step": 7208 }, { "epoch": 29.42448979591837, "grad_norm": 8.271244049072266, "learning_rate": 3.530204081632653e-05, "loss": 2.9358, "step": 7209 }, { "epoch": 29.428571428571427, "grad_norm": 17.14413070678711, "learning_rate": 3.53e-05, "loss": 2.9342, "step": 7210 }, { "epoch": 29.43265306122449, "grad_norm": 13.02499771118164, "learning_rate": 3.529795918367347e-05, "loss": 3.3662, "step": 7211 }, { "epoch": 29.43673469387755, "grad_norm": 21.6083984375, "learning_rate": 3.529591836734694e-05, "loss": 2.2258, "step": 7212 }, { "epoch": 29.440816326530612, "grad_norm": 13.612963676452637, "learning_rate": 3.529387755102041e-05, "loss": 2.8758, "step": 7213 }, { "epoch": 29.444897959183674, "grad_norm": 15.836146354675293, "learning_rate": 3.529183673469388e-05, "loss": 2.7361, "step": 7214 }, { "epoch": 29.448979591836736, "grad_norm": 21.197021484375, "learning_rate": 3.528979591836735e-05, "loss": 1.9538, "step": 7215 }, { "epoch": 29.453061224489797, "grad_norm": 10.887661933898926, "learning_rate": 3.528775510204082e-05, "loss": 3.063, "step": 7216 }, { "epoch": 29.457142857142856, "grad_norm": 12.758623123168945, "learning_rate": 3.528571428571429e-05, "loss": 3.2026, "step": 7217 }, { "epoch": 29.461224489795917, "grad_norm": 16.82378578186035, "learning_rate": 3.5283673469387755e-05, "loss": 2.6715, "step": 7218 }, { "epoch": 29.46530612244898, "grad_norm": 13.565177917480469, "learning_rate": 3.528163265306122e-05, "loss": 3.0167, "step": 7219 }, { "epoch": 29.46938775510204, "grad_norm": 23.725664138793945, "learning_rate": 3.5279591836734696e-05, "loss": 0.3594, "step": 7220 }, { "epoch": 29.473469387755102, "grad_norm": 16.091278076171875, "learning_rate": 3.527755102040817e-05, "loss": 2.6442, "step": 7221 }, { "epoch": 29.477551020408164, "grad_norm": 23.066410064697266, "learning_rate": 3.527551020408164e-05, "loss": 2.8103, "step": 7222 }, { "epoch": 29.481632653061226, "grad_norm": 15.208794593811035, "learning_rate": 3.5273469387755104e-05, "loss": 2.7162, "step": 7223 }, { "epoch": 29.485714285714284, "grad_norm": 18.796152114868164, "learning_rate": 3.527142857142857e-05, "loss": 2.1978, "step": 7224 }, { "epoch": 29.489795918367346, "grad_norm": 14.741925239562988, "learning_rate": 3.526938775510204e-05, "loss": 3.1459, "step": 7225 }, { "epoch": 29.493877551020407, "grad_norm": 29.751188278198242, "learning_rate": 3.526734693877551e-05, "loss": 2.0681, "step": 7226 }, { "epoch": 29.49795918367347, "grad_norm": 12.013311386108398, "learning_rate": 3.526530612244898e-05, "loss": 2.8096, "step": 7227 }, { "epoch": 29.50204081632653, "grad_norm": 11.344034194946289, "learning_rate": 3.526326530612245e-05, "loss": 3.3537, "step": 7228 }, { "epoch": 29.506122448979593, "grad_norm": 9.798651695251465, "learning_rate": 3.526122448979592e-05, "loss": 3.4565, "step": 7229 }, { "epoch": 29.510204081632654, "grad_norm": 24.49930191040039, "learning_rate": 3.525918367346939e-05, "loss": 1.735, "step": 7230 }, { "epoch": 29.514285714285712, "grad_norm": 22.67599868774414, "learning_rate": 3.525714285714286e-05, "loss": 1.4109, "step": 7231 }, { "epoch": 29.518367346938774, "grad_norm": 11.975869178771973, "learning_rate": 3.525510204081633e-05, "loss": 2.9674, "step": 7232 }, { "epoch": 29.522448979591836, "grad_norm": 9.110952377319336, "learning_rate": 3.5253061224489796e-05, "loss": 3.355, "step": 7233 }, { "epoch": 29.526530612244898, "grad_norm": 11.366157531738281, "learning_rate": 3.525102040816326e-05, "loss": 3.0327, "step": 7234 }, { "epoch": 29.53061224489796, "grad_norm": 13.89006233215332, "learning_rate": 3.5248979591836737e-05, "loss": 2.7096, "step": 7235 }, { "epoch": 29.53469387755102, "grad_norm": 15.5947265625, "learning_rate": 3.524693877551021e-05, "loss": 2.6928, "step": 7236 }, { "epoch": 29.538775510204083, "grad_norm": 15.176753044128418, "learning_rate": 3.524489795918368e-05, "loss": 2.6081, "step": 7237 }, { "epoch": 29.542857142857144, "grad_norm": 9.820352554321289, "learning_rate": 3.5242857142857145e-05, "loss": 3.0601, "step": 7238 }, { "epoch": 29.546938775510203, "grad_norm": 10.594793319702148, "learning_rate": 3.524081632653061e-05, "loss": 3.1846, "step": 7239 }, { "epoch": 29.551020408163264, "grad_norm": 21.22689437866211, "learning_rate": 3.523877551020408e-05, "loss": 2.0168, "step": 7240 }, { "epoch": 29.555102040816326, "grad_norm": 12.887094497680664, "learning_rate": 3.523673469387755e-05, "loss": 2.8453, "step": 7241 }, { "epoch": 29.559183673469388, "grad_norm": 15.560819625854492, "learning_rate": 3.523469387755102e-05, "loss": 2.9328, "step": 7242 }, { "epoch": 29.56326530612245, "grad_norm": 22.783771514892578, "learning_rate": 3.5232653061224494e-05, "loss": 1.3124, "step": 7243 }, { "epoch": 29.56734693877551, "grad_norm": 8.529834747314453, "learning_rate": 3.523061224489796e-05, "loss": 3.2479, "step": 7244 }, { "epoch": 29.571428571428573, "grad_norm": 17.075634002685547, "learning_rate": 3.5228571428571435e-05, "loss": 2.5352, "step": 7245 }, { "epoch": 29.57551020408163, "grad_norm": 7.912164688110352, "learning_rate": 3.52265306122449e-05, "loss": 3.4227, "step": 7246 }, { "epoch": 29.579591836734693, "grad_norm": 8.601970672607422, "learning_rate": 3.522448979591837e-05, "loss": 3.3161, "step": 7247 }, { "epoch": 29.583673469387755, "grad_norm": 11.581608772277832, "learning_rate": 3.5222448979591836e-05, "loss": 2.9673, "step": 7248 }, { "epoch": 29.587755102040816, "grad_norm": 16.71293830871582, "learning_rate": 3.5220408163265303e-05, "loss": 2.7332, "step": 7249 }, { "epoch": 29.591836734693878, "grad_norm": 13.470582962036133, "learning_rate": 3.521836734693878e-05, "loss": 2.6641, "step": 7250 }, { "epoch": 29.59591836734694, "grad_norm": 15.918903350830078, "learning_rate": 3.521632653061225e-05, "loss": 2.9271, "step": 7251 }, { "epoch": 29.6, "grad_norm": 9.866695404052734, "learning_rate": 3.521428571428572e-05, "loss": 3.0097, "step": 7252 }, { "epoch": 29.60408163265306, "grad_norm": 17.92062759399414, "learning_rate": 3.5212244897959185e-05, "loss": 2.8675, "step": 7253 }, { "epoch": 29.60816326530612, "grad_norm": 23.054092407226562, "learning_rate": 3.521020408163265e-05, "loss": 2.5506, "step": 7254 }, { "epoch": 29.612244897959183, "grad_norm": 28.4310302734375, "learning_rate": 3.5208163265306127e-05, "loss": 2.3441, "step": 7255 }, { "epoch": 29.616326530612245, "grad_norm": 27.883941650390625, "learning_rate": 3.5206122448979594e-05, "loss": 1.4995, "step": 7256 }, { "epoch": 29.620408163265306, "grad_norm": 19.15985870361328, "learning_rate": 3.520408163265306e-05, "loss": 2.1332, "step": 7257 }, { "epoch": 29.624489795918368, "grad_norm": 11.095528602600098, "learning_rate": 3.5202040816326535e-05, "loss": 2.9014, "step": 7258 }, { "epoch": 29.62857142857143, "grad_norm": 27.33850860595703, "learning_rate": 3.52e-05, "loss": 1.4212, "step": 7259 }, { "epoch": 29.632653061224488, "grad_norm": 25.325057983398438, "learning_rate": 3.5197959183673476e-05, "loss": 2.1468, "step": 7260 }, { "epoch": 29.63673469387755, "grad_norm": 33.136962890625, "learning_rate": 3.519591836734694e-05, "loss": 1.5231, "step": 7261 }, { "epoch": 29.64081632653061, "grad_norm": 10.11917781829834, "learning_rate": 3.519387755102041e-05, "loss": 3.3282, "step": 7262 }, { "epoch": 29.644897959183673, "grad_norm": 14.74810791015625, "learning_rate": 3.519183673469388e-05, "loss": 2.7939, "step": 7263 }, { "epoch": 29.648979591836735, "grad_norm": 19.748254776000977, "learning_rate": 3.5189795918367344e-05, "loss": 2.4869, "step": 7264 }, { "epoch": 29.653061224489797, "grad_norm": 33.566165924072266, "learning_rate": 3.518775510204082e-05, "loss": 2.0766, "step": 7265 }, { "epoch": 29.65714285714286, "grad_norm": 16.93669319152832, "learning_rate": 3.5185714285714285e-05, "loss": 2.7786, "step": 7266 }, { "epoch": 29.66122448979592, "grad_norm": 13.067426681518555, "learning_rate": 3.518367346938776e-05, "loss": 2.8383, "step": 7267 }, { "epoch": 29.66530612244898, "grad_norm": 22.69822120666504, "learning_rate": 3.5181632653061226e-05, "loss": 2.2142, "step": 7268 }, { "epoch": 29.66938775510204, "grad_norm": 14.568136215209961, "learning_rate": 3.517959183673469e-05, "loss": 2.4404, "step": 7269 }, { "epoch": 29.6734693877551, "grad_norm": 12.06290054321289, "learning_rate": 3.517755102040817e-05, "loss": 3.0894, "step": 7270 }, { "epoch": 29.677551020408163, "grad_norm": 19.544397354125977, "learning_rate": 3.5175510204081634e-05, "loss": 2.0315, "step": 7271 }, { "epoch": 29.681632653061225, "grad_norm": 21.520751953125, "learning_rate": 3.51734693877551e-05, "loss": 2.089, "step": 7272 }, { "epoch": 29.685714285714287, "grad_norm": 9.122228622436523, "learning_rate": 3.517142857142857e-05, "loss": 3.1036, "step": 7273 }, { "epoch": 29.68979591836735, "grad_norm": 22.61907958984375, "learning_rate": 3.516938775510204e-05, "loss": 2.1731, "step": 7274 }, { "epoch": 29.693877551020407, "grad_norm": 19.812149047851562, "learning_rate": 3.5167346938775516e-05, "loss": 2.7886, "step": 7275 }, { "epoch": 29.69795918367347, "grad_norm": 14.710420608520508, "learning_rate": 3.5165306122448984e-05, "loss": 2.572, "step": 7276 }, { "epoch": 29.70204081632653, "grad_norm": 8.274962425231934, "learning_rate": 3.516326530612245e-05, "loss": 3.5239, "step": 7277 }, { "epoch": 29.706122448979592, "grad_norm": 9.00491714477539, "learning_rate": 3.516122448979592e-05, "loss": 3.5252, "step": 7278 }, { "epoch": 29.710204081632654, "grad_norm": 12.695775032043457, "learning_rate": 3.5159183673469385e-05, "loss": 2.9254, "step": 7279 }, { "epoch": 29.714285714285715, "grad_norm": 28.12902069091797, "learning_rate": 3.515714285714286e-05, "loss": 1.9146, "step": 7280 }, { "epoch": 29.718367346938777, "grad_norm": 15.637295722961426, "learning_rate": 3.5155102040816326e-05, "loss": 2.5594, "step": 7281 }, { "epoch": 29.722448979591835, "grad_norm": 16.7275333404541, "learning_rate": 3.51530612244898e-05, "loss": 2.9224, "step": 7282 }, { "epoch": 29.726530612244897, "grad_norm": 26.09300994873047, "learning_rate": 3.515102040816327e-05, "loss": 2.1937, "step": 7283 }, { "epoch": 29.73061224489796, "grad_norm": 15.654712677001953, "learning_rate": 3.5148979591836734e-05, "loss": 2.718, "step": 7284 }, { "epoch": 29.73469387755102, "grad_norm": 26.13130760192871, "learning_rate": 3.514693877551021e-05, "loss": 1.1965, "step": 7285 }, { "epoch": 29.738775510204082, "grad_norm": 14.63689136505127, "learning_rate": 3.5144897959183675e-05, "loss": 2.3438, "step": 7286 }, { "epoch": 29.742857142857144, "grad_norm": 13.19955825805664, "learning_rate": 3.514285714285714e-05, "loss": 2.6155, "step": 7287 }, { "epoch": 29.746938775510205, "grad_norm": 16.697181701660156, "learning_rate": 3.514081632653061e-05, "loss": 3.1279, "step": 7288 }, { "epoch": 29.751020408163264, "grad_norm": 23.264568328857422, "learning_rate": 3.513877551020408e-05, "loss": 1.964, "step": 7289 }, { "epoch": 29.755102040816325, "grad_norm": 20.177093505859375, "learning_rate": 3.513673469387756e-05, "loss": 2.2842, "step": 7290 }, { "epoch": 29.759183673469387, "grad_norm": 13.487319946289062, "learning_rate": 3.5134693877551024e-05, "loss": 2.6291, "step": 7291 }, { "epoch": 29.76326530612245, "grad_norm": 33.383506774902344, "learning_rate": 3.513265306122449e-05, "loss": 1.1042, "step": 7292 }, { "epoch": 29.76734693877551, "grad_norm": 33.20398712158203, "learning_rate": 3.513061224489796e-05, "loss": 0.8721, "step": 7293 }, { "epoch": 29.771428571428572, "grad_norm": 12.921542167663574, "learning_rate": 3.512857142857143e-05, "loss": 2.8926, "step": 7294 }, { "epoch": 29.775510204081634, "grad_norm": 10.875670433044434, "learning_rate": 3.51265306122449e-05, "loss": 3.2897, "step": 7295 }, { "epoch": 29.779591836734696, "grad_norm": 32.24281311035156, "learning_rate": 3.512448979591837e-05, "loss": 2.2095, "step": 7296 }, { "epoch": 29.783673469387754, "grad_norm": 18.232744216918945, "learning_rate": 3.512244897959184e-05, "loss": 2.733, "step": 7297 }, { "epoch": 29.787755102040816, "grad_norm": 18.99656105041504, "learning_rate": 3.512040816326531e-05, "loss": 2.423, "step": 7298 }, { "epoch": 29.791836734693877, "grad_norm": 19.918874740600586, "learning_rate": 3.511836734693878e-05, "loss": 2.7227, "step": 7299 }, { "epoch": 29.79591836734694, "grad_norm": 13.151052474975586, "learning_rate": 3.511632653061225e-05, "loss": 3.0183, "step": 7300 }, { "epoch": 29.8, "grad_norm": 8.00633716583252, "learning_rate": 3.5114285714285716e-05, "loss": 3.3065, "step": 7301 }, { "epoch": 29.804081632653062, "grad_norm": 10.766801834106445, "learning_rate": 3.511224489795918e-05, "loss": 3.0851, "step": 7302 }, { "epoch": 29.808163265306124, "grad_norm": 10.979369163513184, "learning_rate": 3.511020408163265e-05, "loss": 3.2401, "step": 7303 }, { "epoch": 29.812244897959182, "grad_norm": 15.432062149047852, "learning_rate": 3.5108163265306124e-05, "loss": 2.5673, "step": 7304 }, { "epoch": 29.816326530612244, "grad_norm": 10.724019050598145, "learning_rate": 3.51061224489796e-05, "loss": 3.1636, "step": 7305 }, { "epoch": 29.820408163265306, "grad_norm": 11.053539276123047, "learning_rate": 3.5104081632653065e-05, "loss": 3.0796, "step": 7306 }, { "epoch": 29.824489795918367, "grad_norm": 13.245216369628906, "learning_rate": 3.510204081632653e-05, "loss": 3.081, "step": 7307 }, { "epoch": 29.82857142857143, "grad_norm": 7.728338241577148, "learning_rate": 3.51e-05, "loss": 3.6003, "step": 7308 }, { "epoch": 29.83265306122449, "grad_norm": 8.51490306854248, "learning_rate": 3.509795918367347e-05, "loss": 3.3747, "step": 7309 }, { "epoch": 29.836734693877553, "grad_norm": 27.186498641967773, "learning_rate": 3.509591836734694e-05, "loss": 2.065, "step": 7310 }, { "epoch": 29.84081632653061, "grad_norm": 9.964412689208984, "learning_rate": 3.509387755102041e-05, "loss": 3.167, "step": 7311 }, { "epoch": 29.844897959183672, "grad_norm": 11.864638328552246, "learning_rate": 3.509183673469388e-05, "loss": 3.0845, "step": 7312 }, { "epoch": 29.848979591836734, "grad_norm": 15.958192825317383, "learning_rate": 3.508979591836735e-05, "loss": 2.4151, "step": 7313 }, { "epoch": 29.853061224489796, "grad_norm": 16.503875732421875, "learning_rate": 3.508775510204082e-05, "loss": 2.58, "step": 7314 }, { "epoch": 29.857142857142858, "grad_norm": 25.611661911010742, "learning_rate": 3.508571428571429e-05, "loss": 1.8469, "step": 7315 }, { "epoch": 29.86122448979592, "grad_norm": 6.560273170471191, "learning_rate": 3.508367346938776e-05, "loss": 3.4322, "step": 7316 }, { "epoch": 29.86530612244898, "grad_norm": 25.28714942932129, "learning_rate": 3.5081632653061224e-05, "loss": 2.2485, "step": 7317 }, { "epoch": 29.86938775510204, "grad_norm": 23.558313369750977, "learning_rate": 3.507959183673469e-05, "loss": 1.8263, "step": 7318 }, { "epoch": 29.8734693877551, "grad_norm": 11.522388458251953, "learning_rate": 3.5077551020408165e-05, "loss": 3.1274, "step": 7319 }, { "epoch": 29.877551020408163, "grad_norm": 18.580976486206055, "learning_rate": 3.507551020408163e-05, "loss": 2.892, "step": 7320 }, { "epoch": 29.881632653061224, "grad_norm": 9.467452049255371, "learning_rate": 3.5073469387755106e-05, "loss": 3.2521, "step": 7321 }, { "epoch": 29.885714285714286, "grad_norm": 20.02756690979004, "learning_rate": 3.507142857142857e-05, "loss": 1.2053, "step": 7322 }, { "epoch": 29.889795918367348, "grad_norm": 15.73986530303955, "learning_rate": 3.506938775510204e-05, "loss": 2.8658, "step": 7323 }, { "epoch": 29.89387755102041, "grad_norm": 29.717485427856445, "learning_rate": 3.5067346938775514e-05, "loss": 2.4375, "step": 7324 }, { "epoch": 29.897959183673468, "grad_norm": 20.245433807373047, "learning_rate": 3.506530612244898e-05, "loss": 2.0117, "step": 7325 }, { "epoch": 29.90204081632653, "grad_norm": 23.865846633911133, "learning_rate": 3.506326530612245e-05, "loss": 1.9529, "step": 7326 }, { "epoch": 29.90612244897959, "grad_norm": 28.4797420501709, "learning_rate": 3.5061224489795915e-05, "loss": 1.9595, "step": 7327 }, { "epoch": 29.910204081632653, "grad_norm": 9.140860557556152, "learning_rate": 3.505918367346939e-05, "loss": 3.4475, "step": 7328 }, { "epoch": 29.914285714285715, "grad_norm": 26.634727478027344, "learning_rate": 3.505714285714286e-05, "loss": 1.3781, "step": 7329 }, { "epoch": 29.918367346938776, "grad_norm": 25.779407501220703, "learning_rate": 3.505510204081633e-05, "loss": 2.0429, "step": 7330 }, { "epoch": 29.922448979591838, "grad_norm": 15.14596939086914, "learning_rate": 3.50530612244898e-05, "loss": 2.884, "step": 7331 }, { "epoch": 29.926530612244896, "grad_norm": 39.7346305847168, "learning_rate": 3.5051020408163265e-05, "loss": 1.2167, "step": 7332 }, { "epoch": 29.930612244897958, "grad_norm": 12.202668190002441, "learning_rate": 3.504897959183673e-05, "loss": 2.9659, "step": 7333 }, { "epoch": 29.93469387755102, "grad_norm": 28.191213607788086, "learning_rate": 3.5046938775510206e-05, "loss": 2.4175, "step": 7334 }, { "epoch": 29.93877551020408, "grad_norm": 20.481348037719727, "learning_rate": 3.504489795918367e-05, "loss": 1.9755, "step": 7335 }, { "epoch": 29.942857142857143, "grad_norm": 18.405641555786133, "learning_rate": 3.504285714285715e-05, "loss": 2.351, "step": 7336 }, { "epoch": 29.946938775510205, "grad_norm": 14.117551803588867, "learning_rate": 3.5040816326530614e-05, "loss": 2.98, "step": 7337 }, { "epoch": 29.951020408163266, "grad_norm": 14.790526390075684, "learning_rate": 3.503877551020409e-05, "loss": 2.9354, "step": 7338 }, { "epoch": 29.955102040816328, "grad_norm": 14.815004348754883, "learning_rate": 3.5036734693877555e-05, "loss": 2.7713, "step": 7339 }, { "epoch": 29.959183673469386, "grad_norm": 17.983137130737305, "learning_rate": 3.503469387755102e-05, "loss": 2.4879, "step": 7340 }, { "epoch": 29.963265306122448, "grad_norm": 12.366456031799316, "learning_rate": 3.503265306122449e-05, "loss": 3.1776, "step": 7341 }, { "epoch": 29.96734693877551, "grad_norm": 21.872312545776367, "learning_rate": 3.5030612244897956e-05, "loss": 2.4976, "step": 7342 }, { "epoch": 29.97142857142857, "grad_norm": 15.664612770080566, "learning_rate": 3.502857142857143e-05, "loss": 2.6848, "step": 7343 }, { "epoch": 29.975510204081633, "grad_norm": 24.854751586914062, "learning_rate": 3.5026530612244904e-05, "loss": 1.9161, "step": 7344 }, { "epoch": 29.979591836734695, "grad_norm": 9.561796188354492, "learning_rate": 3.502448979591837e-05, "loss": 3.3726, "step": 7345 }, { "epoch": 29.983673469387757, "grad_norm": 6.910807132720947, "learning_rate": 3.502244897959184e-05, "loss": 3.7032, "step": 7346 }, { "epoch": 29.987755102040815, "grad_norm": 24.061317443847656, "learning_rate": 3.5020408163265305e-05, "loss": 2.5387, "step": 7347 }, { "epoch": 29.991836734693877, "grad_norm": 17.246238708496094, "learning_rate": 3.501836734693878e-05, "loss": 2.5295, "step": 7348 }, { "epoch": 29.99591836734694, "grad_norm": 24.55019760131836, "learning_rate": 3.5016326530612246e-05, "loss": 2.3998, "step": 7349 }, { "epoch": 30.0, "grad_norm": 6.949610233306885, "learning_rate": 3.5014285714285714e-05, "loss": 3.6927, "step": 7350 }, { "epoch": 30.00408163265306, "grad_norm": 27.049882888793945, "learning_rate": 3.501224489795919e-05, "loss": 1.6716, "step": 7351 }, { "epoch": 30.008163265306123, "grad_norm": 38.618316650390625, "learning_rate": 3.5010204081632655e-05, "loss": 2.0102, "step": 7352 }, { "epoch": 30.012244897959185, "grad_norm": 18.595848083496094, "learning_rate": 3.500816326530613e-05, "loss": 2.1435, "step": 7353 }, { "epoch": 30.016326530612243, "grad_norm": 11.218095779418945, "learning_rate": 3.5006122448979596e-05, "loss": 2.8009, "step": 7354 }, { "epoch": 30.020408163265305, "grad_norm": 12.208884239196777, "learning_rate": 3.500408163265306e-05, "loss": 2.4765, "step": 7355 }, { "epoch": 30.024489795918367, "grad_norm": 27.49500274658203, "learning_rate": 3.500204081632653e-05, "loss": 1.621, "step": 7356 }, { "epoch": 30.02857142857143, "grad_norm": 23.280351638793945, "learning_rate": 3.5e-05, "loss": 2.0669, "step": 7357 }, { "epoch": 30.03265306122449, "grad_norm": 19.719228744506836, "learning_rate": 3.499795918367347e-05, "loss": 2.5009, "step": 7358 }, { "epoch": 30.036734693877552, "grad_norm": 25.452102661132812, "learning_rate": 3.4995918367346945e-05, "loss": 1.8754, "step": 7359 }, { "epoch": 30.040816326530614, "grad_norm": 17.57947540283203, "learning_rate": 3.499387755102041e-05, "loss": 2.3643, "step": 7360 }, { "epoch": 30.044897959183672, "grad_norm": 12.566620826721191, "learning_rate": 3.499183673469388e-05, "loss": 3.2021, "step": 7361 }, { "epoch": 30.048979591836734, "grad_norm": 14.121399879455566, "learning_rate": 3.4989795918367346e-05, "loss": 2.8631, "step": 7362 }, { "epoch": 30.053061224489795, "grad_norm": 14.188594818115234, "learning_rate": 3.498775510204082e-05, "loss": 2.9532, "step": 7363 }, { "epoch": 30.057142857142857, "grad_norm": 27.693891525268555, "learning_rate": 3.498571428571429e-05, "loss": 2.0933, "step": 7364 }, { "epoch": 30.06122448979592, "grad_norm": 29.07714080810547, "learning_rate": 3.4983673469387754e-05, "loss": 2.2449, "step": 7365 }, { "epoch": 30.06530612244898, "grad_norm": 24.18341827392578, "learning_rate": 3.498163265306123e-05, "loss": 2.0217, "step": 7366 }, { "epoch": 30.069387755102042, "grad_norm": 13.134836196899414, "learning_rate": 3.4979591836734695e-05, "loss": 2.9088, "step": 7367 }, { "epoch": 30.073469387755104, "grad_norm": 10.569398880004883, "learning_rate": 3.497755102040817e-05, "loss": 3.0002, "step": 7368 }, { "epoch": 30.077551020408162, "grad_norm": 9.241698265075684, "learning_rate": 3.4975510204081636e-05, "loss": 2.8872, "step": 7369 }, { "epoch": 30.081632653061224, "grad_norm": 19.451236724853516, "learning_rate": 3.4973469387755103e-05, "loss": 2.6015, "step": 7370 }, { "epoch": 30.085714285714285, "grad_norm": 11.749784469604492, "learning_rate": 3.497142857142857e-05, "loss": 2.8008, "step": 7371 }, { "epoch": 30.089795918367347, "grad_norm": 25.900177001953125, "learning_rate": 3.496938775510204e-05, "loss": 2.1126, "step": 7372 }, { "epoch": 30.09387755102041, "grad_norm": 19.257999420166016, "learning_rate": 3.496734693877551e-05, "loss": 0.2841, "step": 7373 }, { "epoch": 30.09795918367347, "grad_norm": 21.14742660522461, "learning_rate": 3.496530612244898e-05, "loss": 2.2625, "step": 7374 }, { "epoch": 30.102040816326532, "grad_norm": 14.916583061218262, "learning_rate": 3.496326530612245e-05, "loss": 2.5789, "step": 7375 }, { "epoch": 30.10612244897959, "grad_norm": 24.93798065185547, "learning_rate": 3.496122448979592e-05, "loss": 2.5952, "step": 7376 }, { "epoch": 30.110204081632652, "grad_norm": 11.11512279510498, "learning_rate": 3.495918367346939e-05, "loss": 2.9805, "step": 7377 }, { "epoch": 30.114285714285714, "grad_norm": 10.88116455078125, "learning_rate": 3.495714285714286e-05, "loss": 3.0691, "step": 7378 }, { "epoch": 30.118367346938776, "grad_norm": 13.351486206054688, "learning_rate": 3.495510204081633e-05, "loss": 2.8087, "step": 7379 }, { "epoch": 30.122448979591837, "grad_norm": 15.799723625183105, "learning_rate": 3.4953061224489795e-05, "loss": 3.0011, "step": 7380 }, { "epoch": 30.1265306122449, "grad_norm": 18.24582862854004, "learning_rate": 3.495102040816326e-05, "loss": 2.5269, "step": 7381 }, { "epoch": 30.13061224489796, "grad_norm": 15.364097595214844, "learning_rate": 3.4948979591836736e-05, "loss": 2.8468, "step": 7382 }, { "epoch": 30.13469387755102, "grad_norm": 12.379136085510254, "learning_rate": 3.494693877551021e-05, "loss": 2.7331, "step": 7383 }, { "epoch": 30.13877551020408, "grad_norm": 17.032062530517578, "learning_rate": 3.494489795918368e-05, "loss": 2.5093, "step": 7384 }, { "epoch": 30.142857142857142, "grad_norm": 7.197747707366943, "learning_rate": 3.4942857142857144e-05, "loss": 3.598, "step": 7385 }, { "epoch": 30.146938775510204, "grad_norm": 9.296204566955566, "learning_rate": 3.494081632653061e-05, "loss": 3.3856, "step": 7386 }, { "epoch": 30.151020408163266, "grad_norm": 17.78386878967285, "learning_rate": 3.4938775510204085e-05, "loss": 2.6756, "step": 7387 }, { "epoch": 30.155102040816328, "grad_norm": 28.117788314819336, "learning_rate": 3.493673469387755e-05, "loss": 1.9387, "step": 7388 }, { "epoch": 30.15918367346939, "grad_norm": 16.72339630126953, "learning_rate": 3.493469387755102e-05, "loss": 2.6593, "step": 7389 }, { "epoch": 30.163265306122447, "grad_norm": 26.392047882080078, "learning_rate": 3.4932653061224493e-05, "loss": 2.7606, "step": 7390 }, { "epoch": 30.16734693877551, "grad_norm": 28.479576110839844, "learning_rate": 3.493061224489796e-05, "loss": 2.0024, "step": 7391 }, { "epoch": 30.17142857142857, "grad_norm": 16.785789489746094, "learning_rate": 3.4928571428571434e-05, "loss": 2.8229, "step": 7392 }, { "epoch": 30.175510204081633, "grad_norm": 16.723569869995117, "learning_rate": 3.49265306122449e-05, "loss": 2.6359, "step": 7393 }, { "epoch": 30.179591836734694, "grad_norm": 10.59644889831543, "learning_rate": 3.492448979591837e-05, "loss": 3.1709, "step": 7394 }, { "epoch": 30.183673469387756, "grad_norm": 9.725181579589844, "learning_rate": 3.4922448979591836e-05, "loss": 3.3903, "step": 7395 }, { "epoch": 30.187755102040818, "grad_norm": 23.248491287231445, "learning_rate": 3.49204081632653e-05, "loss": 2.2261, "step": 7396 }, { "epoch": 30.191836734693876, "grad_norm": 30.176340103149414, "learning_rate": 3.491836734693878e-05, "loss": 2.2641, "step": 7397 }, { "epoch": 30.195918367346938, "grad_norm": 21.44454002380371, "learning_rate": 3.491632653061225e-05, "loss": 2.1382, "step": 7398 }, { "epoch": 30.2, "grad_norm": 10.640564918518066, "learning_rate": 3.491428571428572e-05, "loss": 3.0301, "step": 7399 }, { "epoch": 30.20408163265306, "grad_norm": 15.439946174621582, "learning_rate": 3.4912244897959185e-05, "loss": 2.8275, "step": 7400 }, { "epoch": 30.208163265306123, "grad_norm": 19.73288917541504, "learning_rate": 3.491020408163265e-05, "loss": 2.1081, "step": 7401 }, { "epoch": 30.212244897959184, "grad_norm": 11.454550743103027, "learning_rate": 3.4908163265306126e-05, "loss": 3.0437, "step": 7402 }, { "epoch": 30.216326530612246, "grad_norm": 15.39138412475586, "learning_rate": 3.490612244897959e-05, "loss": 2.6078, "step": 7403 }, { "epoch": 30.220408163265308, "grad_norm": 27.909711837768555, "learning_rate": 3.490408163265306e-05, "loss": 2.8519, "step": 7404 }, { "epoch": 30.224489795918366, "grad_norm": 11.68873119354248, "learning_rate": 3.4902040816326534e-05, "loss": 3.134, "step": 7405 }, { "epoch": 30.228571428571428, "grad_norm": 19.50697898864746, "learning_rate": 3.49e-05, "loss": 2.6957, "step": 7406 }, { "epoch": 30.23265306122449, "grad_norm": 20.592884063720703, "learning_rate": 3.4897959183673475e-05, "loss": 2.3483, "step": 7407 }, { "epoch": 30.23673469387755, "grad_norm": 19.119783401489258, "learning_rate": 3.489591836734694e-05, "loss": 2.3269, "step": 7408 }, { "epoch": 30.240816326530613, "grad_norm": 18.813766479492188, "learning_rate": 3.489387755102041e-05, "loss": 2.4778, "step": 7409 }, { "epoch": 30.244897959183675, "grad_norm": 15.22734546661377, "learning_rate": 3.4891836734693877e-05, "loss": 2.6186, "step": 7410 }, { "epoch": 30.248979591836736, "grad_norm": 15.208473205566406, "learning_rate": 3.4889795918367344e-05, "loss": 2.9169, "step": 7411 }, { "epoch": 30.253061224489795, "grad_norm": 16.598617553710938, "learning_rate": 3.488775510204082e-05, "loss": 2.7016, "step": 7412 }, { "epoch": 30.257142857142856, "grad_norm": 14.21483325958252, "learning_rate": 3.488571428571429e-05, "loss": 2.9565, "step": 7413 }, { "epoch": 30.261224489795918, "grad_norm": 25.305744171142578, "learning_rate": 3.488367346938776e-05, "loss": 1.1998, "step": 7414 }, { "epoch": 30.26530612244898, "grad_norm": 23.468502044677734, "learning_rate": 3.4881632653061226e-05, "loss": 2.5005, "step": 7415 }, { "epoch": 30.26938775510204, "grad_norm": 38.64453125, "learning_rate": 3.487959183673469e-05, "loss": 0.9311, "step": 7416 }, { "epoch": 30.273469387755103, "grad_norm": 11.937335014343262, "learning_rate": 3.487755102040817e-05, "loss": 3.1843, "step": 7417 }, { "epoch": 30.277551020408165, "grad_norm": 17.402158737182617, "learning_rate": 3.4875510204081634e-05, "loss": 2.2673, "step": 7418 }, { "epoch": 30.281632653061223, "grad_norm": 13.710145950317383, "learning_rate": 3.48734693877551e-05, "loss": 3.0958, "step": 7419 }, { "epoch": 30.285714285714285, "grad_norm": 11.308030128479004, "learning_rate": 3.4871428571428575e-05, "loss": 3.1766, "step": 7420 }, { "epoch": 30.289795918367346, "grad_norm": 13.258832931518555, "learning_rate": 3.486938775510204e-05, "loss": 2.9634, "step": 7421 }, { "epoch": 30.293877551020408, "grad_norm": 21.78525161743164, "learning_rate": 3.4867346938775516e-05, "loss": 2.7231, "step": 7422 }, { "epoch": 30.29795918367347, "grad_norm": 31.358238220214844, "learning_rate": 3.486530612244898e-05, "loss": 1.025, "step": 7423 }, { "epoch": 30.30204081632653, "grad_norm": 23.82522964477539, "learning_rate": 3.486326530612245e-05, "loss": 2.3757, "step": 7424 }, { "epoch": 30.306122448979593, "grad_norm": 15.192930221557617, "learning_rate": 3.486122448979592e-05, "loss": 2.2464, "step": 7425 }, { "epoch": 30.31020408163265, "grad_norm": 33.49525833129883, "learning_rate": 3.485918367346939e-05, "loss": 1.241, "step": 7426 }, { "epoch": 30.314285714285713, "grad_norm": 14.515827178955078, "learning_rate": 3.485714285714286e-05, "loss": 2.8926, "step": 7427 }, { "epoch": 30.318367346938775, "grad_norm": 25.175159454345703, "learning_rate": 3.4855102040816325e-05, "loss": 1.7603, "step": 7428 }, { "epoch": 30.322448979591837, "grad_norm": 22.298553466796875, "learning_rate": 3.48530612244898e-05, "loss": 2.4556, "step": 7429 }, { "epoch": 30.3265306122449, "grad_norm": 16.530729293823242, "learning_rate": 3.4851020408163267e-05, "loss": 2.7714, "step": 7430 }, { "epoch": 30.33061224489796, "grad_norm": 13.423223495483398, "learning_rate": 3.484897959183674e-05, "loss": 3.0933, "step": 7431 }, { "epoch": 30.33469387755102, "grad_norm": 16.405517578125, "learning_rate": 3.484693877551021e-05, "loss": 2.9486, "step": 7432 }, { "epoch": 30.33877551020408, "grad_norm": 10.236186027526855, "learning_rate": 3.4844897959183675e-05, "loss": 3.0191, "step": 7433 }, { "epoch": 30.34285714285714, "grad_norm": 16.223779678344727, "learning_rate": 3.484285714285714e-05, "loss": 2.9533, "step": 7434 }, { "epoch": 30.346938775510203, "grad_norm": 12.111163139343262, "learning_rate": 3.484081632653061e-05, "loss": 3.0024, "step": 7435 }, { "epoch": 30.351020408163265, "grad_norm": 13.49427318572998, "learning_rate": 3.483877551020408e-05, "loss": 2.8735, "step": 7436 }, { "epoch": 30.355102040816327, "grad_norm": 13.829347610473633, "learning_rate": 3.483673469387756e-05, "loss": 2.9845, "step": 7437 }, { "epoch": 30.35918367346939, "grad_norm": 13.686388969421387, "learning_rate": 3.4834693877551024e-05, "loss": 2.6311, "step": 7438 }, { "epoch": 30.36326530612245, "grad_norm": 12.735491752624512, "learning_rate": 3.483265306122449e-05, "loss": 2.7701, "step": 7439 }, { "epoch": 30.367346938775512, "grad_norm": 8.6400785446167, "learning_rate": 3.483061224489796e-05, "loss": 3.2366, "step": 7440 }, { "epoch": 30.37142857142857, "grad_norm": 29.11422348022461, "learning_rate": 3.482857142857143e-05, "loss": 2.2689, "step": 7441 }, { "epoch": 30.375510204081632, "grad_norm": 8.797886848449707, "learning_rate": 3.48265306122449e-05, "loss": 3.3852, "step": 7442 }, { "epoch": 30.379591836734694, "grad_norm": 8.203292846679688, "learning_rate": 3.4824489795918366e-05, "loss": 3.4653, "step": 7443 }, { "epoch": 30.383673469387755, "grad_norm": 26.702396392822266, "learning_rate": 3.482244897959184e-05, "loss": 2.0454, "step": 7444 }, { "epoch": 30.387755102040817, "grad_norm": 31.84136962890625, "learning_rate": 3.482040816326531e-05, "loss": 1.9687, "step": 7445 }, { "epoch": 30.39183673469388, "grad_norm": 28.682056427001953, "learning_rate": 3.481836734693878e-05, "loss": 1.1926, "step": 7446 }, { "epoch": 30.39591836734694, "grad_norm": 8.50414752960205, "learning_rate": 3.481632653061225e-05, "loss": 3.3569, "step": 7447 }, { "epoch": 30.4, "grad_norm": 24.133554458618164, "learning_rate": 3.4814285714285715e-05, "loss": 1.8394, "step": 7448 }, { "epoch": 30.40408163265306, "grad_norm": 9.928654670715332, "learning_rate": 3.481224489795918e-05, "loss": 3.0628, "step": 7449 }, { "epoch": 30.408163265306122, "grad_norm": 15.60526180267334, "learning_rate": 3.481020408163265e-05, "loss": 2.9269, "step": 7450 }, { "epoch": 30.412244897959184, "grad_norm": 20.049150466918945, "learning_rate": 3.4808163265306124e-05, "loss": 2.77, "step": 7451 }, { "epoch": 30.416326530612245, "grad_norm": 14.650371551513672, "learning_rate": 3.48061224489796e-05, "loss": 2.9495, "step": 7452 }, { "epoch": 30.420408163265307, "grad_norm": 28.242015838623047, "learning_rate": 3.4804081632653065e-05, "loss": 1.7219, "step": 7453 }, { "epoch": 30.42448979591837, "grad_norm": 18.368366241455078, "learning_rate": 3.480204081632653e-05, "loss": 2.7857, "step": 7454 }, { "epoch": 30.428571428571427, "grad_norm": 8.034869194030762, "learning_rate": 3.48e-05, "loss": 3.3667, "step": 7455 }, { "epoch": 30.43265306122449, "grad_norm": 20.9775447845459, "learning_rate": 3.479795918367347e-05, "loss": 1.8723, "step": 7456 }, { "epoch": 30.43673469387755, "grad_norm": 16.159704208374023, "learning_rate": 3.479591836734694e-05, "loss": 2.669, "step": 7457 }, { "epoch": 30.440816326530612, "grad_norm": 18.594444274902344, "learning_rate": 3.479387755102041e-05, "loss": 2.4998, "step": 7458 }, { "epoch": 30.444897959183674, "grad_norm": 16.787813186645508, "learning_rate": 3.479183673469388e-05, "loss": 2.8027, "step": 7459 }, { "epoch": 30.448979591836736, "grad_norm": 19.90606689453125, "learning_rate": 3.478979591836735e-05, "loss": 2.6236, "step": 7460 }, { "epoch": 30.453061224489797, "grad_norm": 9.939407348632812, "learning_rate": 3.478775510204082e-05, "loss": 3.0077, "step": 7461 }, { "epoch": 30.457142857142856, "grad_norm": 37.0501708984375, "learning_rate": 3.478571428571429e-05, "loss": 1.1352, "step": 7462 }, { "epoch": 30.461224489795917, "grad_norm": 26.75811767578125, "learning_rate": 3.4783673469387756e-05, "loss": 1.3331, "step": 7463 }, { "epoch": 30.46530612244898, "grad_norm": 6.76250696182251, "learning_rate": 3.478163265306122e-05, "loss": 3.6602, "step": 7464 }, { "epoch": 30.46938775510204, "grad_norm": 21.350082397460938, "learning_rate": 3.477959183673469e-05, "loss": 2.0118, "step": 7465 }, { "epoch": 30.473469387755102, "grad_norm": 12.360111236572266, "learning_rate": 3.4777551020408164e-05, "loss": 2.9384, "step": 7466 }, { "epoch": 30.477551020408164, "grad_norm": 34.52409744262695, "learning_rate": 3.477551020408164e-05, "loss": 2.1268, "step": 7467 }, { "epoch": 30.481632653061226, "grad_norm": 25.65933609008789, "learning_rate": 3.4773469387755105e-05, "loss": 2.2286, "step": 7468 }, { "epoch": 30.485714285714284, "grad_norm": 14.221864700317383, "learning_rate": 3.477142857142857e-05, "loss": 2.7138, "step": 7469 }, { "epoch": 30.489795918367346, "grad_norm": 19.008386611938477, "learning_rate": 3.476938775510204e-05, "loss": 2.5133, "step": 7470 }, { "epoch": 30.493877551020407, "grad_norm": 20.87289047241211, "learning_rate": 3.4767346938775514e-05, "loss": 2.2737, "step": 7471 }, { "epoch": 30.49795918367347, "grad_norm": 11.87682056427002, "learning_rate": 3.476530612244898e-05, "loss": 3.3018, "step": 7472 }, { "epoch": 30.50204081632653, "grad_norm": 21.094194412231445, "learning_rate": 3.476326530612245e-05, "loss": 1.9894, "step": 7473 }, { "epoch": 30.506122448979593, "grad_norm": 16.50042724609375, "learning_rate": 3.476122448979592e-05, "loss": 3.1041, "step": 7474 }, { "epoch": 30.510204081632654, "grad_norm": 14.412752151489258, "learning_rate": 3.475918367346939e-05, "loss": 2.9764, "step": 7475 }, { "epoch": 30.514285714285712, "grad_norm": 15.290192604064941, "learning_rate": 3.475714285714286e-05, "loss": 2.9911, "step": 7476 }, { "epoch": 30.518367346938774, "grad_norm": 20.162689208984375, "learning_rate": 3.475510204081633e-05, "loss": 2.3559, "step": 7477 }, { "epoch": 30.522448979591836, "grad_norm": 12.949235916137695, "learning_rate": 3.47530612244898e-05, "loss": 3.0113, "step": 7478 }, { "epoch": 30.526530612244898, "grad_norm": 26.294525146484375, "learning_rate": 3.4751020408163264e-05, "loss": 1.3196, "step": 7479 }, { "epoch": 30.53061224489796, "grad_norm": 10.76133918762207, "learning_rate": 3.474897959183674e-05, "loss": 2.879, "step": 7480 }, { "epoch": 30.53469387755102, "grad_norm": 17.403522491455078, "learning_rate": 3.4746938775510205e-05, "loss": 2.5262, "step": 7481 }, { "epoch": 30.538775510204083, "grad_norm": 16.34668731689453, "learning_rate": 3.474489795918367e-05, "loss": 2.7123, "step": 7482 }, { "epoch": 30.542857142857144, "grad_norm": 11.853461265563965, "learning_rate": 3.4742857142857146e-05, "loss": 2.7988, "step": 7483 }, { "epoch": 30.546938775510203, "grad_norm": 23.604162216186523, "learning_rate": 3.474081632653061e-05, "loss": 2.7759, "step": 7484 }, { "epoch": 30.551020408163264, "grad_norm": 13.976022720336914, "learning_rate": 3.473877551020409e-05, "loss": 2.9, "step": 7485 }, { "epoch": 30.555102040816326, "grad_norm": 10.568768501281738, "learning_rate": 3.4736734693877554e-05, "loss": 3.2742, "step": 7486 }, { "epoch": 30.559183673469388, "grad_norm": 18.009334564208984, "learning_rate": 3.473469387755102e-05, "loss": 2.0966, "step": 7487 }, { "epoch": 30.56326530612245, "grad_norm": 19.446807861328125, "learning_rate": 3.473265306122449e-05, "loss": 2.2596, "step": 7488 }, { "epoch": 30.56734693877551, "grad_norm": 19.347810745239258, "learning_rate": 3.4730612244897956e-05, "loss": 2.7724, "step": 7489 }, { "epoch": 30.571428571428573, "grad_norm": 24.67753028869629, "learning_rate": 3.472857142857143e-05, "loss": 2.4336, "step": 7490 }, { "epoch": 30.57551020408163, "grad_norm": 13.537286758422852, "learning_rate": 3.4726530612244903e-05, "loss": 2.9807, "step": 7491 }, { "epoch": 30.579591836734693, "grad_norm": 10.434187889099121, "learning_rate": 3.472448979591837e-05, "loss": 2.96, "step": 7492 }, { "epoch": 30.583673469387755, "grad_norm": 26.272268295288086, "learning_rate": 3.472244897959184e-05, "loss": 1.4804, "step": 7493 }, { "epoch": 30.587755102040816, "grad_norm": 13.694485664367676, "learning_rate": 3.4720408163265305e-05, "loss": 2.7697, "step": 7494 }, { "epoch": 30.591836734693878, "grad_norm": 32.48751449584961, "learning_rate": 3.471836734693878e-05, "loss": 2.0562, "step": 7495 }, { "epoch": 30.59591836734694, "grad_norm": 27.346332550048828, "learning_rate": 3.4716326530612246e-05, "loss": 1.8154, "step": 7496 }, { "epoch": 30.6, "grad_norm": 19.92500114440918, "learning_rate": 3.471428571428571e-05, "loss": 2.4769, "step": 7497 }, { "epoch": 30.60408163265306, "grad_norm": 17.172670364379883, "learning_rate": 3.471224489795919e-05, "loss": 2.4377, "step": 7498 }, { "epoch": 30.60816326530612, "grad_norm": 15.457582473754883, "learning_rate": 3.4710204081632654e-05, "loss": 2.8586, "step": 7499 }, { "epoch": 30.612244897959183, "grad_norm": 15.286867141723633, "learning_rate": 3.470816326530613e-05, "loss": 2.3834, "step": 7500 }, { "epoch": 30.616326530612245, "grad_norm": 30.60108184814453, "learning_rate": 3.4706122448979595e-05, "loss": 1.0981, "step": 7501 }, { "epoch": 30.620408163265306, "grad_norm": 15.593948364257812, "learning_rate": 3.470408163265306e-05, "loss": 2.7196, "step": 7502 }, { "epoch": 30.624489795918368, "grad_norm": 21.401992797851562, "learning_rate": 3.470204081632653e-05, "loss": 1.9549, "step": 7503 }, { "epoch": 30.62857142857143, "grad_norm": 11.8500394821167, "learning_rate": 3.4699999999999996e-05, "loss": 3.011, "step": 7504 }, { "epoch": 30.632653061224488, "grad_norm": 14.366751670837402, "learning_rate": 3.469795918367347e-05, "loss": 2.8266, "step": 7505 }, { "epoch": 30.63673469387755, "grad_norm": 10.750656127929688, "learning_rate": 3.4695918367346944e-05, "loss": 3.0619, "step": 7506 }, { "epoch": 30.64081632653061, "grad_norm": 18.723243713378906, "learning_rate": 3.469387755102041e-05, "loss": 2.4035, "step": 7507 }, { "epoch": 30.644897959183673, "grad_norm": 26.208284378051758, "learning_rate": 3.469183673469388e-05, "loss": 1.797, "step": 7508 }, { "epoch": 30.648979591836735, "grad_norm": 11.088577270507812, "learning_rate": 3.4689795918367346e-05, "loss": 3.0602, "step": 7509 }, { "epoch": 30.653061224489797, "grad_norm": 7.4955010414123535, "learning_rate": 3.468775510204082e-05, "loss": 3.6987, "step": 7510 }, { "epoch": 30.65714285714286, "grad_norm": 13.189536094665527, "learning_rate": 3.468571428571429e-05, "loss": 3.0021, "step": 7511 }, { "epoch": 30.66122448979592, "grad_norm": 32.98078155517578, "learning_rate": 3.4683673469387754e-05, "loss": 1.444, "step": 7512 }, { "epoch": 30.66530612244898, "grad_norm": 14.982906341552734, "learning_rate": 3.468163265306123e-05, "loss": 2.3757, "step": 7513 }, { "epoch": 30.66938775510204, "grad_norm": 36.78602600097656, "learning_rate": 3.4679591836734695e-05, "loss": 1.3096, "step": 7514 }, { "epoch": 30.6734693877551, "grad_norm": 13.168632507324219, "learning_rate": 3.467755102040817e-05, "loss": 3.0117, "step": 7515 }, { "epoch": 30.677551020408163, "grad_norm": 9.637045860290527, "learning_rate": 3.4675510204081636e-05, "loss": 3.3011, "step": 7516 }, { "epoch": 30.681632653061225, "grad_norm": 19.962366104125977, "learning_rate": 3.46734693877551e-05, "loss": 1.9159, "step": 7517 }, { "epoch": 30.685714285714287, "grad_norm": 18.278331756591797, "learning_rate": 3.467142857142857e-05, "loss": 2.4544, "step": 7518 }, { "epoch": 30.68979591836735, "grad_norm": 13.401832580566406, "learning_rate": 3.4669387755102044e-05, "loss": 2.9351, "step": 7519 }, { "epoch": 30.693877551020407, "grad_norm": 14.02892017364502, "learning_rate": 3.466734693877551e-05, "loss": 2.6034, "step": 7520 }, { "epoch": 30.69795918367347, "grad_norm": 12.645072937011719, "learning_rate": 3.4665306122448985e-05, "loss": 2.5364, "step": 7521 }, { "epoch": 30.70204081632653, "grad_norm": 28.78864288330078, "learning_rate": 3.466326530612245e-05, "loss": 2.1883, "step": 7522 }, { "epoch": 30.706122448979592, "grad_norm": 25.164371490478516, "learning_rate": 3.466122448979592e-05, "loss": 1.9762, "step": 7523 }, { "epoch": 30.710204081632654, "grad_norm": 15.806513786315918, "learning_rate": 3.465918367346939e-05, "loss": 2.6757, "step": 7524 }, { "epoch": 30.714285714285715, "grad_norm": 27.700403213500977, "learning_rate": 3.465714285714286e-05, "loss": 1.3973, "step": 7525 }, { "epoch": 30.718367346938777, "grad_norm": 7.454907417297363, "learning_rate": 3.465510204081633e-05, "loss": 3.3912, "step": 7526 }, { "epoch": 30.722448979591835, "grad_norm": 21.376575469970703, "learning_rate": 3.4653061224489795e-05, "loss": 2.1855, "step": 7527 }, { "epoch": 30.726530612244897, "grad_norm": 12.529339790344238, "learning_rate": 3.465102040816326e-05, "loss": 2.9092, "step": 7528 }, { "epoch": 30.73061224489796, "grad_norm": 10.874977111816406, "learning_rate": 3.4648979591836736e-05, "loss": 3.4602, "step": 7529 }, { "epoch": 30.73469387755102, "grad_norm": 15.890228271484375, "learning_rate": 3.464693877551021e-05, "loss": 2.7379, "step": 7530 }, { "epoch": 30.738775510204082, "grad_norm": 17.313093185424805, "learning_rate": 3.4644897959183677e-05, "loss": 2.5426, "step": 7531 }, { "epoch": 30.742857142857144, "grad_norm": 18.5251522064209, "learning_rate": 3.4642857142857144e-05, "loss": 2.6631, "step": 7532 }, { "epoch": 30.746938775510205, "grad_norm": 23.936620712280273, "learning_rate": 3.464081632653061e-05, "loss": 1.1211, "step": 7533 }, { "epoch": 30.751020408163264, "grad_norm": 10.165387153625488, "learning_rate": 3.4638775510204085e-05, "loss": 3.1905, "step": 7534 }, { "epoch": 30.755102040816325, "grad_norm": 12.957620620727539, "learning_rate": 3.463673469387755e-05, "loss": 3.0157, "step": 7535 }, { "epoch": 30.759183673469387, "grad_norm": 15.641772270202637, "learning_rate": 3.463469387755102e-05, "loss": 2.7907, "step": 7536 }, { "epoch": 30.76326530612245, "grad_norm": 9.87114429473877, "learning_rate": 3.463265306122449e-05, "loss": 3.1432, "step": 7537 }, { "epoch": 30.76734693877551, "grad_norm": 33.80904769897461, "learning_rate": 3.463061224489796e-05, "loss": 1.8245, "step": 7538 }, { "epoch": 30.771428571428572, "grad_norm": 11.712279319763184, "learning_rate": 3.4628571428571434e-05, "loss": 3.1775, "step": 7539 }, { "epoch": 30.775510204081634, "grad_norm": 31.40801239013672, "learning_rate": 3.46265306122449e-05, "loss": 0.8959, "step": 7540 }, { "epoch": 30.779591836734696, "grad_norm": 23.247859954833984, "learning_rate": 3.462448979591837e-05, "loss": 2.3009, "step": 7541 }, { "epoch": 30.783673469387754, "grad_norm": 13.656906127929688, "learning_rate": 3.4622448979591835e-05, "loss": 2.5517, "step": 7542 }, { "epoch": 30.787755102040816, "grad_norm": 14.586549758911133, "learning_rate": 3.46204081632653e-05, "loss": 2.6783, "step": 7543 }, { "epoch": 30.791836734693877, "grad_norm": 8.505349159240723, "learning_rate": 3.4618367346938776e-05, "loss": 3.306, "step": 7544 }, { "epoch": 30.79591836734694, "grad_norm": 20.7886962890625, "learning_rate": 3.461632653061225e-05, "loss": 1.9756, "step": 7545 }, { "epoch": 30.8, "grad_norm": 18.931459426879883, "learning_rate": 3.461428571428572e-05, "loss": 2.0705, "step": 7546 }, { "epoch": 30.804081632653062, "grad_norm": 30.248456954956055, "learning_rate": 3.4612244897959184e-05, "loss": 1.7322, "step": 7547 }, { "epoch": 30.808163265306124, "grad_norm": 11.242277145385742, "learning_rate": 3.461020408163265e-05, "loss": 3.2078, "step": 7548 }, { "epoch": 30.812244897959182, "grad_norm": 10.419809341430664, "learning_rate": 3.4608163265306125e-05, "loss": 3.1167, "step": 7549 }, { "epoch": 30.816326530612244, "grad_norm": 11.995039939880371, "learning_rate": 3.460612244897959e-05, "loss": 3.0984, "step": 7550 }, { "epoch": 30.820408163265306, "grad_norm": 19.188077926635742, "learning_rate": 3.460408163265306e-05, "loss": 2.0222, "step": 7551 }, { "epoch": 30.824489795918367, "grad_norm": 10.032392501831055, "learning_rate": 3.4602040816326534e-05, "loss": 3.5514, "step": 7552 }, { "epoch": 30.82857142857143, "grad_norm": 9.872049331665039, "learning_rate": 3.46e-05, "loss": 3.2012, "step": 7553 }, { "epoch": 30.83265306122449, "grad_norm": 19.009445190429688, "learning_rate": 3.4597959183673475e-05, "loss": 2.5095, "step": 7554 }, { "epoch": 30.836734693877553, "grad_norm": 12.076786041259766, "learning_rate": 3.459591836734694e-05, "loss": 3.082, "step": 7555 }, { "epoch": 30.84081632653061, "grad_norm": 14.598994255065918, "learning_rate": 3.459387755102041e-05, "loss": 2.5033, "step": 7556 }, { "epoch": 30.844897959183672, "grad_norm": 26.18412208557129, "learning_rate": 3.4591836734693876e-05, "loss": 1.8845, "step": 7557 }, { "epoch": 30.848979591836734, "grad_norm": 22.988887786865234, "learning_rate": 3.458979591836734e-05, "loss": 1.7309, "step": 7558 }, { "epoch": 30.853061224489796, "grad_norm": 13.945730209350586, "learning_rate": 3.458775510204082e-05, "loss": 2.7905, "step": 7559 }, { "epoch": 30.857142857142858, "grad_norm": 14.225029945373535, "learning_rate": 3.458571428571429e-05, "loss": 2.9777, "step": 7560 }, { "epoch": 30.86122448979592, "grad_norm": 10.886368751525879, "learning_rate": 3.458367346938776e-05, "loss": 3.1009, "step": 7561 }, { "epoch": 30.86530612244898, "grad_norm": 25.537912368774414, "learning_rate": 3.4581632653061225e-05, "loss": 1.8606, "step": 7562 }, { "epoch": 30.86938775510204, "grad_norm": 25.439268112182617, "learning_rate": 3.45795918367347e-05, "loss": 2.0902, "step": 7563 }, { "epoch": 30.8734693877551, "grad_norm": 16.653112411499023, "learning_rate": 3.4577551020408166e-05, "loss": 2.8355, "step": 7564 }, { "epoch": 30.877551020408163, "grad_norm": 20.48154067993164, "learning_rate": 3.457551020408163e-05, "loss": 1.8507, "step": 7565 }, { "epoch": 30.881632653061224, "grad_norm": 8.716703414916992, "learning_rate": 3.45734693877551e-05, "loss": 3.3355, "step": 7566 }, { "epoch": 30.885714285714286, "grad_norm": 9.518182754516602, "learning_rate": 3.4571428571428574e-05, "loss": 3.245, "step": 7567 }, { "epoch": 30.889795918367348, "grad_norm": 11.25218391418457, "learning_rate": 3.456938775510205e-05, "loss": 3.2997, "step": 7568 }, { "epoch": 30.89387755102041, "grad_norm": 9.243191719055176, "learning_rate": 3.4567346938775515e-05, "loss": 3.0516, "step": 7569 }, { "epoch": 30.897959183673468, "grad_norm": 15.441628456115723, "learning_rate": 3.456530612244898e-05, "loss": 2.6637, "step": 7570 }, { "epoch": 30.90204081632653, "grad_norm": 20.823558807373047, "learning_rate": 3.456326530612245e-05, "loss": 1.7785, "step": 7571 }, { "epoch": 30.90612244897959, "grad_norm": 14.223628044128418, "learning_rate": 3.456122448979592e-05, "loss": 2.8546, "step": 7572 }, { "epoch": 30.910204081632653, "grad_norm": 26.3062686920166, "learning_rate": 3.455918367346939e-05, "loss": 2.6059, "step": 7573 }, { "epoch": 30.914285714285715, "grad_norm": 12.113519668579102, "learning_rate": 3.455714285714286e-05, "loss": 3.3684, "step": 7574 }, { "epoch": 30.918367346938776, "grad_norm": 16.711639404296875, "learning_rate": 3.455510204081633e-05, "loss": 2.5555, "step": 7575 }, { "epoch": 30.922448979591838, "grad_norm": 20.682941436767578, "learning_rate": 3.45530612244898e-05, "loss": 2.6983, "step": 7576 }, { "epoch": 30.926530612244896, "grad_norm": 12.527320861816406, "learning_rate": 3.4551020408163266e-05, "loss": 3.0505, "step": 7577 }, { "epoch": 30.930612244897958, "grad_norm": 8.642112731933594, "learning_rate": 3.454897959183674e-05, "loss": 3.3497, "step": 7578 }, { "epoch": 30.93469387755102, "grad_norm": 18.552913665771484, "learning_rate": 3.454693877551021e-05, "loss": 2.6573, "step": 7579 }, { "epoch": 30.93877551020408, "grad_norm": 11.77277946472168, "learning_rate": 3.4544897959183674e-05, "loss": 3.1831, "step": 7580 }, { "epoch": 30.942857142857143, "grad_norm": 17.868465423583984, "learning_rate": 3.454285714285714e-05, "loss": 2.7511, "step": 7581 }, { "epoch": 30.946938775510205, "grad_norm": 25.425182342529297, "learning_rate": 3.454081632653061e-05, "loss": 1.1324, "step": 7582 }, { "epoch": 30.951020408163266, "grad_norm": 17.961458206176758, "learning_rate": 3.453877551020408e-05, "loss": 2.8286, "step": 7583 }, { "epoch": 30.955102040816328, "grad_norm": 9.27149486541748, "learning_rate": 3.4536734693877556e-05, "loss": 3.484, "step": 7584 }, { "epoch": 30.959183673469386, "grad_norm": 22.321081161499023, "learning_rate": 3.453469387755102e-05, "loss": 2.3843, "step": 7585 }, { "epoch": 30.963265306122448, "grad_norm": 43.778072357177734, "learning_rate": 3.453265306122449e-05, "loss": 2.0354, "step": 7586 }, { "epoch": 30.96734693877551, "grad_norm": 10.532785415649414, "learning_rate": 3.453061224489796e-05, "loss": 3.1874, "step": 7587 }, { "epoch": 30.97142857142857, "grad_norm": 18.037633895874023, "learning_rate": 3.452857142857143e-05, "loss": 2.6515, "step": 7588 }, { "epoch": 30.975510204081633, "grad_norm": 29.952341079711914, "learning_rate": 3.45265306122449e-05, "loss": 1.9293, "step": 7589 }, { "epoch": 30.979591836734695, "grad_norm": 8.149201393127441, "learning_rate": 3.4524489795918366e-05, "loss": 3.5763, "step": 7590 }, { "epoch": 30.983673469387757, "grad_norm": 13.200241088867188, "learning_rate": 3.452244897959184e-05, "loss": 3.1212, "step": 7591 }, { "epoch": 30.987755102040815, "grad_norm": 23.076021194458008, "learning_rate": 3.452040816326531e-05, "loss": 2.5079, "step": 7592 }, { "epoch": 30.991836734693877, "grad_norm": 21.306806564331055, "learning_rate": 3.451836734693878e-05, "loss": 2.2464, "step": 7593 }, { "epoch": 30.99591836734694, "grad_norm": 10.464432716369629, "learning_rate": 3.451632653061225e-05, "loss": 3.0032, "step": 7594 }, { "epoch": 31.0, "grad_norm": 19.16067886352539, "learning_rate": 3.4514285714285715e-05, "loss": 2.1335, "step": 7595 }, { "epoch": 31.00408163265306, "grad_norm": 10.746975898742676, "learning_rate": 3.451224489795918e-05, "loss": 3.0624, "step": 7596 }, { "epoch": 31.008163265306123, "grad_norm": 11.317785263061523, "learning_rate": 3.451020408163265e-05, "loss": 3.1169, "step": 7597 }, { "epoch": 31.012244897959185, "grad_norm": 8.587532043457031, "learning_rate": 3.450816326530612e-05, "loss": 3.3496, "step": 7598 }, { "epoch": 31.016326530612243, "grad_norm": 16.478870391845703, "learning_rate": 3.45061224489796e-05, "loss": 2.5614, "step": 7599 }, { "epoch": 31.020408163265305, "grad_norm": 12.535517692565918, "learning_rate": 3.4504081632653064e-05, "loss": 2.4746, "step": 7600 }, { "epoch": 31.024489795918367, "grad_norm": 18.485977172851562, "learning_rate": 3.450204081632653e-05, "loss": 2.4407, "step": 7601 }, { "epoch": 31.02857142857143, "grad_norm": 17.114864349365234, "learning_rate": 3.45e-05, "loss": 2.6131, "step": 7602 }, { "epoch": 31.03265306122449, "grad_norm": 15.737911224365234, "learning_rate": 3.449795918367347e-05, "loss": 2.6212, "step": 7603 }, { "epoch": 31.036734693877552, "grad_norm": 20.778005599975586, "learning_rate": 3.449591836734694e-05, "loss": 2.5861, "step": 7604 }, { "epoch": 31.040816326530614, "grad_norm": 34.50579833984375, "learning_rate": 3.4493877551020406e-05, "loss": 2.1236, "step": 7605 }, { "epoch": 31.044897959183672, "grad_norm": 18.487146377563477, "learning_rate": 3.449183673469388e-05, "loss": 2.9577, "step": 7606 }, { "epoch": 31.048979591836734, "grad_norm": 31.103275299072266, "learning_rate": 3.4489795918367354e-05, "loss": 1.3844, "step": 7607 }, { "epoch": 31.053061224489795, "grad_norm": 28.007593154907227, "learning_rate": 3.448775510204082e-05, "loss": 1.5464, "step": 7608 }, { "epoch": 31.057142857142857, "grad_norm": 16.027273178100586, "learning_rate": 3.448571428571429e-05, "loss": 2.7706, "step": 7609 }, { "epoch": 31.06122448979592, "grad_norm": 20.490114212036133, "learning_rate": 3.4483673469387756e-05, "loss": 2.1964, "step": 7610 }, { "epoch": 31.06530612244898, "grad_norm": 25.5344295501709, "learning_rate": 3.448163265306122e-05, "loss": 2.5865, "step": 7611 }, { "epoch": 31.069387755102042, "grad_norm": 22.703655242919922, "learning_rate": 3.44795918367347e-05, "loss": 2.0821, "step": 7612 }, { "epoch": 31.073469387755104, "grad_norm": 29.821413040161133, "learning_rate": 3.4477551020408164e-05, "loss": 1.6098, "step": 7613 }, { "epoch": 31.077551020408162, "grad_norm": 12.130106925964355, "learning_rate": 3.447551020408164e-05, "loss": 2.9207, "step": 7614 }, { "epoch": 31.081632653061224, "grad_norm": 15.486513137817383, "learning_rate": 3.4473469387755105e-05, "loss": 2.8546, "step": 7615 }, { "epoch": 31.085714285714285, "grad_norm": 20.62505531311035, "learning_rate": 3.447142857142857e-05, "loss": 2.4634, "step": 7616 }, { "epoch": 31.089795918367347, "grad_norm": 18.338706970214844, "learning_rate": 3.4469387755102046e-05, "loss": 2.8944, "step": 7617 }, { "epoch": 31.09387755102041, "grad_norm": 16.869525909423828, "learning_rate": 3.446734693877551e-05, "loss": 2.2986, "step": 7618 }, { "epoch": 31.09795918367347, "grad_norm": 15.061881065368652, "learning_rate": 3.446530612244898e-05, "loss": 2.5119, "step": 7619 }, { "epoch": 31.102040816326532, "grad_norm": 20.139585494995117, "learning_rate": 3.446326530612245e-05, "loss": 2.1118, "step": 7620 }, { "epoch": 31.10612244897959, "grad_norm": 34.999855041503906, "learning_rate": 3.446122448979592e-05, "loss": 1.2025, "step": 7621 }, { "epoch": 31.110204081632652, "grad_norm": 12.799439430236816, "learning_rate": 3.4459183673469395e-05, "loss": 2.9717, "step": 7622 }, { "epoch": 31.114285714285714, "grad_norm": 16.250049591064453, "learning_rate": 3.445714285714286e-05, "loss": 2.8723, "step": 7623 }, { "epoch": 31.118367346938776, "grad_norm": 20.365238189697266, "learning_rate": 3.445510204081633e-05, "loss": 1.9371, "step": 7624 }, { "epoch": 31.122448979591837, "grad_norm": 15.967719078063965, "learning_rate": 3.4453061224489796e-05, "loss": 2.398, "step": 7625 }, { "epoch": 31.1265306122449, "grad_norm": 23.61285972595215, "learning_rate": 3.4451020408163264e-05, "loss": 2.4142, "step": 7626 }, { "epoch": 31.13061224489796, "grad_norm": 15.85710620880127, "learning_rate": 3.444897959183674e-05, "loss": 2.5134, "step": 7627 }, { "epoch": 31.13469387755102, "grad_norm": 17.59627914428711, "learning_rate": 3.4446938775510205e-05, "loss": 2.7424, "step": 7628 }, { "epoch": 31.13877551020408, "grad_norm": 46.74300003051758, "learning_rate": 3.444489795918368e-05, "loss": 1.0297, "step": 7629 }, { "epoch": 31.142857142857142, "grad_norm": 23.672840118408203, "learning_rate": 3.4442857142857146e-05, "loss": 2.0251, "step": 7630 }, { "epoch": 31.146938775510204, "grad_norm": 10.250277519226074, "learning_rate": 3.444081632653061e-05, "loss": 3.0932, "step": 7631 }, { "epoch": 31.151020408163266, "grad_norm": 23.51953125, "learning_rate": 3.443877551020409e-05, "loss": 1.6707, "step": 7632 }, { "epoch": 31.155102040816328, "grad_norm": 21.016807556152344, "learning_rate": 3.4436734693877554e-05, "loss": 0.2578, "step": 7633 }, { "epoch": 31.15918367346939, "grad_norm": 9.595036506652832, "learning_rate": 3.443469387755102e-05, "loss": 3.1057, "step": 7634 }, { "epoch": 31.163265306122447, "grad_norm": 12.975627899169922, "learning_rate": 3.443265306122449e-05, "loss": 3.0459, "step": 7635 }, { "epoch": 31.16734693877551, "grad_norm": 22.321773529052734, "learning_rate": 3.4430612244897955e-05, "loss": 2.7283, "step": 7636 }, { "epoch": 31.17142857142857, "grad_norm": 63.056941986083984, "learning_rate": 3.442857142857143e-05, "loss": 1.1025, "step": 7637 }, { "epoch": 31.175510204081633, "grad_norm": 31.715923309326172, "learning_rate": 3.44265306122449e-05, "loss": 1.9655, "step": 7638 }, { "epoch": 31.179591836734694, "grad_norm": 26.225711822509766, "learning_rate": 3.442448979591837e-05, "loss": 2.6052, "step": 7639 }, { "epoch": 31.183673469387756, "grad_norm": 9.279577255249023, "learning_rate": 3.442244897959184e-05, "loss": 3.225, "step": 7640 }, { "epoch": 31.187755102040818, "grad_norm": 24.05272102355957, "learning_rate": 3.4420408163265304e-05, "loss": 1.7031, "step": 7641 }, { "epoch": 31.191836734693876, "grad_norm": 12.196471214294434, "learning_rate": 3.441836734693878e-05, "loss": 3.0109, "step": 7642 }, { "epoch": 31.195918367346938, "grad_norm": 18.6279354095459, "learning_rate": 3.4416326530612245e-05, "loss": 2.3737, "step": 7643 }, { "epoch": 31.2, "grad_norm": 24.02916145324707, "learning_rate": 3.441428571428571e-05, "loss": 1.6426, "step": 7644 }, { "epoch": 31.20408163265306, "grad_norm": 16.06822395324707, "learning_rate": 3.4412244897959186e-05, "loss": 2.3452, "step": 7645 }, { "epoch": 31.208163265306123, "grad_norm": 28.88471031188965, "learning_rate": 3.4410204081632653e-05, "loss": 2.2438, "step": 7646 }, { "epoch": 31.212244897959184, "grad_norm": 20.314130783081055, "learning_rate": 3.440816326530613e-05, "loss": 2.3259, "step": 7647 }, { "epoch": 31.216326530612246, "grad_norm": 14.768084526062012, "learning_rate": 3.4406122448979595e-05, "loss": 2.6864, "step": 7648 }, { "epoch": 31.220408163265308, "grad_norm": 11.18152141571045, "learning_rate": 3.440408163265306e-05, "loss": 2.9465, "step": 7649 }, { "epoch": 31.224489795918366, "grad_norm": 14.676597595214844, "learning_rate": 3.440204081632653e-05, "loss": 2.9176, "step": 7650 }, { "epoch": 31.228571428571428, "grad_norm": 11.654465675354004, "learning_rate": 3.4399999999999996e-05, "loss": 2.7861, "step": 7651 }, { "epoch": 31.23265306122449, "grad_norm": 7.967672824859619, "learning_rate": 3.439795918367347e-05, "loss": 3.6285, "step": 7652 }, { "epoch": 31.23673469387755, "grad_norm": 11.107316970825195, "learning_rate": 3.4395918367346944e-05, "loss": 3.2399, "step": 7653 }, { "epoch": 31.240816326530613, "grad_norm": 15.537543296813965, "learning_rate": 3.439387755102041e-05, "loss": 2.5484, "step": 7654 }, { "epoch": 31.244897959183675, "grad_norm": 28.771671295166016, "learning_rate": 3.439183673469388e-05, "loss": 2.7242, "step": 7655 }, { "epoch": 31.248979591836736, "grad_norm": 9.865107536315918, "learning_rate": 3.438979591836735e-05, "loss": 3.0123, "step": 7656 }, { "epoch": 31.253061224489795, "grad_norm": 17.24803924560547, "learning_rate": 3.438775510204082e-05, "loss": 2.2222, "step": 7657 }, { "epoch": 31.257142857142856, "grad_norm": 29.559247970581055, "learning_rate": 3.4385714285714286e-05, "loss": 2.1935, "step": 7658 }, { "epoch": 31.261224489795918, "grad_norm": 17.21048355102539, "learning_rate": 3.438367346938775e-05, "loss": 2.4009, "step": 7659 }, { "epoch": 31.26530612244898, "grad_norm": 38.69514846801758, "learning_rate": 3.438163265306123e-05, "loss": 0.9945, "step": 7660 }, { "epoch": 31.26938775510204, "grad_norm": 18.20001220703125, "learning_rate": 3.43795918367347e-05, "loss": 2.6887, "step": 7661 }, { "epoch": 31.273469387755103, "grad_norm": 24.977455139160156, "learning_rate": 3.437755102040817e-05, "loss": 1.9892, "step": 7662 }, { "epoch": 31.277551020408165, "grad_norm": 15.713215827941895, "learning_rate": 3.4375510204081635e-05, "loss": 2.3727, "step": 7663 }, { "epoch": 31.281632653061223, "grad_norm": 21.7626895904541, "learning_rate": 3.43734693877551e-05, "loss": 2.1269, "step": 7664 }, { "epoch": 31.285714285714285, "grad_norm": 15.312686920166016, "learning_rate": 3.437142857142857e-05, "loss": 2.887, "step": 7665 }, { "epoch": 31.289795918367346, "grad_norm": 33.3695068359375, "learning_rate": 3.4369387755102043e-05, "loss": 2.0105, "step": 7666 }, { "epoch": 31.293877551020408, "grad_norm": 14.653512001037598, "learning_rate": 3.436734693877551e-05, "loss": 2.8159, "step": 7667 }, { "epoch": 31.29795918367347, "grad_norm": 13.963134765625, "learning_rate": 3.4365306122448984e-05, "loss": 2.9592, "step": 7668 }, { "epoch": 31.30204081632653, "grad_norm": 21.83640480041504, "learning_rate": 3.436326530612245e-05, "loss": 2.3334, "step": 7669 }, { "epoch": 31.306122448979593, "grad_norm": 14.985886573791504, "learning_rate": 3.436122448979592e-05, "loss": 2.8581, "step": 7670 }, { "epoch": 31.31020408163265, "grad_norm": 32.57678985595703, "learning_rate": 3.435918367346939e-05, "loss": 2.5514, "step": 7671 }, { "epoch": 31.314285714285713, "grad_norm": 44.99002456665039, "learning_rate": 3.435714285714286e-05, "loss": 1.1617, "step": 7672 }, { "epoch": 31.318367346938775, "grad_norm": 24.151752471923828, "learning_rate": 3.435510204081633e-05, "loss": 2.1333, "step": 7673 }, { "epoch": 31.322448979591837, "grad_norm": 13.101911544799805, "learning_rate": 3.4353061224489794e-05, "loss": 2.7731, "step": 7674 }, { "epoch": 31.3265306122449, "grad_norm": 15.12996768951416, "learning_rate": 3.435102040816327e-05, "loss": 2.8967, "step": 7675 }, { "epoch": 31.33061224489796, "grad_norm": 27.417448043823242, "learning_rate": 3.434897959183674e-05, "loss": 1.8468, "step": 7676 }, { "epoch": 31.33469387755102, "grad_norm": 8.097335815429688, "learning_rate": 3.434693877551021e-05, "loss": 3.4149, "step": 7677 }, { "epoch": 31.33877551020408, "grad_norm": 18.573974609375, "learning_rate": 3.4344897959183676e-05, "loss": 2.1143, "step": 7678 }, { "epoch": 31.34285714285714, "grad_norm": 19.305246353149414, "learning_rate": 3.434285714285714e-05, "loss": 2.2368, "step": 7679 }, { "epoch": 31.346938775510203, "grad_norm": 15.574773788452148, "learning_rate": 3.434081632653061e-05, "loss": 2.3628, "step": 7680 }, { "epoch": 31.351020408163265, "grad_norm": 10.855818748474121, "learning_rate": 3.4338775510204084e-05, "loss": 3.1128, "step": 7681 }, { "epoch": 31.355102040816327, "grad_norm": 26.182048797607422, "learning_rate": 3.433673469387755e-05, "loss": 2.2425, "step": 7682 }, { "epoch": 31.35918367346939, "grad_norm": 24.874692916870117, "learning_rate": 3.4334693877551025e-05, "loss": 2.0802, "step": 7683 }, { "epoch": 31.36326530612245, "grad_norm": 13.484421730041504, "learning_rate": 3.433265306122449e-05, "loss": 2.9991, "step": 7684 }, { "epoch": 31.367346938775512, "grad_norm": 41.56697082519531, "learning_rate": 3.433061224489796e-05, "loss": 2.064, "step": 7685 }, { "epoch": 31.37142857142857, "grad_norm": 35.0934944152832, "learning_rate": 3.432857142857143e-05, "loss": 1.9907, "step": 7686 }, { "epoch": 31.375510204081632, "grad_norm": 21.83010482788086, "learning_rate": 3.43265306122449e-05, "loss": 1.8739, "step": 7687 }, { "epoch": 31.379591836734694, "grad_norm": 8.857489585876465, "learning_rate": 3.432448979591837e-05, "loss": 3.3499, "step": 7688 }, { "epoch": 31.383673469387755, "grad_norm": 31.092897415161133, "learning_rate": 3.4322448979591835e-05, "loss": 1.2862, "step": 7689 }, { "epoch": 31.387755102040817, "grad_norm": 28.01395034790039, "learning_rate": 3.43204081632653e-05, "loss": 1.7069, "step": 7690 }, { "epoch": 31.39183673469388, "grad_norm": 21.022140502929688, "learning_rate": 3.4318367346938776e-05, "loss": 1.9737, "step": 7691 }, { "epoch": 31.39591836734694, "grad_norm": 22.47629737854004, "learning_rate": 3.431632653061225e-05, "loss": 2.19, "step": 7692 }, { "epoch": 31.4, "grad_norm": 12.541924476623535, "learning_rate": 3.431428571428572e-05, "loss": 3.131, "step": 7693 }, { "epoch": 31.40408163265306, "grad_norm": 8.73076057434082, "learning_rate": 3.4312244897959184e-05, "loss": 3.4912, "step": 7694 }, { "epoch": 31.408163265306122, "grad_norm": 18.339248657226562, "learning_rate": 3.431020408163265e-05, "loss": 2.6779, "step": 7695 }, { "epoch": 31.412244897959184, "grad_norm": 12.960396766662598, "learning_rate": 3.4308163265306125e-05, "loss": 2.9457, "step": 7696 }, { "epoch": 31.416326530612245, "grad_norm": 28.910364151000977, "learning_rate": 3.430612244897959e-05, "loss": 1.0261, "step": 7697 }, { "epoch": 31.420408163265307, "grad_norm": 26.815513610839844, "learning_rate": 3.430408163265306e-05, "loss": 2.0065, "step": 7698 }, { "epoch": 31.42448979591837, "grad_norm": 20.100051879882812, "learning_rate": 3.430204081632653e-05, "loss": 2.6964, "step": 7699 }, { "epoch": 31.428571428571427, "grad_norm": 30.10014533996582, "learning_rate": 3.430000000000001e-05, "loss": 1.8874, "step": 7700 }, { "epoch": 31.43265306122449, "grad_norm": 25.913280487060547, "learning_rate": 3.4297959183673474e-05, "loss": 2.6082, "step": 7701 }, { "epoch": 31.43673469387755, "grad_norm": 19.736186981201172, "learning_rate": 3.429591836734694e-05, "loss": 2.4059, "step": 7702 }, { "epoch": 31.440816326530612, "grad_norm": 13.090712547302246, "learning_rate": 3.429387755102041e-05, "loss": 2.991, "step": 7703 }, { "epoch": 31.444897959183674, "grad_norm": 18.060672760009766, "learning_rate": 3.4291836734693876e-05, "loss": 2.5988, "step": 7704 }, { "epoch": 31.448979591836736, "grad_norm": 12.439470291137695, "learning_rate": 3.428979591836735e-05, "loss": 3.107, "step": 7705 }, { "epoch": 31.453061224489797, "grad_norm": 13.018596649169922, "learning_rate": 3.4287755102040817e-05, "loss": 2.8849, "step": 7706 }, { "epoch": 31.457142857142856, "grad_norm": 22.306798934936523, "learning_rate": 3.428571428571429e-05, "loss": 1.8678, "step": 7707 }, { "epoch": 31.461224489795917, "grad_norm": 10.619730949401855, "learning_rate": 3.428367346938776e-05, "loss": 3.3713, "step": 7708 }, { "epoch": 31.46530612244898, "grad_norm": 20.049291610717773, "learning_rate": 3.4281632653061225e-05, "loss": 2.4354, "step": 7709 }, { "epoch": 31.46938775510204, "grad_norm": 13.031858444213867, "learning_rate": 3.42795918367347e-05, "loss": 2.758, "step": 7710 }, { "epoch": 31.473469387755102, "grad_norm": 11.094979286193848, "learning_rate": 3.4277551020408166e-05, "loss": 2.9451, "step": 7711 }, { "epoch": 31.477551020408164, "grad_norm": 23.113365173339844, "learning_rate": 3.427551020408163e-05, "loss": 1.0823, "step": 7712 }, { "epoch": 31.481632653061226, "grad_norm": 12.09887981414795, "learning_rate": 3.42734693877551e-05, "loss": 2.8312, "step": 7713 }, { "epoch": 31.485714285714284, "grad_norm": 17.133943557739258, "learning_rate": 3.4271428571428574e-05, "loss": 2.5959, "step": 7714 }, { "epoch": 31.489795918367346, "grad_norm": 11.630691528320312, "learning_rate": 3.426938775510205e-05, "loss": 3.0067, "step": 7715 }, { "epoch": 31.493877551020407, "grad_norm": 20.447906494140625, "learning_rate": 3.4267346938775515e-05, "loss": 1.9395, "step": 7716 }, { "epoch": 31.49795918367347, "grad_norm": 10.349793434143066, "learning_rate": 3.426530612244898e-05, "loss": 3.0658, "step": 7717 }, { "epoch": 31.50204081632653, "grad_norm": 12.212488174438477, "learning_rate": 3.426326530612245e-05, "loss": 3.153, "step": 7718 }, { "epoch": 31.506122448979593, "grad_norm": 11.118268966674805, "learning_rate": 3.4261224489795916e-05, "loss": 3.2044, "step": 7719 }, { "epoch": 31.510204081632654, "grad_norm": 14.168693542480469, "learning_rate": 3.425918367346939e-05, "loss": 3.0336, "step": 7720 }, { "epoch": 31.514285714285712, "grad_norm": 12.029051780700684, "learning_rate": 3.425714285714286e-05, "loss": 3.273, "step": 7721 }, { "epoch": 31.518367346938774, "grad_norm": 8.773683547973633, "learning_rate": 3.425510204081633e-05, "loss": 3.2234, "step": 7722 }, { "epoch": 31.522448979591836, "grad_norm": 11.800020217895508, "learning_rate": 3.42530612244898e-05, "loss": 3.07, "step": 7723 }, { "epoch": 31.526530612244898, "grad_norm": 13.958852767944336, "learning_rate": 3.4251020408163265e-05, "loss": 2.9246, "step": 7724 }, { "epoch": 31.53061224489796, "grad_norm": 15.920875549316406, "learning_rate": 3.424897959183674e-05, "loss": 2.6613, "step": 7725 }, { "epoch": 31.53469387755102, "grad_norm": 12.630060195922852, "learning_rate": 3.4246938775510206e-05, "loss": 2.7417, "step": 7726 }, { "epoch": 31.538775510204083, "grad_norm": 27.878738403320312, "learning_rate": 3.4244897959183674e-05, "loss": 1.6375, "step": 7727 }, { "epoch": 31.542857142857144, "grad_norm": 16.60161018371582, "learning_rate": 3.424285714285714e-05, "loss": 2.9416, "step": 7728 }, { "epoch": 31.546938775510203, "grad_norm": 13.213852882385254, "learning_rate": 3.4240816326530615e-05, "loss": 2.9436, "step": 7729 }, { "epoch": 31.551020408163264, "grad_norm": 20.775489807128906, "learning_rate": 3.423877551020409e-05, "loss": 2.7572, "step": 7730 }, { "epoch": 31.555102040816326, "grad_norm": 15.28341293334961, "learning_rate": 3.4236734693877556e-05, "loss": 2.764, "step": 7731 }, { "epoch": 31.559183673469388, "grad_norm": 17.777063369750977, "learning_rate": 3.423469387755102e-05, "loss": 2.5712, "step": 7732 }, { "epoch": 31.56326530612245, "grad_norm": 9.554546356201172, "learning_rate": 3.423265306122449e-05, "loss": 3.2835, "step": 7733 }, { "epoch": 31.56734693877551, "grad_norm": 6.531254291534424, "learning_rate": 3.423061224489796e-05, "loss": 3.6507, "step": 7734 }, { "epoch": 31.571428571428573, "grad_norm": 22.612287521362305, "learning_rate": 3.422857142857143e-05, "loss": 1.7333, "step": 7735 }, { "epoch": 31.57551020408163, "grad_norm": 19.175033569335938, "learning_rate": 3.42265306122449e-05, "loss": 2.2056, "step": 7736 }, { "epoch": 31.579591836734693, "grad_norm": 11.556212425231934, "learning_rate": 3.4224489795918365e-05, "loss": 2.8159, "step": 7737 }, { "epoch": 31.583673469387755, "grad_norm": 28.462339401245117, "learning_rate": 3.422244897959184e-05, "loss": 1.1915, "step": 7738 }, { "epoch": 31.587755102040816, "grad_norm": 16.918941497802734, "learning_rate": 3.4220408163265306e-05, "loss": 3.0416, "step": 7739 }, { "epoch": 31.591836734693878, "grad_norm": 24.73459815979004, "learning_rate": 3.421836734693878e-05, "loss": 1.8154, "step": 7740 }, { "epoch": 31.59591836734694, "grad_norm": 16.87835693359375, "learning_rate": 3.421632653061225e-05, "loss": 2.694, "step": 7741 }, { "epoch": 31.6, "grad_norm": 9.10283374786377, "learning_rate": 3.4214285714285714e-05, "loss": 3.2859, "step": 7742 }, { "epoch": 31.60408163265306, "grad_norm": 9.725250244140625, "learning_rate": 3.421224489795918e-05, "loss": 3.4503, "step": 7743 }, { "epoch": 31.60816326530612, "grad_norm": 16.497209548950195, "learning_rate": 3.4210204081632655e-05, "loss": 2.5802, "step": 7744 }, { "epoch": 31.612244897959183, "grad_norm": 13.38985824584961, "learning_rate": 3.420816326530612e-05, "loss": 2.8557, "step": 7745 }, { "epoch": 31.616326530612245, "grad_norm": 11.15544319152832, "learning_rate": 3.4206122448979596e-05, "loss": 3.0818, "step": 7746 }, { "epoch": 31.620408163265306, "grad_norm": 19.7243709564209, "learning_rate": 3.4204081632653064e-05, "loss": 2.71, "step": 7747 }, { "epoch": 31.624489795918368, "grad_norm": 11.999835014343262, "learning_rate": 3.420204081632653e-05, "loss": 2.9946, "step": 7748 }, { "epoch": 31.62857142857143, "grad_norm": 20.306594848632812, "learning_rate": 3.4200000000000005e-05, "loss": 2.4714, "step": 7749 }, { "epoch": 31.632653061224488, "grad_norm": 16.022737503051758, "learning_rate": 3.419795918367347e-05, "loss": 2.9346, "step": 7750 }, { "epoch": 31.63673469387755, "grad_norm": 34.780433654785156, "learning_rate": 3.419591836734694e-05, "loss": 1.8589, "step": 7751 }, { "epoch": 31.64081632653061, "grad_norm": 10.293840408325195, "learning_rate": 3.4193877551020406e-05, "loss": 3.3809, "step": 7752 }, { "epoch": 31.644897959183673, "grad_norm": 20.575300216674805, "learning_rate": 3.419183673469388e-05, "loss": 2.5248, "step": 7753 }, { "epoch": 31.648979591836735, "grad_norm": 19.96422004699707, "learning_rate": 3.4189795918367354e-05, "loss": 2.5128, "step": 7754 }, { "epoch": 31.653061224489797, "grad_norm": 13.100007057189941, "learning_rate": 3.418775510204082e-05, "loss": 3.1996, "step": 7755 }, { "epoch": 31.65714285714286, "grad_norm": 27.477218627929688, "learning_rate": 3.418571428571429e-05, "loss": 1.8972, "step": 7756 }, { "epoch": 31.66122448979592, "grad_norm": 14.445070266723633, "learning_rate": 3.4183673469387755e-05, "loss": 2.9418, "step": 7757 }, { "epoch": 31.66530612244898, "grad_norm": 23.805810928344727, "learning_rate": 3.418163265306122e-05, "loss": 2.3689, "step": 7758 }, { "epoch": 31.66938775510204, "grad_norm": 28.27191734313965, "learning_rate": 3.4179591836734696e-05, "loss": 1.9494, "step": 7759 }, { "epoch": 31.6734693877551, "grad_norm": 20.004812240600586, "learning_rate": 3.417755102040816e-05, "loss": 2.4552, "step": 7760 }, { "epoch": 31.677551020408163, "grad_norm": 13.032587051391602, "learning_rate": 3.417551020408164e-05, "loss": 2.9896, "step": 7761 }, { "epoch": 31.681632653061225, "grad_norm": 17.20568084716797, "learning_rate": 3.4173469387755104e-05, "loss": 2.3283, "step": 7762 }, { "epoch": 31.685714285714287, "grad_norm": 11.31820011138916, "learning_rate": 3.417142857142857e-05, "loss": 2.9412, "step": 7763 }, { "epoch": 31.68979591836735, "grad_norm": 23.60626792907715, "learning_rate": 3.4169387755102045e-05, "loss": 2.6261, "step": 7764 }, { "epoch": 31.693877551020407, "grad_norm": 27.34309196472168, "learning_rate": 3.416734693877551e-05, "loss": 1.9024, "step": 7765 }, { "epoch": 31.69795918367347, "grad_norm": 7.6837849617004395, "learning_rate": 3.416530612244898e-05, "loss": 3.6655, "step": 7766 }, { "epoch": 31.70204081632653, "grad_norm": 10.941728591918945, "learning_rate": 3.416326530612245e-05, "loss": 2.9466, "step": 7767 }, { "epoch": 31.706122448979592, "grad_norm": 11.58503246307373, "learning_rate": 3.416122448979592e-05, "loss": 3.0186, "step": 7768 }, { "epoch": 31.710204081632654, "grad_norm": 15.691161155700684, "learning_rate": 3.4159183673469395e-05, "loss": 2.6992, "step": 7769 }, { "epoch": 31.714285714285715, "grad_norm": 14.742530822753906, "learning_rate": 3.415714285714286e-05, "loss": 2.7638, "step": 7770 }, { "epoch": 31.718367346938777, "grad_norm": 26.24575424194336, "learning_rate": 3.415510204081633e-05, "loss": 2.3371, "step": 7771 }, { "epoch": 31.722448979591835, "grad_norm": 21.058252334594727, "learning_rate": 3.4153061224489796e-05, "loss": 2.2258, "step": 7772 }, { "epoch": 31.726530612244897, "grad_norm": 14.793285369873047, "learning_rate": 3.415102040816326e-05, "loss": 2.6027, "step": 7773 }, { "epoch": 31.73061224489796, "grad_norm": 33.67033767700195, "learning_rate": 3.414897959183674e-05, "loss": 1.3186, "step": 7774 }, { "epoch": 31.73469387755102, "grad_norm": 10.209571838378906, "learning_rate": 3.4146938775510204e-05, "loss": 2.8939, "step": 7775 }, { "epoch": 31.738775510204082, "grad_norm": 15.620491981506348, "learning_rate": 3.414489795918368e-05, "loss": 2.6162, "step": 7776 }, { "epoch": 31.742857142857144, "grad_norm": 11.133772850036621, "learning_rate": 3.4142857142857145e-05, "loss": 2.9272, "step": 7777 }, { "epoch": 31.746938775510205, "grad_norm": 15.913702011108398, "learning_rate": 3.414081632653061e-05, "loss": 2.8277, "step": 7778 }, { "epoch": 31.751020408163264, "grad_norm": 31.256404876708984, "learning_rate": 3.4138775510204086e-05, "loss": 1.1424, "step": 7779 }, { "epoch": 31.755102040816325, "grad_norm": 41.5513801574707, "learning_rate": 3.413673469387755e-05, "loss": 1.8451, "step": 7780 }, { "epoch": 31.759183673469387, "grad_norm": 25.934391021728516, "learning_rate": 3.413469387755102e-05, "loss": 2.1027, "step": 7781 }, { "epoch": 31.76326530612245, "grad_norm": 23.86419677734375, "learning_rate": 3.413265306122449e-05, "loss": 2.3507, "step": 7782 }, { "epoch": 31.76734693877551, "grad_norm": 10.71545696258545, "learning_rate": 3.413061224489796e-05, "loss": 2.9809, "step": 7783 }, { "epoch": 31.771428571428572, "grad_norm": 22.49974250793457, "learning_rate": 3.4128571428571435e-05, "loss": 1.8722, "step": 7784 }, { "epoch": 31.775510204081634, "grad_norm": 31.690210342407227, "learning_rate": 3.41265306122449e-05, "loss": 2.746, "step": 7785 }, { "epoch": 31.779591836734696, "grad_norm": 10.786982536315918, "learning_rate": 3.412448979591837e-05, "loss": 3.0394, "step": 7786 }, { "epoch": 31.783673469387754, "grad_norm": 18.44454574584961, "learning_rate": 3.412244897959184e-05, "loss": 2.322, "step": 7787 }, { "epoch": 31.787755102040816, "grad_norm": 10.87215805053711, "learning_rate": 3.4120408163265304e-05, "loss": 3.0157, "step": 7788 }, { "epoch": 31.791836734693877, "grad_norm": 31.56850242614746, "learning_rate": 3.411836734693878e-05, "loss": 1.3464, "step": 7789 }, { "epoch": 31.79591836734694, "grad_norm": 14.799697875976562, "learning_rate": 3.4116326530612245e-05, "loss": 2.6589, "step": 7790 }, { "epoch": 31.8, "grad_norm": 38.132591247558594, "learning_rate": 3.411428571428571e-05, "loss": 2.3724, "step": 7791 }, { "epoch": 31.804081632653062, "grad_norm": 13.908093452453613, "learning_rate": 3.4112244897959186e-05, "loss": 3.1421, "step": 7792 }, { "epoch": 31.808163265306124, "grad_norm": 20.288358688354492, "learning_rate": 3.411020408163266e-05, "loss": 2.364, "step": 7793 }, { "epoch": 31.812244897959182, "grad_norm": 16.74187469482422, "learning_rate": 3.410816326530613e-05, "loss": 2.841, "step": 7794 }, { "epoch": 31.816326530612244, "grad_norm": 35.39567947387695, "learning_rate": 3.4106122448979594e-05, "loss": 1.394, "step": 7795 }, { "epoch": 31.820408163265306, "grad_norm": 15.221031188964844, "learning_rate": 3.410408163265306e-05, "loss": 2.9507, "step": 7796 }, { "epoch": 31.824489795918367, "grad_norm": 20.567808151245117, "learning_rate": 3.410204081632653e-05, "loss": 2.3359, "step": 7797 }, { "epoch": 31.82857142857143, "grad_norm": 26.981834411621094, "learning_rate": 3.41e-05, "loss": 1.6971, "step": 7798 }, { "epoch": 31.83265306122449, "grad_norm": 34.013771057128906, "learning_rate": 3.409795918367347e-05, "loss": 1.7777, "step": 7799 }, { "epoch": 31.836734693877553, "grad_norm": 31.097890853881836, "learning_rate": 3.409591836734694e-05, "loss": 1.2952, "step": 7800 }, { "epoch": 31.84081632653061, "grad_norm": 19.871967315673828, "learning_rate": 3.409387755102041e-05, "loss": 2.5224, "step": 7801 }, { "epoch": 31.844897959183672, "grad_norm": 16.133604049682617, "learning_rate": 3.409183673469388e-05, "loss": 2.7478, "step": 7802 }, { "epoch": 31.848979591836734, "grad_norm": 19.863195419311523, "learning_rate": 3.408979591836735e-05, "loss": 2.7881, "step": 7803 }, { "epoch": 31.853061224489796, "grad_norm": 32.04188919067383, "learning_rate": 3.408775510204082e-05, "loss": 1.8048, "step": 7804 }, { "epoch": 31.857142857142858, "grad_norm": 29.405729293823242, "learning_rate": 3.4085714285714286e-05, "loss": 2.4572, "step": 7805 }, { "epoch": 31.86122448979592, "grad_norm": 13.11235523223877, "learning_rate": 3.408367346938775e-05, "loss": 2.9112, "step": 7806 }, { "epoch": 31.86530612244898, "grad_norm": 12.357328414916992, "learning_rate": 3.408163265306123e-05, "loss": 2.4732, "step": 7807 }, { "epoch": 31.86938775510204, "grad_norm": 18.99102020263672, "learning_rate": 3.40795918367347e-05, "loss": 2.5484, "step": 7808 }, { "epoch": 31.8734693877551, "grad_norm": 17.066329956054688, "learning_rate": 3.407755102040817e-05, "loss": 2.6632, "step": 7809 }, { "epoch": 31.877551020408163, "grad_norm": 13.458902359008789, "learning_rate": 3.4075510204081635e-05, "loss": 2.9351, "step": 7810 }, { "epoch": 31.881632653061224, "grad_norm": 30.52532958984375, "learning_rate": 3.40734693877551e-05, "loss": 0.8137, "step": 7811 }, { "epoch": 31.885714285714286, "grad_norm": 8.934247016906738, "learning_rate": 3.407142857142857e-05, "loss": 3.3671, "step": 7812 }, { "epoch": 31.889795918367348, "grad_norm": 13.578363418579102, "learning_rate": 3.406938775510204e-05, "loss": 2.8897, "step": 7813 }, { "epoch": 31.89387755102041, "grad_norm": 18.0568904876709, "learning_rate": 3.406734693877551e-05, "loss": 2.6334, "step": 7814 }, { "epoch": 31.897959183673468, "grad_norm": 20.05987548828125, "learning_rate": 3.4065306122448984e-05, "loss": 1.9581, "step": 7815 }, { "epoch": 31.90204081632653, "grad_norm": 17.573341369628906, "learning_rate": 3.406326530612245e-05, "loss": 2.4285, "step": 7816 }, { "epoch": 31.90612244897959, "grad_norm": 10.008957862854004, "learning_rate": 3.406122448979592e-05, "loss": 3.2775, "step": 7817 }, { "epoch": 31.910204081632653, "grad_norm": 19.127262115478516, "learning_rate": 3.405918367346939e-05, "loss": 2.7591, "step": 7818 }, { "epoch": 31.914285714285715, "grad_norm": 20.68985939025879, "learning_rate": 3.405714285714286e-05, "loss": 2.326, "step": 7819 }, { "epoch": 31.918367346938776, "grad_norm": 13.401607513427734, "learning_rate": 3.4055102040816326e-05, "loss": 3.0601, "step": 7820 }, { "epoch": 31.922448979591838, "grad_norm": 19.52018165588379, "learning_rate": 3.4053061224489793e-05, "loss": 2.7544, "step": 7821 }, { "epoch": 31.926530612244896, "grad_norm": 22.168363571166992, "learning_rate": 3.405102040816327e-05, "loss": 1.9148, "step": 7822 }, { "epoch": 31.930612244897958, "grad_norm": 20.757354736328125, "learning_rate": 3.404897959183674e-05, "loss": 2.0078, "step": 7823 }, { "epoch": 31.93469387755102, "grad_norm": 12.022222518920898, "learning_rate": 3.404693877551021e-05, "loss": 3.0473, "step": 7824 }, { "epoch": 31.93877551020408, "grad_norm": 26.333194732666016, "learning_rate": 3.4044897959183676e-05, "loss": 1.6811, "step": 7825 }, { "epoch": 31.942857142857143, "grad_norm": 8.07841968536377, "learning_rate": 3.404285714285714e-05, "loss": 3.3785, "step": 7826 }, { "epoch": 31.946938775510205, "grad_norm": 14.737503051757812, "learning_rate": 3.404081632653061e-05, "loss": 2.7599, "step": 7827 }, { "epoch": 31.951020408163266, "grad_norm": 24.55837059020996, "learning_rate": 3.4038775510204084e-05, "loss": 2.5391, "step": 7828 }, { "epoch": 31.955102040816328, "grad_norm": 23.833614349365234, "learning_rate": 3.403673469387755e-05, "loss": 2.0307, "step": 7829 }, { "epoch": 31.959183673469386, "grad_norm": 13.471550941467285, "learning_rate": 3.4034693877551025e-05, "loss": 3.2455, "step": 7830 }, { "epoch": 31.963265306122448, "grad_norm": 10.953044891357422, "learning_rate": 3.403265306122449e-05, "loss": 3.2911, "step": 7831 }, { "epoch": 31.96734693877551, "grad_norm": 8.510176658630371, "learning_rate": 3.403061224489796e-05, "loss": 3.3539, "step": 7832 }, { "epoch": 31.97142857142857, "grad_norm": 15.32232666015625, "learning_rate": 3.402857142857143e-05, "loss": 2.7733, "step": 7833 }, { "epoch": 31.975510204081633, "grad_norm": 9.762984275817871, "learning_rate": 3.40265306122449e-05, "loss": 3.5141, "step": 7834 }, { "epoch": 31.979591836734695, "grad_norm": 15.518745422363281, "learning_rate": 3.402448979591837e-05, "loss": 2.6301, "step": 7835 }, { "epoch": 31.983673469387757, "grad_norm": 14.542551040649414, "learning_rate": 3.4022448979591834e-05, "loss": 2.889, "step": 7836 }, { "epoch": 31.987755102040815, "grad_norm": 9.583148002624512, "learning_rate": 3.402040816326531e-05, "loss": 3.0205, "step": 7837 }, { "epoch": 31.991836734693877, "grad_norm": 27.225440979003906, "learning_rate": 3.401836734693878e-05, "loss": 2.1332, "step": 7838 }, { "epoch": 31.99591836734694, "grad_norm": 10.911288261413574, "learning_rate": 3.401632653061225e-05, "loss": 3.0415, "step": 7839 }, { "epoch": 32.0, "grad_norm": 13.101760864257812, "learning_rate": 3.4014285714285716e-05, "loss": 2.7924, "step": 7840 }, { "epoch": 32.00408163265306, "grad_norm": 8.469244956970215, "learning_rate": 3.4012244897959183e-05, "loss": 3.4705, "step": 7841 }, { "epoch": 32.00816326530612, "grad_norm": 9.32753849029541, "learning_rate": 3.401020408163266e-05, "loss": 3.0037, "step": 7842 }, { "epoch": 32.012244897959185, "grad_norm": 13.068449020385742, "learning_rate": 3.4008163265306124e-05, "loss": 2.8182, "step": 7843 }, { "epoch": 32.01632653061225, "grad_norm": 25.514354705810547, "learning_rate": 3.400612244897959e-05, "loss": 2.0275, "step": 7844 }, { "epoch": 32.02040816326531, "grad_norm": 9.298077583312988, "learning_rate": 3.400408163265306e-05, "loss": 3.1988, "step": 7845 }, { "epoch": 32.02448979591837, "grad_norm": 25.807287216186523, "learning_rate": 3.400204081632653e-05, "loss": 1.6781, "step": 7846 }, { "epoch": 32.02857142857143, "grad_norm": 11.854947090148926, "learning_rate": 3.4000000000000007e-05, "loss": 3.0555, "step": 7847 }, { "epoch": 32.03265306122449, "grad_norm": 14.083656311035156, "learning_rate": 3.3997959183673474e-05, "loss": 2.8855, "step": 7848 }, { "epoch": 32.03673469387755, "grad_norm": 36.09602355957031, "learning_rate": 3.399591836734694e-05, "loss": 1.1737, "step": 7849 }, { "epoch": 32.04081632653061, "grad_norm": 25.21131134033203, "learning_rate": 3.399387755102041e-05, "loss": 1.9955, "step": 7850 }, { "epoch": 32.04489795918367, "grad_norm": 26.34317398071289, "learning_rate": 3.3991836734693875e-05, "loss": 1.869, "step": 7851 }, { "epoch": 32.04897959183673, "grad_norm": 35.95726013183594, "learning_rate": 3.398979591836735e-05, "loss": 1.4164, "step": 7852 }, { "epoch": 32.053061224489795, "grad_norm": 33.003910064697266, "learning_rate": 3.3987755102040816e-05, "loss": 1.1776, "step": 7853 }, { "epoch": 32.05714285714286, "grad_norm": 33.088783264160156, "learning_rate": 3.398571428571429e-05, "loss": 1.7615, "step": 7854 }, { "epoch": 32.06122448979592, "grad_norm": 32.68407440185547, "learning_rate": 3.398367346938776e-05, "loss": 1.7941, "step": 7855 }, { "epoch": 32.06530612244898, "grad_norm": 30.236223220825195, "learning_rate": 3.3981632653061224e-05, "loss": 2.2194, "step": 7856 }, { "epoch": 32.06938775510204, "grad_norm": 16.860397338867188, "learning_rate": 3.39795918367347e-05, "loss": 2.6847, "step": 7857 }, { "epoch": 32.073469387755104, "grad_norm": 17.357744216918945, "learning_rate": 3.3977551020408165e-05, "loss": 2.6747, "step": 7858 }, { "epoch": 32.077551020408166, "grad_norm": 22.33395004272461, "learning_rate": 3.397551020408163e-05, "loss": 2.3558, "step": 7859 }, { "epoch": 32.08163265306123, "grad_norm": 32.673187255859375, "learning_rate": 3.39734693877551e-05, "loss": 1.2064, "step": 7860 }, { "epoch": 32.08571428571429, "grad_norm": 17.31221580505371, "learning_rate": 3.397142857142857e-05, "loss": 2.589, "step": 7861 }, { "epoch": 32.089795918367344, "grad_norm": 15.568792343139648, "learning_rate": 3.396938775510205e-05, "loss": 2.8454, "step": 7862 }, { "epoch": 32.093877551020405, "grad_norm": 21.23862648010254, "learning_rate": 3.3967346938775514e-05, "loss": 1.7867, "step": 7863 }, { "epoch": 32.09795918367347, "grad_norm": 27.294254302978516, "learning_rate": 3.396530612244898e-05, "loss": 2.2886, "step": 7864 }, { "epoch": 32.10204081632653, "grad_norm": 14.43566608428955, "learning_rate": 3.396326530612245e-05, "loss": 2.8026, "step": 7865 }, { "epoch": 32.10612244897959, "grad_norm": 19.423852920532227, "learning_rate": 3.3961224489795916e-05, "loss": 2.2862, "step": 7866 }, { "epoch": 32.11020408163265, "grad_norm": 23.724742889404297, "learning_rate": 3.395918367346939e-05, "loss": 2.1584, "step": 7867 }, { "epoch": 32.114285714285714, "grad_norm": 18.336408615112305, "learning_rate": 3.395714285714286e-05, "loss": 2.6065, "step": 7868 }, { "epoch": 32.118367346938776, "grad_norm": 22.38627815246582, "learning_rate": 3.395510204081633e-05, "loss": 2.1547, "step": 7869 }, { "epoch": 32.12244897959184, "grad_norm": 20.577104568481445, "learning_rate": 3.39530612244898e-05, "loss": 2.1932, "step": 7870 }, { "epoch": 32.1265306122449, "grad_norm": 16.308753967285156, "learning_rate": 3.3951020408163265e-05, "loss": 2.6386, "step": 7871 }, { "epoch": 32.13061224489796, "grad_norm": 36.27302932739258, "learning_rate": 3.394897959183674e-05, "loss": 2.7469, "step": 7872 }, { "epoch": 32.13469387755102, "grad_norm": 15.540992736816406, "learning_rate": 3.3946938775510206e-05, "loss": 3.0589, "step": 7873 }, { "epoch": 32.138775510204084, "grad_norm": 16.530561447143555, "learning_rate": 3.394489795918367e-05, "loss": 2.8135, "step": 7874 }, { "epoch": 32.142857142857146, "grad_norm": 21.95552635192871, "learning_rate": 3.394285714285714e-05, "loss": 1.6152, "step": 7875 }, { "epoch": 32.14693877551021, "grad_norm": 14.655920028686523, "learning_rate": 3.3940816326530614e-05, "loss": 2.838, "step": 7876 }, { "epoch": 32.15102040816326, "grad_norm": 11.746498107910156, "learning_rate": 3.393877551020409e-05, "loss": 2.9343, "step": 7877 }, { "epoch": 32.155102040816324, "grad_norm": 25.641691207885742, "learning_rate": 3.3936734693877555e-05, "loss": 2.3634, "step": 7878 }, { "epoch": 32.159183673469386, "grad_norm": 9.464117050170898, "learning_rate": 3.393469387755102e-05, "loss": 3.3853, "step": 7879 }, { "epoch": 32.16326530612245, "grad_norm": 18.13282585144043, "learning_rate": 3.393265306122449e-05, "loss": 2.6015, "step": 7880 }, { "epoch": 32.16734693877551, "grad_norm": 19.894832611083984, "learning_rate": 3.393061224489796e-05, "loss": 2.6232, "step": 7881 }, { "epoch": 32.17142857142857, "grad_norm": 11.722594261169434, "learning_rate": 3.392857142857143e-05, "loss": 3.0543, "step": 7882 }, { "epoch": 32.17551020408163, "grad_norm": 14.590972900390625, "learning_rate": 3.39265306122449e-05, "loss": 2.7907, "step": 7883 }, { "epoch": 32.179591836734694, "grad_norm": 23.460865020751953, "learning_rate": 3.392448979591837e-05, "loss": 2.0963, "step": 7884 }, { "epoch": 32.183673469387756, "grad_norm": 21.139652252197266, "learning_rate": 3.392244897959184e-05, "loss": 1.8078, "step": 7885 }, { "epoch": 32.18775510204082, "grad_norm": 38.60369873046875, "learning_rate": 3.392040816326531e-05, "loss": 1.697, "step": 7886 }, { "epoch": 32.19183673469388, "grad_norm": 11.328204154968262, "learning_rate": 3.391836734693878e-05, "loss": 2.9736, "step": 7887 }, { "epoch": 32.19591836734694, "grad_norm": 9.838872909545898, "learning_rate": 3.391632653061225e-05, "loss": 3.1189, "step": 7888 }, { "epoch": 32.2, "grad_norm": 15.829684257507324, "learning_rate": 3.3914285714285714e-05, "loss": 2.8036, "step": 7889 }, { "epoch": 32.204081632653065, "grad_norm": 13.085862159729004, "learning_rate": 3.391224489795918e-05, "loss": 3.1021, "step": 7890 }, { "epoch": 32.20816326530612, "grad_norm": 17.26824188232422, "learning_rate": 3.3910204081632655e-05, "loss": 2.5301, "step": 7891 }, { "epoch": 32.21224489795918, "grad_norm": 22.22077751159668, "learning_rate": 3.390816326530613e-05, "loss": 1.8772, "step": 7892 }, { "epoch": 32.21632653061224, "grad_norm": 15.818589210510254, "learning_rate": 3.3906122448979596e-05, "loss": 2.1315, "step": 7893 }, { "epoch": 32.220408163265304, "grad_norm": 11.903413772583008, "learning_rate": 3.390408163265306e-05, "loss": 2.9402, "step": 7894 }, { "epoch": 32.224489795918366, "grad_norm": 15.575374603271484, "learning_rate": 3.390204081632653e-05, "loss": 2.5007, "step": 7895 }, { "epoch": 32.22857142857143, "grad_norm": 34.02323913574219, "learning_rate": 3.3900000000000004e-05, "loss": 2.0188, "step": 7896 }, { "epoch": 32.23265306122449, "grad_norm": 16.854101181030273, "learning_rate": 3.389795918367347e-05, "loss": 2.3674, "step": 7897 }, { "epoch": 32.23673469387755, "grad_norm": 26.066354751586914, "learning_rate": 3.389591836734694e-05, "loss": 2.4258, "step": 7898 }, { "epoch": 32.24081632653061, "grad_norm": 10.638282775878906, "learning_rate": 3.3893877551020405e-05, "loss": 3.3522, "step": 7899 }, { "epoch": 32.244897959183675, "grad_norm": 34.06488037109375, "learning_rate": 3.389183673469388e-05, "loss": 1.8143, "step": 7900 }, { "epoch": 32.248979591836736, "grad_norm": 11.225264549255371, "learning_rate": 3.388979591836735e-05, "loss": 3.2161, "step": 7901 }, { "epoch": 32.2530612244898, "grad_norm": 29.68376350402832, "learning_rate": 3.388775510204082e-05, "loss": 2.1595, "step": 7902 }, { "epoch": 32.25714285714286, "grad_norm": 8.8146333694458, "learning_rate": 3.388571428571429e-05, "loss": 3.3918, "step": 7903 }, { "epoch": 32.26122448979592, "grad_norm": 15.128990173339844, "learning_rate": 3.3883673469387755e-05, "loss": 2.8006, "step": 7904 }, { "epoch": 32.265306122448976, "grad_norm": 36.7532844543457, "learning_rate": 3.388163265306122e-05, "loss": 1.0685, "step": 7905 }, { "epoch": 32.26938775510204, "grad_norm": 13.081744194030762, "learning_rate": 3.3879591836734696e-05, "loss": 3.1276, "step": 7906 }, { "epoch": 32.2734693877551, "grad_norm": 11.153566360473633, "learning_rate": 3.387755102040816e-05, "loss": 2.9041, "step": 7907 }, { "epoch": 32.27755102040816, "grad_norm": 9.174474716186523, "learning_rate": 3.387551020408164e-05, "loss": 3.285, "step": 7908 }, { "epoch": 32.28163265306122, "grad_norm": 11.885093688964844, "learning_rate": 3.3873469387755104e-05, "loss": 3.3381, "step": 7909 }, { "epoch": 32.285714285714285, "grad_norm": 12.36900806427002, "learning_rate": 3.387142857142857e-05, "loss": 2.6818, "step": 7910 }, { "epoch": 32.289795918367346, "grad_norm": 12.456095695495605, "learning_rate": 3.3869387755102045e-05, "loss": 2.9639, "step": 7911 }, { "epoch": 32.29387755102041, "grad_norm": 16.139545440673828, "learning_rate": 3.386734693877551e-05, "loss": 2.5155, "step": 7912 }, { "epoch": 32.29795918367347, "grad_norm": 15.686694145202637, "learning_rate": 3.386530612244898e-05, "loss": 3.0155, "step": 7913 }, { "epoch": 32.30204081632653, "grad_norm": 12.360705375671387, "learning_rate": 3.3863265306122446e-05, "loss": 2.9481, "step": 7914 }, { "epoch": 32.30612244897959, "grad_norm": 18.106645584106445, "learning_rate": 3.386122448979592e-05, "loss": 2.5307, "step": 7915 }, { "epoch": 32.310204081632655, "grad_norm": 9.324602127075195, "learning_rate": 3.3859183673469394e-05, "loss": 3.2284, "step": 7916 }, { "epoch": 32.31428571428572, "grad_norm": 15.917169570922852, "learning_rate": 3.385714285714286e-05, "loss": 2.2157, "step": 7917 }, { "epoch": 32.31836734693878, "grad_norm": 23.116426467895508, "learning_rate": 3.385510204081633e-05, "loss": 1.6218, "step": 7918 }, { "epoch": 32.32244897959184, "grad_norm": 20.22661781311035, "learning_rate": 3.3853061224489795e-05, "loss": 2.6385, "step": 7919 }, { "epoch": 32.326530612244895, "grad_norm": 26.020978927612305, "learning_rate": 3.385102040816326e-05, "loss": 1.5569, "step": 7920 }, { "epoch": 32.33061224489796, "grad_norm": 16.190326690673828, "learning_rate": 3.3848979591836736e-05, "loss": 2.5775, "step": 7921 }, { "epoch": 32.33469387755102, "grad_norm": 8.621857643127441, "learning_rate": 3.3846938775510204e-05, "loss": 3.2758, "step": 7922 }, { "epoch": 32.33877551020408, "grad_norm": 20.561485290527344, "learning_rate": 3.384489795918368e-05, "loss": 2.3245, "step": 7923 }, { "epoch": 32.34285714285714, "grad_norm": 30.865388870239258, "learning_rate": 3.3842857142857145e-05, "loss": 1.8406, "step": 7924 }, { "epoch": 32.3469387755102, "grad_norm": 29.68283462524414, "learning_rate": 3.384081632653062e-05, "loss": 2.0889, "step": 7925 }, { "epoch": 32.351020408163265, "grad_norm": 16.8483943939209, "learning_rate": 3.3838775510204086e-05, "loss": 2.434, "step": 7926 }, { "epoch": 32.35510204081633, "grad_norm": 30.299266815185547, "learning_rate": 3.383673469387755e-05, "loss": 1.6868, "step": 7927 }, { "epoch": 32.35918367346939, "grad_norm": 14.419978141784668, "learning_rate": 3.383469387755102e-05, "loss": 2.5458, "step": 7928 }, { "epoch": 32.36326530612245, "grad_norm": 15.008890151977539, "learning_rate": 3.383265306122449e-05, "loss": 2.8979, "step": 7929 }, { "epoch": 32.36734693877551, "grad_norm": 11.179667472839355, "learning_rate": 3.383061224489796e-05, "loss": 2.9208, "step": 7930 }, { "epoch": 32.371428571428574, "grad_norm": 30.787343978881836, "learning_rate": 3.3828571428571435e-05, "loss": 1.8951, "step": 7931 }, { "epoch": 32.375510204081635, "grad_norm": 10.059853553771973, "learning_rate": 3.38265306122449e-05, "loss": 3.3766, "step": 7932 }, { "epoch": 32.3795918367347, "grad_norm": 26.227231979370117, "learning_rate": 3.382448979591837e-05, "loss": 1.4997, "step": 7933 }, { "epoch": 32.38367346938775, "grad_norm": 9.985421180725098, "learning_rate": 3.3822448979591836e-05, "loss": 3.2347, "step": 7934 }, { "epoch": 32.38775510204081, "grad_norm": 15.702921867370605, "learning_rate": 3.382040816326531e-05, "loss": 2.7253, "step": 7935 }, { "epoch": 32.391836734693875, "grad_norm": 18.843677520751953, "learning_rate": 3.381836734693878e-05, "loss": 2.35, "step": 7936 }, { "epoch": 32.39591836734694, "grad_norm": 28.88945960998535, "learning_rate": 3.3816326530612244e-05, "loss": 2.2164, "step": 7937 }, { "epoch": 32.4, "grad_norm": 15.999478340148926, "learning_rate": 3.381428571428572e-05, "loss": 2.6337, "step": 7938 }, { "epoch": 32.40408163265306, "grad_norm": 13.48015022277832, "learning_rate": 3.3812244897959185e-05, "loss": 3.0179, "step": 7939 }, { "epoch": 32.40816326530612, "grad_norm": 23.87801170349121, "learning_rate": 3.381020408163266e-05, "loss": 2.1198, "step": 7940 }, { "epoch": 32.412244897959184, "grad_norm": 23.39457130432129, "learning_rate": 3.3808163265306126e-05, "loss": 2.2809, "step": 7941 }, { "epoch": 32.416326530612245, "grad_norm": 26.701662063598633, "learning_rate": 3.3806122448979593e-05, "loss": 2.6927, "step": 7942 }, { "epoch": 32.42040816326531, "grad_norm": 12.116499900817871, "learning_rate": 3.380408163265306e-05, "loss": 2.9326, "step": 7943 }, { "epoch": 32.42448979591837, "grad_norm": 26.364639282226562, "learning_rate": 3.380204081632653e-05, "loss": 2.3193, "step": 7944 }, { "epoch": 32.42857142857143, "grad_norm": 12.544071197509766, "learning_rate": 3.38e-05, "loss": 3.0813, "step": 7945 }, { "epoch": 32.43265306122449, "grad_norm": 19.54819679260254, "learning_rate": 3.379795918367347e-05, "loss": 2.6592, "step": 7946 }, { "epoch": 32.436734693877554, "grad_norm": 17.558691024780273, "learning_rate": 3.379591836734694e-05, "loss": 2.761, "step": 7947 }, { "epoch": 32.440816326530616, "grad_norm": 21.51334571838379, "learning_rate": 3.379387755102041e-05, "loss": 0.2396, "step": 7948 }, { "epoch": 32.44489795918367, "grad_norm": 24.315898895263672, "learning_rate": 3.379183673469388e-05, "loss": 1.9299, "step": 7949 }, { "epoch": 32.44897959183673, "grad_norm": 42.883609771728516, "learning_rate": 3.378979591836735e-05, "loss": 0.9348, "step": 7950 }, { "epoch": 32.453061224489794, "grad_norm": 10.425662994384766, "learning_rate": 3.378775510204082e-05, "loss": 3.1247, "step": 7951 }, { "epoch": 32.457142857142856, "grad_norm": 33.978553771972656, "learning_rate": 3.3785714285714285e-05, "loss": 2.0291, "step": 7952 }, { "epoch": 32.46122448979592, "grad_norm": 29.353500366210938, "learning_rate": 3.378367346938775e-05, "loss": 1.9114, "step": 7953 }, { "epoch": 32.46530612244898, "grad_norm": 20.4705753326416, "learning_rate": 3.3781632653061226e-05, "loss": 2.6118, "step": 7954 }, { "epoch": 32.46938775510204, "grad_norm": 17.599626541137695, "learning_rate": 3.37795918367347e-05, "loss": 2.524, "step": 7955 }, { "epoch": 32.4734693877551, "grad_norm": 23.414880752563477, "learning_rate": 3.377755102040817e-05, "loss": 2.1024, "step": 7956 }, { "epoch": 32.477551020408164, "grad_norm": 30.47659683227539, "learning_rate": 3.3775510204081634e-05, "loss": 2.2098, "step": 7957 }, { "epoch": 32.481632653061226, "grad_norm": 11.852950096130371, "learning_rate": 3.37734693877551e-05, "loss": 2.9102, "step": 7958 }, { "epoch": 32.48571428571429, "grad_norm": 31.160810470581055, "learning_rate": 3.377142857142857e-05, "loss": 2.7213, "step": 7959 }, { "epoch": 32.48979591836735, "grad_norm": 11.755589485168457, "learning_rate": 3.376938775510204e-05, "loss": 3.0512, "step": 7960 }, { "epoch": 32.49387755102041, "grad_norm": 31.59195327758789, "learning_rate": 3.376734693877551e-05, "loss": 1.6108, "step": 7961 }, { "epoch": 32.49795918367347, "grad_norm": 22.82252311706543, "learning_rate": 3.3765306122448983e-05, "loss": 2.1804, "step": 7962 }, { "epoch": 32.50204081632653, "grad_norm": 10.954950332641602, "learning_rate": 3.376326530612245e-05, "loss": 3.101, "step": 7963 }, { "epoch": 32.50612244897959, "grad_norm": 26.444252014160156, "learning_rate": 3.376122448979592e-05, "loss": 2.0794, "step": 7964 }, { "epoch": 32.51020408163265, "grad_norm": 16.466035842895508, "learning_rate": 3.375918367346939e-05, "loss": 2.7754, "step": 7965 }, { "epoch": 32.51428571428571, "grad_norm": 16.624347686767578, "learning_rate": 3.375714285714286e-05, "loss": 2.6429, "step": 7966 }, { "epoch": 32.518367346938774, "grad_norm": 14.093188285827637, "learning_rate": 3.3755102040816326e-05, "loss": 2.9203, "step": 7967 }, { "epoch": 32.522448979591836, "grad_norm": 17.36079216003418, "learning_rate": 3.375306122448979e-05, "loss": 1.9394, "step": 7968 }, { "epoch": 32.5265306122449, "grad_norm": 13.275962829589844, "learning_rate": 3.375102040816327e-05, "loss": 3.2794, "step": 7969 }, { "epoch": 32.53061224489796, "grad_norm": 14.058477401733398, "learning_rate": 3.374897959183674e-05, "loss": 2.4077, "step": 7970 }, { "epoch": 32.53469387755102, "grad_norm": 20.676939010620117, "learning_rate": 3.374693877551021e-05, "loss": 2.5458, "step": 7971 }, { "epoch": 32.53877551020408, "grad_norm": 17.345687866210938, "learning_rate": 3.3744897959183675e-05, "loss": 2.5662, "step": 7972 }, { "epoch": 32.542857142857144, "grad_norm": 21.688940048217773, "learning_rate": 3.374285714285714e-05, "loss": 2.3779, "step": 7973 }, { "epoch": 32.546938775510206, "grad_norm": 22.385778427124023, "learning_rate": 3.3740816326530616e-05, "loss": 1.7744, "step": 7974 }, { "epoch": 32.55102040816327, "grad_norm": 26.327869415283203, "learning_rate": 3.373877551020408e-05, "loss": 1.9808, "step": 7975 }, { "epoch": 32.55510204081633, "grad_norm": 19.88191795349121, "learning_rate": 3.373673469387755e-05, "loss": 2.3818, "step": 7976 }, { "epoch": 32.55918367346939, "grad_norm": 7.685800075531006, "learning_rate": 3.3734693877551024e-05, "loss": 3.6124, "step": 7977 }, { "epoch": 32.563265306122446, "grad_norm": 26.097131729125977, "learning_rate": 3.373265306122449e-05, "loss": 2.0435, "step": 7978 }, { "epoch": 32.56734693877551, "grad_norm": 14.494077682495117, "learning_rate": 3.3730612244897965e-05, "loss": 2.9117, "step": 7979 }, { "epoch": 32.57142857142857, "grad_norm": 13.495274543762207, "learning_rate": 3.372857142857143e-05, "loss": 3.17, "step": 7980 }, { "epoch": 32.57551020408163, "grad_norm": 15.141353607177734, "learning_rate": 3.37265306122449e-05, "loss": 2.5155, "step": 7981 }, { "epoch": 32.57959183673469, "grad_norm": 11.390070915222168, "learning_rate": 3.3724489795918367e-05, "loss": 2.954, "step": 7982 }, { "epoch": 32.583673469387755, "grad_norm": 11.909421920776367, "learning_rate": 3.3722448979591834e-05, "loss": 3.0198, "step": 7983 }, { "epoch": 32.587755102040816, "grad_norm": 10.861814498901367, "learning_rate": 3.372040816326531e-05, "loss": 2.9561, "step": 7984 }, { "epoch": 32.59183673469388, "grad_norm": 27.705610275268555, "learning_rate": 3.371836734693878e-05, "loss": 1.1773, "step": 7985 }, { "epoch": 32.59591836734694, "grad_norm": 12.540350914001465, "learning_rate": 3.371632653061225e-05, "loss": 2.8648, "step": 7986 }, { "epoch": 32.6, "grad_norm": 40.415802001953125, "learning_rate": 3.3714285714285716e-05, "loss": 1.9222, "step": 7987 }, { "epoch": 32.60408163265306, "grad_norm": 11.931374549865723, "learning_rate": 3.371224489795918e-05, "loss": 2.8775, "step": 7988 }, { "epoch": 32.608163265306125, "grad_norm": 13.246390342712402, "learning_rate": 3.371020408163266e-05, "loss": 2.8395, "step": 7989 }, { "epoch": 32.61224489795919, "grad_norm": 18.011253356933594, "learning_rate": 3.3708163265306124e-05, "loss": 2.4906, "step": 7990 }, { "epoch": 32.61632653061225, "grad_norm": 21.430612564086914, "learning_rate": 3.370612244897959e-05, "loss": 2.5982, "step": 7991 }, { "epoch": 32.6204081632653, "grad_norm": 18.783103942871094, "learning_rate": 3.3704081632653065e-05, "loss": 2.6285, "step": 7992 }, { "epoch": 32.624489795918365, "grad_norm": 18.21701431274414, "learning_rate": 3.370204081632653e-05, "loss": 2.7976, "step": 7993 }, { "epoch": 32.628571428571426, "grad_norm": 14.211181640625, "learning_rate": 3.3700000000000006e-05, "loss": 2.9877, "step": 7994 }, { "epoch": 32.63265306122449, "grad_norm": 12.652579307556152, "learning_rate": 3.369795918367347e-05, "loss": 2.9191, "step": 7995 }, { "epoch": 32.63673469387755, "grad_norm": 23.82673454284668, "learning_rate": 3.369591836734694e-05, "loss": 1.8091, "step": 7996 }, { "epoch": 32.64081632653061, "grad_norm": 15.857054710388184, "learning_rate": 3.369387755102041e-05, "loss": 2.7134, "step": 7997 }, { "epoch": 32.64489795918367, "grad_norm": 21.93404769897461, "learning_rate": 3.3691836734693874e-05, "loss": 1.8641, "step": 7998 }, { "epoch": 32.648979591836735, "grad_norm": 38.63385772705078, "learning_rate": 3.368979591836735e-05, "loss": 0.9859, "step": 7999 }, { "epoch": 32.6530612244898, "grad_norm": 22.67137908935547, "learning_rate": 3.3687755102040816e-05, "loss": 2.5307, "step": 8000 }, { "epoch": 32.65714285714286, "grad_norm": 27.539642333984375, "learning_rate": 3.368571428571429e-05, "loss": 1.2171, "step": 8001 }, { "epoch": 32.66122448979592, "grad_norm": 9.367973327636719, "learning_rate": 3.3683673469387757e-05, "loss": 3.2077, "step": 8002 }, { "epoch": 32.66530612244898, "grad_norm": 19.824796676635742, "learning_rate": 3.3681632653061224e-05, "loss": 1.6565, "step": 8003 }, { "epoch": 32.66938775510204, "grad_norm": 18.22032356262207, "learning_rate": 3.36795918367347e-05, "loss": 2.4026, "step": 8004 }, { "epoch": 32.673469387755105, "grad_norm": 20.256269454956055, "learning_rate": 3.3677551020408165e-05, "loss": 2.4457, "step": 8005 }, { "epoch": 32.67755102040816, "grad_norm": 18.042083740234375, "learning_rate": 3.367551020408163e-05, "loss": 2.5031, "step": 8006 }, { "epoch": 32.68163265306122, "grad_norm": 12.696553230285645, "learning_rate": 3.36734693877551e-05, "loss": 2.9714, "step": 8007 }, { "epoch": 32.68571428571428, "grad_norm": 12.735856056213379, "learning_rate": 3.367142857142857e-05, "loss": 3.2082, "step": 8008 }, { "epoch": 32.689795918367345, "grad_norm": 13.903680801391602, "learning_rate": 3.366938775510205e-05, "loss": 3.0935, "step": 8009 }, { "epoch": 32.69387755102041, "grad_norm": 16.915746688842773, "learning_rate": 3.3667346938775514e-05, "loss": 2.8815, "step": 8010 }, { "epoch": 32.69795918367347, "grad_norm": 18.504858016967773, "learning_rate": 3.366530612244898e-05, "loss": 2.383, "step": 8011 }, { "epoch": 32.70204081632653, "grad_norm": 14.041413307189941, "learning_rate": 3.366326530612245e-05, "loss": 2.7603, "step": 8012 }, { "epoch": 32.70612244897959, "grad_norm": 17.234037399291992, "learning_rate": 3.3661224489795915e-05, "loss": 2.5613, "step": 8013 }, { "epoch": 32.710204081632654, "grad_norm": 11.16473388671875, "learning_rate": 3.365918367346939e-05, "loss": 3.2373, "step": 8014 }, { "epoch": 32.714285714285715, "grad_norm": 16.74928092956543, "learning_rate": 3.3657142857142856e-05, "loss": 2.3546, "step": 8015 }, { "epoch": 32.71836734693878, "grad_norm": 16.074098587036133, "learning_rate": 3.365510204081633e-05, "loss": 2.8537, "step": 8016 }, { "epoch": 32.72244897959184, "grad_norm": 21.154855728149414, "learning_rate": 3.36530612244898e-05, "loss": 1.9749, "step": 8017 }, { "epoch": 32.7265306122449, "grad_norm": 20.645235061645508, "learning_rate": 3.365102040816327e-05, "loss": 2.5638, "step": 8018 }, { "epoch": 32.73061224489796, "grad_norm": 25.93383026123047, "learning_rate": 3.364897959183674e-05, "loss": 1.7506, "step": 8019 }, { "epoch": 32.734693877551024, "grad_norm": 11.88583755493164, "learning_rate": 3.3646938775510205e-05, "loss": 3.0214, "step": 8020 }, { "epoch": 32.73877551020408, "grad_norm": 15.124131202697754, "learning_rate": 3.364489795918367e-05, "loss": 2.4806, "step": 8021 }, { "epoch": 32.74285714285714, "grad_norm": 25.553112030029297, "learning_rate": 3.364285714285714e-05, "loss": 1.1882, "step": 8022 }, { "epoch": 32.7469387755102, "grad_norm": 26.91239356994629, "learning_rate": 3.3640816326530614e-05, "loss": 1.834, "step": 8023 }, { "epoch": 32.751020408163264, "grad_norm": 30.055896759033203, "learning_rate": 3.363877551020409e-05, "loss": 0.7516, "step": 8024 }, { "epoch": 32.755102040816325, "grad_norm": 12.902405738830566, "learning_rate": 3.3636734693877555e-05, "loss": 3.0045, "step": 8025 }, { "epoch": 32.75918367346939, "grad_norm": 23.89080047607422, "learning_rate": 3.363469387755102e-05, "loss": 2.5776, "step": 8026 }, { "epoch": 32.76326530612245, "grad_norm": 12.285019874572754, "learning_rate": 3.363265306122449e-05, "loss": 2.8003, "step": 8027 }, { "epoch": 32.76734693877551, "grad_norm": 13.14271068572998, "learning_rate": 3.363061224489796e-05, "loss": 2.9604, "step": 8028 }, { "epoch": 32.77142857142857, "grad_norm": 16.770465850830078, "learning_rate": 3.362857142857143e-05, "loss": 2.8749, "step": 8029 }, { "epoch": 32.775510204081634, "grad_norm": 56.046852111816406, "learning_rate": 3.36265306122449e-05, "loss": 1.1471, "step": 8030 }, { "epoch": 32.779591836734696, "grad_norm": 31.317005157470703, "learning_rate": 3.362448979591837e-05, "loss": 1.9959, "step": 8031 }, { "epoch": 32.78367346938776, "grad_norm": 19.049785614013672, "learning_rate": 3.362244897959184e-05, "loss": 2.6445, "step": 8032 }, { "epoch": 32.78775510204082, "grad_norm": 20.3148193359375, "learning_rate": 3.362040816326531e-05, "loss": 2.0723, "step": 8033 }, { "epoch": 32.79183673469388, "grad_norm": 22.65831184387207, "learning_rate": 3.361836734693878e-05, "loss": 1.8442, "step": 8034 }, { "epoch": 32.795918367346935, "grad_norm": 24.892885208129883, "learning_rate": 3.3616326530612246e-05, "loss": 2.3366, "step": 8035 }, { "epoch": 32.8, "grad_norm": 34.883907318115234, "learning_rate": 3.361428571428571e-05, "loss": 1.6276, "step": 8036 }, { "epoch": 32.80408163265306, "grad_norm": 20.274263381958008, "learning_rate": 3.361224489795918e-05, "loss": 2.6483, "step": 8037 }, { "epoch": 32.80816326530612, "grad_norm": 12.57522201538086, "learning_rate": 3.3610204081632654e-05, "loss": 3.098, "step": 8038 }, { "epoch": 32.81224489795918, "grad_norm": 14.36034870147705, "learning_rate": 3.360816326530613e-05, "loss": 2.8007, "step": 8039 }, { "epoch": 32.816326530612244, "grad_norm": 9.396206855773926, "learning_rate": 3.3606122448979595e-05, "loss": 3.3558, "step": 8040 }, { "epoch": 32.820408163265306, "grad_norm": 13.604372024536133, "learning_rate": 3.360408163265306e-05, "loss": 2.9813, "step": 8041 }, { "epoch": 32.82448979591837, "grad_norm": 21.50217628479004, "learning_rate": 3.360204081632653e-05, "loss": 1.8108, "step": 8042 }, { "epoch": 32.82857142857143, "grad_norm": 20.681428909301758, "learning_rate": 3.3600000000000004e-05, "loss": 2.9693, "step": 8043 }, { "epoch": 32.83265306122449, "grad_norm": 21.22345733642578, "learning_rate": 3.359795918367347e-05, "loss": 2.3979, "step": 8044 }, { "epoch": 32.83673469387755, "grad_norm": 17.061983108520508, "learning_rate": 3.359591836734694e-05, "loss": 2.858, "step": 8045 }, { "epoch": 32.840816326530614, "grad_norm": 16.67576789855957, "learning_rate": 3.359387755102041e-05, "loss": 2.7372, "step": 8046 }, { "epoch": 32.844897959183676, "grad_norm": 7.121243000030518, "learning_rate": 3.359183673469388e-05, "loss": 3.6401, "step": 8047 }, { "epoch": 32.84897959183674, "grad_norm": 17.17960548400879, "learning_rate": 3.358979591836735e-05, "loss": 2.2788, "step": 8048 }, { "epoch": 32.85306122448979, "grad_norm": 17.06718635559082, "learning_rate": 3.358775510204082e-05, "loss": 2.9046, "step": 8049 }, { "epoch": 32.857142857142854, "grad_norm": 19.59503173828125, "learning_rate": 3.358571428571429e-05, "loss": 2.6106, "step": 8050 }, { "epoch": 32.861224489795916, "grad_norm": 12.423542022705078, "learning_rate": 3.3583673469387754e-05, "loss": 2.8673, "step": 8051 }, { "epoch": 32.86530612244898, "grad_norm": 19.432682037353516, "learning_rate": 3.358163265306122e-05, "loss": 2.6489, "step": 8052 }, { "epoch": 32.86938775510204, "grad_norm": 21.344444274902344, "learning_rate": 3.3579591836734695e-05, "loss": 2.3616, "step": 8053 }, { "epoch": 32.8734693877551, "grad_norm": 9.831491470336914, "learning_rate": 3.357755102040816e-05, "loss": 2.8434, "step": 8054 }, { "epoch": 32.87755102040816, "grad_norm": 7.304631233215332, "learning_rate": 3.3575510204081636e-05, "loss": 3.348, "step": 8055 }, { "epoch": 32.881632653061224, "grad_norm": 12.211851119995117, "learning_rate": 3.35734693877551e-05, "loss": 2.885, "step": 8056 }, { "epoch": 32.885714285714286, "grad_norm": 25.971906661987305, "learning_rate": 3.357142857142857e-05, "loss": 0.9778, "step": 8057 }, { "epoch": 32.88979591836735, "grad_norm": 15.829633712768555, "learning_rate": 3.3569387755102044e-05, "loss": 2.7711, "step": 8058 }, { "epoch": 32.89387755102041, "grad_norm": 24.05912208557129, "learning_rate": 3.356734693877551e-05, "loss": 2.2803, "step": 8059 }, { "epoch": 32.89795918367347, "grad_norm": 16.793132781982422, "learning_rate": 3.356530612244898e-05, "loss": 2.8889, "step": 8060 }, { "epoch": 32.90204081632653, "grad_norm": 16.51384925842285, "learning_rate": 3.3563265306122446e-05, "loss": 2.548, "step": 8061 }, { "epoch": 32.906122448979595, "grad_norm": 35.19988250732422, "learning_rate": 3.356122448979592e-05, "loss": 1.15, "step": 8062 }, { "epoch": 32.910204081632656, "grad_norm": 16.455289840698242, "learning_rate": 3.3559183673469394e-05, "loss": 2.6025, "step": 8063 }, { "epoch": 32.91428571428571, "grad_norm": 20.992860794067383, "learning_rate": 3.355714285714286e-05, "loss": 2.2703, "step": 8064 }, { "epoch": 32.91836734693877, "grad_norm": 14.439105033874512, "learning_rate": 3.355510204081633e-05, "loss": 2.4358, "step": 8065 }, { "epoch": 32.922448979591834, "grad_norm": 15.17219066619873, "learning_rate": 3.3553061224489795e-05, "loss": 2.8178, "step": 8066 }, { "epoch": 32.926530612244896, "grad_norm": 30.31907844543457, "learning_rate": 3.355102040816327e-05, "loss": 1.2026, "step": 8067 }, { "epoch": 32.93061224489796, "grad_norm": 24.82712173461914, "learning_rate": 3.3548979591836736e-05, "loss": 0.9716, "step": 8068 }, { "epoch": 32.93469387755102, "grad_norm": 20.797693252563477, "learning_rate": 3.35469387755102e-05, "loss": 2.3766, "step": 8069 }, { "epoch": 32.93877551020408, "grad_norm": 35.603267669677734, "learning_rate": 3.354489795918368e-05, "loss": 1.7849, "step": 8070 }, { "epoch": 32.94285714285714, "grad_norm": 30.78575325012207, "learning_rate": 3.3542857142857144e-05, "loss": 2.5583, "step": 8071 }, { "epoch": 32.946938775510205, "grad_norm": 11.92208194732666, "learning_rate": 3.354081632653062e-05, "loss": 3.3857, "step": 8072 }, { "epoch": 32.95102040816327, "grad_norm": 22.297815322875977, "learning_rate": 3.3538775510204085e-05, "loss": 2.0456, "step": 8073 }, { "epoch": 32.95510204081633, "grad_norm": 21.390024185180664, "learning_rate": 3.353673469387755e-05, "loss": 2.3059, "step": 8074 }, { "epoch": 32.95918367346939, "grad_norm": 21.472434997558594, "learning_rate": 3.353469387755102e-05, "loss": 2.6836, "step": 8075 }, { "epoch": 32.96326530612245, "grad_norm": 13.397744178771973, "learning_rate": 3.3532653061224486e-05, "loss": 2.8024, "step": 8076 }, { "epoch": 32.96734693877551, "grad_norm": 8.193024635314941, "learning_rate": 3.353061224489796e-05, "loss": 3.6366, "step": 8077 }, { "epoch": 32.97142857142857, "grad_norm": 30.28462028503418, "learning_rate": 3.3528571428571434e-05, "loss": 1.6841, "step": 8078 }, { "epoch": 32.97551020408163, "grad_norm": 27.452899932861328, "learning_rate": 3.35265306122449e-05, "loss": 2.533, "step": 8079 }, { "epoch": 32.97959183673469, "grad_norm": 11.784382820129395, "learning_rate": 3.352448979591837e-05, "loss": 3.2502, "step": 8080 }, { "epoch": 32.98367346938775, "grad_norm": 19.870906829833984, "learning_rate": 3.3522448979591836e-05, "loss": 3.0241, "step": 8081 }, { "epoch": 32.987755102040815, "grad_norm": 16.477169036865234, "learning_rate": 3.352040816326531e-05, "loss": 3.0247, "step": 8082 }, { "epoch": 32.99183673469388, "grad_norm": 16.662771224975586, "learning_rate": 3.351836734693878e-05, "loss": 2.7525, "step": 8083 }, { "epoch": 32.99591836734694, "grad_norm": 29.001325607299805, "learning_rate": 3.3516326530612244e-05, "loss": 2.0721, "step": 8084 }, { "epoch": 33.0, "grad_norm": 28.280597686767578, "learning_rate": 3.351428571428572e-05, "loss": 2.6133, "step": 8085 }, { "epoch": 33.00408163265306, "grad_norm": 27.011104583740234, "learning_rate": 3.3512244897959185e-05, "loss": 0.2659, "step": 8086 }, { "epoch": 33.00816326530612, "grad_norm": 12.802959442138672, "learning_rate": 3.351020408163266e-05, "loss": 2.8509, "step": 8087 }, { "epoch": 33.012244897959185, "grad_norm": 12.392351150512695, "learning_rate": 3.3508163265306126e-05, "loss": 2.631, "step": 8088 }, { "epoch": 33.01632653061225, "grad_norm": 13.1118745803833, "learning_rate": 3.350612244897959e-05, "loss": 2.6621, "step": 8089 }, { "epoch": 33.02040816326531, "grad_norm": 24.203197479248047, "learning_rate": 3.350408163265306e-05, "loss": 1.615, "step": 8090 }, { "epoch": 33.02448979591837, "grad_norm": 13.454306602478027, "learning_rate": 3.350204081632653e-05, "loss": 2.5796, "step": 8091 }, { "epoch": 33.02857142857143, "grad_norm": 8.411219596862793, "learning_rate": 3.35e-05, "loss": 3.5532, "step": 8092 }, { "epoch": 33.03265306122449, "grad_norm": 31.326576232910156, "learning_rate": 3.3497959183673475e-05, "loss": 2.1221, "step": 8093 }, { "epoch": 33.03673469387755, "grad_norm": 15.673168182373047, "learning_rate": 3.349591836734694e-05, "loss": 2.844, "step": 8094 }, { "epoch": 33.04081632653061, "grad_norm": 22.87706756591797, "learning_rate": 3.349387755102041e-05, "loss": 2.1115, "step": 8095 }, { "epoch": 33.04489795918367, "grad_norm": 16.356826782226562, "learning_rate": 3.3491836734693876e-05, "loss": 2.8323, "step": 8096 }, { "epoch": 33.04897959183673, "grad_norm": 26.787195205688477, "learning_rate": 3.348979591836735e-05, "loss": 2.3617, "step": 8097 }, { "epoch": 33.053061224489795, "grad_norm": 19.3568172454834, "learning_rate": 3.348775510204082e-05, "loss": 1.8339, "step": 8098 }, { "epoch": 33.05714285714286, "grad_norm": 13.99195671081543, "learning_rate": 3.3485714285714285e-05, "loss": 2.7691, "step": 8099 }, { "epoch": 33.06122448979592, "grad_norm": 26.385988235473633, "learning_rate": 3.348367346938776e-05, "loss": 2.3173, "step": 8100 }, { "epoch": 33.06530612244898, "grad_norm": 9.376944541931152, "learning_rate": 3.3481632653061226e-05, "loss": 3.4607, "step": 8101 }, { "epoch": 33.06938775510204, "grad_norm": 14.464348793029785, "learning_rate": 3.34795918367347e-05, "loss": 2.8063, "step": 8102 }, { "epoch": 33.073469387755104, "grad_norm": 35.09455871582031, "learning_rate": 3.347755102040817e-05, "loss": 1.8849, "step": 8103 }, { "epoch": 33.077551020408166, "grad_norm": 28.70766830444336, "learning_rate": 3.3475510204081634e-05, "loss": 1.5957, "step": 8104 }, { "epoch": 33.08163265306123, "grad_norm": 31.968788146972656, "learning_rate": 3.34734693877551e-05, "loss": 1.7863, "step": 8105 }, { "epoch": 33.08571428571429, "grad_norm": 13.371590614318848, "learning_rate": 3.3471428571428575e-05, "loss": 2.6956, "step": 8106 }, { "epoch": 33.089795918367344, "grad_norm": 10.660127639770508, "learning_rate": 3.346938775510204e-05, "loss": 3.0023, "step": 8107 }, { "epoch": 33.093877551020405, "grad_norm": 29.124210357666016, "learning_rate": 3.346734693877551e-05, "loss": 1.9404, "step": 8108 }, { "epoch": 33.09795918367347, "grad_norm": 25.96866798400879, "learning_rate": 3.346530612244898e-05, "loss": 2.1636, "step": 8109 }, { "epoch": 33.10204081632653, "grad_norm": 8.750179290771484, "learning_rate": 3.346326530612245e-05, "loss": 3.344, "step": 8110 }, { "epoch": 33.10612244897959, "grad_norm": 31.019168853759766, "learning_rate": 3.3461224489795924e-05, "loss": 1.6155, "step": 8111 }, { "epoch": 33.11020408163265, "grad_norm": 27.8685302734375, "learning_rate": 3.345918367346939e-05, "loss": 1.7313, "step": 8112 }, { "epoch": 33.114285714285714, "grad_norm": 36.8254280090332, "learning_rate": 3.345714285714286e-05, "loss": 1.0198, "step": 8113 }, { "epoch": 33.118367346938776, "grad_norm": 9.678533554077148, "learning_rate": 3.3455102040816325e-05, "loss": 3.3151, "step": 8114 }, { "epoch": 33.12244897959184, "grad_norm": 19.2943172454834, "learning_rate": 3.345306122448979e-05, "loss": 2.5177, "step": 8115 }, { "epoch": 33.1265306122449, "grad_norm": 19.556076049804688, "learning_rate": 3.3451020408163266e-05, "loss": 2.6593, "step": 8116 }, { "epoch": 33.13061224489796, "grad_norm": 13.44836139678955, "learning_rate": 3.344897959183674e-05, "loss": 3.0692, "step": 8117 }, { "epoch": 33.13469387755102, "grad_norm": 13.930143356323242, "learning_rate": 3.344693877551021e-05, "loss": 2.9561, "step": 8118 }, { "epoch": 33.138775510204084, "grad_norm": 19.031225204467773, "learning_rate": 3.3444897959183675e-05, "loss": 2.2886, "step": 8119 }, { "epoch": 33.142857142857146, "grad_norm": 18.66162872314453, "learning_rate": 3.344285714285714e-05, "loss": 2.4082, "step": 8120 }, { "epoch": 33.14693877551021, "grad_norm": 11.445101737976074, "learning_rate": 3.3440816326530616e-05, "loss": 2.9919, "step": 8121 }, { "epoch": 33.15102040816326, "grad_norm": 22.577110290527344, "learning_rate": 3.343877551020408e-05, "loss": 2.283, "step": 8122 }, { "epoch": 33.155102040816324, "grad_norm": 15.15609073638916, "learning_rate": 3.343673469387755e-05, "loss": 2.7839, "step": 8123 }, { "epoch": 33.159183673469386, "grad_norm": 13.283990859985352, "learning_rate": 3.3434693877551024e-05, "loss": 2.8684, "step": 8124 }, { "epoch": 33.16326530612245, "grad_norm": 15.875911712646484, "learning_rate": 3.343265306122449e-05, "loss": 2.8325, "step": 8125 }, { "epoch": 33.16734693877551, "grad_norm": 14.750438690185547, "learning_rate": 3.3430612244897965e-05, "loss": 2.8299, "step": 8126 }, { "epoch": 33.17142857142857, "grad_norm": 13.673657417297363, "learning_rate": 3.342857142857143e-05, "loss": 2.3297, "step": 8127 }, { "epoch": 33.17551020408163, "grad_norm": 15.392630577087402, "learning_rate": 3.34265306122449e-05, "loss": 2.8953, "step": 8128 }, { "epoch": 33.179591836734694, "grad_norm": 31.766815185546875, "learning_rate": 3.3424489795918366e-05, "loss": 1.8069, "step": 8129 }, { "epoch": 33.183673469387756, "grad_norm": 25.507919311523438, "learning_rate": 3.342244897959183e-05, "loss": 2.3828, "step": 8130 }, { "epoch": 33.18775510204082, "grad_norm": 19.97781753540039, "learning_rate": 3.342040816326531e-05, "loss": 2.5288, "step": 8131 }, { "epoch": 33.19183673469388, "grad_norm": 19.71562385559082, "learning_rate": 3.341836734693878e-05, "loss": 2.4971, "step": 8132 }, { "epoch": 33.19591836734694, "grad_norm": 21.057292938232422, "learning_rate": 3.341632653061225e-05, "loss": 2.221, "step": 8133 }, { "epoch": 33.2, "grad_norm": 19.128671646118164, "learning_rate": 3.3414285714285715e-05, "loss": 2.5175, "step": 8134 }, { "epoch": 33.204081632653065, "grad_norm": 11.50345230102539, "learning_rate": 3.341224489795918e-05, "loss": 3.1358, "step": 8135 }, { "epoch": 33.20816326530612, "grad_norm": 18.492626190185547, "learning_rate": 3.3410204081632656e-05, "loss": 2.2187, "step": 8136 }, { "epoch": 33.21224489795918, "grad_norm": 19.661470413208008, "learning_rate": 3.3408163265306123e-05, "loss": 2.3337, "step": 8137 }, { "epoch": 33.21632653061224, "grad_norm": 16.69127082824707, "learning_rate": 3.340612244897959e-05, "loss": 2.4021, "step": 8138 }, { "epoch": 33.220408163265304, "grad_norm": 27.965181350708008, "learning_rate": 3.3404081632653064e-05, "loss": 0.8863, "step": 8139 }, { "epoch": 33.224489795918366, "grad_norm": 11.489518165588379, "learning_rate": 3.340204081632653e-05, "loss": 3.0454, "step": 8140 }, { "epoch": 33.22857142857143, "grad_norm": 23.067176818847656, "learning_rate": 3.3400000000000005e-05, "loss": 2.1475, "step": 8141 }, { "epoch": 33.23265306122449, "grad_norm": 34.608360290527344, "learning_rate": 3.339795918367347e-05, "loss": 1.5069, "step": 8142 }, { "epoch": 33.23673469387755, "grad_norm": 26.78287696838379, "learning_rate": 3.339591836734694e-05, "loss": 1.6983, "step": 8143 }, { "epoch": 33.24081632653061, "grad_norm": 18.52655029296875, "learning_rate": 3.339387755102041e-05, "loss": 2.8867, "step": 8144 }, { "epoch": 33.244897959183675, "grad_norm": 20.217731475830078, "learning_rate": 3.3391836734693874e-05, "loss": 2.2786, "step": 8145 }, { "epoch": 33.248979591836736, "grad_norm": 31.296199798583984, "learning_rate": 3.338979591836735e-05, "loss": 1.8918, "step": 8146 }, { "epoch": 33.2530612244898, "grad_norm": 9.537978172302246, "learning_rate": 3.338775510204082e-05, "loss": 3.5617, "step": 8147 }, { "epoch": 33.25714285714286, "grad_norm": 41.250972747802734, "learning_rate": 3.338571428571429e-05, "loss": 1.1223, "step": 8148 }, { "epoch": 33.26122448979592, "grad_norm": 22.05338478088379, "learning_rate": 3.3383673469387756e-05, "loss": 2.0354, "step": 8149 }, { "epoch": 33.265306122448976, "grad_norm": 12.24942684173584, "learning_rate": 3.338163265306122e-05, "loss": 2.8898, "step": 8150 }, { "epoch": 33.26938775510204, "grad_norm": 10.70300579071045, "learning_rate": 3.33795918367347e-05, "loss": 3.1632, "step": 8151 }, { "epoch": 33.2734693877551, "grad_norm": 34.681663513183594, "learning_rate": 3.3377551020408164e-05, "loss": 2.1497, "step": 8152 }, { "epoch": 33.27755102040816, "grad_norm": 9.767955780029297, "learning_rate": 3.337551020408163e-05, "loss": 3.2238, "step": 8153 }, { "epoch": 33.28163265306122, "grad_norm": 18.71284294128418, "learning_rate": 3.3373469387755105e-05, "loss": 2.7173, "step": 8154 }, { "epoch": 33.285714285714285, "grad_norm": 13.554027557373047, "learning_rate": 3.337142857142857e-05, "loss": 2.6846, "step": 8155 }, { "epoch": 33.289795918367346, "grad_norm": 23.13677215576172, "learning_rate": 3.3369387755102046e-05, "loss": 2.2644, "step": 8156 }, { "epoch": 33.29387755102041, "grad_norm": 12.309287071228027, "learning_rate": 3.336734693877551e-05, "loss": 2.83, "step": 8157 }, { "epoch": 33.29795918367347, "grad_norm": 24.140995025634766, "learning_rate": 3.336530612244898e-05, "loss": 1.9931, "step": 8158 }, { "epoch": 33.30204081632653, "grad_norm": 14.297746658325195, "learning_rate": 3.336326530612245e-05, "loss": 2.9962, "step": 8159 }, { "epoch": 33.30612244897959, "grad_norm": 14.929182052612305, "learning_rate": 3.336122448979592e-05, "loss": 2.7678, "step": 8160 }, { "epoch": 33.310204081632655, "grad_norm": 26.57412338256836, "learning_rate": 3.335918367346939e-05, "loss": 1.7141, "step": 8161 }, { "epoch": 33.31428571428572, "grad_norm": 21.58136749267578, "learning_rate": 3.3357142857142856e-05, "loss": 2.9071, "step": 8162 }, { "epoch": 33.31836734693878, "grad_norm": 23.670392990112305, "learning_rate": 3.335510204081633e-05, "loss": 2.0493, "step": 8163 }, { "epoch": 33.32244897959184, "grad_norm": 18.452205657958984, "learning_rate": 3.33530612244898e-05, "loss": 2.2773, "step": 8164 }, { "epoch": 33.326530612244895, "grad_norm": 20.802486419677734, "learning_rate": 3.335102040816327e-05, "loss": 2.5602, "step": 8165 }, { "epoch": 33.33061224489796, "grad_norm": 16.25602149963379, "learning_rate": 3.334897959183674e-05, "loss": 2.999, "step": 8166 }, { "epoch": 33.33469387755102, "grad_norm": 22.61324691772461, "learning_rate": 3.3346938775510205e-05, "loss": 2.1559, "step": 8167 }, { "epoch": 33.33877551020408, "grad_norm": 24.994850158691406, "learning_rate": 3.334489795918367e-05, "loss": 2.5445, "step": 8168 }, { "epoch": 33.34285714285714, "grad_norm": 31.939348220825195, "learning_rate": 3.334285714285714e-05, "loss": 2.6997, "step": 8169 }, { "epoch": 33.3469387755102, "grad_norm": 17.813241958618164, "learning_rate": 3.334081632653061e-05, "loss": 2.1015, "step": 8170 }, { "epoch": 33.351020408163265, "grad_norm": 26.98061752319336, "learning_rate": 3.333877551020409e-05, "loss": 1.1216, "step": 8171 }, { "epoch": 33.35510204081633, "grad_norm": 22.478553771972656, "learning_rate": 3.3336734693877554e-05, "loss": 1.9691, "step": 8172 }, { "epoch": 33.35918367346939, "grad_norm": 7.3543572425842285, "learning_rate": 3.333469387755102e-05, "loss": 3.6056, "step": 8173 }, { "epoch": 33.36326530612245, "grad_norm": 15.634464263916016, "learning_rate": 3.333265306122449e-05, "loss": 2.5107, "step": 8174 }, { "epoch": 33.36734693877551, "grad_norm": 25.196094512939453, "learning_rate": 3.333061224489796e-05, "loss": 1.8594, "step": 8175 }, { "epoch": 33.371428571428574, "grad_norm": 28.21268081665039, "learning_rate": 3.332857142857143e-05, "loss": 1.7954, "step": 8176 }, { "epoch": 33.375510204081635, "grad_norm": 23.83778190612793, "learning_rate": 3.3326530612244897e-05, "loss": 1.6761, "step": 8177 }, { "epoch": 33.3795918367347, "grad_norm": 23.024158477783203, "learning_rate": 3.332448979591837e-05, "loss": 1.7783, "step": 8178 }, { "epoch": 33.38367346938775, "grad_norm": 18.427974700927734, "learning_rate": 3.332244897959184e-05, "loss": 2.5088, "step": 8179 }, { "epoch": 33.38775510204081, "grad_norm": 25.08498191833496, "learning_rate": 3.332040816326531e-05, "loss": 2.5033, "step": 8180 }, { "epoch": 33.391836734693875, "grad_norm": 25.503807067871094, "learning_rate": 3.331836734693878e-05, "loss": 1.6173, "step": 8181 }, { "epoch": 33.39591836734694, "grad_norm": 18.44489288330078, "learning_rate": 3.3316326530612246e-05, "loss": 1.8202, "step": 8182 }, { "epoch": 33.4, "grad_norm": 15.081554412841797, "learning_rate": 3.331428571428571e-05, "loss": 2.746, "step": 8183 }, { "epoch": 33.40408163265306, "grad_norm": 17.961320877075195, "learning_rate": 3.331224489795918e-05, "loss": 2.4201, "step": 8184 }, { "epoch": 33.40816326530612, "grad_norm": 16.190019607543945, "learning_rate": 3.3310204081632654e-05, "loss": 2.3859, "step": 8185 }, { "epoch": 33.412244897959184, "grad_norm": 11.856733322143555, "learning_rate": 3.330816326530613e-05, "loss": 2.9382, "step": 8186 }, { "epoch": 33.416326530612245, "grad_norm": 14.388327598571777, "learning_rate": 3.3306122448979595e-05, "loss": 3.0792, "step": 8187 }, { "epoch": 33.42040816326531, "grad_norm": 28.445110321044922, "learning_rate": 3.330408163265306e-05, "loss": 1.9212, "step": 8188 }, { "epoch": 33.42448979591837, "grad_norm": 25.494640350341797, "learning_rate": 3.330204081632653e-05, "loss": 2.2336, "step": 8189 }, { "epoch": 33.42857142857143, "grad_norm": 9.632705688476562, "learning_rate": 3.33e-05, "loss": 3.1618, "step": 8190 }, { "epoch": 33.43265306122449, "grad_norm": 10.362114906311035, "learning_rate": 3.329795918367347e-05, "loss": 3.2245, "step": 8191 }, { "epoch": 33.436734693877554, "grad_norm": 21.639034271240234, "learning_rate": 3.329591836734694e-05, "loss": 2.5856, "step": 8192 }, { "epoch": 33.440816326530616, "grad_norm": 18.962366104125977, "learning_rate": 3.329387755102041e-05, "loss": 2.4059, "step": 8193 }, { "epoch": 33.44489795918367, "grad_norm": 19.76604652404785, "learning_rate": 3.329183673469388e-05, "loss": 2.4671, "step": 8194 }, { "epoch": 33.44897959183673, "grad_norm": 13.697283744812012, "learning_rate": 3.328979591836735e-05, "loss": 3.1195, "step": 8195 }, { "epoch": 33.453061224489794, "grad_norm": 33.23484420776367, "learning_rate": 3.328775510204082e-05, "loss": 1.5063, "step": 8196 }, { "epoch": 33.457142857142856, "grad_norm": 11.84349536895752, "learning_rate": 3.3285714285714286e-05, "loss": 3.1748, "step": 8197 }, { "epoch": 33.46122448979592, "grad_norm": 15.76066780090332, "learning_rate": 3.3283673469387754e-05, "loss": 2.7647, "step": 8198 }, { "epoch": 33.46530612244898, "grad_norm": 23.109975814819336, "learning_rate": 3.328163265306123e-05, "loss": 1.8627, "step": 8199 }, { "epoch": 33.46938775510204, "grad_norm": 24.644901275634766, "learning_rate": 3.3279591836734695e-05, "loss": 1.3428, "step": 8200 }, { "epoch": 33.4734693877551, "grad_norm": 21.14790916442871, "learning_rate": 3.327755102040817e-05, "loss": 2.475, "step": 8201 }, { "epoch": 33.477551020408164, "grad_norm": 14.472193717956543, "learning_rate": 3.3275510204081636e-05, "loss": 2.5087, "step": 8202 }, { "epoch": 33.481632653061226, "grad_norm": 21.43715476989746, "learning_rate": 3.32734693877551e-05, "loss": 2.1539, "step": 8203 }, { "epoch": 33.48571428571429, "grad_norm": 13.20856761932373, "learning_rate": 3.327142857142858e-05, "loss": 2.8771, "step": 8204 }, { "epoch": 33.48979591836735, "grad_norm": 22.399343490600586, "learning_rate": 3.3269387755102044e-05, "loss": 2.5863, "step": 8205 }, { "epoch": 33.49387755102041, "grad_norm": 35.74860763549805, "learning_rate": 3.326734693877551e-05, "loss": 2.3433, "step": 8206 }, { "epoch": 33.49795918367347, "grad_norm": 22.091888427734375, "learning_rate": 3.326530612244898e-05, "loss": 2.6594, "step": 8207 }, { "epoch": 33.50204081632653, "grad_norm": 14.262661933898926, "learning_rate": 3.326326530612245e-05, "loss": 2.7659, "step": 8208 }, { "epoch": 33.50612244897959, "grad_norm": 28.49407386779785, "learning_rate": 3.326122448979592e-05, "loss": 2.0722, "step": 8209 }, { "epoch": 33.51020408163265, "grad_norm": 32.57735824584961, "learning_rate": 3.325918367346939e-05, "loss": 2.6672, "step": 8210 }, { "epoch": 33.51428571428571, "grad_norm": 26.660024642944336, "learning_rate": 3.325714285714286e-05, "loss": 1.9907, "step": 8211 }, { "epoch": 33.518367346938774, "grad_norm": 18.142059326171875, "learning_rate": 3.325510204081633e-05, "loss": 2.2295, "step": 8212 }, { "epoch": 33.522448979591836, "grad_norm": 16.968753814697266, "learning_rate": 3.3253061224489794e-05, "loss": 2.8419, "step": 8213 }, { "epoch": 33.5265306122449, "grad_norm": 11.77319622039795, "learning_rate": 3.325102040816327e-05, "loss": 3.1143, "step": 8214 }, { "epoch": 33.53061224489796, "grad_norm": 7.161776542663574, "learning_rate": 3.3248979591836735e-05, "loss": 3.2733, "step": 8215 }, { "epoch": 33.53469387755102, "grad_norm": 19.313064575195312, "learning_rate": 3.32469387755102e-05, "loss": 2.574, "step": 8216 }, { "epoch": 33.53877551020408, "grad_norm": 11.73214054107666, "learning_rate": 3.3244897959183676e-05, "loss": 3.0008, "step": 8217 }, { "epoch": 33.542857142857144, "grad_norm": 24.474300384521484, "learning_rate": 3.3242857142857144e-05, "loss": 1.0289, "step": 8218 }, { "epoch": 33.546938775510206, "grad_norm": 16.489213943481445, "learning_rate": 3.324081632653062e-05, "loss": 3.0051, "step": 8219 }, { "epoch": 33.55102040816327, "grad_norm": 13.159140586853027, "learning_rate": 3.3238775510204085e-05, "loss": 2.6698, "step": 8220 }, { "epoch": 33.55510204081633, "grad_norm": 19.728439331054688, "learning_rate": 3.323673469387755e-05, "loss": 2.7504, "step": 8221 }, { "epoch": 33.55918367346939, "grad_norm": 37.147613525390625, "learning_rate": 3.323469387755102e-05, "loss": 1.008, "step": 8222 }, { "epoch": 33.563265306122446, "grad_norm": 25.39222526550293, "learning_rate": 3.3232653061224486e-05, "loss": 1.0904, "step": 8223 }, { "epoch": 33.56734693877551, "grad_norm": 30.658470153808594, "learning_rate": 3.323061224489796e-05, "loss": 1.5755, "step": 8224 }, { "epoch": 33.57142857142857, "grad_norm": 14.35496997833252, "learning_rate": 3.3228571428571434e-05, "loss": 3.0844, "step": 8225 }, { "epoch": 33.57551020408163, "grad_norm": 15.27794075012207, "learning_rate": 3.32265306122449e-05, "loss": 2.7588, "step": 8226 }, { "epoch": 33.57959183673469, "grad_norm": 31.722732543945312, "learning_rate": 3.322448979591837e-05, "loss": 0.6667, "step": 8227 }, { "epoch": 33.583673469387755, "grad_norm": 39.28044509887695, "learning_rate": 3.3222448979591835e-05, "loss": 1.12, "step": 8228 }, { "epoch": 33.587755102040816, "grad_norm": 28.649431228637695, "learning_rate": 3.322040816326531e-05, "loss": 2.2874, "step": 8229 }, { "epoch": 33.59183673469388, "grad_norm": 12.751749992370605, "learning_rate": 3.3218367346938776e-05, "loss": 2.9896, "step": 8230 }, { "epoch": 33.59591836734694, "grad_norm": 17.175416946411133, "learning_rate": 3.321632653061224e-05, "loss": 2.88, "step": 8231 }, { "epoch": 33.6, "grad_norm": 18.423690795898438, "learning_rate": 3.321428571428572e-05, "loss": 2.8096, "step": 8232 }, { "epoch": 33.60408163265306, "grad_norm": 16.565433502197266, "learning_rate": 3.3212244897959184e-05, "loss": 2.7304, "step": 8233 }, { "epoch": 33.608163265306125, "grad_norm": 18.924882888793945, "learning_rate": 3.321020408163266e-05, "loss": 2.817, "step": 8234 }, { "epoch": 33.61224489795919, "grad_norm": 18.923023223876953, "learning_rate": 3.3208163265306125e-05, "loss": 1.7929, "step": 8235 }, { "epoch": 33.61632653061225, "grad_norm": 20.138429641723633, "learning_rate": 3.320612244897959e-05, "loss": 2.1442, "step": 8236 }, { "epoch": 33.6204081632653, "grad_norm": 17.347978591918945, "learning_rate": 3.320408163265306e-05, "loss": 2.6357, "step": 8237 }, { "epoch": 33.624489795918365, "grad_norm": 15.584450721740723, "learning_rate": 3.320204081632653e-05, "loss": 2.8298, "step": 8238 }, { "epoch": 33.628571428571426, "grad_norm": 19.775358200073242, "learning_rate": 3.32e-05, "loss": 2.3314, "step": 8239 }, { "epoch": 33.63265306122449, "grad_norm": 19.718843460083008, "learning_rate": 3.3197959183673475e-05, "loss": 2.5828, "step": 8240 }, { "epoch": 33.63673469387755, "grad_norm": 14.587671279907227, "learning_rate": 3.319591836734694e-05, "loss": 2.7963, "step": 8241 }, { "epoch": 33.64081632653061, "grad_norm": 12.073002815246582, "learning_rate": 3.319387755102041e-05, "loss": 2.9265, "step": 8242 }, { "epoch": 33.64489795918367, "grad_norm": 23.669891357421875, "learning_rate": 3.319183673469388e-05, "loss": 1.615, "step": 8243 }, { "epoch": 33.648979591836735, "grad_norm": 12.989846229553223, "learning_rate": 3.318979591836735e-05, "loss": 2.9224, "step": 8244 }, { "epoch": 33.6530612244898, "grad_norm": 8.60689640045166, "learning_rate": 3.318775510204082e-05, "loss": 3.2841, "step": 8245 }, { "epoch": 33.65714285714286, "grad_norm": 18.33628273010254, "learning_rate": 3.3185714285714284e-05, "loss": 2.983, "step": 8246 }, { "epoch": 33.66122448979592, "grad_norm": 8.964735984802246, "learning_rate": 3.318367346938776e-05, "loss": 3.2622, "step": 8247 }, { "epoch": 33.66530612244898, "grad_norm": 20.54548454284668, "learning_rate": 3.318163265306123e-05, "loss": 2.3274, "step": 8248 }, { "epoch": 33.66938775510204, "grad_norm": 36.34088134765625, "learning_rate": 3.31795918367347e-05, "loss": 0.9275, "step": 8249 }, { "epoch": 33.673469387755105, "grad_norm": 22.08277130126953, "learning_rate": 3.3177551020408166e-05, "loss": 2.6188, "step": 8250 }, { "epoch": 33.67755102040816, "grad_norm": 16.249866485595703, "learning_rate": 3.317551020408163e-05, "loss": 2.8019, "step": 8251 }, { "epoch": 33.68163265306122, "grad_norm": 22.223482131958008, "learning_rate": 3.31734693877551e-05, "loss": 2.3291, "step": 8252 }, { "epoch": 33.68571428571428, "grad_norm": 23.01645278930664, "learning_rate": 3.3171428571428574e-05, "loss": 1.8559, "step": 8253 }, { "epoch": 33.689795918367345, "grad_norm": 21.27170753479004, "learning_rate": 3.316938775510204e-05, "loss": 2.7362, "step": 8254 }, { "epoch": 33.69387755102041, "grad_norm": 34.718162536621094, "learning_rate": 3.3167346938775515e-05, "loss": 1.8778, "step": 8255 }, { "epoch": 33.69795918367347, "grad_norm": 11.574621200561523, "learning_rate": 3.316530612244898e-05, "loss": 2.9299, "step": 8256 }, { "epoch": 33.70204081632653, "grad_norm": 31.995908737182617, "learning_rate": 3.316326530612245e-05, "loss": 2.1049, "step": 8257 }, { "epoch": 33.70612244897959, "grad_norm": 27.835206985473633, "learning_rate": 3.3161224489795923e-05, "loss": 1.5773, "step": 8258 }, { "epoch": 33.710204081632654, "grad_norm": 18.299583435058594, "learning_rate": 3.315918367346939e-05, "loss": 2.6138, "step": 8259 }, { "epoch": 33.714285714285715, "grad_norm": 14.38803768157959, "learning_rate": 3.315714285714286e-05, "loss": 2.8161, "step": 8260 }, { "epoch": 33.71836734693878, "grad_norm": 10.400288581848145, "learning_rate": 3.3155102040816325e-05, "loss": 3.3172, "step": 8261 }, { "epoch": 33.72244897959184, "grad_norm": 17.366622924804688, "learning_rate": 3.31530612244898e-05, "loss": 2.6528, "step": 8262 }, { "epoch": 33.7265306122449, "grad_norm": 14.836278915405273, "learning_rate": 3.3151020408163266e-05, "loss": 2.3793, "step": 8263 }, { "epoch": 33.73061224489796, "grad_norm": 14.495479583740234, "learning_rate": 3.314897959183674e-05, "loss": 2.9403, "step": 8264 }, { "epoch": 33.734693877551024, "grad_norm": 23.88275718688965, "learning_rate": 3.314693877551021e-05, "loss": 2.0947, "step": 8265 }, { "epoch": 33.73877551020408, "grad_norm": 21.348411560058594, "learning_rate": 3.3144897959183674e-05, "loss": 2.2353, "step": 8266 }, { "epoch": 33.74285714285714, "grad_norm": 13.461901664733887, "learning_rate": 3.314285714285714e-05, "loss": 2.7948, "step": 8267 }, { "epoch": 33.7469387755102, "grad_norm": 26.602052688598633, "learning_rate": 3.3140816326530615e-05, "loss": 1.1013, "step": 8268 }, { "epoch": 33.751020408163264, "grad_norm": 16.529739379882812, "learning_rate": 3.313877551020408e-05, "loss": 2.6756, "step": 8269 }, { "epoch": 33.755102040816325, "grad_norm": 12.840838432312012, "learning_rate": 3.313673469387755e-05, "loss": 3.0538, "step": 8270 }, { "epoch": 33.75918367346939, "grad_norm": 40.08258819580078, "learning_rate": 3.313469387755102e-05, "loss": 1.2322, "step": 8271 }, { "epoch": 33.76326530612245, "grad_norm": 29.40024185180664, "learning_rate": 3.313265306122449e-05, "loss": 1.8574, "step": 8272 }, { "epoch": 33.76734693877551, "grad_norm": 13.625479698181152, "learning_rate": 3.3130612244897964e-05, "loss": 3.2456, "step": 8273 }, { "epoch": 33.77142857142857, "grad_norm": 23.270706176757812, "learning_rate": 3.312857142857143e-05, "loss": 1.8637, "step": 8274 }, { "epoch": 33.775510204081634, "grad_norm": 27.790199279785156, "learning_rate": 3.31265306122449e-05, "loss": 1.8107, "step": 8275 }, { "epoch": 33.779591836734696, "grad_norm": 20.04136848449707, "learning_rate": 3.3124489795918366e-05, "loss": 2.6431, "step": 8276 }, { "epoch": 33.78367346938776, "grad_norm": 10.340428352355957, "learning_rate": 3.312244897959183e-05, "loss": 2.7812, "step": 8277 }, { "epoch": 33.78775510204082, "grad_norm": 19.00847053527832, "learning_rate": 3.3120408163265307e-05, "loss": 2.3217, "step": 8278 }, { "epoch": 33.79183673469388, "grad_norm": 23.538593292236328, "learning_rate": 3.311836734693878e-05, "loss": 1.9894, "step": 8279 }, { "epoch": 33.795918367346935, "grad_norm": 13.030593872070312, "learning_rate": 3.311632653061225e-05, "loss": 2.8907, "step": 8280 }, { "epoch": 33.8, "grad_norm": 16.139585494995117, "learning_rate": 3.3114285714285715e-05, "loss": 2.956, "step": 8281 }, { "epoch": 33.80408163265306, "grad_norm": 25.87005615234375, "learning_rate": 3.311224489795918e-05, "loss": 2.4167, "step": 8282 }, { "epoch": 33.80816326530612, "grad_norm": 15.446025848388672, "learning_rate": 3.3110204081632656e-05, "loss": 2.6497, "step": 8283 }, { "epoch": 33.81224489795918, "grad_norm": 23.06848907470703, "learning_rate": 3.310816326530612e-05, "loss": 2.5464, "step": 8284 }, { "epoch": 33.816326530612244, "grad_norm": 36.138954162597656, "learning_rate": 3.310612244897959e-05, "loss": 1.022, "step": 8285 }, { "epoch": 33.820408163265306, "grad_norm": 21.320884704589844, "learning_rate": 3.3104081632653064e-05, "loss": 2.1448, "step": 8286 }, { "epoch": 33.82448979591837, "grad_norm": 16.62826919555664, "learning_rate": 3.310204081632653e-05, "loss": 2.2301, "step": 8287 }, { "epoch": 33.82857142857143, "grad_norm": 29.841781616210938, "learning_rate": 3.3100000000000005e-05, "loss": 1.6593, "step": 8288 }, { "epoch": 33.83265306122449, "grad_norm": 16.415325164794922, "learning_rate": 3.309795918367347e-05, "loss": 2.6247, "step": 8289 }, { "epoch": 33.83673469387755, "grad_norm": 14.177592277526855, "learning_rate": 3.309591836734694e-05, "loss": 2.7164, "step": 8290 }, { "epoch": 33.840816326530614, "grad_norm": 12.659385681152344, "learning_rate": 3.3093877551020406e-05, "loss": 2.9629, "step": 8291 }, { "epoch": 33.844897959183676, "grad_norm": 31.69219970703125, "learning_rate": 3.309183673469388e-05, "loss": 0.9792, "step": 8292 }, { "epoch": 33.84897959183674, "grad_norm": 10.593429565429688, "learning_rate": 3.308979591836735e-05, "loss": 3.2328, "step": 8293 }, { "epoch": 33.85306122448979, "grad_norm": 24.51593017578125, "learning_rate": 3.308775510204082e-05, "loss": 2.1375, "step": 8294 }, { "epoch": 33.857142857142854, "grad_norm": 33.52049255371094, "learning_rate": 3.308571428571429e-05, "loss": 1.7935, "step": 8295 }, { "epoch": 33.861224489795916, "grad_norm": 13.526883125305176, "learning_rate": 3.3083673469387756e-05, "loss": 3.0185, "step": 8296 }, { "epoch": 33.86530612244898, "grad_norm": 10.780017852783203, "learning_rate": 3.308163265306123e-05, "loss": 3.1157, "step": 8297 }, { "epoch": 33.86938775510204, "grad_norm": 11.928752899169922, "learning_rate": 3.3079591836734697e-05, "loss": 2.8929, "step": 8298 }, { "epoch": 33.8734693877551, "grad_norm": 12.669313430786133, "learning_rate": 3.3077551020408164e-05, "loss": 2.9935, "step": 8299 }, { "epoch": 33.87755102040816, "grad_norm": 27.30625343322754, "learning_rate": 3.307551020408163e-05, "loss": 2.1858, "step": 8300 }, { "epoch": 33.881632653061224, "grad_norm": 9.6483793258667, "learning_rate": 3.3073469387755105e-05, "loss": 3.304, "step": 8301 }, { "epoch": 33.885714285714286, "grad_norm": 17.315876007080078, "learning_rate": 3.307142857142858e-05, "loss": 2.5462, "step": 8302 }, { "epoch": 33.88979591836735, "grad_norm": 18.104143142700195, "learning_rate": 3.3069387755102046e-05, "loss": 2.1846, "step": 8303 }, { "epoch": 33.89387755102041, "grad_norm": 11.476370811462402, "learning_rate": 3.306734693877551e-05, "loss": 2.9538, "step": 8304 }, { "epoch": 33.89795918367347, "grad_norm": 16.749727249145508, "learning_rate": 3.306530612244898e-05, "loss": 2.4922, "step": 8305 }, { "epoch": 33.90204081632653, "grad_norm": 23.149890899658203, "learning_rate": 3.306326530612245e-05, "loss": 2.494, "step": 8306 }, { "epoch": 33.906122448979595, "grad_norm": 10.696621894836426, "learning_rate": 3.306122448979592e-05, "loss": 3.3168, "step": 8307 }, { "epoch": 33.910204081632656, "grad_norm": 11.32978343963623, "learning_rate": 3.305918367346939e-05, "loss": 3.0304, "step": 8308 }, { "epoch": 33.91428571428571, "grad_norm": 40.79157638549805, "learning_rate": 3.305714285714286e-05, "loss": 1.9498, "step": 8309 }, { "epoch": 33.91836734693877, "grad_norm": 40.78385543823242, "learning_rate": 3.305510204081633e-05, "loss": 1.7504, "step": 8310 }, { "epoch": 33.922448979591834, "grad_norm": 40.06719970703125, "learning_rate": 3.3053061224489796e-05, "loss": 0.8492, "step": 8311 }, { "epoch": 33.926530612244896, "grad_norm": 27.614213943481445, "learning_rate": 3.305102040816327e-05, "loss": 1.4485, "step": 8312 }, { "epoch": 33.93061224489796, "grad_norm": 25.376609802246094, "learning_rate": 3.304897959183674e-05, "loss": 2.4458, "step": 8313 }, { "epoch": 33.93469387755102, "grad_norm": 10.132667541503906, "learning_rate": 3.3046938775510204e-05, "loss": 3.4291, "step": 8314 }, { "epoch": 33.93877551020408, "grad_norm": 27.353166580200195, "learning_rate": 3.304489795918367e-05, "loss": 1.6641, "step": 8315 }, { "epoch": 33.94285714285714, "grad_norm": 11.702315330505371, "learning_rate": 3.304285714285714e-05, "loss": 3.3451, "step": 8316 }, { "epoch": 33.946938775510205, "grad_norm": 16.006893157958984, "learning_rate": 3.304081632653061e-05, "loss": 2.5541, "step": 8317 }, { "epoch": 33.95102040816327, "grad_norm": 16.434219360351562, "learning_rate": 3.3038775510204086e-05, "loss": 2.3933, "step": 8318 }, { "epoch": 33.95510204081633, "grad_norm": 19.96187400817871, "learning_rate": 3.3036734693877554e-05, "loss": 2.3864, "step": 8319 }, { "epoch": 33.95918367346939, "grad_norm": 11.55543327331543, "learning_rate": 3.303469387755102e-05, "loss": 3.0967, "step": 8320 }, { "epoch": 33.96326530612245, "grad_norm": 25.832792282104492, "learning_rate": 3.303265306122449e-05, "loss": 1.7741, "step": 8321 }, { "epoch": 33.96734693877551, "grad_norm": 17.241832733154297, "learning_rate": 3.303061224489796e-05, "loss": 2.8767, "step": 8322 }, { "epoch": 33.97142857142857, "grad_norm": 19.703401565551758, "learning_rate": 3.302857142857143e-05, "loss": 2.5945, "step": 8323 }, { "epoch": 33.97551020408163, "grad_norm": 21.903865814208984, "learning_rate": 3.3026530612244896e-05, "loss": 2.0003, "step": 8324 }, { "epoch": 33.97959183673469, "grad_norm": 21.03885269165039, "learning_rate": 3.302448979591837e-05, "loss": 2.6883, "step": 8325 }, { "epoch": 33.98367346938775, "grad_norm": 16.099409103393555, "learning_rate": 3.302244897959184e-05, "loss": 2.8193, "step": 8326 }, { "epoch": 33.987755102040815, "grad_norm": 16.24953842163086, "learning_rate": 3.302040816326531e-05, "loss": 2.6606, "step": 8327 }, { "epoch": 33.99183673469388, "grad_norm": 11.889866828918457, "learning_rate": 3.301836734693878e-05, "loss": 2.9798, "step": 8328 }, { "epoch": 33.99591836734694, "grad_norm": 21.592803955078125, "learning_rate": 3.3016326530612245e-05, "loss": 2.5248, "step": 8329 }, { "epoch": 34.0, "grad_norm": 17.19196319580078, "learning_rate": 3.301428571428571e-05, "loss": 2.8635, "step": 8330 }, { "epoch": 34.00408163265306, "grad_norm": 23.17346954345703, "learning_rate": 3.301224489795918e-05, "loss": 2.0472, "step": 8331 }, { "epoch": 34.00816326530612, "grad_norm": 20.714197158813477, "learning_rate": 3.301020408163265e-05, "loss": 1.7817, "step": 8332 }, { "epoch": 34.012244897959185, "grad_norm": 15.17874813079834, "learning_rate": 3.300816326530613e-05, "loss": 2.6165, "step": 8333 }, { "epoch": 34.01632653061225, "grad_norm": 27.26506805419922, "learning_rate": 3.3006122448979594e-05, "loss": 2.2801, "step": 8334 }, { "epoch": 34.02040816326531, "grad_norm": 29.680866241455078, "learning_rate": 3.300408163265306e-05, "loss": 1.6791, "step": 8335 }, { "epoch": 34.02448979591837, "grad_norm": 22.578392028808594, "learning_rate": 3.3002040816326535e-05, "loss": 2.5015, "step": 8336 }, { "epoch": 34.02857142857143, "grad_norm": 14.560768127441406, "learning_rate": 3.3e-05, "loss": 2.71, "step": 8337 }, { "epoch": 34.03265306122449, "grad_norm": 16.974342346191406, "learning_rate": 3.299795918367347e-05, "loss": 2.3307, "step": 8338 }, { "epoch": 34.03673469387755, "grad_norm": 14.974791526794434, "learning_rate": 3.299591836734694e-05, "loss": 2.6896, "step": 8339 }, { "epoch": 34.04081632653061, "grad_norm": 40.88699722290039, "learning_rate": 3.299387755102041e-05, "loss": 1.1952, "step": 8340 }, { "epoch": 34.04489795918367, "grad_norm": 13.500312805175781, "learning_rate": 3.2991836734693885e-05, "loss": 2.8543, "step": 8341 }, { "epoch": 34.04897959183673, "grad_norm": 18.494285583496094, "learning_rate": 3.298979591836735e-05, "loss": 2.8083, "step": 8342 }, { "epoch": 34.053061224489795, "grad_norm": 21.933547973632812, "learning_rate": 3.298775510204082e-05, "loss": 2.4775, "step": 8343 }, { "epoch": 34.05714285714286, "grad_norm": 24.286457061767578, "learning_rate": 3.2985714285714286e-05, "loss": 2.4393, "step": 8344 }, { "epoch": 34.06122448979592, "grad_norm": 25.681133270263672, "learning_rate": 3.298367346938775e-05, "loss": 2.5208, "step": 8345 }, { "epoch": 34.06530612244898, "grad_norm": 29.010032653808594, "learning_rate": 3.298163265306123e-05, "loss": 1.9845, "step": 8346 }, { "epoch": 34.06938775510204, "grad_norm": 24.659353256225586, "learning_rate": 3.2979591836734694e-05, "loss": 2.1174, "step": 8347 }, { "epoch": 34.073469387755104, "grad_norm": 12.529099464416504, "learning_rate": 3.297755102040817e-05, "loss": 2.7482, "step": 8348 }, { "epoch": 34.077551020408166, "grad_norm": 14.131296157836914, "learning_rate": 3.2975510204081635e-05, "loss": 3.0814, "step": 8349 }, { "epoch": 34.08163265306123, "grad_norm": 12.0037260055542, "learning_rate": 3.29734693877551e-05, "loss": 3.1289, "step": 8350 }, { "epoch": 34.08571428571429, "grad_norm": 22.10776138305664, "learning_rate": 3.2971428571428576e-05, "loss": 2.1173, "step": 8351 }, { "epoch": 34.089795918367344, "grad_norm": 18.918447494506836, "learning_rate": 3.296938775510204e-05, "loss": 2.5054, "step": 8352 }, { "epoch": 34.093877551020405, "grad_norm": 15.09061050415039, "learning_rate": 3.296734693877551e-05, "loss": 2.5676, "step": 8353 }, { "epoch": 34.09795918367347, "grad_norm": 18.59025764465332, "learning_rate": 3.296530612244898e-05, "loss": 2.5485, "step": 8354 }, { "epoch": 34.10204081632653, "grad_norm": 20.69272232055664, "learning_rate": 3.296326530612245e-05, "loss": 2.2452, "step": 8355 }, { "epoch": 34.10612244897959, "grad_norm": 11.146295547485352, "learning_rate": 3.2961224489795925e-05, "loss": 3.2278, "step": 8356 }, { "epoch": 34.11020408163265, "grad_norm": 13.911418914794922, "learning_rate": 3.295918367346939e-05, "loss": 2.6628, "step": 8357 }, { "epoch": 34.114285714285714, "grad_norm": 20.326196670532227, "learning_rate": 3.295714285714286e-05, "loss": 1.7911, "step": 8358 }, { "epoch": 34.118367346938776, "grad_norm": 25.604267120361328, "learning_rate": 3.295510204081633e-05, "loss": 2.1457, "step": 8359 }, { "epoch": 34.12244897959184, "grad_norm": 27.76422119140625, "learning_rate": 3.2953061224489794e-05, "loss": 1.895, "step": 8360 }, { "epoch": 34.1265306122449, "grad_norm": 14.133358001708984, "learning_rate": 3.295102040816327e-05, "loss": 2.8951, "step": 8361 }, { "epoch": 34.13061224489796, "grad_norm": 13.88060188293457, "learning_rate": 3.2948979591836735e-05, "loss": 2.8884, "step": 8362 }, { "epoch": 34.13469387755102, "grad_norm": 21.547794342041016, "learning_rate": 3.294693877551021e-05, "loss": 2.3467, "step": 8363 }, { "epoch": 34.138775510204084, "grad_norm": 17.957401275634766, "learning_rate": 3.2944897959183676e-05, "loss": 2.3685, "step": 8364 }, { "epoch": 34.142857142857146, "grad_norm": 33.36155319213867, "learning_rate": 3.294285714285714e-05, "loss": 1.5647, "step": 8365 }, { "epoch": 34.14693877551021, "grad_norm": 14.093692779541016, "learning_rate": 3.294081632653062e-05, "loss": 2.8047, "step": 8366 }, { "epoch": 34.15102040816326, "grad_norm": 18.26239585876465, "learning_rate": 3.2938775510204084e-05, "loss": 2.5473, "step": 8367 }, { "epoch": 34.155102040816324, "grad_norm": 27.798828125, "learning_rate": 3.293673469387755e-05, "loss": 1.7999, "step": 8368 }, { "epoch": 34.159183673469386, "grad_norm": 16.81810188293457, "learning_rate": 3.293469387755102e-05, "loss": 2.2494, "step": 8369 }, { "epoch": 34.16326530612245, "grad_norm": 28.448184967041016, "learning_rate": 3.2932653061224485e-05, "loss": 1.461, "step": 8370 }, { "epoch": 34.16734693877551, "grad_norm": 18.38902473449707, "learning_rate": 3.293061224489796e-05, "loss": 2.4471, "step": 8371 }, { "epoch": 34.17142857142857, "grad_norm": 28.20792579650879, "learning_rate": 3.292857142857143e-05, "loss": 2.1432, "step": 8372 }, { "epoch": 34.17551020408163, "grad_norm": 38.77720642089844, "learning_rate": 3.29265306122449e-05, "loss": 0.8792, "step": 8373 }, { "epoch": 34.179591836734694, "grad_norm": 27.22085189819336, "learning_rate": 3.292448979591837e-05, "loss": 2.5594, "step": 8374 }, { "epoch": 34.183673469387756, "grad_norm": 9.432269096374512, "learning_rate": 3.2922448979591835e-05, "loss": 3.3289, "step": 8375 }, { "epoch": 34.18775510204082, "grad_norm": 28.65688705444336, "learning_rate": 3.292040816326531e-05, "loss": 1.2785, "step": 8376 }, { "epoch": 34.19183673469388, "grad_norm": 37.824283599853516, "learning_rate": 3.2918367346938776e-05, "loss": 1.771, "step": 8377 }, { "epoch": 34.19591836734694, "grad_norm": 19.595661163330078, "learning_rate": 3.291632653061224e-05, "loss": 2.5699, "step": 8378 }, { "epoch": 34.2, "grad_norm": 12.778748512268066, "learning_rate": 3.291428571428572e-05, "loss": 2.853, "step": 8379 }, { "epoch": 34.204081632653065, "grad_norm": 23.997827529907227, "learning_rate": 3.291224489795919e-05, "loss": 1.6292, "step": 8380 }, { "epoch": 34.20816326530612, "grad_norm": 19.318754196166992, "learning_rate": 3.291020408163266e-05, "loss": 2.2251, "step": 8381 }, { "epoch": 34.21224489795918, "grad_norm": 20.3732852935791, "learning_rate": 3.2908163265306125e-05, "loss": 2.4436, "step": 8382 }, { "epoch": 34.21632653061224, "grad_norm": 18.123687744140625, "learning_rate": 3.290612244897959e-05, "loss": 2.1995, "step": 8383 }, { "epoch": 34.220408163265304, "grad_norm": 12.919646263122559, "learning_rate": 3.290408163265306e-05, "loss": 3.0074, "step": 8384 }, { "epoch": 34.224489795918366, "grad_norm": 17.833459854125977, "learning_rate": 3.290204081632653e-05, "loss": 2.5517, "step": 8385 }, { "epoch": 34.22857142857143, "grad_norm": 16.6523494720459, "learning_rate": 3.29e-05, "loss": 2.4357, "step": 8386 }, { "epoch": 34.23265306122449, "grad_norm": 25.71668243408203, "learning_rate": 3.2897959183673474e-05, "loss": 2.1576, "step": 8387 }, { "epoch": 34.23673469387755, "grad_norm": 14.724448204040527, "learning_rate": 3.289591836734694e-05, "loss": 2.7339, "step": 8388 }, { "epoch": 34.24081632653061, "grad_norm": 24.381940841674805, "learning_rate": 3.289387755102041e-05, "loss": 1.5639, "step": 8389 }, { "epoch": 34.244897959183675, "grad_norm": 24.873180389404297, "learning_rate": 3.289183673469388e-05, "loss": 0.6036, "step": 8390 }, { "epoch": 34.248979591836736, "grad_norm": 31.266517639160156, "learning_rate": 3.288979591836735e-05, "loss": 1.0168, "step": 8391 }, { "epoch": 34.2530612244898, "grad_norm": 36.20226287841797, "learning_rate": 3.2887755102040816e-05, "loss": 0.9022, "step": 8392 }, { "epoch": 34.25714285714286, "grad_norm": 42.14738845825195, "learning_rate": 3.2885714285714284e-05, "loss": 1.8538, "step": 8393 }, { "epoch": 34.26122448979592, "grad_norm": 26.5578670501709, "learning_rate": 3.288367346938776e-05, "loss": 1.7557, "step": 8394 }, { "epoch": 34.265306122448976, "grad_norm": 15.199592590332031, "learning_rate": 3.288163265306123e-05, "loss": 2.8412, "step": 8395 }, { "epoch": 34.26938775510204, "grad_norm": 28.501522064208984, "learning_rate": 3.28795918367347e-05, "loss": 2.2336, "step": 8396 }, { "epoch": 34.2734693877551, "grad_norm": 20.962669372558594, "learning_rate": 3.2877551020408166e-05, "loss": 1.9942, "step": 8397 }, { "epoch": 34.27755102040816, "grad_norm": 17.593891143798828, "learning_rate": 3.287551020408163e-05, "loss": 2.7863, "step": 8398 }, { "epoch": 34.28163265306122, "grad_norm": 17.112918853759766, "learning_rate": 3.28734693877551e-05, "loss": 2.5204, "step": 8399 }, { "epoch": 34.285714285714285, "grad_norm": 23.15278434753418, "learning_rate": 3.2871428571428574e-05, "loss": 2.3885, "step": 8400 }, { "epoch": 34.289795918367346, "grad_norm": 31.00674819946289, "learning_rate": 3.286938775510204e-05, "loss": 2.5855, "step": 8401 }, { "epoch": 34.29387755102041, "grad_norm": 31.551715850830078, "learning_rate": 3.2867346938775515e-05, "loss": 1.9537, "step": 8402 }, { "epoch": 34.29795918367347, "grad_norm": 15.436446189880371, "learning_rate": 3.286530612244898e-05, "loss": 3.2109, "step": 8403 }, { "epoch": 34.30204081632653, "grad_norm": 18.449512481689453, "learning_rate": 3.286326530612245e-05, "loss": 2.6786, "step": 8404 }, { "epoch": 34.30612244897959, "grad_norm": 11.011892318725586, "learning_rate": 3.286122448979592e-05, "loss": 2.9264, "step": 8405 }, { "epoch": 34.310204081632655, "grad_norm": 11.817037582397461, "learning_rate": 3.285918367346939e-05, "loss": 2.8026, "step": 8406 }, { "epoch": 34.31428571428572, "grad_norm": 14.603585243225098, "learning_rate": 3.285714285714286e-05, "loss": 2.9117, "step": 8407 }, { "epoch": 34.31836734693878, "grad_norm": 18.995275497436523, "learning_rate": 3.2855102040816324e-05, "loss": 2.5019, "step": 8408 }, { "epoch": 34.32244897959184, "grad_norm": 21.262954711914062, "learning_rate": 3.28530612244898e-05, "loss": 2.5742, "step": 8409 }, { "epoch": 34.326530612244895, "grad_norm": 12.185182571411133, "learning_rate": 3.285102040816327e-05, "loss": 3.0071, "step": 8410 }, { "epoch": 34.33061224489796, "grad_norm": 19.223451614379883, "learning_rate": 3.284897959183674e-05, "loss": 1.7131, "step": 8411 }, { "epoch": 34.33469387755102, "grad_norm": 30.004600524902344, "learning_rate": 3.2846938775510206e-05, "loss": 1.5482, "step": 8412 }, { "epoch": 34.33877551020408, "grad_norm": 14.484593391418457, "learning_rate": 3.2844897959183673e-05, "loss": 2.7027, "step": 8413 }, { "epoch": 34.34285714285714, "grad_norm": 14.583871841430664, "learning_rate": 3.284285714285714e-05, "loss": 2.6285, "step": 8414 }, { "epoch": 34.3469387755102, "grad_norm": 21.744441986083984, "learning_rate": 3.2840816326530615e-05, "loss": 2.4516, "step": 8415 }, { "epoch": 34.351020408163265, "grad_norm": 15.705300331115723, "learning_rate": 3.283877551020408e-05, "loss": 2.8775, "step": 8416 }, { "epoch": 34.35510204081633, "grad_norm": 39.48397445678711, "learning_rate": 3.2836734693877556e-05, "loss": 1.5, "step": 8417 }, { "epoch": 34.35918367346939, "grad_norm": 14.038717269897461, "learning_rate": 3.283469387755102e-05, "loss": 3.0385, "step": 8418 }, { "epoch": 34.36326530612245, "grad_norm": 32.110355377197266, "learning_rate": 3.283265306122449e-05, "loss": 1.0925, "step": 8419 }, { "epoch": 34.36734693877551, "grad_norm": 10.465141296386719, "learning_rate": 3.2830612244897964e-05, "loss": 3.2597, "step": 8420 }, { "epoch": 34.371428571428574, "grad_norm": 10.537412643432617, "learning_rate": 3.282857142857143e-05, "loss": 2.8376, "step": 8421 }, { "epoch": 34.375510204081635, "grad_norm": 18.110380172729492, "learning_rate": 3.28265306122449e-05, "loss": 2.8242, "step": 8422 }, { "epoch": 34.3795918367347, "grad_norm": 19.931047439575195, "learning_rate": 3.2824489795918365e-05, "loss": 2.4971, "step": 8423 }, { "epoch": 34.38367346938775, "grad_norm": 47.18436813354492, "learning_rate": 3.282244897959184e-05, "loss": 1.1006, "step": 8424 }, { "epoch": 34.38775510204081, "grad_norm": 17.700618743896484, "learning_rate": 3.2820408163265306e-05, "loss": 2.8053, "step": 8425 }, { "epoch": 34.391836734693875, "grad_norm": 28.912904739379883, "learning_rate": 3.281836734693878e-05, "loss": 1.8593, "step": 8426 }, { "epoch": 34.39591836734694, "grad_norm": 21.716049194335938, "learning_rate": 3.281632653061225e-05, "loss": 2.2526, "step": 8427 }, { "epoch": 34.4, "grad_norm": 22.42300796508789, "learning_rate": 3.2814285714285714e-05, "loss": 2.605, "step": 8428 }, { "epoch": 34.40408163265306, "grad_norm": 14.949228286743164, "learning_rate": 3.281224489795919e-05, "loss": 2.8633, "step": 8429 }, { "epoch": 34.40816326530612, "grad_norm": 22.173845291137695, "learning_rate": 3.2810204081632655e-05, "loss": 2.152, "step": 8430 }, { "epoch": 34.412244897959184, "grad_norm": 21.346281051635742, "learning_rate": 3.280816326530612e-05, "loss": 1.6834, "step": 8431 }, { "epoch": 34.416326530612245, "grad_norm": 35.54929733276367, "learning_rate": 3.280612244897959e-05, "loss": 1.5835, "step": 8432 }, { "epoch": 34.42040816326531, "grad_norm": 10.079787254333496, "learning_rate": 3.2804081632653063e-05, "loss": 3.1053, "step": 8433 }, { "epoch": 34.42448979591837, "grad_norm": 32.541221618652344, "learning_rate": 3.280204081632654e-05, "loss": 1.719, "step": 8434 }, { "epoch": 34.42857142857143, "grad_norm": 14.890401840209961, "learning_rate": 3.2800000000000004e-05, "loss": 2.621, "step": 8435 }, { "epoch": 34.43265306122449, "grad_norm": 15.107977867126465, "learning_rate": 3.279795918367347e-05, "loss": 3.0797, "step": 8436 }, { "epoch": 34.436734693877554, "grad_norm": 10.080735206604004, "learning_rate": 3.279591836734694e-05, "loss": 3.2299, "step": 8437 }, { "epoch": 34.440816326530616, "grad_norm": 19.429372787475586, "learning_rate": 3.2793877551020406e-05, "loss": 2.2068, "step": 8438 }, { "epoch": 34.44489795918367, "grad_norm": 22.658554077148438, "learning_rate": 3.279183673469388e-05, "loss": 0.2506, "step": 8439 }, { "epoch": 34.44897959183673, "grad_norm": 19.601350784301758, "learning_rate": 3.278979591836735e-05, "loss": 2.7727, "step": 8440 }, { "epoch": 34.453061224489794, "grad_norm": 12.228846549987793, "learning_rate": 3.278775510204082e-05, "loss": 3.259, "step": 8441 }, { "epoch": 34.457142857142856, "grad_norm": 22.786352157592773, "learning_rate": 3.278571428571429e-05, "loss": 1.7515, "step": 8442 }, { "epoch": 34.46122448979592, "grad_norm": 12.655269622802734, "learning_rate": 3.2783673469387755e-05, "loss": 2.8698, "step": 8443 }, { "epoch": 34.46530612244898, "grad_norm": 13.889034271240234, "learning_rate": 3.278163265306123e-05, "loss": 2.95, "step": 8444 }, { "epoch": 34.46938775510204, "grad_norm": 13.462217330932617, "learning_rate": 3.2779591836734696e-05, "loss": 2.9979, "step": 8445 }, { "epoch": 34.4734693877551, "grad_norm": 22.119709014892578, "learning_rate": 3.277755102040816e-05, "loss": 2.45, "step": 8446 }, { "epoch": 34.477551020408164, "grad_norm": 12.697782516479492, "learning_rate": 3.277551020408163e-05, "loss": 3.1437, "step": 8447 }, { "epoch": 34.481632653061226, "grad_norm": 25.099111557006836, "learning_rate": 3.2773469387755104e-05, "loss": 1.6009, "step": 8448 }, { "epoch": 34.48571428571429, "grad_norm": 11.768712043762207, "learning_rate": 3.277142857142858e-05, "loss": 3.0257, "step": 8449 }, { "epoch": 34.48979591836735, "grad_norm": 14.2904634475708, "learning_rate": 3.2769387755102045e-05, "loss": 2.7061, "step": 8450 }, { "epoch": 34.49387755102041, "grad_norm": 24.906404495239258, "learning_rate": 3.276734693877551e-05, "loss": 2.2763, "step": 8451 }, { "epoch": 34.49795918367347, "grad_norm": 16.358903884887695, "learning_rate": 3.276530612244898e-05, "loss": 2.7562, "step": 8452 }, { "epoch": 34.50204081632653, "grad_norm": 12.99262523651123, "learning_rate": 3.2763265306122447e-05, "loss": 2.8724, "step": 8453 }, { "epoch": 34.50612244897959, "grad_norm": 44.88444137573242, "learning_rate": 3.276122448979592e-05, "loss": 0.7584, "step": 8454 }, { "epoch": 34.51020408163265, "grad_norm": 23.581069946289062, "learning_rate": 3.275918367346939e-05, "loss": 2.3271, "step": 8455 }, { "epoch": 34.51428571428571, "grad_norm": 42.22799301147461, "learning_rate": 3.275714285714286e-05, "loss": 1.5346, "step": 8456 }, { "epoch": 34.518367346938774, "grad_norm": 13.037779808044434, "learning_rate": 3.275510204081633e-05, "loss": 2.9101, "step": 8457 }, { "epoch": 34.522448979591836, "grad_norm": 10.965228080749512, "learning_rate": 3.2753061224489796e-05, "loss": 3.3465, "step": 8458 }, { "epoch": 34.5265306122449, "grad_norm": 15.789831161499023, "learning_rate": 3.275102040816327e-05, "loss": 2.77, "step": 8459 }, { "epoch": 34.53061224489796, "grad_norm": 14.085875511169434, "learning_rate": 3.274897959183674e-05, "loss": 2.9941, "step": 8460 }, { "epoch": 34.53469387755102, "grad_norm": 12.216861724853516, "learning_rate": 3.2746938775510204e-05, "loss": 2.7908, "step": 8461 }, { "epoch": 34.53877551020408, "grad_norm": 19.040515899658203, "learning_rate": 3.274489795918367e-05, "loss": 2.432, "step": 8462 }, { "epoch": 34.542857142857144, "grad_norm": 23.285512924194336, "learning_rate": 3.2742857142857145e-05, "loss": 1.7798, "step": 8463 }, { "epoch": 34.546938775510206, "grad_norm": 13.437283515930176, "learning_rate": 3.274081632653062e-05, "loss": 2.8753, "step": 8464 }, { "epoch": 34.55102040816327, "grad_norm": 18.61046028137207, "learning_rate": 3.2738775510204086e-05, "loss": 2.396, "step": 8465 }, { "epoch": 34.55510204081633, "grad_norm": 19.325300216674805, "learning_rate": 3.273673469387755e-05, "loss": 2.1045, "step": 8466 }, { "epoch": 34.55918367346939, "grad_norm": 10.679936408996582, "learning_rate": 3.273469387755102e-05, "loss": 3.1764, "step": 8467 }, { "epoch": 34.563265306122446, "grad_norm": 29.120357513427734, "learning_rate": 3.273265306122449e-05, "loss": 1.5071, "step": 8468 }, { "epoch": 34.56734693877551, "grad_norm": 16.59218978881836, "learning_rate": 3.273061224489796e-05, "loss": 2.7404, "step": 8469 }, { "epoch": 34.57142857142857, "grad_norm": 16.920289993286133, "learning_rate": 3.272857142857143e-05, "loss": 2.2985, "step": 8470 }, { "epoch": 34.57551020408163, "grad_norm": 29.666011810302734, "learning_rate": 3.27265306122449e-05, "loss": 1.0015, "step": 8471 }, { "epoch": 34.57959183673469, "grad_norm": 31.837337493896484, "learning_rate": 3.272448979591837e-05, "loss": 1.7586, "step": 8472 }, { "epoch": 34.583673469387755, "grad_norm": 10.908772468566895, "learning_rate": 3.272244897959184e-05, "loss": 2.9866, "step": 8473 }, { "epoch": 34.587755102040816, "grad_norm": 12.260642051696777, "learning_rate": 3.272040816326531e-05, "loss": 3.0222, "step": 8474 }, { "epoch": 34.59183673469388, "grad_norm": 17.126131057739258, "learning_rate": 3.271836734693878e-05, "loss": 2.4416, "step": 8475 }, { "epoch": 34.59591836734694, "grad_norm": 20.592559814453125, "learning_rate": 3.2716326530612245e-05, "loss": 2.915, "step": 8476 }, { "epoch": 34.6, "grad_norm": 41.496910095214844, "learning_rate": 3.271428571428571e-05, "loss": 0.9386, "step": 8477 }, { "epoch": 34.60408163265306, "grad_norm": 13.564115524291992, "learning_rate": 3.2712244897959186e-05, "loss": 3.0514, "step": 8478 }, { "epoch": 34.608163265306125, "grad_norm": 17.599111557006836, "learning_rate": 3.271020408163265e-05, "loss": 2.5322, "step": 8479 }, { "epoch": 34.61224489795919, "grad_norm": 24.11789321899414, "learning_rate": 3.270816326530613e-05, "loss": 2.0875, "step": 8480 }, { "epoch": 34.61632653061225, "grad_norm": 13.289281845092773, "learning_rate": 3.2706122448979594e-05, "loss": 2.9764, "step": 8481 }, { "epoch": 34.6204081632653, "grad_norm": 16.141437530517578, "learning_rate": 3.270408163265306e-05, "loss": 2.6205, "step": 8482 }, { "epoch": 34.624489795918365, "grad_norm": 24.83843994140625, "learning_rate": 3.2702040816326535e-05, "loss": 1.5144, "step": 8483 }, { "epoch": 34.628571428571426, "grad_norm": 13.708064079284668, "learning_rate": 3.27e-05, "loss": 2.3153, "step": 8484 }, { "epoch": 34.63265306122449, "grad_norm": 33.96390914916992, "learning_rate": 3.269795918367347e-05, "loss": 1.1636, "step": 8485 }, { "epoch": 34.63673469387755, "grad_norm": 13.363662719726562, "learning_rate": 3.2695918367346936e-05, "loss": 2.7858, "step": 8486 }, { "epoch": 34.64081632653061, "grad_norm": 11.002381324768066, "learning_rate": 3.269387755102041e-05, "loss": 2.7566, "step": 8487 }, { "epoch": 34.64489795918367, "grad_norm": 21.6149845123291, "learning_rate": 3.2691836734693884e-05, "loss": 2.4343, "step": 8488 }, { "epoch": 34.648979591836735, "grad_norm": 25.5428524017334, "learning_rate": 3.268979591836735e-05, "loss": 1.9637, "step": 8489 }, { "epoch": 34.6530612244898, "grad_norm": 33.30556106567383, "learning_rate": 3.268775510204082e-05, "loss": 1.9068, "step": 8490 }, { "epoch": 34.65714285714286, "grad_norm": 40.86261749267578, "learning_rate": 3.2685714285714285e-05, "loss": 1.8993, "step": 8491 }, { "epoch": 34.66122448979592, "grad_norm": 14.271615028381348, "learning_rate": 3.268367346938775e-05, "loss": 2.9553, "step": 8492 }, { "epoch": 34.66530612244898, "grad_norm": 34.51704406738281, "learning_rate": 3.2681632653061226e-05, "loss": 1.7489, "step": 8493 }, { "epoch": 34.66938775510204, "grad_norm": 29.97757339477539, "learning_rate": 3.2679591836734694e-05, "loss": 0.8883, "step": 8494 }, { "epoch": 34.673469387755105, "grad_norm": 8.777730941772461, "learning_rate": 3.267755102040817e-05, "loss": 3.5994, "step": 8495 }, { "epoch": 34.67755102040816, "grad_norm": 27.01949119567871, "learning_rate": 3.2675510204081635e-05, "loss": 1.8657, "step": 8496 }, { "epoch": 34.68163265306122, "grad_norm": 29.36595916748047, "learning_rate": 3.26734693877551e-05, "loss": 2.6256, "step": 8497 }, { "epoch": 34.68571428571428, "grad_norm": 26.4501895904541, "learning_rate": 3.2671428571428576e-05, "loss": 1.9555, "step": 8498 }, { "epoch": 34.689795918367345, "grad_norm": 19.805557250976562, "learning_rate": 3.266938775510204e-05, "loss": 1.9025, "step": 8499 }, { "epoch": 34.69387755102041, "grad_norm": 17.5607852935791, "learning_rate": 3.266734693877551e-05, "loss": 2.2594, "step": 8500 }, { "epoch": 34.69795918367347, "grad_norm": 13.46008586883545, "learning_rate": 3.266530612244898e-05, "loss": 2.8698, "step": 8501 }, { "epoch": 34.70204081632653, "grad_norm": 17.285255432128906, "learning_rate": 3.266326530612245e-05, "loss": 2.8567, "step": 8502 }, { "epoch": 34.70612244897959, "grad_norm": 30.94552993774414, "learning_rate": 3.2661224489795925e-05, "loss": 1.8061, "step": 8503 }, { "epoch": 34.710204081632654, "grad_norm": 20.02382469177246, "learning_rate": 3.265918367346939e-05, "loss": 2.5383, "step": 8504 }, { "epoch": 34.714285714285715, "grad_norm": 13.22634220123291, "learning_rate": 3.265714285714286e-05, "loss": 3.1654, "step": 8505 }, { "epoch": 34.71836734693878, "grad_norm": 22.178668975830078, "learning_rate": 3.2655102040816326e-05, "loss": 2.588, "step": 8506 }, { "epoch": 34.72244897959184, "grad_norm": 43.02183151245117, "learning_rate": 3.265306122448979e-05, "loss": 0.9006, "step": 8507 }, { "epoch": 34.7265306122449, "grad_norm": 9.91458511352539, "learning_rate": 3.265102040816327e-05, "loss": 3.2283, "step": 8508 }, { "epoch": 34.73061224489796, "grad_norm": 20.305410385131836, "learning_rate": 3.2648979591836734e-05, "loss": 2.4507, "step": 8509 }, { "epoch": 34.734693877551024, "grad_norm": 11.714557647705078, "learning_rate": 3.264693877551021e-05, "loss": 2.9473, "step": 8510 }, { "epoch": 34.73877551020408, "grad_norm": 32.17837905883789, "learning_rate": 3.2644897959183675e-05, "loss": 2.0438, "step": 8511 }, { "epoch": 34.74285714285714, "grad_norm": 15.075512886047363, "learning_rate": 3.264285714285714e-05, "loss": 2.8257, "step": 8512 }, { "epoch": 34.7469387755102, "grad_norm": 18.437225341796875, "learning_rate": 3.2640816326530616e-05, "loss": 2.1012, "step": 8513 }, { "epoch": 34.751020408163264, "grad_norm": 14.432195663452148, "learning_rate": 3.2638775510204084e-05, "loss": 2.7971, "step": 8514 }, { "epoch": 34.755102040816325, "grad_norm": 24.366607666015625, "learning_rate": 3.263673469387755e-05, "loss": 2.2708, "step": 8515 }, { "epoch": 34.75918367346939, "grad_norm": 19.47488021850586, "learning_rate": 3.263469387755102e-05, "loss": 2.4799, "step": 8516 }, { "epoch": 34.76326530612245, "grad_norm": 15.02834415435791, "learning_rate": 3.263265306122449e-05, "loss": 2.7615, "step": 8517 }, { "epoch": 34.76734693877551, "grad_norm": 17.291318893432617, "learning_rate": 3.2630612244897966e-05, "loss": 2.7856, "step": 8518 }, { "epoch": 34.77142857142857, "grad_norm": 15.39118766784668, "learning_rate": 3.262857142857143e-05, "loss": 2.7352, "step": 8519 }, { "epoch": 34.775510204081634, "grad_norm": 20.500446319580078, "learning_rate": 3.26265306122449e-05, "loss": 2.5362, "step": 8520 }, { "epoch": 34.779591836734696, "grad_norm": 36.2117919921875, "learning_rate": 3.262448979591837e-05, "loss": 2.5109, "step": 8521 }, { "epoch": 34.78367346938776, "grad_norm": 43.77335739135742, "learning_rate": 3.262244897959184e-05, "loss": 1.5933, "step": 8522 }, { "epoch": 34.78775510204082, "grad_norm": 30.858287811279297, "learning_rate": 3.262040816326531e-05, "loss": 2.1136, "step": 8523 }, { "epoch": 34.79183673469388, "grad_norm": 13.390913009643555, "learning_rate": 3.2618367346938775e-05, "loss": 2.9331, "step": 8524 }, { "epoch": 34.795918367346935, "grad_norm": 24.391565322875977, "learning_rate": 3.261632653061224e-05, "loss": 2.0327, "step": 8525 }, { "epoch": 34.8, "grad_norm": 8.530810356140137, "learning_rate": 3.2614285714285716e-05, "loss": 3.5846, "step": 8526 }, { "epoch": 34.80408163265306, "grad_norm": 23.029829025268555, "learning_rate": 3.261224489795919e-05, "loss": 2.1896, "step": 8527 }, { "epoch": 34.80816326530612, "grad_norm": 13.721616744995117, "learning_rate": 3.261020408163266e-05, "loss": 2.6864, "step": 8528 }, { "epoch": 34.81224489795918, "grad_norm": 18.293855667114258, "learning_rate": 3.2608163265306124e-05, "loss": 2.5638, "step": 8529 }, { "epoch": 34.816326530612244, "grad_norm": 29.01122283935547, "learning_rate": 3.260612244897959e-05, "loss": 1.8908, "step": 8530 }, { "epoch": 34.820408163265306, "grad_norm": 17.320003509521484, "learning_rate": 3.260408163265306e-05, "loss": 2.1931, "step": 8531 }, { "epoch": 34.82448979591837, "grad_norm": 33.203208923339844, "learning_rate": 3.260204081632653e-05, "loss": 1.1875, "step": 8532 }, { "epoch": 34.82857142857143, "grad_norm": 23.397092819213867, "learning_rate": 3.26e-05, "loss": 1.5991, "step": 8533 }, { "epoch": 34.83265306122449, "grad_norm": 10.075894355773926, "learning_rate": 3.2597959183673473e-05, "loss": 3.2083, "step": 8534 }, { "epoch": 34.83673469387755, "grad_norm": 26.716394424438477, "learning_rate": 3.259591836734694e-05, "loss": 2.2089, "step": 8535 }, { "epoch": 34.840816326530614, "grad_norm": 7.96040678024292, "learning_rate": 3.259387755102041e-05, "loss": 3.2742, "step": 8536 }, { "epoch": 34.844897959183676, "grad_norm": 20.276819229125977, "learning_rate": 3.259183673469388e-05, "loss": 2.727, "step": 8537 }, { "epoch": 34.84897959183674, "grad_norm": 29.785871505737305, "learning_rate": 3.258979591836735e-05, "loss": 1.9461, "step": 8538 }, { "epoch": 34.85306122448979, "grad_norm": 22.379981994628906, "learning_rate": 3.2587755102040816e-05, "loss": 2.1383, "step": 8539 }, { "epoch": 34.857142857142854, "grad_norm": 13.184921264648438, "learning_rate": 3.258571428571428e-05, "loss": 2.9788, "step": 8540 }, { "epoch": 34.861224489795916, "grad_norm": 19.374713897705078, "learning_rate": 3.258367346938776e-05, "loss": 2.2894, "step": 8541 }, { "epoch": 34.86530612244898, "grad_norm": 17.2032413482666, "learning_rate": 3.258163265306123e-05, "loss": 2.7599, "step": 8542 }, { "epoch": 34.86938775510204, "grad_norm": 7.180168628692627, "learning_rate": 3.25795918367347e-05, "loss": 3.5989, "step": 8543 }, { "epoch": 34.8734693877551, "grad_norm": 34.039215087890625, "learning_rate": 3.2577551020408165e-05, "loss": 1.7897, "step": 8544 }, { "epoch": 34.87755102040816, "grad_norm": 24.788803100585938, "learning_rate": 3.257551020408163e-05, "loss": 1.8738, "step": 8545 }, { "epoch": 34.881632653061224, "grad_norm": 12.14743423461914, "learning_rate": 3.25734693877551e-05, "loss": 3.282, "step": 8546 }, { "epoch": 34.885714285714286, "grad_norm": 26.713993072509766, "learning_rate": 3.257142857142857e-05, "loss": 1.3206, "step": 8547 }, { "epoch": 34.88979591836735, "grad_norm": 21.879514694213867, "learning_rate": 3.256938775510204e-05, "loss": 2.0781, "step": 8548 }, { "epoch": 34.89387755102041, "grad_norm": 19.102983474731445, "learning_rate": 3.2567346938775514e-05, "loss": 2.2487, "step": 8549 }, { "epoch": 34.89795918367347, "grad_norm": 28.17045021057129, "learning_rate": 3.256530612244898e-05, "loss": 1.7216, "step": 8550 }, { "epoch": 34.90204081632653, "grad_norm": 10.452619552612305, "learning_rate": 3.256326530612245e-05, "loss": 3.3992, "step": 8551 }, { "epoch": 34.906122448979595, "grad_norm": 15.953866958618164, "learning_rate": 3.256122448979592e-05, "loss": 2.6069, "step": 8552 }, { "epoch": 34.910204081632656, "grad_norm": 17.264312744140625, "learning_rate": 3.255918367346939e-05, "loss": 2.4356, "step": 8553 }, { "epoch": 34.91428571428571, "grad_norm": 17.00946617126465, "learning_rate": 3.255714285714286e-05, "loss": 2.6826, "step": 8554 }, { "epoch": 34.91836734693877, "grad_norm": 30.94854736328125, "learning_rate": 3.2555102040816324e-05, "loss": 1.6845, "step": 8555 }, { "epoch": 34.922448979591834, "grad_norm": 19.050643920898438, "learning_rate": 3.25530612244898e-05, "loss": 2.9698, "step": 8556 }, { "epoch": 34.926530612244896, "grad_norm": 22.490798950195312, "learning_rate": 3.255102040816327e-05, "loss": 2.3362, "step": 8557 }, { "epoch": 34.93061224489796, "grad_norm": 15.396220207214355, "learning_rate": 3.254897959183674e-05, "loss": 2.4647, "step": 8558 }, { "epoch": 34.93469387755102, "grad_norm": 16.589500427246094, "learning_rate": 3.2546938775510206e-05, "loss": 2.7791, "step": 8559 }, { "epoch": 34.93877551020408, "grad_norm": 16.305574417114258, "learning_rate": 3.254489795918367e-05, "loss": 2.8583, "step": 8560 }, { "epoch": 34.94285714285714, "grad_norm": 11.255495071411133, "learning_rate": 3.254285714285715e-05, "loss": 2.8752, "step": 8561 }, { "epoch": 34.946938775510205, "grad_norm": 10.388415336608887, "learning_rate": 3.2540816326530614e-05, "loss": 3.0723, "step": 8562 }, { "epoch": 34.95102040816327, "grad_norm": 17.031095504760742, "learning_rate": 3.253877551020408e-05, "loss": 2.1754, "step": 8563 }, { "epoch": 34.95510204081633, "grad_norm": 23.477338790893555, "learning_rate": 3.2536734693877555e-05, "loss": 2.0489, "step": 8564 }, { "epoch": 34.95918367346939, "grad_norm": 20.041723251342773, "learning_rate": 3.253469387755102e-05, "loss": 2.2753, "step": 8565 }, { "epoch": 34.96326530612245, "grad_norm": 25.82427215576172, "learning_rate": 3.2532653061224496e-05, "loss": 1.58, "step": 8566 }, { "epoch": 34.96734693877551, "grad_norm": 8.986954689025879, "learning_rate": 3.253061224489796e-05, "loss": 3.4423, "step": 8567 }, { "epoch": 34.97142857142857, "grad_norm": 11.690427780151367, "learning_rate": 3.252857142857143e-05, "loss": 3.1734, "step": 8568 }, { "epoch": 34.97551020408163, "grad_norm": 17.790390014648438, "learning_rate": 3.25265306122449e-05, "loss": 2.7043, "step": 8569 }, { "epoch": 34.97959183673469, "grad_norm": 33.49898910522461, "learning_rate": 3.2524489795918365e-05, "loss": 1.0249, "step": 8570 }, { "epoch": 34.98367346938775, "grad_norm": 17.8455753326416, "learning_rate": 3.252244897959184e-05, "loss": 2.7559, "step": 8571 }, { "epoch": 34.987755102040815, "grad_norm": 20.535688400268555, "learning_rate": 3.252040816326531e-05, "loss": 2.2884, "step": 8572 }, { "epoch": 34.99183673469388, "grad_norm": 21.74603271484375, "learning_rate": 3.251836734693878e-05, "loss": 2.5529, "step": 8573 }, { "epoch": 34.99591836734694, "grad_norm": 12.658342361450195, "learning_rate": 3.2516326530612247e-05, "loss": 2.8923, "step": 8574 }, { "epoch": 35.0, "grad_norm": 16.80106544494629, "learning_rate": 3.2514285714285714e-05, "loss": 2.4258, "step": 8575 }, { "epoch": 35.00408163265306, "grad_norm": 23.701549530029297, "learning_rate": 3.251224489795919e-05, "loss": 0.2045, "step": 8576 }, { "epoch": 35.00816326530612, "grad_norm": 15.207380294799805, "learning_rate": 3.2510204081632655e-05, "loss": 2.3262, "step": 8577 }, { "epoch": 35.012244897959185, "grad_norm": 23.756567001342773, "learning_rate": 3.250816326530612e-05, "loss": 2.3754, "step": 8578 }, { "epoch": 35.01632653061225, "grad_norm": 14.663171768188477, "learning_rate": 3.250612244897959e-05, "loss": 2.8314, "step": 8579 }, { "epoch": 35.02040816326531, "grad_norm": 11.863405227661133, "learning_rate": 3.250408163265306e-05, "loss": 2.8986, "step": 8580 }, { "epoch": 35.02448979591837, "grad_norm": 35.25958251953125, "learning_rate": 3.250204081632654e-05, "loss": 1.4271, "step": 8581 }, { "epoch": 35.02857142857143, "grad_norm": 14.3568696975708, "learning_rate": 3.2500000000000004e-05, "loss": 2.658, "step": 8582 }, { "epoch": 35.03265306122449, "grad_norm": 16.71297836303711, "learning_rate": 3.249795918367347e-05, "loss": 2.6239, "step": 8583 }, { "epoch": 35.03673469387755, "grad_norm": 19.874086380004883, "learning_rate": 3.249591836734694e-05, "loss": 2.0427, "step": 8584 }, { "epoch": 35.04081632653061, "grad_norm": 20.77169418334961, "learning_rate": 3.2493877551020405e-05, "loss": 2.3987, "step": 8585 }, { "epoch": 35.04489795918367, "grad_norm": 22.530363082885742, "learning_rate": 3.249183673469388e-05, "loss": 2.0915, "step": 8586 }, { "epoch": 35.04897959183673, "grad_norm": 18.050371170043945, "learning_rate": 3.2489795918367346e-05, "loss": 2.559, "step": 8587 }, { "epoch": 35.053061224489795, "grad_norm": 20.420408248901367, "learning_rate": 3.248775510204082e-05, "loss": 2.3634, "step": 8588 }, { "epoch": 35.05714285714286, "grad_norm": 26.114599227905273, "learning_rate": 3.248571428571429e-05, "loss": 2.3644, "step": 8589 }, { "epoch": 35.06122448979592, "grad_norm": 18.65868377685547, "learning_rate": 3.2483673469387754e-05, "loss": 1.9362, "step": 8590 }, { "epoch": 35.06530612244898, "grad_norm": 30.389001846313477, "learning_rate": 3.248163265306123e-05, "loss": 1.5843, "step": 8591 }, { "epoch": 35.06938775510204, "grad_norm": 24.302518844604492, "learning_rate": 3.2479591836734696e-05, "loss": 1.704, "step": 8592 }, { "epoch": 35.073469387755104, "grad_norm": 29.401412963867188, "learning_rate": 3.247755102040816e-05, "loss": 2.0099, "step": 8593 }, { "epoch": 35.077551020408166, "grad_norm": 25.257234573364258, "learning_rate": 3.247551020408163e-05, "loss": 2.1619, "step": 8594 }, { "epoch": 35.08163265306123, "grad_norm": 19.792755126953125, "learning_rate": 3.2473469387755104e-05, "loss": 2.405, "step": 8595 }, { "epoch": 35.08571428571429, "grad_norm": 15.955522537231445, "learning_rate": 3.247142857142858e-05, "loss": 2.7526, "step": 8596 }, { "epoch": 35.089795918367344, "grad_norm": 30.827409744262695, "learning_rate": 3.2469387755102045e-05, "loss": 1.5817, "step": 8597 }, { "epoch": 35.093877551020405, "grad_norm": 11.032672882080078, "learning_rate": 3.246734693877551e-05, "loss": 2.6571, "step": 8598 }, { "epoch": 35.09795918367347, "grad_norm": 26.915508270263672, "learning_rate": 3.246530612244898e-05, "loss": 1.7266, "step": 8599 }, { "epoch": 35.10204081632653, "grad_norm": 33.873600006103516, "learning_rate": 3.2463265306122446e-05, "loss": 1.8548, "step": 8600 }, { "epoch": 35.10612244897959, "grad_norm": 30.284713745117188, "learning_rate": 3.246122448979592e-05, "loss": 1.6292, "step": 8601 }, { "epoch": 35.11020408163265, "grad_norm": 21.321109771728516, "learning_rate": 3.245918367346939e-05, "loss": 2.1578, "step": 8602 }, { "epoch": 35.114285714285714, "grad_norm": 21.701881408691406, "learning_rate": 3.245714285714286e-05, "loss": 2.4629, "step": 8603 }, { "epoch": 35.118367346938776, "grad_norm": 27.154102325439453, "learning_rate": 3.245510204081633e-05, "loss": 2.4329, "step": 8604 }, { "epoch": 35.12244897959184, "grad_norm": 16.513296127319336, "learning_rate": 3.24530612244898e-05, "loss": 2.6823, "step": 8605 }, { "epoch": 35.1265306122449, "grad_norm": 33.06282043457031, "learning_rate": 3.245102040816327e-05, "loss": 1.7872, "step": 8606 }, { "epoch": 35.13061224489796, "grad_norm": 36.78363037109375, "learning_rate": 3.2448979591836736e-05, "loss": 1.8229, "step": 8607 }, { "epoch": 35.13469387755102, "grad_norm": 15.001163482666016, "learning_rate": 3.2446938775510203e-05, "loss": 2.6923, "step": 8608 }, { "epoch": 35.138775510204084, "grad_norm": 36.0374641418457, "learning_rate": 3.244489795918367e-05, "loss": 1.9399, "step": 8609 }, { "epoch": 35.142857142857146, "grad_norm": 13.930551528930664, "learning_rate": 3.2442857142857144e-05, "loss": 2.8138, "step": 8610 }, { "epoch": 35.14693877551021, "grad_norm": 22.251380920410156, "learning_rate": 3.244081632653062e-05, "loss": 2.5514, "step": 8611 }, { "epoch": 35.15102040816326, "grad_norm": 23.875974655151367, "learning_rate": 3.2438775510204085e-05, "loss": 2.4517, "step": 8612 }, { "epoch": 35.155102040816324, "grad_norm": 31.198230743408203, "learning_rate": 3.243673469387755e-05, "loss": 2.5686, "step": 8613 }, { "epoch": 35.159183673469386, "grad_norm": 22.165727615356445, "learning_rate": 3.243469387755102e-05, "loss": 1.4323, "step": 8614 }, { "epoch": 35.16326530612245, "grad_norm": 29.064741134643555, "learning_rate": 3.2432653061224494e-05, "loss": 1.5057, "step": 8615 }, { "epoch": 35.16734693877551, "grad_norm": 19.343624114990234, "learning_rate": 3.243061224489796e-05, "loss": 2.0244, "step": 8616 }, { "epoch": 35.17142857142857, "grad_norm": 20.710779190063477, "learning_rate": 3.242857142857143e-05, "loss": 2.298, "step": 8617 }, { "epoch": 35.17551020408163, "grad_norm": 15.620438575744629, "learning_rate": 3.24265306122449e-05, "loss": 2.8625, "step": 8618 }, { "epoch": 35.179591836734694, "grad_norm": 24.42255401611328, "learning_rate": 3.242448979591837e-05, "loss": 2.8639, "step": 8619 }, { "epoch": 35.183673469387756, "grad_norm": 17.96176528930664, "learning_rate": 3.242244897959184e-05, "loss": 2.2644, "step": 8620 }, { "epoch": 35.18775510204082, "grad_norm": 13.772622108459473, "learning_rate": 3.242040816326531e-05, "loss": 2.9108, "step": 8621 }, { "epoch": 35.19183673469388, "grad_norm": 18.49674415588379, "learning_rate": 3.241836734693878e-05, "loss": 2.714, "step": 8622 }, { "epoch": 35.19591836734694, "grad_norm": 18.544036865234375, "learning_rate": 3.2416326530612244e-05, "loss": 2.1873, "step": 8623 }, { "epoch": 35.2, "grad_norm": 14.317252159118652, "learning_rate": 3.241428571428571e-05, "loss": 2.8664, "step": 8624 }, { "epoch": 35.204081632653065, "grad_norm": 19.020675659179688, "learning_rate": 3.2412244897959185e-05, "loss": 2.3993, "step": 8625 }, { "epoch": 35.20816326530612, "grad_norm": 13.639044761657715, "learning_rate": 3.241020408163266e-05, "loss": 2.5966, "step": 8626 }, { "epoch": 35.21224489795918, "grad_norm": 12.24155044555664, "learning_rate": 3.2408163265306126e-05, "loss": 2.778, "step": 8627 }, { "epoch": 35.21632653061224, "grad_norm": 8.440421104431152, "learning_rate": 3.240612244897959e-05, "loss": 3.5027, "step": 8628 }, { "epoch": 35.220408163265304, "grad_norm": 10.922039031982422, "learning_rate": 3.240408163265306e-05, "loss": 3.2635, "step": 8629 }, { "epoch": 35.224489795918366, "grad_norm": 18.986684799194336, "learning_rate": 3.2402040816326534e-05, "loss": 2.7414, "step": 8630 }, { "epoch": 35.22857142857143, "grad_norm": 24.630680084228516, "learning_rate": 3.24e-05, "loss": 2.0054, "step": 8631 }, { "epoch": 35.23265306122449, "grad_norm": 10.628144264221191, "learning_rate": 3.239795918367347e-05, "loss": 3.1342, "step": 8632 }, { "epoch": 35.23673469387755, "grad_norm": 13.559548377990723, "learning_rate": 3.2395918367346936e-05, "loss": 2.7787, "step": 8633 }, { "epoch": 35.24081632653061, "grad_norm": 13.592714309692383, "learning_rate": 3.239387755102041e-05, "loss": 3.1155, "step": 8634 }, { "epoch": 35.244897959183675, "grad_norm": 18.9766902923584, "learning_rate": 3.2391836734693884e-05, "loss": 2.5489, "step": 8635 }, { "epoch": 35.248979591836736, "grad_norm": 17.206724166870117, "learning_rate": 3.238979591836735e-05, "loss": 2.494, "step": 8636 }, { "epoch": 35.2530612244898, "grad_norm": 26.05010986328125, "learning_rate": 3.238775510204082e-05, "loss": 1.8379, "step": 8637 }, { "epoch": 35.25714285714286, "grad_norm": 20.22762107849121, "learning_rate": 3.2385714285714285e-05, "loss": 2.7606, "step": 8638 }, { "epoch": 35.26122448979592, "grad_norm": 9.592907905578613, "learning_rate": 3.238367346938775e-05, "loss": 3.3496, "step": 8639 }, { "epoch": 35.265306122448976, "grad_norm": 16.327186584472656, "learning_rate": 3.2381632653061226e-05, "loss": 2.5643, "step": 8640 }, { "epoch": 35.26938775510204, "grad_norm": 13.85539436340332, "learning_rate": 3.237959183673469e-05, "loss": 2.8211, "step": 8641 }, { "epoch": 35.2734693877551, "grad_norm": 19.68981170654297, "learning_rate": 3.237755102040817e-05, "loss": 2.5182, "step": 8642 }, { "epoch": 35.27755102040816, "grad_norm": 11.843658447265625, "learning_rate": 3.2375510204081634e-05, "loss": 2.8314, "step": 8643 }, { "epoch": 35.28163265306122, "grad_norm": 12.997007369995117, "learning_rate": 3.23734693877551e-05, "loss": 2.7446, "step": 8644 }, { "epoch": 35.285714285714285, "grad_norm": 24.21871566772461, "learning_rate": 3.2371428571428575e-05, "loss": 2.4861, "step": 8645 }, { "epoch": 35.289795918367346, "grad_norm": 11.573081970214844, "learning_rate": 3.236938775510204e-05, "loss": 3.1978, "step": 8646 }, { "epoch": 35.29387755102041, "grad_norm": 28.251445770263672, "learning_rate": 3.236734693877551e-05, "loss": 2.3113, "step": 8647 }, { "epoch": 35.29795918367347, "grad_norm": 17.075084686279297, "learning_rate": 3.2365306122448977e-05, "loss": 2.6609, "step": 8648 }, { "epoch": 35.30204081632653, "grad_norm": 16.28622055053711, "learning_rate": 3.236326530612245e-05, "loss": 2.5773, "step": 8649 }, { "epoch": 35.30612244897959, "grad_norm": 16.959497451782227, "learning_rate": 3.2361224489795924e-05, "loss": 2.6175, "step": 8650 }, { "epoch": 35.310204081632655, "grad_norm": 10.421104431152344, "learning_rate": 3.235918367346939e-05, "loss": 3.1313, "step": 8651 }, { "epoch": 35.31428571428572, "grad_norm": 20.87819480895996, "learning_rate": 3.235714285714286e-05, "loss": 2.4554, "step": 8652 }, { "epoch": 35.31836734693878, "grad_norm": 36.694908142089844, "learning_rate": 3.2355102040816326e-05, "loss": 0.8119, "step": 8653 }, { "epoch": 35.32244897959184, "grad_norm": 14.102578163146973, "learning_rate": 3.23530612244898e-05, "loss": 3.0876, "step": 8654 }, { "epoch": 35.326530612244895, "grad_norm": 9.032535552978516, "learning_rate": 3.235102040816327e-05, "loss": 3.5209, "step": 8655 }, { "epoch": 35.33061224489796, "grad_norm": 23.47716522216797, "learning_rate": 3.2348979591836734e-05, "loss": 2.2329, "step": 8656 }, { "epoch": 35.33469387755102, "grad_norm": 13.813155174255371, "learning_rate": 3.234693877551021e-05, "loss": 2.9104, "step": 8657 }, { "epoch": 35.33877551020408, "grad_norm": 22.336305618286133, "learning_rate": 3.2344897959183675e-05, "loss": 2.104, "step": 8658 }, { "epoch": 35.34285714285714, "grad_norm": 15.63586139678955, "learning_rate": 3.234285714285715e-05, "loss": 2.9111, "step": 8659 }, { "epoch": 35.3469387755102, "grad_norm": 19.120819091796875, "learning_rate": 3.2340816326530616e-05, "loss": 2.6178, "step": 8660 }, { "epoch": 35.351020408163265, "grad_norm": 20.807205200195312, "learning_rate": 3.233877551020408e-05, "loss": 2.4251, "step": 8661 }, { "epoch": 35.35510204081633, "grad_norm": 21.168298721313477, "learning_rate": 3.233673469387755e-05, "loss": 2.5577, "step": 8662 }, { "epoch": 35.35918367346939, "grad_norm": 11.710079193115234, "learning_rate": 3.233469387755102e-05, "loss": 3.3211, "step": 8663 }, { "epoch": 35.36326530612245, "grad_norm": 25.528133392333984, "learning_rate": 3.233265306122449e-05, "loss": 2.046, "step": 8664 }, { "epoch": 35.36734693877551, "grad_norm": 8.086487770080566, "learning_rate": 3.2330612244897965e-05, "loss": 3.2126, "step": 8665 }, { "epoch": 35.371428571428574, "grad_norm": 23.25257682800293, "learning_rate": 3.232857142857143e-05, "loss": 2.0608, "step": 8666 }, { "epoch": 35.375510204081635, "grad_norm": 18.810598373413086, "learning_rate": 3.23265306122449e-05, "loss": 2.455, "step": 8667 }, { "epoch": 35.3795918367347, "grad_norm": 29.973100662231445, "learning_rate": 3.2324489795918366e-05, "loss": 1.5491, "step": 8668 }, { "epoch": 35.38367346938775, "grad_norm": 23.645153045654297, "learning_rate": 3.232244897959184e-05, "loss": 2.0481, "step": 8669 }, { "epoch": 35.38775510204081, "grad_norm": 30.015920639038086, "learning_rate": 3.232040816326531e-05, "loss": 1.0405, "step": 8670 }, { "epoch": 35.391836734693875, "grad_norm": 14.541857719421387, "learning_rate": 3.2318367346938775e-05, "loss": 3.0322, "step": 8671 }, { "epoch": 35.39591836734694, "grad_norm": 35.61042022705078, "learning_rate": 3.231632653061225e-05, "loss": 0.6285, "step": 8672 }, { "epoch": 35.4, "grad_norm": 20.817588806152344, "learning_rate": 3.2314285714285716e-05, "loss": 2.7189, "step": 8673 }, { "epoch": 35.40408163265306, "grad_norm": 30.507570266723633, "learning_rate": 3.231224489795919e-05, "loss": 2.3839, "step": 8674 }, { "epoch": 35.40816326530612, "grad_norm": 21.515199661254883, "learning_rate": 3.231020408163266e-05, "loss": 2.8278, "step": 8675 }, { "epoch": 35.412244897959184, "grad_norm": 35.865684509277344, "learning_rate": 3.2308163265306124e-05, "loss": 1.9573, "step": 8676 }, { "epoch": 35.416326530612245, "grad_norm": 12.92955207824707, "learning_rate": 3.230612244897959e-05, "loss": 3.1015, "step": 8677 }, { "epoch": 35.42040816326531, "grad_norm": 13.79304313659668, "learning_rate": 3.230408163265306e-05, "loss": 2.5557, "step": 8678 }, { "epoch": 35.42448979591837, "grad_norm": 18.899038314819336, "learning_rate": 3.230204081632653e-05, "loss": 2.5501, "step": 8679 }, { "epoch": 35.42857142857143, "grad_norm": 18.444061279296875, "learning_rate": 3.2300000000000006e-05, "loss": 2.5827, "step": 8680 }, { "epoch": 35.43265306122449, "grad_norm": 24.62149429321289, "learning_rate": 3.229795918367347e-05, "loss": 1.732, "step": 8681 }, { "epoch": 35.436734693877554, "grad_norm": 27.44190216064453, "learning_rate": 3.229591836734694e-05, "loss": 0.8969, "step": 8682 }, { "epoch": 35.440816326530616, "grad_norm": 25.502042770385742, "learning_rate": 3.229387755102041e-05, "loss": 1.9131, "step": 8683 }, { "epoch": 35.44489795918367, "grad_norm": 27.9564151763916, "learning_rate": 3.229183673469388e-05, "loss": 2.1229, "step": 8684 }, { "epoch": 35.44897959183673, "grad_norm": 16.8466739654541, "learning_rate": 3.228979591836735e-05, "loss": 2.3062, "step": 8685 }, { "epoch": 35.453061224489794, "grad_norm": 10.172727584838867, "learning_rate": 3.2287755102040815e-05, "loss": 3.1771, "step": 8686 }, { "epoch": 35.457142857142856, "grad_norm": 24.55603790283203, "learning_rate": 3.228571428571428e-05, "loss": 2.5076, "step": 8687 }, { "epoch": 35.46122448979592, "grad_norm": 40.504825592041016, "learning_rate": 3.2283673469387756e-05, "loss": 1.5983, "step": 8688 }, { "epoch": 35.46530612244898, "grad_norm": 14.632794380187988, "learning_rate": 3.228163265306123e-05, "loss": 2.9827, "step": 8689 }, { "epoch": 35.46938775510204, "grad_norm": 22.747339248657227, "learning_rate": 3.22795918367347e-05, "loss": 2.2815, "step": 8690 }, { "epoch": 35.4734693877551, "grad_norm": 11.337689399719238, "learning_rate": 3.2277551020408165e-05, "loss": 2.9821, "step": 8691 }, { "epoch": 35.477551020408164, "grad_norm": 25.116666793823242, "learning_rate": 3.227551020408163e-05, "loss": 2.3257, "step": 8692 }, { "epoch": 35.481632653061226, "grad_norm": 29.87096405029297, "learning_rate": 3.22734693877551e-05, "loss": 1.7554, "step": 8693 }, { "epoch": 35.48571428571429, "grad_norm": 16.777658462524414, "learning_rate": 3.227142857142857e-05, "loss": 2.7596, "step": 8694 }, { "epoch": 35.48979591836735, "grad_norm": 16.955629348754883, "learning_rate": 3.226938775510204e-05, "loss": 2.7567, "step": 8695 }, { "epoch": 35.49387755102041, "grad_norm": 39.67006301879883, "learning_rate": 3.2267346938775514e-05, "loss": 1.0698, "step": 8696 }, { "epoch": 35.49795918367347, "grad_norm": 12.042694091796875, "learning_rate": 3.226530612244898e-05, "loss": 2.9401, "step": 8697 }, { "epoch": 35.50204081632653, "grad_norm": 18.377140045166016, "learning_rate": 3.2263265306122455e-05, "loss": 2.3459, "step": 8698 }, { "epoch": 35.50612244897959, "grad_norm": 9.748637199401855, "learning_rate": 3.226122448979592e-05, "loss": 3.1975, "step": 8699 }, { "epoch": 35.51020408163265, "grad_norm": 17.40862464904785, "learning_rate": 3.225918367346939e-05, "loss": 2.5085, "step": 8700 }, { "epoch": 35.51428571428571, "grad_norm": 31.250051498413086, "learning_rate": 3.2257142857142856e-05, "loss": 1.9512, "step": 8701 }, { "epoch": 35.518367346938774, "grad_norm": 28.797992706298828, "learning_rate": 3.225510204081632e-05, "loss": 0.8592, "step": 8702 }, { "epoch": 35.522448979591836, "grad_norm": 17.312467575073242, "learning_rate": 3.22530612244898e-05, "loss": 2.8044, "step": 8703 }, { "epoch": 35.5265306122449, "grad_norm": 34.23091125488281, "learning_rate": 3.225102040816327e-05, "loss": 0.8883, "step": 8704 }, { "epoch": 35.53061224489796, "grad_norm": 14.438969612121582, "learning_rate": 3.224897959183674e-05, "loss": 2.876, "step": 8705 }, { "epoch": 35.53469387755102, "grad_norm": 20.762977600097656, "learning_rate": 3.2246938775510205e-05, "loss": 2.4212, "step": 8706 }, { "epoch": 35.53877551020408, "grad_norm": 13.533738136291504, "learning_rate": 3.224489795918367e-05, "loss": 2.8976, "step": 8707 }, { "epoch": 35.542857142857144, "grad_norm": 27.14653205871582, "learning_rate": 3.2242857142857146e-05, "loss": 1.4423, "step": 8708 }, { "epoch": 35.546938775510206, "grad_norm": 13.393363952636719, "learning_rate": 3.2240816326530613e-05, "loss": 3.2732, "step": 8709 }, { "epoch": 35.55102040816327, "grad_norm": 18.229265213012695, "learning_rate": 3.223877551020408e-05, "loss": 2.8176, "step": 8710 }, { "epoch": 35.55510204081633, "grad_norm": 18.250843048095703, "learning_rate": 3.2236734693877554e-05, "loss": 2.7392, "step": 8711 }, { "epoch": 35.55918367346939, "grad_norm": 21.904394149780273, "learning_rate": 3.223469387755102e-05, "loss": 2.1854, "step": 8712 }, { "epoch": 35.563265306122446, "grad_norm": 25.5242862701416, "learning_rate": 3.2232653061224496e-05, "loss": 2.0503, "step": 8713 }, { "epoch": 35.56734693877551, "grad_norm": 9.643579483032227, "learning_rate": 3.223061224489796e-05, "loss": 3.0786, "step": 8714 }, { "epoch": 35.57142857142857, "grad_norm": 31.830425262451172, "learning_rate": 3.222857142857143e-05, "loss": 1.4619, "step": 8715 }, { "epoch": 35.57551020408163, "grad_norm": 13.436302185058594, "learning_rate": 3.22265306122449e-05, "loss": 2.6583, "step": 8716 }, { "epoch": 35.57959183673469, "grad_norm": 37.36548614501953, "learning_rate": 3.2224489795918364e-05, "loss": 1.7226, "step": 8717 }, { "epoch": 35.583673469387755, "grad_norm": 14.697395324707031, "learning_rate": 3.222244897959184e-05, "loss": 2.9198, "step": 8718 }, { "epoch": 35.587755102040816, "grad_norm": 33.52898406982422, "learning_rate": 3.222040816326531e-05, "loss": 1.0413, "step": 8719 }, { "epoch": 35.59183673469388, "grad_norm": 26.295900344848633, "learning_rate": 3.221836734693878e-05, "loss": 1.7705, "step": 8720 }, { "epoch": 35.59591836734694, "grad_norm": 22.8314208984375, "learning_rate": 3.2216326530612246e-05, "loss": 2.2931, "step": 8721 }, { "epoch": 35.6, "grad_norm": 14.218239784240723, "learning_rate": 3.221428571428571e-05, "loss": 2.8989, "step": 8722 }, { "epoch": 35.60408163265306, "grad_norm": 21.0687198638916, "learning_rate": 3.221224489795919e-05, "loss": 2.5237, "step": 8723 }, { "epoch": 35.608163265306125, "grad_norm": 21.55141830444336, "learning_rate": 3.2210204081632654e-05, "loss": 2.8132, "step": 8724 }, { "epoch": 35.61224489795919, "grad_norm": 12.684462547302246, "learning_rate": 3.220816326530612e-05, "loss": 2.7885, "step": 8725 }, { "epoch": 35.61632653061225, "grad_norm": 22.396095275878906, "learning_rate": 3.2206122448979595e-05, "loss": 2.2189, "step": 8726 }, { "epoch": 35.6204081632653, "grad_norm": 33.11471939086914, "learning_rate": 3.220408163265306e-05, "loss": 1.7622, "step": 8727 }, { "epoch": 35.624489795918365, "grad_norm": 24.493961334228516, "learning_rate": 3.2202040816326536e-05, "loss": 1.6901, "step": 8728 }, { "epoch": 35.628571428571426, "grad_norm": 40.64608383178711, "learning_rate": 3.2200000000000003e-05, "loss": 1.4712, "step": 8729 }, { "epoch": 35.63265306122449, "grad_norm": 28.847198486328125, "learning_rate": 3.219795918367347e-05, "loss": 1.4833, "step": 8730 }, { "epoch": 35.63673469387755, "grad_norm": 27.46587371826172, "learning_rate": 3.219591836734694e-05, "loss": 1.6744, "step": 8731 }, { "epoch": 35.64081632653061, "grad_norm": 37.34915542602539, "learning_rate": 3.2193877551020405e-05, "loss": 1.6989, "step": 8732 }, { "epoch": 35.64489795918367, "grad_norm": 27.636444091796875, "learning_rate": 3.219183673469388e-05, "loss": 1.2588, "step": 8733 }, { "epoch": 35.648979591836735, "grad_norm": 19.83462142944336, "learning_rate": 3.2189795918367346e-05, "loss": 2.5049, "step": 8734 }, { "epoch": 35.6530612244898, "grad_norm": 29.511234283447266, "learning_rate": 3.218775510204082e-05, "loss": 2.1506, "step": 8735 }, { "epoch": 35.65714285714286, "grad_norm": 23.23855972290039, "learning_rate": 3.218571428571429e-05, "loss": 1.6022, "step": 8736 }, { "epoch": 35.66122448979592, "grad_norm": 30.569881439208984, "learning_rate": 3.2183673469387754e-05, "loss": 1.9844, "step": 8737 }, { "epoch": 35.66530612244898, "grad_norm": 15.324026107788086, "learning_rate": 3.218163265306123e-05, "loss": 2.6205, "step": 8738 }, { "epoch": 35.66938775510204, "grad_norm": 16.137481689453125, "learning_rate": 3.2179591836734695e-05, "loss": 2.768, "step": 8739 }, { "epoch": 35.673469387755105, "grad_norm": 20.510923385620117, "learning_rate": 3.217755102040816e-05, "loss": 2.4891, "step": 8740 }, { "epoch": 35.67755102040816, "grad_norm": 26.59943962097168, "learning_rate": 3.217551020408163e-05, "loss": 1.6376, "step": 8741 }, { "epoch": 35.68163265306122, "grad_norm": 24.09360122680664, "learning_rate": 3.21734693877551e-05, "loss": 2.2228, "step": 8742 }, { "epoch": 35.68571428571428, "grad_norm": 16.170921325683594, "learning_rate": 3.217142857142858e-05, "loss": 2.6974, "step": 8743 }, { "epoch": 35.689795918367345, "grad_norm": 28.70375633239746, "learning_rate": 3.2169387755102044e-05, "loss": 1.7252, "step": 8744 }, { "epoch": 35.69387755102041, "grad_norm": 10.062454223632812, "learning_rate": 3.216734693877551e-05, "loss": 3.1388, "step": 8745 }, { "epoch": 35.69795918367347, "grad_norm": 24.04151153564453, "learning_rate": 3.216530612244898e-05, "loss": 2.503, "step": 8746 }, { "epoch": 35.70204081632653, "grad_norm": 23.41143035888672, "learning_rate": 3.216326530612245e-05, "loss": 2.1607, "step": 8747 }, { "epoch": 35.70612244897959, "grad_norm": 26.2150936126709, "learning_rate": 3.216122448979592e-05, "loss": 1.5548, "step": 8748 }, { "epoch": 35.710204081632654, "grad_norm": 38.003204345703125, "learning_rate": 3.2159183673469387e-05, "loss": 0.8192, "step": 8749 }, { "epoch": 35.714285714285715, "grad_norm": 21.242979049682617, "learning_rate": 3.215714285714286e-05, "loss": 1.8212, "step": 8750 }, { "epoch": 35.71836734693878, "grad_norm": 15.165603637695312, "learning_rate": 3.215510204081633e-05, "loss": 2.8536, "step": 8751 }, { "epoch": 35.72244897959184, "grad_norm": 28.300127029418945, "learning_rate": 3.21530612244898e-05, "loss": 1.6326, "step": 8752 }, { "epoch": 35.7265306122449, "grad_norm": 30.41887855529785, "learning_rate": 3.215102040816327e-05, "loss": 1.8015, "step": 8753 }, { "epoch": 35.73061224489796, "grad_norm": 37.187477111816406, "learning_rate": 3.2148979591836736e-05, "loss": 1.0743, "step": 8754 }, { "epoch": 35.734693877551024, "grad_norm": 30.684009552001953, "learning_rate": 3.21469387755102e-05, "loss": 1.896, "step": 8755 }, { "epoch": 35.73877551020408, "grad_norm": 17.81430435180664, "learning_rate": 3.214489795918367e-05, "loss": 2.2556, "step": 8756 }, { "epoch": 35.74285714285714, "grad_norm": 12.878438949584961, "learning_rate": 3.2142857142857144e-05, "loss": 2.9857, "step": 8757 }, { "epoch": 35.7469387755102, "grad_norm": 12.733549118041992, "learning_rate": 3.214081632653062e-05, "loss": 2.7734, "step": 8758 }, { "epoch": 35.751020408163264, "grad_norm": 17.36026382446289, "learning_rate": 3.2138775510204085e-05, "loss": 2.8621, "step": 8759 }, { "epoch": 35.755102040816325, "grad_norm": 16.842958450317383, "learning_rate": 3.213673469387755e-05, "loss": 2.5366, "step": 8760 }, { "epoch": 35.75918367346939, "grad_norm": 13.380146026611328, "learning_rate": 3.213469387755102e-05, "loss": 2.9694, "step": 8761 }, { "epoch": 35.76326530612245, "grad_norm": 33.32242202758789, "learning_rate": 3.213265306122449e-05, "loss": 2.2951, "step": 8762 }, { "epoch": 35.76734693877551, "grad_norm": 36.397029876708984, "learning_rate": 3.213061224489796e-05, "loss": 1.7997, "step": 8763 }, { "epoch": 35.77142857142857, "grad_norm": 21.899890899658203, "learning_rate": 3.212857142857143e-05, "loss": 2.2243, "step": 8764 }, { "epoch": 35.775510204081634, "grad_norm": 27.366514205932617, "learning_rate": 3.21265306122449e-05, "loss": 1.5134, "step": 8765 }, { "epoch": 35.779591836734696, "grad_norm": 13.597817420959473, "learning_rate": 3.212448979591837e-05, "loss": 2.8368, "step": 8766 }, { "epoch": 35.78367346938776, "grad_norm": 14.312515258789062, "learning_rate": 3.212244897959184e-05, "loss": 2.6003, "step": 8767 }, { "epoch": 35.78775510204082, "grad_norm": 22.7169246673584, "learning_rate": 3.212040816326531e-05, "loss": 2.1837, "step": 8768 }, { "epoch": 35.79183673469388, "grad_norm": 21.010099411010742, "learning_rate": 3.2118367346938777e-05, "loss": 2.2296, "step": 8769 }, { "epoch": 35.795918367346935, "grad_norm": 27.616132736206055, "learning_rate": 3.2116326530612244e-05, "loss": 2.1966, "step": 8770 }, { "epoch": 35.8, "grad_norm": 16.823028564453125, "learning_rate": 3.211428571428571e-05, "loss": 2.7325, "step": 8771 }, { "epoch": 35.80408163265306, "grad_norm": 13.14574909210205, "learning_rate": 3.2112244897959185e-05, "loss": 2.9916, "step": 8772 }, { "epoch": 35.80816326530612, "grad_norm": 20.43597984313965, "learning_rate": 3.211020408163266e-05, "loss": 2.6348, "step": 8773 }, { "epoch": 35.81224489795918, "grad_norm": 18.255678176879883, "learning_rate": 3.2108163265306126e-05, "loss": 2.4702, "step": 8774 }, { "epoch": 35.816326530612244, "grad_norm": 22.841699600219727, "learning_rate": 3.210612244897959e-05, "loss": 2.3405, "step": 8775 }, { "epoch": 35.820408163265306, "grad_norm": 12.980295181274414, "learning_rate": 3.210408163265306e-05, "loss": 2.8923, "step": 8776 }, { "epoch": 35.82448979591837, "grad_norm": 42.04844284057617, "learning_rate": 3.2102040816326534e-05, "loss": 0.7369, "step": 8777 }, { "epoch": 35.82857142857143, "grad_norm": 14.469022750854492, "learning_rate": 3.21e-05, "loss": 3.1932, "step": 8778 }, { "epoch": 35.83265306122449, "grad_norm": 36.45124435424805, "learning_rate": 3.209795918367347e-05, "loss": 1.2874, "step": 8779 }, { "epoch": 35.83673469387755, "grad_norm": 18.53529167175293, "learning_rate": 3.209591836734694e-05, "loss": 2.2796, "step": 8780 }, { "epoch": 35.840816326530614, "grad_norm": 13.909157752990723, "learning_rate": 3.209387755102041e-05, "loss": 3.0252, "step": 8781 }, { "epoch": 35.844897959183676, "grad_norm": 29.88106918334961, "learning_rate": 3.209183673469388e-05, "loss": 1.8556, "step": 8782 }, { "epoch": 35.84897959183674, "grad_norm": 11.130410194396973, "learning_rate": 3.208979591836735e-05, "loss": 3.2972, "step": 8783 }, { "epoch": 35.85306122448979, "grad_norm": 10.587654113769531, "learning_rate": 3.208775510204082e-05, "loss": 3.3107, "step": 8784 }, { "epoch": 35.857142857142854, "grad_norm": 25.49094581604004, "learning_rate": 3.2085714285714284e-05, "loss": 2.0476, "step": 8785 }, { "epoch": 35.861224489795916, "grad_norm": 17.638227462768555, "learning_rate": 3.208367346938775e-05, "loss": 2.4547, "step": 8786 }, { "epoch": 35.86530612244898, "grad_norm": 22.510013580322266, "learning_rate": 3.2081632653061225e-05, "loss": 2.5584, "step": 8787 }, { "epoch": 35.86938775510204, "grad_norm": 28.34165382385254, "learning_rate": 3.207959183673469e-05, "loss": 0.9826, "step": 8788 }, { "epoch": 35.8734693877551, "grad_norm": 17.160015106201172, "learning_rate": 3.2077551020408166e-05, "loss": 2.7324, "step": 8789 }, { "epoch": 35.87755102040816, "grad_norm": 16.821334838867188, "learning_rate": 3.2075510204081634e-05, "loss": 2.7175, "step": 8790 }, { "epoch": 35.881632653061224, "grad_norm": 36.752811431884766, "learning_rate": 3.207346938775511e-05, "loss": 1.0145, "step": 8791 }, { "epoch": 35.885714285714286, "grad_norm": 23.30975914001465, "learning_rate": 3.2071428571428575e-05, "loss": 2.3707, "step": 8792 }, { "epoch": 35.88979591836735, "grad_norm": 6.834610462188721, "learning_rate": 3.206938775510204e-05, "loss": 3.5831, "step": 8793 }, { "epoch": 35.89387755102041, "grad_norm": 20.2476863861084, "learning_rate": 3.206734693877551e-05, "loss": 2.1809, "step": 8794 }, { "epoch": 35.89795918367347, "grad_norm": 29.855445861816406, "learning_rate": 3.2065306122448976e-05, "loss": 2.1002, "step": 8795 }, { "epoch": 35.90204081632653, "grad_norm": 14.750408172607422, "learning_rate": 3.206326530612245e-05, "loss": 2.8767, "step": 8796 }, { "epoch": 35.906122448979595, "grad_norm": 27.5535831451416, "learning_rate": 3.2061224489795924e-05, "loss": 1.5296, "step": 8797 }, { "epoch": 35.910204081632656, "grad_norm": 11.271982192993164, "learning_rate": 3.205918367346939e-05, "loss": 2.9847, "step": 8798 }, { "epoch": 35.91428571428571, "grad_norm": 19.195249557495117, "learning_rate": 3.205714285714286e-05, "loss": 2.4277, "step": 8799 }, { "epoch": 35.91836734693877, "grad_norm": 13.783639907836914, "learning_rate": 3.2055102040816325e-05, "loss": 2.8672, "step": 8800 }, { "epoch": 35.922448979591834, "grad_norm": 32.67879867553711, "learning_rate": 3.20530612244898e-05, "loss": 2.5626, "step": 8801 }, { "epoch": 35.926530612244896, "grad_norm": 21.219200134277344, "learning_rate": 3.2051020408163266e-05, "loss": 1.9777, "step": 8802 }, { "epoch": 35.93061224489796, "grad_norm": 16.16938591003418, "learning_rate": 3.204897959183673e-05, "loss": 2.741, "step": 8803 }, { "epoch": 35.93469387755102, "grad_norm": 16.557292938232422, "learning_rate": 3.204693877551021e-05, "loss": 2.6206, "step": 8804 }, { "epoch": 35.93877551020408, "grad_norm": 25.674633026123047, "learning_rate": 3.2044897959183674e-05, "loss": 2.1724, "step": 8805 }, { "epoch": 35.94285714285714, "grad_norm": 13.441021919250488, "learning_rate": 3.204285714285715e-05, "loss": 2.8407, "step": 8806 }, { "epoch": 35.946938775510205, "grad_norm": 33.71595001220703, "learning_rate": 3.2040816326530615e-05, "loss": 2.4143, "step": 8807 }, { "epoch": 35.95102040816327, "grad_norm": 26.503856658935547, "learning_rate": 3.203877551020408e-05, "loss": 1.6639, "step": 8808 }, { "epoch": 35.95510204081633, "grad_norm": 32.6041374206543, "learning_rate": 3.203673469387755e-05, "loss": 0.8593, "step": 8809 }, { "epoch": 35.95918367346939, "grad_norm": 13.146708488464355, "learning_rate": 3.203469387755102e-05, "loss": 3.1681, "step": 8810 }, { "epoch": 35.96326530612245, "grad_norm": 25.671573638916016, "learning_rate": 3.203265306122449e-05, "loss": 1.8684, "step": 8811 }, { "epoch": 35.96734693877551, "grad_norm": 9.944584846496582, "learning_rate": 3.2030612244897965e-05, "loss": 3.2584, "step": 8812 }, { "epoch": 35.97142857142857, "grad_norm": 11.378994941711426, "learning_rate": 3.202857142857143e-05, "loss": 3.0502, "step": 8813 }, { "epoch": 35.97551020408163, "grad_norm": 12.92449951171875, "learning_rate": 3.20265306122449e-05, "loss": 2.9124, "step": 8814 }, { "epoch": 35.97959183673469, "grad_norm": 20.43273162841797, "learning_rate": 3.2024489795918366e-05, "loss": 2.4091, "step": 8815 }, { "epoch": 35.98367346938775, "grad_norm": 24.765668869018555, "learning_rate": 3.202244897959184e-05, "loss": 2.2465, "step": 8816 }, { "epoch": 35.987755102040815, "grad_norm": 24.441162109375, "learning_rate": 3.202040816326531e-05, "loss": 1.7695, "step": 8817 }, { "epoch": 35.99183673469388, "grad_norm": 36.81863021850586, "learning_rate": 3.2018367346938774e-05, "loss": 1.6285, "step": 8818 }, { "epoch": 35.99591836734694, "grad_norm": 16.298709869384766, "learning_rate": 3.201632653061225e-05, "loss": 2.4234, "step": 8819 }, { "epoch": 36.0, "grad_norm": 30.09847640991211, "learning_rate": 3.2014285714285715e-05, "loss": 0.9387, "step": 8820 }, { "epoch": 36.00408163265306, "grad_norm": 13.510285377502441, "learning_rate": 3.201224489795919e-05, "loss": 2.6563, "step": 8821 }, { "epoch": 36.00816326530612, "grad_norm": 16.259017944335938, "learning_rate": 3.2010204081632656e-05, "loss": 2.7064, "step": 8822 }, { "epoch": 36.012244897959185, "grad_norm": 21.136686325073242, "learning_rate": 3.200816326530612e-05, "loss": 2.3746, "step": 8823 }, { "epoch": 36.01632653061225, "grad_norm": 10.782669067382812, "learning_rate": 3.200612244897959e-05, "loss": 2.9081, "step": 8824 }, { "epoch": 36.02040816326531, "grad_norm": 22.743513107299805, "learning_rate": 3.200408163265306e-05, "loss": 1.5996, "step": 8825 }, { "epoch": 36.02448979591837, "grad_norm": 15.458463668823242, "learning_rate": 3.200204081632653e-05, "loss": 2.4651, "step": 8826 }, { "epoch": 36.02857142857143, "grad_norm": 21.854248046875, "learning_rate": 3.2000000000000005e-05, "loss": 1.6412, "step": 8827 }, { "epoch": 36.03265306122449, "grad_norm": 28.064245223999023, "learning_rate": 3.199795918367347e-05, "loss": 1.8339, "step": 8828 }, { "epoch": 36.03673469387755, "grad_norm": 13.785000801086426, "learning_rate": 3.199591836734694e-05, "loss": 3.0442, "step": 8829 }, { "epoch": 36.04081632653061, "grad_norm": 22.809396743774414, "learning_rate": 3.199387755102041e-05, "loss": 2.2356, "step": 8830 }, { "epoch": 36.04489795918367, "grad_norm": 18.588838577270508, "learning_rate": 3.199183673469388e-05, "loss": 2.4139, "step": 8831 }, { "epoch": 36.04897959183673, "grad_norm": 17.005552291870117, "learning_rate": 3.198979591836735e-05, "loss": 2.5958, "step": 8832 }, { "epoch": 36.053061224489795, "grad_norm": 26.235647201538086, "learning_rate": 3.1987755102040815e-05, "loss": 1.4872, "step": 8833 }, { "epoch": 36.05714285714286, "grad_norm": 9.474143028259277, "learning_rate": 3.198571428571429e-05, "loss": 3.5532, "step": 8834 }, { "epoch": 36.06122448979592, "grad_norm": 18.631725311279297, "learning_rate": 3.198367346938776e-05, "loss": 2.8687, "step": 8835 }, { "epoch": 36.06530612244898, "grad_norm": 17.707284927368164, "learning_rate": 3.198163265306123e-05, "loss": 2.3081, "step": 8836 }, { "epoch": 36.06938775510204, "grad_norm": 35.20999526977539, "learning_rate": 3.19795918367347e-05, "loss": 1.8617, "step": 8837 }, { "epoch": 36.073469387755104, "grad_norm": 14.106186866760254, "learning_rate": 3.1977551020408164e-05, "loss": 2.5526, "step": 8838 }, { "epoch": 36.077551020408166, "grad_norm": 33.85637664794922, "learning_rate": 3.197551020408163e-05, "loss": 0.9678, "step": 8839 }, { "epoch": 36.08163265306123, "grad_norm": 16.545412063598633, "learning_rate": 3.1973469387755105e-05, "loss": 2.3315, "step": 8840 }, { "epoch": 36.08571428571429, "grad_norm": 27.12277603149414, "learning_rate": 3.197142857142857e-05, "loss": 2.1373, "step": 8841 }, { "epoch": 36.089795918367344, "grad_norm": 12.150263786315918, "learning_rate": 3.196938775510204e-05, "loss": 2.7162, "step": 8842 }, { "epoch": 36.093877551020405, "grad_norm": 22.63973045349121, "learning_rate": 3.196734693877551e-05, "loss": 2.3646, "step": 8843 }, { "epoch": 36.09795918367347, "grad_norm": 9.957548141479492, "learning_rate": 3.196530612244898e-05, "loss": 3.123, "step": 8844 }, { "epoch": 36.10204081632653, "grad_norm": 24.530324935913086, "learning_rate": 3.1963265306122454e-05, "loss": 2.4234, "step": 8845 }, { "epoch": 36.10612244897959, "grad_norm": 27.727964401245117, "learning_rate": 3.196122448979592e-05, "loss": 0.4964, "step": 8846 }, { "epoch": 36.11020408163265, "grad_norm": 28.570493698120117, "learning_rate": 3.195918367346939e-05, "loss": 1.6315, "step": 8847 }, { "epoch": 36.114285714285714, "grad_norm": 24.80472755432129, "learning_rate": 3.1957142857142856e-05, "loss": 1.5569, "step": 8848 }, { "epoch": 36.118367346938776, "grad_norm": 14.799158096313477, "learning_rate": 3.195510204081632e-05, "loss": 2.7676, "step": 8849 }, { "epoch": 36.12244897959184, "grad_norm": 30.535646438598633, "learning_rate": 3.19530612244898e-05, "loss": 1.754, "step": 8850 }, { "epoch": 36.1265306122449, "grad_norm": 28.268951416015625, "learning_rate": 3.195102040816327e-05, "loss": 2.5708, "step": 8851 }, { "epoch": 36.13061224489796, "grad_norm": 23.50152015686035, "learning_rate": 3.194897959183674e-05, "loss": 2.4011, "step": 8852 }, { "epoch": 36.13469387755102, "grad_norm": 16.832021713256836, "learning_rate": 3.1946938775510205e-05, "loss": 2.9967, "step": 8853 }, { "epoch": 36.138775510204084, "grad_norm": 28.913286209106445, "learning_rate": 3.194489795918367e-05, "loss": 2.0492, "step": 8854 }, { "epoch": 36.142857142857146, "grad_norm": 22.57180404663086, "learning_rate": 3.1942857142857146e-05, "loss": 1.6921, "step": 8855 }, { "epoch": 36.14693877551021, "grad_norm": 17.114442825317383, "learning_rate": 3.194081632653061e-05, "loss": 2.5954, "step": 8856 }, { "epoch": 36.15102040816326, "grad_norm": 11.745878219604492, "learning_rate": 3.193877551020408e-05, "loss": 3.2284, "step": 8857 }, { "epoch": 36.155102040816324, "grad_norm": 26.59943199157715, "learning_rate": 3.1936734693877554e-05, "loss": 2.4303, "step": 8858 }, { "epoch": 36.159183673469386, "grad_norm": 13.040274620056152, "learning_rate": 3.193469387755102e-05, "loss": 3.0461, "step": 8859 }, { "epoch": 36.16326530612245, "grad_norm": 18.069364547729492, "learning_rate": 3.1932653061224495e-05, "loss": 2.3437, "step": 8860 }, { "epoch": 36.16734693877551, "grad_norm": 24.923437118530273, "learning_rate": 3.193061224489796e-05, "loss": 1.8549, "step": 8861 }, { "epoch": 36.17142857142857, "grad_norm": 49.55575942993164, "learning_rate": 3.192857142857143e-05, "loss": 1.0815, "step": 8862 }, { "epoch": 36.17551020408163, "grad_norm": 36.757041931152344, "learning_rate": 3.1926530612244896e-05, "loss": 1.4495, "step": 8863 }, { "epoch": 36.179591836734694, "grad_norm": 38.38723373413086, "learning_rate": 3.1924489795918364e-05, "loss": 0.2975, "step": 8864 }, { "epoch": 36.183673469387756, "grad_norm": 39.414222717285156, "learning_rate": 3.192244897959184e-05, "loss": 0.7159, "step": 8865 }, { "epoch": 36.18775510204082, "grad_norm": 41.71603012084961, "learning_rate": 3.192040816326531e-05, "loss": 2.5249, "step": 8866 }, { "epoch": 36.19183673469388, "grad_norm": 30.624610900878906, "learning_rate": 3.191836734693878e-05, "loss": 1.5637, "step": 8867 }, { "epoch": 36.19591836734694, "grad_norm": 37.3682861328125, "learning_rate": 3.1916326530612246e-05, "loss": 0.8218, "step": 8868 }, { "epoch": 36.2, "grad_norm": 14.236174583435059, "learning_rate": 3.191428571428571e-05, "loss": 2.9077, "step": 8869 }, { "epoch": 36.204081632653065, "grad_norm": 30.86981773376465, "learning_rate": 3.1912244897959187e-05, "loss": 1.475, "step": 8870 }, { "epoch": 36.20816326530612, "grad_norm": 13.290759086608887, "learning_rate": 3.1910204081632654e-05, "loss": 2.7216, "step": 8871 }, { "epoch": 36.21224489795918, "grad_norm": 9.048957824707031, "learning_rate": 3.190816326530612e-05, "loss": 3.505, "step": 8872 }, { "epoch": 36.21632653061224, "grad_norm": 34.819759368896484, "learning_rate": 3.1906122448979595e-05, "loss": 0.9625, "step": 8873 }, { "epoch": 36.220408163265304, "grad_norm": 11.746479034423828, "learning_rate": 3.190408163265306e-05, "loss": 2.7692, "step": 8874 }, { "epoch": 36.224489795918366, "grad_norm": 23.111984252929688, "learning_rate": 3.1902040816326536e-05, "loss": 2.4461, "step": 8875 }, { "epoch": 36.22857142857143, "grad_norm": 14.246902465820312, "learning_rate": 3.19e-05, "loss": 2.7104, "step": 8876 }, { "epoch": 36.23265306122449, "grad_norm": 23.698041915893555, "learning_rate": 3.189795918367347e-05, "loss": 2.2376, "step": 8877 }, { "epoch": 36.23673469387755, "grad_norm": 22.04266357421875, "learning_rate": 3.189591836734694e-05, "loss": 2.4299, "step": 8878 }, { "epoch": 36.24081632653061, "grad_norm": 15.383990287780762, "learning_rate": 3.189387755102041e-05, "loss": 2.8539, "step": 8879 }, { "epoch": 36.244897959183675, "grad_norm": 27.47145652770996, "learning_rate": 3.189183673469388e-05, "loss": 1.5651, "step": 8880 }, { "epoch": 36.248979591836736, "grad_norm": 19.02092742919922, "learning_rate": 3.188979591836735e-05, "loss": 2.7051, "step": 8881 }, { "epoch": 36.2530612244898, "grad_norm": 37.90599060058594, "learning_rate": 3.188775510204082e-05, "loss": 0.8078, "step": 8882 }, { "epoch": 36.25714285714286, "grad_norm": 25.71035385131836, "learning_rate": 3.1885714285714286e-05, "loss": 2.1801, "step": 8883 }, { "epoch": 36.26122448979592, "grad_norm": 16.81964683532715, "learning_rate": 3.188367346938776e-05, "loss": 2.1814, "step": 8884 }, { "epoch": 36.265306122448976, "grad_norm": 25.759614944458008, "learning_rate": 3.188163265306123e-05, "loss": 2.0105, "step": 8885 }, { "epoch": 36.26938775510204, "grad_norm": 20.085189819335938, "learning_rate": 3.1879591836734694e-05, "loss": 2.2525, "step": 8886 }, { "epoch": 36.2734693877551, "grad_norm": 28.571720123291016, "learning_rate": 3.187755102040816e-05, "loss": 1.3837, "step": 8887 }, { "epoch": 36.27755102040816, "grad_norm": 13.676822662353516, "learning_rate": 3.1875510204081636e-05, "loss": 2.8482, "step": 8888 }, { "epoch": 36.28163265306122, "grad_norm": 11.240846633911133, "learning_rate": 3.187346938775511e-05, "loss": 2.6456, "step": 8889 }, { "epoch": 36.285714285714285, "grad_norm": 14.429059982299805, "learning_rate": 3.1871428571428577e-05, "loss": 2.8093, "step": 8890 }, { "epoch": 36.289795918367346, "grad_norm": 13.731757164001465, "learning_rate": 3.1869387755102044e-05, "loss": 2.7922, "step": 8891 }, { "epoch": 36.29387755102041, "grad_norm": 15.869174003601074, "learning_rate": 3.186734693877551e-05, "loss": 2.9092, "step": 8892 }, { "epoch": 36.29795918367347, "grad_norm": 12.017372131347656, "learning_rate": 3.186530612244898e-05, "loss": 2.708, "step": 8893 }, { "epoch": 36.30204081632653, "grad_norm": 38.44761657714844, "learning_rate": 3.186326530612245e-05, "loss": 1.6587, "step": 8894 }, { "epoch": 36.30612244897959, "grad_norm": 28.38031005859375, "learning_rate": 3.186122448979592e-05, "loss": 1.8283, "step": 8895 }, { "epoch": 36.310204081632655, "grad_norm": 22.567914962768555, "learning_rate": 3.1859183673469386e-05, "loss": 2.1538, "step": 8896 }, { "epoch": 36.31428571428572, "grad_norm": 39.766700744628906, "learning_rate": 3.185714285714286e-05, "loss": 0.7995, "step": 8897 }, { "epoch": 36.31836734693878, "grad_norm": 24.074560165405273, "learning_rate": 3.185510204081633e-05, "loss": 2.0983, "step": 8898 }, { "epoch": 36.32244897959184, "grad_norm": 22.51327133178711, "learning_rate": 3.18530612244898e-05, "loss": 2.0428, "step": 8899 }, { "epoch": 36.326530612244895, "grad_norm": 18.31655502319336, "learning_rate": 3.185102040816327e-05, "loss": 2.61, "step": 8900 }, { "epoch": 36.33061224489796, "grad_norm": 13.50167179107666, "learning_rate": 3.1848979591836735e-05, "loss": 3.0999, "step": 8901 }, { "epoch": 36.33469387755102, "grad_norm": 41.1103630065918, "learning_rate": 3.18469387755102e-05, "loss": 0.933, "step": 8902 }, { "epoch": 36.33877551020408, "grad_norm": 27.70138931274414, "learning_rate": 3.184489795918367e-05, "loss": 1.7452, "step": 8903 }, { "epoch": 36.34285714285714, "grad_norm": 14.996089935302734, "learning_rate": 3.1842857142857143e-05, "loss": 2.906, "step": 8904 }, { "epoch": 36.3469387755102, "grad_norm": 24.485620498657227, "learning_rate": 3.184081632653062e-05, "loss": 2.2999, "step": 8905 }, { "epoch": 36.351020408163265, "grad_norm": 33.0539665222168, "learning_rate": 3.1838775510204084e-05, "loss": 2.0114, "step": 8906 }, { "epoch": 36.35510204081633, "grad_norm": 11.69351863861084, "learning_rate": 3.183673469387755e-05, "loss": 2.9278, "step": 8907 }, { "epoch": 36.35918367346939, "grad_norm": 29.921709060668945, "learning_rate": 3.183469387755102e-05, "loss": 2.3806, "step": 8908 }, { "epoch": 36.36326530612245, "grad_norm": 13.442781448364258, "learning_rate": 3.183265306122449e-05, "loss": 2.9379, "step": 8909 }, { "epoch": 36.36734693877551, "grad_norm": 20.219358444213867, "learning_rate": 3.183061224489796e-05, "loss": 2.152, "step": 8910 }, { "epoch": 36.371428571428574, "grad_norm": 15.774872779846191, "learning_rate": 3.182857142857143e-05, "loss": 2.7421, "step": 8911 }, { "epoch": 36.375510204081635, "grad_norm": 13.909914016723633, "learning_rate": 3.18265306122449e-05, "loss": 2.764, "step": 8912 }, { "epoch": 36.3795918367347, "grad_norm": 21.832191467285156, "learning_rate": 3.182448979591837e-05, "loss": 2.9028, "step": 8913 }, { "epoch": 36.38367346938775, "grad_norm": 30.491939544677734, "learning_rate": 3.182244897959184e-05, "loss": 1.585, "step": 8914 }, { "epoch": 36.38775510204081, "grad_norm": 34.35236358642578, "learning_rate": 3.182040816326531e-05, "loss": 1.9154, "step": 8915 }, { "epoch": 36.391836734693875, "grad_norm": 17.810531616210938, "learning_rate": 3.1818367346938776e-05, "loss": 2.5228, "step": 8916 }, { "epoch": 36.39591836734694, "grad_norm": 22.367082595825195, "learning_rate": 3.181632653061224e-05, "loss": 2.477, "step": 8917 }, { "epoch": 36.4, "grad_norm": 27.953369140625, "learning_rate": 3.181428571428571e-05, "loss": 0.8658, "step": 8918 }, { "epoch": 36.40408163265306, "grad_norm": 18.1716251373291, "learning_rate": 3.1812244897959184e-05, "loss": 2.7876, "step": 8919 }, { "epoch": 36.40816326530612, "grad_norm": 34.312416076660156, "learning_rate": 3.181020408163266e-05, "loss": 1.7008, "step": 8920 }, { "epoch": 36.412244897959184, "grad_norm": 10.816664695739746, "learning_rate": 3.1808163265306125e-05, "loss": 3.2925, "step": 8921 }, { "epoch": 36.416326530612245, "grad_norm": 24.44845199584961, "learning_rate": 3.180612244897959e-05, "loss": 2.104, "step": 8922 }, { "epoch": 36.42040816326531, "grad_norm": 12.29004192352295, "learning_rate": 3.1804081632653066e-05, "loss": 2.8903, "step": 8923 }, { "epoch": 36.42448979591837, "grad_norm": 19.888887405395508, "learning_rate": 3.180204081632653e-05, "loss": 2.5028, "step": 8924 }, { "epoch": 36.42857142857143, "grad_norm": 44.68607711791992, "learning_rate": 3.18e-05, "loss": 2.0138, "step": 8925 }, { "epoch": 36.43265306122449, "grad_norm": 32.84951400756836, "learning_rate": 3.179795918367347e-05, "loss": 1.4366, "step": 8926 }, { "epoch": 36.436734693877554, "grad_norm": 9.666711807250977, "learning_rate": 3.179591836734694e-05, "loss": 3.516, "step": 8927 }, { "epoch": 36.440816326530616, "grad_norm": 14.247481346130371, "learning_rate": 3.1793877551020415e-05, "loss": 2.7395, "step": 8928 }, { "epoch": 36.44489795918367, "grad_norm": 13.455948829650879, "learning_rate": 3.179183673469388e-05, "loss": 2.7813, "step": 8929 }, { "epoch": 36.44897959183673, "grad_norm": 25.16204261779785, "learning_rate": 3.178979591836735e-05, "loss": 1.9745, "step": 8930 }, { "epoch": 36.453061224489794, "grad_norm": 18.828720092773438, "learning_rate": 3.178775510204082e-05, "loss": 2.4944, "step": 8931 }, { "epoch": 36.457142857142856, "grad_norm": 16.363147735595703, "learning_rate": 3.1785714285714284e-05, "loss": 2.9879, "step": 8932 }, { "epoch": 36.46122448979592, "grad_norm": 19.176254272460938, "learning_rate": 3.178367346938776e-05, "loss": 2.1934, "step": 8933 }, { "epoch": 36.46530612244898, "grad_norm": 20.976802825927734, "learning_rate": 3.1781632653061225e-05, "loss": 2.3979, "step": 8934 }, { "epoch": 36.46938775510204, "grad_norm": 27.498464584350586, "learning_rate": 3.17795918367347e-05, "loss": 2.0223, "step": 8935 }, { "epoch": 36.4734693877551, "grad_norm": 29.676755905151367, "learning_rate": 3.1777551020408166e-05, "loss": 0.8413, "step": 8936 }, { "epoch": 36.477551020408164, "grad_norm": 22.520469665527344, "learning_rate": 3.177551020408163e-05, "loss": 2.2859, "step": 8937 }, { "epoch": 36.481632653061226, "grad_norm": 41.74522018432617, "learning_rate": 3.177346938775511e-05, "loss": 0.7998, "step": 8938 }, { "epoch": 36.48571428571429, "grad_norm": 33.90296173095703, "learning_rate": 3.1771428571428574e-05, "loss": 1.7156, "step": 8939 }, { "epoch": 36.48979591836735, "grad_norm": 23.969114303588867, "learning_rate": 3.176938775510204e-05, "loss": 2.1378, "step": 8940 }, { "epoch": 36.49387755102041, "grad_norm": 10.51596450805664, "learning_rate": 3.176734693877551e-05, "loss": 3.3407, "step": 8941 }, { "epoch": 36.49795918367347, "grad_norm": 21.3309268951416, "learning_rate": 3.176530612244898e-05, "loss": 2.1937, "step": 8942 }, { "epoch": 36.50204081632653, "grad_norm": 16.431354522705078, "learning_rate": 3.176326530612245e-05, "loss": 2.6782, "step": 8943 }, { "epoch": 36.50612244897959, "grad_norm": 34.05454635620117, "learning_rate": 3.176122448979592e-05, "loss": 1.4968, "step": 8944 }, { "epoch": 36.51020408163265, "grad_norm": 35.217262268066406, "learning_rate": 3.175918367346939e-05, "loss": 1.531, "step": 8945 }, { "epoch": 36.51428571428571, "grad_norm": 13.857718467712402, "learning_rate": 3.175714285714286e-05, "loss": 3.1942, "step": 8946 }, { "epoch": 36.518367346938774, "grad_norm": 16.41679573059082, "learning_rate": 3.1755102040816325e-05, "loss": 2.8863, "step": 8947 }, { "epoch": 36.522448979591836, "grad_norm": 19.055706024169922, "learning_rate": 3.17530612244898e-05, "loss": 2.5069, "step": 8948 }, { "epoch": 36.5265306122449, "grad_norm": 28.092552185058594, "learning_rate": 3.1751020408163266e-05, "loss": 2.4474, "step": 8949 }, { "epoch": 36.53061224489796, "grad_norm": 22.113859176635742, "learning_rate": 3.174897959183673e-05, "loss": 2.1025, "step": 8950 }, { "epoch": 36.53469387755102, "grad_norm": 20.8596248626709, "learning_rate": 3.174693877551021e-05, "loss": 1.985, "step": 8951 }, { "epoch": 36.53877551020408, "grad_norm": 19.758167266845703, "learning_rate": 3.1744897959183674e-05, "loss": 2.7446, "step": 8952 }, { "epoch": 36.542857142857144, "grad_norm": 18.921579360961914, "learning_rate": 3.174285714285715e-05, "loss": 2.3045, "step": 8953 }, { "epoch": 36.546938775510206, "grad_norm": 31.636301040649414, "learning_rate": 3.1740816326530615e-05, "loss": 1.1971, "step": 8954 }, { "epoch": 36.55102040816327, "grad_norm": 10.201943397521973, "learning_rate": 3.173877551020408e-05, "loss": 3.0516, "step": 8955 }, { "epoch": 36.55510204081633, "grad_norm": 30.136049270629883, "learning_rate": 3.173673469387755e-05, "loss": 1.8203, "step": 8956 }, { "epoch": 36.55918367346939, "grad_norm": 15.774182319641113, "learning_rate": 3.1734693877551016e-05, "loss": 2.552, "step": 8957 }, { "epoch": 36.563265306122446, "grad_norm": 20.085540771484375, "learning_rate": 3.173265306122449e-05, "loss": 2.4579, "step": 8958 }, { "epoch": 36.56734693877551, "grad_norm": 14.20785140991211, "learning_rate": 3.1730612244897964e-05, "loss": 2.672, "step": 8959 }, { "epoch": 36.57142857142857, "grad_norm": 15.612555503845215, "learning_rate": 3.172857142857143e-05, "loss": 2.2813, "step": 8960 }, { "epoch": 36.57551020408163, "grad_norm": 20.021251678466797, "learning_rate": 3.17265306122449e-05, "loss": 2.3658, "step": 8961 }, { "epoch": 36.57959183673469, "grad_norm": 17.81877326965332, "learning_rate": 3.1724489795918365e-05, "loss": 2.6625, "step": 8962 }, { "epoch": 36.583673469387755, "grad_norm": 17.378572463989258, "learning_rate": 3.172244897959184e-05, "loss": 2.731, "step": 8963 }, { "epoch": 36.587755102040816, "grad_norm": 18.642797470092773, "learning_rate": 3.1720408163265306e-05, "loss": 2.6359, "step": 8964 }, { "epoch": 36.59183673469388, "grad_norm": 28.130380630493164, "learning_rate": 3.1718367346938774e-05, "loss": 2.4162, "step": 8965 }, { "epoch": 36.59591836734694, "grad_norm": 13.315828323364258, "learning_rate": 3.171632653061225e-05, "loss": 2.7813, "step": 8966 }, { "epoch": 36.6, "grad_norm": 18.35321044921875, "learning_rate": 3.1714285714285715e-05, "loss": 2.7704, "step": 8967 }, { "epoch": 36.60408163265306, "grad_norm": 19.093124389648438, "learning_rate": 3.171224489795919e-05, "loss": 2.3694, "step": 8968 }, { "epoch": 36.608163265306125, "grad_norm": 14.841259002685547, "learning_rate": 3.1710204081632656e-05, "loss": 2.6375, "step": 8969 }, { "epoch": 36.61224489795919, "grad_norm": 33.20487976074219, "learning_rate": 3.170816326530612e-05, "loss": 1.7263, "step": 8970 }, { "epoch": 36.61632653061225, "grad_norm": 29.256206512451172, "learning_rate": 3.170612244897959e-05, "loss": 1.6665, "step": 8971 }, { "epoch": 36.6204081632653, "grad_norm": 34.89817428588867, "learning_rate": 3.1704081632653064e-05, "loss": 2.5419, "step": 8972 }, { "epoch": 36.624489795918365, "grad_norm": 18.942832946777344, "learning_rate": 3.170204081632653e-05, "loss": 2.5482, "step": 8973 }, { "epoch": 36.628571428571426, "grad_norm": 20.53012466430664, "learning_rate": 3.1700000000000005e-05, "loss": 2.5328, "step": 8974 }, { "epoch": 36.63265306122449, "grad_norm": 26.54608917236328, "learning_rate": 3.169795918367347e-05, "loss": 1.7452, "step": 8975 }, { "epoch": 36.63673469387755, "grad_norm": 20.2497501373291, "learning_rate": 3.169591836734694e-05, "loss": 2.4513, "step": 8976 }, { "epoch": 36.64081632653061, "grad_norm": 23.139556884765625, "learning_rate": 3.169387755102041e-05, "loss": 2.0101, "step": 8977 }, { "epoch": 36.64489795918367, "grad_norm": 30.178958892822266, "learning_rate": 3.169183673469388e-05, "loss": 1.407, "step": 8978 }, { "epoch": 36.648979591836735, "grad_norm": 17.87272834777832, "learning_rate": 3.168979591836735e-05, "loss": 2.2814, "step": 8979 }, { "epoch": 36.6530612244898, "grad_norm": 16.59813117980957, "learning_rate": 3.1687755102040814e-05, "loss": 2.6345, "step": 8980 }, { "epoch": 36.65714285714286, "grad_norm": 47.86736297607422, "learning_rate": 3.168571428571429e-05, "loss": 1.7863, "step": 8981 }, { "epoch": 36.66122448979592, "grad_norm": 15.29373550415039, "learning_rate": 3.168367346938776e-05, "loss": 2.6542, "step": 8982 }, { "epoch": 36.66530612244898, "grad_norm": 16.160911560058594, "learning_rate": 3.168163265306123e-05, "loss": 2.8925, "step": 8983 }, { "epoch": 36.66938775510204, "grad_norm": 29.878053665161133, "learning_rate": 3.1679591836734696e-05, "loss": 1.5051, "step": 8984 }, { "epoch": 36.673469387755105, "grad_norm": 34.82160186767578, "learning_rate": 3.1677551020408164e-05, "loss": 1.4963, "step": 8985 }, { "epoch": 36.67755102040816, "grad_norm": 31.547361373901367, "learning_rate": 3.167551020408163e-05, "loss": 2.2017, "step": 8986 }, { "epoch": 36.68163265306122, "grad_norm": 25.849191665649414, "learning_rate": 3.1673469387755105e-05, "loss": 2.0923, "step": 8987 }, { "epoch": 36.68571428571428, "grad_norm": 41.84989929199219, "learning_rate": 3.167142857142857e-05, "loss": 1.9356, "step": 8988 }, { "epoch": 36.689795918367345, "grad_norm": 17.2860164642334, "learning_rate": 3.1669387755102046e-05, "loss": 2.4339, "step": 8989 }, { "epoch": 36.69387755102041, "grad_norm": 23.31496810913086, "learning_rate": 3.166734693877551e-05, "loss": 2.457, "step": 8990 }, { "epoch": 36.69795918367347, "grad_norm": 15.769783020019531, "learning_rate": 3.166530612244898e-05, "loss": 2.7744, "step": 8991 }, { "epoch": 36.70204081632653, "grad_norm": 15.497669219970703, "learning_rate": 3.1663265306122454e-05, "loss": 2.8584, "step": 8992 }, { "epoch": 36.70612244897959, "grad_norm": 13.758878707885742, "learning_rate": 3.166122448979592e-05, "loss": 3.0263, "step": 8993 }, { "epoch": 36.710204081632654, "grad_norm": 15.248748779296875, "learning_rate": 3.165918367346939e-05, "loss": 2.8804, "step": 8994 }, { "epoch": 36.714285714285715, "grad_norm": 28.685012817382812, "learning_rate": 3.1657142857142855e-05, "loss": 1.7188, "step": 8995 }, { "epoch": 36.71836734693878, "grad_norm": 32.502716064453125, "learning_rate": 3.165510204081633e-05, "loss": 1.9972, "step": 8996 }, { "epoch": 36.72244897959184, "grad_norm": 17.063068389892578, "learning_rate": 3.1653061224489796e-05, "loss": 2.5602, "step": 8997 }, { "epoch": 36.7265306122449, "grad_norm": 22.453577041625977, "learning_rate": 3.165102040816327e-05, "loss": 2.4096, "step": 8998 }, { "epoch": 36.73061224489796, "grad_norm": 16.855695724487305, "learning_rate": 3.164897959183674e-05, "loss": 2.7559, "step": 8999 }, { "epoch": 36.734693877551024, "grad_norm": 15.665678024291992, "learning_rate": 3.1646938775510204e-05, "loss": 2.6176, "step": 9000 }, { "epoch": 36.73877551020408, "grad_norm": 19.389873504638672, "learning_rate": 3.164489795918367e-05, "loss": 2.0613, "step": 9001 }, { "epoch": 36.74285714285714, "grad_norm": 15.940102577209473, "learning_rate": 3.1642857142857145e-05, "loss": 2.553, "step": 9002 }, { "epoch": 36.7469387755102, "grad_norm": 12.397176742553711, "learning_rate": 3.164081632653061e-05, "loss": 3.3215, "step": 9003 }, { "epoch": 36.751020408163264, "grad_norm": 10.853214263916016, "learning_rate": 3.163877551020408e-05, "loss": 3.191, "step": 9004 }, { "epoch": 36.755102040816325, "grad_norm": 14.223611831665039, "learning_rate": 3.1636734693877553e-05, "loss": 2.904, "step": 9005 }, { "epoch": 36.75918367346939, "grad_norm": 17.99357795715332, "learning_rate": 3.163469387755102e-05, "loss": 2.6809, "step": 9006 }, { "epoch": 36.76326530612245, "grad_norm": 23.232057571411133, "learning_rate": 3.1632653061224494e-05, "loss": 1.9955, "step": 9007 }, { "epoch": 36.76734693877551, "grad_norm": 14.104649543762207, "learning_rate": 3.163061224489796e-05, "loss": 3.1087, "step": 9008 }, { "epoch": 36.77142857142857, "grad_norm": 19.044260025024414, "learning_rate": 3.162857142857143e-05, "loss": 2.7572, "step": 9009 }, { "epoch": 36.775510204081634, "grad_norm": 24.124792098999023, "learning_rate": 3.1626530612244896e-05, "loss": 2.1125, "step": 9010 }, { "epoch": 36.779591836734696, "grad_norm": 24.29998016357422, "learning_rate": 3.162448979591836e-05, "loss": 1.6854, "step": 9011 }, { "epoch": 36.78367346938776, "grad_norm": 23.416677474975586, "learning_rate": 3.162244897959184e-05, "loss": 2.1552, "step": 9012 }, { "epoch": 36.78775510204082, "grad_norm": 50.21798324584961, "learning_rate": 3.162040816326531e-05, "loss": 1.6002, "step": 9013 }, { "epoch": 36.79183673469388, "grad_norm": 10.771097183227539, "learning_rate": 3.161836734693878e-05, "loss": 3.2044, "step": 9014 }, { "epoch": 36.795918367346935, "grad_norm": 27.785655975341797, "learning_rate": 3.1616326530612245e-05, "loss": 2.2701, "step": 9015 }, { "epoch": 36.8, "grad_norm": 19.894887924194336, "learning_rate": 3.161428571428572e-05, "loss": 2.0599, "step": 9016 }, { "epoch": 36.80408163265306, "grad_norm": 29.069177627563477, "learning_rate": 3.1612244897959186e-05, "loss": 1.5456, "step": 9017 }, { "epoch": 36.80816326530612, "grad_norm": 23.35994529724121, "learning_rate": 3.161020408163265e-05, "loss": 2.3054, "step": 9018 }, { "epoch": 36.81224489795918, "grad_norm": 19.57378578186035, "learning_rate": 3.160816326530612e-05, "loss": 2.6625, "step": 9019 }, { "epoch": 36.816326530612244, "grad_norm": 38.2786750793457, "learning_rate": 3.1606122448979594e-05, "loss": 1.0502, "step": 9020 }, { "epoch": 36.820408163265306, "grad_norm": 13.163663864135742, "learning_rate": 3.160408163265307e-05, "loss": 2.9009, "step": 9021 }, { "epoch": 36.82448979591837, "grad_norm": 20.98906898498535, "learning_rate": 3.1602040816326535e-05, "loss": 2.74, "step": 9022 }, { "epoch": 36.82857142857143, "grad_norm": 11.67607307434082, "learning_rate": 3.16e-05, "loss": 3.1342, "step": 9023 }, { "epoch": 36.83265306122449, "grad_norm": 15.588510513305664, "learning_rate": 3.159795918367347e-05, "loss": 2.8081, "step": 9024 }, { "epoch": 36.83673469387755, "grad_norm": 42.67729949951172, "learning_rate": 3.159591836734694e-05, "loss": 1.5162, "step": 9025 }, { "epoch": 36.840816326530614, "grad_norm": 15.563336372375488, "learning_rate": 3.159387755102041e-05, "loss": 2.7962, "step": 9026 }, { "epoch": 36.844897959183676, "grad_norm": 13.711341857910156, "learning_rate": 3.159183673469388e-05, "loss": 2.8891, "step": 9027 }, { "epoch": 36.84897959183674, "grad_norm": 23.635292053222656, "learning_rate": 3.158979591836735e-05, "loss": 2.0692, "step": 9028 }, { "epoch": 36.85306122448979, "grad_norm": 12.146669387817383, "learning_rate": 3.158775510204082e-05, "loss": 3.204, "step": 9029 }, { "epoch": 36.857142857142854, "grad_norm": 13.413804054260254, "learning_rate": 3.1585714285714286e-05, "loss": 2.7682, "step": 9030 }, { "epoch": 36.861224489795916, "grad_norm": 27.949718475341797, "learning_rate": 3.158367346938776e-05, "loss": 2.0686, "step": 9031 }, { "epoch": 36.86530612244898, "grad_norm": 33.30353927612305, "learning_rate": 3.158163265306123e-05, "loss": 2.3175, "step": 9032 }, { "epoch": 36.86938775510204, "grad_norm": 18.036523818969727, "learning_rate": 3.1579591836734694e-05, "loss": 2.3957, "step": 9033 }, { "epoch": 36.8734693877551, "grad_norm": 14.99231243133545, "learning_rate": 3.157755102040816e-05, "loss": 2.7808, "step": 9034 }, { "epoch": 36.87755102040816, "grad_norm": 18.668617248535156, "learning_rate": 3.1575510204081635e-05, "loss": 2.5925, "step": 9035 }, { "epoch": 36.881632653061224, "grad_norm": 14.164766311645508, "learning_rate": 3.157346938775511e-05, "loss": 2.5363, "step": 9036 }, { "epoch": 36.885714285714286, "grad_norm": 36.1424446105957, "learning_rate": 3.1571428571428576e-05, "loss": 1.2127, "step": 9037 }, { "epoch": 36.88979591836735, "grad_norm": 21.18681526184082, "learning_rate": 3.156938775510204e-05, "loss": 2.1171, "step": 9038 }, { "epoch": 36.89387755102041, "grad_norm": 23.500856399536133, "learning_rate": 3.156734693877551e-05, "loss": 2.2025, "step": 9039 }, { "epoch": 36.89795918367347, "grad_norm": 20.460857391357422, "learning_rate": 3.156530612244898e-05, "loss": 2.5898, "step": 9040 }, { "epoch": 36.90204081632653, "grad_norm": 30.87279510498047, "learning_rate": 3.156326530612245e-05, "loss": 1.628, "step": 9041 }, { "epoch": 36.906122448979595, "grad_norm": 11.192578315734863, "learning_rate": 3.156122448979592e-05, "loss": 3.1336, "step": 9042 }, { "epoch": 36.910204081632656, "grad_norm": 21.0073299407959, "learning_rate": 3.155918367346939e-05, "loss": 2.4511, "step": 9043 }, { "epoch": 36.91428571428571, "grad_norm": 12.42131519317627, "learning_rate": 3.155714285714286e-05, "loss": 3.2334, "step": 9044 }, { "epoch": 36.91836734693877, "grad_norm": 19.121944427490234, "learning_rate": 3.1555102040816327e-05, "loss": 2.1575, "step": 9045 }, { "epoch": 36.922448979591834, "grad_norm": 17.768712997436523, "learning_rate": 3.15530612244898e-05, "loss": 2.3593, "step": 9046 }, { "epoch": 36.926530612244896, "grad_norm": 13.092504501342773, "learning_rate": 3.155102040816327e-05, "loss": 2.9553, "step": 9047 }, { "epoch": 36.93061224489796, "grad_norm": 20.06194305419922, "learning_rate": 3.1548979591836735e-05, "loss": 2.6007, "step": 9048 }, { "epoch": 36.93469387755102, "grad_norm": 39.00282287597656, "learning_rate": 3.15469387755102e-05, "loss": 1.4579, "step": 9049 }, { "epoch": 36.93877551020408, "grad_norm": 13.405571937561035, "learning_rate": 3.1544897959183676e-05, "loss": 2.8408, "step": 9050 }, { "epoch": 36.94285714285714, "grad_norm": 35.205074310302734, "learning_rate": 3.154285714285714e-05, "loss": 1.7959, "step": 9051 }, { "epoch": 36.946938775510205, "grad_norm": 22.06031036376953, "learning_rate": 3.154081632653062e-05, "loss": 2.7929, "step": 9052 }, { "epoch": 36.95102040816327, "grad_norm": 26.972959518432617, "learning_rate": 3.1538775510204084e-05, "loss": 1.4525, "step": 9053 }, { "epoch": 36.95510204081633, "grad_norm": 34.22995376586914, "learning_rate": 3.153673469387755e-05, "loss": 1.034, "step": 9054 }, { "epoch": 36.95918367346939, "grad_norm": 24.159360885620117, "learning_rate": 3.153469387755102e-05, "loss": 1.9397, "step": 9055 }, { "epoch": 36.96326530612245, "grad_norm": 20.061416625976562, "learning_rate": 3.153265306122449e-05, "loss": 2.3405, "step": 9056 }, { "epoch": 36.96734693877551, "grad_norm": 25.66337776184082, "learning_rate": 3.153061224489796e-05, "loss": 1.8037, "step": 9057 }, { "epoch": 36.97142857142857, "grad_norm": 63.59450912475586, "learning_rate": 3.1528571428571426e-05, "loss": 1.6785, "step": 9058 }, { "epoch": 36.97551020408163, "grad_norm": 24.640544891357422, "learning_rate": 3.15265306122449e-05, "loss": 1.6645, "step": 9059 }, { "epoch": 36.97959183673469, "grad_norm": 8.95534610748291, "learning_rate": 3.1524489795918374e-05, "loss": 3.257, "step": 9060 }, { "epoch": 36.98367346938775, "grad_norm": 28.151369094848633, "learning_rate": 3.152244897959184e-05, "loss": 1.8985, "step": 9061 }, { "epoch": 36.987755102040815, "grad_norm": 44.95875930786133, "learning_rate": 3.152040816326531e-05, "loss": 0.7808, "step": 9062 }, { "epoch": 36.99183673469388, "grad_norm": 11.556235313415527, "learning_rate": 3.1518367346938775e-05, "loss": 3.1511, "step": 9063 }, { "epoch": 36.99591836734694, "grad_norm": 23.378398895263672, "learning_rate": 3.151632653061224e-05, "loss": 2.2578, "step": 9064 }, { "epoch": 37.0, "grad_norm": 26.287382125854492, "learning_rate": 3.1514285714285717e-05, "loss": 2.4074, "step": 9065 }, { "epoch": 37.00408163265306, "grad_norm": NaN, "learning_rate": 3.1512244897959184e-05, "loss": 0.9268, "step": 9066 }, { "epoch": 37.00816326530612, "grad_norm": 17.46730613708496, "learning_rate": 3.1512244897959184e-05, "loss": 2.6629, "step": 9067 }, { "epoch": 37.012244897959185, "grad_norm": 22.191503524780273, "learning_rate": 3.151020408163266e-05, "loss": 2.3637, "step": 9068 }, { "epoch": 37.01632653061225, "grad_norm": 23.359575271606445, "learning_rate": 3.1508163265306125e-05, "loss": 2.6309, "step": 9069 }, { "epoch": 37.02040816326531, "grad_norm": 8.91557502746582, "learning_rate": 3.150612244897959e-05, "loss": 3.4626, "step": 9070 }, { "epoch": 37.02448979591837, "grad_norm": 29.113521575927734, "learning_rate": 3.1504081632653066e-05, "loss": 1.9849, "step": 9071 }, { "epoch": 37.02857142857143, "grad_norm": 24.019824981689453, "learning_rate": 3.150204081632653e-05, "loss": 1.5585, "step": 9072 }, { "epoch": 37.03265306122449, "grad_norm": 10.796038627624512, "learning_rate": 3.15e-05, "loss": 3.1358, "step": 9073 }, { "epoch": 37.03673469387755, "grad_norm": 26.327171325683594, "learning_rate": 3.149795918367347e-05, "loss": 1.8177, "step": 9074 }, { "epoch": 37.04081632653061, "grad_norm": 12.699931144714355, "learning_rate": 3.149591836734694e-05, "loss": 2.7591, "step": 9075 }, { "epoch": 37.04489795918367, "grad_norm": 32.87741470336914, "learning_rate": 3.1493877551020415e-05, "loss": 1.6546, "step": 9076 }, { "epoch": 37.04897959183673, "grad_norm": 20.438983917236328, "learning_rate": 3.149183673469388e-05, "loss": 1.9481, "step": 9077 }, { "epoch": 37.053061224489795, "grad_norm": 37.00336456298828, "learning_rate": 3.148979591836735e-05, "loss": 0.9041, "step": 9078 }, { "epoch": 37.05714285714286, "grad_norm": 17.41078758239746, "learning_rate": 3.1487755102040816e-05, "loss": 2.4264, "step": 9079 }, { "epoch": 37.06122448979592, "grad_norm": 17.01318359375, "learning_rate": 3.148571428571428e-05, "loss": 2.2837, "step": 9080 }, { "epoch": 37.06530612244898, "grad_norm": 20.53771209716797, "learning_rate": 3.148367346938776e-05, "loss": 2.3609, "step": 9081 }, { "epoch": 37.06938775510204, "grad_norm": 14.00755500793457, "learning_rate": 3.1481632653061224e-05, "loss": 2.9263, "step": 9082 }, { "epoch": 37.073469387755104, "grad_norm": 26.329439163208008, "learning_rate": 3.14795918367347e-05, "loss": 2.6274, "step": 9083 }, { "epoch": 37.077551020408166, "grad_norm": 22.056961059570312, "learning_rate": 3.1477551020408165e-05, "loss": 2.4248, "step": 9084 }, { "epoch": 37.08163265306123, "grad_norm": 36.80606460571289, "learning_rate": 3.147551020408163e-05, "loss": 1.5495, "step": 9085 }, { "epoch": 37.08571428571429, "grad_norm": 15.655630111694336, "learning_rate": 3.1473469387755106e-05, "loss": 2.5426, "step": 9086 }, { "epoch": 37.089795918367344, "grad_norm": 29.324106216430664, "learning_rate": 3.1471428571428574e-05, "loss": 1.7031, "step": 9087 }, { "epoch": 37.093877551020405, "grad_norm": 18.391347885131836, "learning_rate": 3.146938775510204e-05, "loss": 2.4538, "step": 9088 }, { "epoch": 37.09795918367347, "grad_norm": 16.174211502075195, "learning_rate": 3.146734693877551e-05, "loss": 2.7252, "step": 9089 }, { "epoch": 37.10204081632653, "grad_norm": 30.798362731933594, "learning_rate": 3.146530612244898e-05, "loss": 0.8912, "step": 9090 }, { "epoch": 37.10612244897959, "grad_norm": 44.45064163208008, "learning_rate": 3.1463265306122456e-05, "loss": 0.7123, "step": 9091 }, { "epoch": 37.11020408163265, "grad_norm": 16.566118240356445, "learning_rate": 3.146122448979592e-05, "loss": 2.5197, "step": 9092 }, { "epoch": 37.114285714285714, "grad_norm": 19.946815490722656, "learning_rate": 3.145918367346939e-05, "loss": 2.7006, "step": 9093 }, { "epoch": 37.118367346938776, "grad_norm": 24.375328063964844, "learning_rate": 3.145714285714286e-05, "loss": 2.1981, "step": 9094 }, { "epoch": 37.12244897959184, "grad_norm": 14.667604446411133, "learning_rate": 3.1455102040816324e-05, "loss": 3.0634, "step": 9095 }, { "epoch": 37.1265306122449, "grad_norm": 24.131420135498047, "learning_rate": 3.14530612244898e-05, "loss": 2.0694, "step": 9096 }, { "epoch": 37.13061224489796, "grad_norm": 21.81142807006836, "learning_rate": 3.1451020408163265e-05, "loss": 2.2373, "step": 9097 }, { "epoch": 37.13469387755102, "grad_norm": 19.3470516204834, "learning_rate": 3.144897959183674e-05, "loss": 2.4177, "step": 9098 }, { "epoch": 37.138775510204084, "grad_norm": 36.733009338378906, "learning_rate": 3.1446938775510206e-05, "loss": 1.37, "step": 9099 }, { "epoch": 37.142857142857146, "grad_norm": 19.32262420654297, "learning_rate": 3.144489795918367e-05, "loss": 2.3475, "step": 9100 }, { "epoch": 37.14693877551021, "grad_norm": 23.090593338012695, "learning_rate": 3.144285714285715e-05, "loss": 2.399, "step": 9101 }, { "epoch": 37.15102040816326, "grad_norm": 14.459794044494629, "learning_rate": 3.1440816326530614e-05, "loss": 2.7821, "step": 9102 }, { "epoch": 37.155102040816324, "grad_norm": 15.695764541625977, "learning_rate": 3.143877551020408e-05, "loss": 2.8356, "step": 9103 }, { "epoch": 37.159183673469386, "grad_norm": 15.954536437988281, "learning_rate": 3.143673469387755e-05, "loss": 2.7322, "step": 9104 }, { "epoch": 37.16326530612245, "grad_norm": 19.905853271484375, "learning_rate": 3.143469387755102e-05, "loss": 2.5634, "step": 9105 }, { "epoch": 37.16734693877551, "grad_norm": 14.823992729187012, "learning_rate": 3.143265306122449e-05, "loss": 2.7187, "step": 9106 }, { "epoch": 37.17142857142857, "grad_norm": 13.232272148132324, "learning_rate": 3.1430612244897964e-05, "loss": 2.8434, "step": 9107 }, { "epoch": 37.17551020408163, "grad_norm": 17.223966598510742, "learning_rate": 3.142857142857143e-05, "loss": 2.7956, "step": 9108 }, { "epoch": 37.179591836734694, "grad_norm": 30.415361404418945, "learning_rate": 3.14265306122449e-05, "loss": 1.4987, "step": 9109 }, { "epoch": 37.183673469387756, "grad_norm": 18.73544692993164, "learning_rate": 3.142448979591837e-05, "loss": 2.4244, "step": 9110 }, { "epoch": 37.18775510204082, "grad_norm": 31.76274299621582, "learning_rate": 3.142244897959184e-05, "loss": 2.4444, "step": 9111 }, { "epoch": 37.19183673469388, "grad_norm": 24.198062896728516, "learning_rate": 3.1420408163265306e-05, "loss": 2.007, "step": 9112 }, { "epoch": 37.19591836734694, "grad_norm": 19.518508911132812, "learning_rate": 3.141836734693877e-05, "loss": 2.3267, "step": 9113 }, { "epoch": 37.2, "grad_norm": 19.87332534790039, "learning_rate": 3.141632653061225e-05, "loss": 2.4943, "step": 9114 }, { "epoch": 37.204081632653065, "grad_norm": 31.67030143737793, "learning_rate": 3.141428571428572e-05, "loss": 1.9684, "step": 9115 }, { "epoch": 37.20816326530612, "grad_norm": 22.203323364257812, "learning_rate": 3.141224489795919e-05, "loss": 2.168, "step": 9116 }, { "epoch": 37.21224489795918, "grad_norm": 11.02291202545166, "learning_rate": 3.1410204081632655e-05, "loss": 3.1087, "step": 9117 }, { "epoch": 37.21632653061224, "grad_norm": 16.52153205871582, "learning_rate": 3.140816326530612e-05, "loss": 2.1948, "step": 9118 }, { "epoch": 37.220408163265304, "grad_norm": 35.782596588134766, "learning_rate": 3.140612244897959e-05, "loss": 1.4075, "step": 9119 }, { "epoch": 37.224489795918366, "grad_norm": 9.62512493133545, "learning_rate": 3.140408163265306e-05, "loss": 3.2266, "step": 9120 }, { "epoch": 37.22857142857143, "grad_norm": 11.406339645385742, "learning_rate": 3.140204081632653e-05, "loss": 3.2647, "step": 9121 }, { "epoch": 37.23265306122449, "grad_norm": 25.11699676513672, "learning_rate": 3.1400000000000004e-05, "loss": 1.9979, "step": 9122 }, { "epoch": 37.23673469387755, "grad_norm": 15.870360374450684, "learning_rate": 3.139795918367347e-05, "loss": 2.9627, "step": 9123 }, { "epoch": 37.24081632653061, "grad_norm": 16.808259963989258, "learning_rate": 3.139591836734694e-05, "loss": 2.704, "step": 9124 }, { "epoch": 37.244897959183675, "grad_norm": 16.12295913696289, "learning_rate": 3.139387755102041e-05, "loss": 2.9379, "step": 9125 }, { "epoch": 37.248979591836736, "grad_norm": 17.00575065612793, "learning_rate": 3.139183673469388e-05, "loss": 2.7977, "step": 9126 }, { "epoch": 37.2530612244898, "grad_norm": 33.98732376098633, "learning_rate": 3.138979591836735e-05, "loss": 2.4215, "step": 9127 }, { "epoch": 37.25714285714286, "grad_norm": 27.179668426513672, "learning_rate": 3.1387755102040814e-05, "loss": 1.92, "step": 9128 }, { "epoch": 37.26122448979592, "grad_norm": 41.046443939208984, "learning_rate": 3.138571428571429e-05, "loss": 1.6181, "step": 9129 }, { "epoch": 37.265306122448976, "grad_norm": 35.941246032714844, "learning_rate": 3.138367346938776e-05, "loss": 2.2439, "step": 9130 }, { "epoch": 37.26938775510204, "grad_norm": 21.973190307617188, "learning_rate": 3.138163265306123e-05, "loss": 2.0898, "step": 9131 }, { "epoch": 37.2734693877551, "grad_norm": 32.13368225097656, "learning_rate": 3.1379591836734696e-05, "loss": 1.3508, "step": 9132 }, { "epoch": 37.27755102040816, "grad_norm": 13.950509071350098, "learning_rate": 3.137755102040816e-05, "loss": 2.7499, "step": 9133 }, { "epoch": 37.28163265306122, "grad_norm": 21.441991806030273, "learning_rate": 3.137551020408163e-05, "loss": 2.2484, "step": 9134 }, { "epoch": 37.285714285714285, "grad_norm": 32.16775131225586, "learning_rate": 3.1373469387755104e-05, "loss": 1.3923, "step": 9135 }, { "epoch": 37.289795918367346, "grad_norm": 20.136581420898438, "learning_rate": 3.137142857142857e-05, "loss": 2.615, "step": 9136 }, { "epoch": 37.29387755102041, "grad_norm": 8.569703102111816, "learning_rate": 3.1369387755102045e-05, "loss": 3.1501, "step": 9137 }, { "epoch": 37.29795918367347, "grad_norm": 13.274460792541504, "learning_rate": 3.136734693877551e-05, "loss": 2.7893, "step": 9138 }, { "epoch": 37.30204081632653, "grad_norm": 16.704938888549805, "learning_rate": 3.136530612244898e-05, "loss": 2.8569, "step": 9139 }, { "epoch": 37.30612244897959, "grad_norm": 16.983348846435547, "learning_rate": 3.136326530612245e-05, "loss": 2.6894, "step": 9140 }, { "epoch": 37.310204081632655, "grad_norm": 15.539382934570312, "learning_rate": 3.136122448979592e-05, "loss": 2.8333, "step": 9141 }, { "epoch": 37.31428571428572, "grad_norm": 37.59119415283203, "learning_rate": 3.135918367346939e-05, "loss": 1.6612, "step": 9142 }, { "epoch": 37.31836734693878, "grad_norm": 22.02695083618164, "learning_rate": 3.1357142857142855e-05, "loss": 2.495, "step": 9143 }, { "epoch": 37.32244897959184, "grad_norm": 18.21954917907715, "learning_rate": 3.135510204081633e-05, "loss": 1.9933, "step": 9144 }, { "epoch": 37.326530612244895, "grad_norm": 42.58170700073242, "learning_rate": 3.13530612244898e-05, "loss": 1.369, "step": 9145 }, { "epoch": 37.33061224489796, "grad_norm": 24.841638565063477, "learning_rate": 3.135102040816327e-05, "loss": 2.4111, "step": 9146 }, { "epoch": 37.33469387755102, "grad_norm": 25.68368911743164, "learning_rate": 3.134897959183674e-05, "loss": 1.8945, "step": 9147 }, { "epoch": 37.33877551020408, "grad_norm": 31.222211837768555, "learning_rate": 3.1346938775510204e-05, "loss": 1.8515, "step": 9148 }, { "epoch": 37.34285714285714, "grad_norm": 55.78367614746094, "learning_rate": 3.134489795918367e-05, "loss": 1.4023, "step": 9149 }, { "epoch": 37.3469387755102, "grad_norm": 18.205394744873047, "learning_rate": 3.1342857142857145e-05, "loss": 2.142, "step": 9150 }, { "epoch": 37.351020408163265, "grad_norm": 33.07643127441406, "learning_rate": 3.134081632653061e-05, "loss": 1.5051, "step": 9151 }, { "epoch": 37.35510204081633, "grad_norm": 26.252779006958008, "learning_rate": 3.1338775510204086e-05, "loss": 2.058, "step": 9152 }, { "epoch": 37.35918367346939, "grad_norm": 24.082565307617188, "learning_rate": 3.133673469387755e-05, "loss": 2.1285, "step": 9153 }, { "epoch": 37.36326530612245, "grad_norm": 29.318283081054688, "learning_rate": 3.133469387755103e-05, "loss": 1.6604, "step": 9154 }, { "epoch": 37.36734693877551, "grad_norm": 19.815523147583008, "learning_rate": 3.1332653061224494e-05, "loss": 2.3771, "step": 9155 }, { "epoch": 37.371428571428574, "grad_norm": 23.728971481323242, "learning_rate": 3.133061224489796e-05, "loss": 1.3788, "step": 9156 }, { "epoch": 37.375510204081635, "grad_norm": 20.18566131591797, "learning_rate": 3.132857142857143e-05, "loss": 2.3139, "step": 9157 }, { "epoch": 37.3795918367347, "grad_norm": 20.672061920166016, "learning_rate": 3.1326530612244895e-05, "loss": 2.2513, "step": 9158 }, { "epoch": 37.38367346938775, "grad_norm": 24.38507080078125, "learning_rate": 3.132448979591837e-05, "loss": 1.682, "step": 9159 }, { "epoch": 37.38775510204081, "grad_norm": 30.515949249267578, "learning_rate": 3.1322448979591836e-05, "loss": 2.2974, "step": 9160 }, { "epoch": 37.391836734693875, "grad_norm": 20.95818328857422, "learning_rate": 3.132040816326531e-05, "loss": 2.7478, "step": 9161 }, { "epoch": 37.39591836734694, "grad_norm": 43.09282302856445, "learning_rate": 3.131836734693878e-05, "loss": 0.9069, "step": 9162 }, { "epoch": 37.4, "grad_norm": 28.576854705810547, "learning_rate": 3.1316326530612245e-05, "loss": 1.5556, "step": 9163 }, { "epoch": 37.40408163265306, "grad_norm": 21.173660278320312, "learning_rate": 3.131428571428572e-05, "loss": 2.3214, "step": 9164 }, { "epoch": 37.40816326530612, "grad_norm": 14.849388122558594, "learning_rate": 3.1312244897959186e-05, "loss": 2.8168, "step": 9165 }, { "epoch": 37.412244897959184, "grad_norm": 16.50362777709961, "learning_rate": 3.131020408163265e-05, "loss": 2.6234, "step": 9166 }, { "epoch": 37.416326530612245, "grad_norm": 18.757308959960938, "learning_rate": 3.130816326530612e-05, "loss": 2.6193, "step": 9167 }, { "epoch": 37.42040816326531, "grad_norm": 12.321294784545898, "learning_rate": 3.1306122448979594e-05, "loss": 3.1409, "step": 9168 }, { "epoch": 37.42448979591837, "grad_norm": 13.731840133666992, "learning_rate": 3.130408163265307e-05, "loss": 2.6973, "step": 9169 }, { "epoch": 37.42857142857143, "grad_norm": 33.2808952331543, "learning_rate": 3.1302040816326535e-05, "loss": 1.5947, "step": 9170 }, { "epoch": 37.43265306122449, "grad_norm": 30.76287841796875, "learning_rate": 3.13e-05, "loss": 1.3703, "step": 9171 }, { "epoch": 37.436734693877554, "grad_norm": 31.111303329467773, "learning_rate": 3.129795918367347e-05, "loss": 1.716, "step": 9172 }, { "epoch": 37.440816326530616, "grad_norm": 16.13011360168457, "learning_rate": 3.1295918367346936e-05, "loss": 2.5906, "step": 9173 }, { "epoch": 37.44489795918367, "grad_norm": 20.999618530273438, "learning_rate": 3.129387755102041e-05, "loss": 2.4881, "step": 9174 }, { "epoch": 37.44897959183673, "grad_norm": 22.624645233154297, "learning_rate": 3.129183673469388e-05, "loss": 2.5336, "step": 9175 }, { "epoch": 37.453061224489794, "grad_norm": 12.957050323486328, "learning_rate": 3.128979591836735e-05, "loss": 2.8849, "step": 9176 }, { "epoch": 37.457142857142856, "grad_norm": 14.618096351623535, "learning_rate": 3.128775510204082e-05, "loss": 2.7236, "step": 9177 }, { "epoch": 37.46122448979592, "grad_norm": 26.942819595336914, "learning_rate": 3.1285714285714285e-05, "loss": 2.8258, "step": 9178 }, { "epoch": 37.46530612244898, "grad_norm": 36.18353271484375, "learning_rate": 3.128367346938776e-05, "loss": 1.6596, "step": 9179 }, { "epoch": 37.46938775510204, "grad_norm": 9.93194580078125, "learning_rate": 3.1281632653061226e-05, "loss": 3.4729, "step": 9180 }, { "epoch": 37.4734693877551, "grad_norm": 18.520233154296875, "learning_rate": 3.1279591836734693e-05, "loss": 2.1009, "step": 9181 }, { "epoch": 37.477551020408164, "grad_norm": 24.781505584716797, "learning_rate": 3.127755102040816e-05, "loss": 2.3144, "step": 9182 }, { "epoch": 37.481632653061226, "grad_norm": 11.625343322753906, "learning_rate": 3.1275510204081634e-05, "loss": 2.9272, "step": 9183 }, { "epoch": 37.48571428571429, "grad_norm": 29.458147048950195, "learning_rate": 3.127346938775511e-05, "loss": 2.0957, "step": 9184 }, { "epoch": 37.48979591836735, "grad_norm": 21.199331283569336, "learning_rate": 3.1271428571428576e-05, "loss": 2.0842, "step": 9185 }, { "epoch": 37.49387755102041, "grad_norm": 24.462675094604492, "learning_rate": 3.126938775510204e-05, "loss": 1.6606, "step": 9186 }, { "epoch": 37.49795918367347, "grad_norm": 26.53451919555664, "learning_rate": 3.126734693877551e-05, "loss": 1.8579, "step": 9187 }, { "epoch": 37.50204081632653, "grad_norm": 20.75285530090332, "learning_rate": 3.126530612244898e-05, "loss": 2.6704, "step": 9188 }, { "epoch": 37.50612244897959, "grad_norm": 26.97390365600586, "learning_rate": 3.126326530612245e-05, "loss": 2.3478, "step": 9189 }, { "epoch": 37.51020408163265, "grad_norm": 16.016624450683594, "learning_rate": 3.126122448979592e-05, "loss": 2.8129, "step": 9190 }, { "epoch": 37.51428571428571, "grad_norm": 19.906829833984375, "learning_rate": 3.125918367346939e-05, "loss": 2.2492, "step": 9191 }, { "epoch": 37.518367346938774, "grad_norm": 27.900175094604492, "learning_rate": 3.125714285714286e-05, "loss": 1.7675, "step": 9192 }, { "epoch": 37.522448979591836, "grad_norm": 23.889142990112305, "learning_rate": 3.1255102040816326e-05, "loss": 1.9472, "step": 9193 }, { "epoch": 37.5265306122449, "grad_norm": 14.86342716217041, "learning_rate": 3.12530612244898e-05, "loss": 2.7631, "step": 9194 }, { "epoch": 37.53061224489796, "grad_norm": 33.39315414428711, "learning_rate": 3.125102040816327e-05, "loss": 1.5281, "step": 9195 }, { "epoch": 37.53469387755102, "grad_norm": 38.39816665649414, "learning_rate": 3.1248979591836734e-05, "loss": 1.0074, "step": 9196 }, { "epoch": 37.53877551020408, "grad_norm": 16.018543243408203, "learning_rate": 3.12469387755102e-05, "loss": 2.6806, "step": 9197 }, { "epoch": 37.542857142857144, "grad_norm": 26.153461456298828, "learning_rate": 3.1244897959183675e-05, "loss": 2.3389, "step": 9198 }, { "epoch": 37.546938775510206, "grad_norm": 21.1763858795166, "learning_rate": 3.124285714285715e-05, "loss": 2.1562, "step": 9199 }, { "epoch": 37.55102040816327, "grad_norm": 19.774599075317383, "learning_rate": 3.1240816326530616e-05, "loss": 2.2199, "step": 9200 }, { "epoch": 37.55510204081633, "grad_norm": 33.378177642822266, "learning_rate": 3.1238775510204083e-05, "loss": 1.7456, "step": 9201 }, { "epoch": 37.55918367346939, "grad_norm": 18.81965446472168, "learning_rate": 3.123673469387755e-05, "loss": 2.5732, "step": 9202 }, { "epoch": 37.563265306122446, "grad_norm": 16.09052276611328, "learning_rate": 3.1234693877551024e-05, "loss": 2.4842, "step": 9203 }, { "epoch": 37.56734693877551, "grad_norm": 24.508995056152344, "learning_rate": 3.123265306122449e-05, "loss": 2.2476, "step": 9204 }, { "epoch": 37.57142857142857, "grad_norm": 36.21653366088867, "learning_rate": 3.123061224489796e-05, "loss": 1.4822, "step": 9205 }, { "epoch": 37.57551020408163, "grad_norm": 16.115890502929688, "learning_rate": 3.122857142857143e-05, "loss": 2.6171, "step": 9206 }, { "epoch": 37.57959183673469, "grad_norm": 27.794754028320312, "learning_rate": 3.12265306122449e-05, "loss": 1.4672, "step": 9207 }, { "epoch": 37.583673469387755, "grad_norm": 15.175254821777344, "learning_rate": 3.1224489795918374e-05, "loss": 2.9154, "step": 9208 }, { "epoch": 37.587755102040816, "grad_norm": 25.093496322631836, "learning_rate": 3.122244897959184e-05, "loss": 2.1601, "step": 9209 }, { "epoch": 37.59183673469388, "grad_norm": 26.856929779052734, "learning_rate": 3.122040816326531e-05, "loss": 1.6484, "step": 9210 }, { "epoch": 37.59591836734694, "grad_norm": 13.996114730834961, "learning_rate": 3.1218367346938775e-05, "loss": 2.8568, "step": 9211 }, { "epoch": 37.6, "grad_norm": 27.648496627807617, "learning_rate": 3.121632653061224e-05, "loss": 2.0928, "step": 9212 }, { "epoch": 37.60408163265306, "grad_norm": 24.865196228027344, "learning_rate": 3.1214285714285716e-05, "loss": 2.0164, "step": 9213 }, { "epoch": 37.608163265306125, "grad_norm": 30.240947723388672, "learning_rate": 3.121224489795918e-05, "loss": 0.793, "step": 9214 }, { "epoch": 37.61224489795919, "grad_norm": 20.689390182495117, "learning_rate": 3.121020408163266e-05, "loss": 2.6567, "step": 9215 }, { "epoch": 37.61632653061225, "grad_norm": 18.438690185546875, "learning_rate": 3.1208163265306124e-05, "loss": 2.4949, "step": 9216 }, { "epoch": 37.6204081632653, "grad_norm": 17.567319869995117, "learning_rate": 3.120612244897959e-05, "loss": 2.6672, "step": 9217 }, { "epoch": 37.624489795918365, "grad_norm": 46.272216796875, "learning_rate": 3.1204081632653065e-05, "loss": 1.9392, "step": 9218 }, { "epoch": 37.628571428571426, "grad_norm": 25.527694702148438, "learning_rate": 3.120204081632653e-05, "loss": 2.3766, "step": 9219 }, { "epoch": 37.63265306122449, "grad_norm": 8.298959732055664, "learning_rate": 3.12e-05, "loss": 3.5503, "step": 9220 }, { "epoch": 37.63673469387755, "grad_norm": 11.317220687866211, "learning_rate": 3.1197959183673467e-05, "loss": 3.0247, "step": 9221 }, { "epoch": 37.64081632653061, "grad_norm": 39.72385025024414, "learning_rate": 3.119591836734694e-05, "loss": 0.9895, "step": 9222 }, { "epoch": 37.64489795918367, "grad_norm": 28.117033004760742, "learning_rate": 3.1193877551020414e-05, "loss": 2.0161, "step": 9223 }, { "epoch": 37.648979591836735, "grad_norm": 34.840782165527344, "learning_rate": 3.119183673469388e-05, "loss": 0.9872, "step": 9224 }, { "epoch": 37.6530612244898, "grad_norm": 13.504500389099121, "learning_rate": 3.118979591836735e-05, "loss": 3.0921, "step": 9225 }, { "epoch": 37.65714285714286, "grad_norm": 18.827524185180664, "learning_rate": 3.1187755102040816e-05, "loss": 2.7094, "step": 9226 }, { "epoch": 37.66122448979592, "grad_norm": 19.949857711791992, "learning_rate": 3.118571428571428e-05, "loss": 2.2141, "step": 9227 }, { "epoch": 37.66530612244898, "grad_norm": 17.33610725402832, "learning_rate": 3.118367346938776e-05, "loss": 3.1434, "step": 9228 }, { "epoch": 37.66938775510204, "grad_norm": 23.30012321472168, "learning_rate": 3.1181632653061224e-05, "loss": 2.4111, "step": 9229 }, { "epoch": 37.673469387755105, "grad_norm": 12.265673637390137, "learning_rate": 3.11795918367347e-05, "loss": 2.7633, "step": 9230 }, { "epoch": 37.67755102040816, "grad_norm": 23.45245361328125, "learning_rate": 3.1177551020408165e-05, "loss": 1.9936, "step": 9231 }, { "epoch": 37.68163265306122, "grad_norm": 27.826156616210938, "learning_rate": 3.117551020408163e-05, "loss": 1.5291, "step": 9232 }, { "epoch": 37.68571428571428, "grad_norm": 20.614757537841797, "learning_rate": 3.1173469387755106e-05, "loss": 2.147, "step": 9233 }, { "epoch": 37.689795918367345, "grad_norm": 18.97332000732422, "learning_rate": 3.117142857142857e-05, "loss": 2.6133, "step": 9234 }, { "epoch": 37.69387755102041, "grad_norm": 37.497920989990234, "learning_rate": 3.116938775510204e-05, "loss": 1.5168, "step": 9235 }, { "epoch": 37.69795918367347, "grad_norm": 14.629691123962402, "learning_rate": 3.116734693877551e-05, "loss": 2.8652, "step": 9236 }, { "epoch": 37.70204081632653, "grad_norm": 33.509056091308594, "learning_rate": 3.116530612244898e-05, "loss": 1.7227, "step": 9237 }, { "epoch": 37.70612244897959, "grad_norm": 25.39766502380371, "learning_rate": 3.1163265306122455e-05, "loss": 2.4939, "step": 9238 }, { "epoch": 37.710204081632654, "grad_norm": 18.002836227416992, "learning_rate": 3.116122448979592e-05, "loss": 2.5231, "step": 9239 }, { "epoch": 37.714285714285715, "grad_norm": 16.577367782592773, "learning_rate": 3.115918367346939e-05, "loss": 2.6601, "step": 9240 }, { "epoch": 37.71836734693878, "grad_norm": 23.769975662231445, "learning_rate": 3.1157142857142857e-05, "loss": 2.4277, "step": 9241 }, { "epoch": 37.72244897959184, "grad_norm": 27.53723907470703, "learning_rate": 3.115510204081633e-05, "loss": 1.8381, "step": 9242 }, { "epoch": 37.7265306122449, "grad_norm": 13.334917068481445, "learning_rate": 3.11530612244898e-05, "loss": 2.607, "step": 9243 }, { "epoch": 37.73061224489796, "grad_norm": 18.20914077758789, "learning_rate": 3.1151020408163265e-05, "loss": 2.6831, "step": 9244 }, { "epoch": 37.734693877551024, "grad_norm": 10.718067169189453, "learning_rate": 3.114897959183674e-05, "loss": 3.082, "step": 9245 }, { "epoch": 37.73877551020408, "grad_norm": 22.465173721313477, "learning_rate": 3.1146938775510206e-05, "loss": 2.2115, "step": 9246 }, { "epoch": 37.74285714285714, "grad_norm": 26.25851821899414, "learning_rate": 3.114489795918368e-05, "loss": 2.248, "step": 9247 }, { "epoch": 37.7469387755102, "grad_norm": 21.531667709350586, "learning_rate": 3.114285714285715e-05, "loss": 1.6438, "step": 9248 }, { "epoch": 37.751020408163264, "grad_norm": 19.937593460083008, "learning_rate": 3.1140816326530614e-05, "loss": 2.0821, "step": 9249 }, { "epoch": 37.755102040816325, "grad_norm": 13.3495512008667, "learning_rate": 3.113877551020408e-05, "loss": 2.7588, "step": 9250 }, { "epoch": 37.75918367346939, "grad_norm": 11.565564155578613, "learning_rate": 3.113673469387755e-05, "loss": 3.1581, "step": 9251 }, { "epoch": 37.76326530612245, "grad_norm": 31.689300537109375, "learning_rate": 3.113469387755102e-05, "loss": 0.8884, "step": 9252 }, { "epoch": 37.76734693877551, "grad_norm": 28.80850601196289, "learning_rate": 3.1132653061224496e-05, "loss": 1.9627, "step": 9253 }, { "epoch": 37.77142857142857, "grad_norm": 24.390100479125977, "learning_rate": 3.113061224489796e-05, "loss": 2.3949, "step": 9254 }, { "epoch": 37.775510204081634, "grad_norm": 14.09070110321045, "learning_rate": 3.112857142857143e-05, "loss": 2.8913, "step": 9255 }, { "epoch": 37.779591836734696, "grad_norm": 20.719392776489258, "learning_rate": 3.11265306122449e-05, "loss": 2.4259, "step": 9256 }, { "epoch": 37.78367346938776, "grad_norm": 14.451277732849121, "learning_rate": 3.112448979591837e-05, "loss": 2.6409, "step": 9257 }, { "epoch": 37.78775510204082, "grad_norm": 26.55877113342285, "learning_rate": 3.112244897959184e-05, "loss": 2.4409, "step": 9258 }, { "epoch": 37.79183673469388, "grad_norm": 22.864315032958984, "learning_rate": 3.1120408163265305e-05, "loss": 2.2361, "step": 9259 }, { "epoch": 37.795918367346935, "grad_norm": 19.21999168395996, "learning_rate": 3.111836734693878e-05, "loss": 1.9164, "step": 9260 }, { "epoch": 37.8, "grad_norm": 25.1397647857666, "learning_rate": 3.1116326530612246e-05, "loss": 1.5608, "step": 9261 }, { "epoch": 37.80408163265306, "grad_norm": 24.8808650970459, "learning_rate": 3.111428571428572e-05, "loss": 1.989, "step": 9262 }, { "epoch": 37.80816326530612, "grad_norm": 20.536258697509766, "learning_rate": 3.111224489795919e-05, "loss": 0.2236, "step": 9263 }, { "epoch": 37.81224489795918, "grad_norm": 13.367637634277344, "learning_rate": 3.1110204081632655e-05, "loss": 2.7013, "step": 9264 }, { "epoch": 37.816326530612244, "grad_norm": 14.435465812683105, "learning_rate": 3.110816326530612e-05, "loss": 2.8942, "step": 9265 }, { "epoch": 37.820408163265306, "grad_norm": 25.474763870239258, "learning_rate": 3.110612244897959e-05, "loss": 1.5945, "step": 9266 }, { "epoch": 37.82448979591837, "grad_norm": 29.245820999145508, "learning_rate": 3.110408163265306e-05, "loss": 1.7898, "step": 9267 }, { "epoch": 37.82857142857143, "grad_norm": 10.573956489562988, "learning_rate": 3.110204081632653e-05, "loss": 3.3464, "step": 9268 }, { "epoch": 37.83265306122449, "grad_norm": 36.02583312988281, "learning_rate": 3.1100000000000004e-05, "loss": 1.7055, "step": 9269 }, { "epoch": 37.83673469387755, "grad_norm": 14.49129581451416, "learning_rate": 3.109795918367347e-05, "loss": 2.7329, "step": 9270 }, { "epoch": 37.840816326530614, "grad_norm": 33.03559875488281, "learning_rate": 3.109591836734694e-05, "loss": 1.6154, "step": 9271 }, { "epoch": 37.844897959183676, "grad_norm": 25.34282684326172, "learning_rate": 3.109387755102041e-05, "loss": 2.0521, "step": 9272 }, { "epoch": 37.84897959183674, "grad_norm": 19.953357696533203, "learning_rate": 3.109183673469388e-05, "loss": 2.3574, "step": 9273 }, { "epoch": 37.85306122448979, "grad_norm": 17.394296646118164, "learning_rate": 3.1089795918367346e-05, "loss": 2.6904, "step": 9274 }, { "epoch": 37.857142857142854, "grad_norm": 23.109651565551758, "learning_rate": 3.108775510204081e-05, "loss": 2.3857, "step": 9275 }, { "epoch": 37.861224489795916, "grad_norm": 30.235071182250977, "learning_rate": 3.108571428571429e-05, "loss": 0.7911, "step": 9276 }, { "epoch": 37.86530612244898, "grad_norm": 23.403806686401367, "learning_rate": 3.108367346938776e-05, "loss": 2.3338, "step": 9277 }, { "epoch": 37.86938775510204, "grad_norm": 13.085829734802246, "learning_rate": 3.108163265306123e-05, "loss": 2.7144, "step": 9278 }, { "epoch": 37.8734693877551, "grad_norm": 32.49026870727539, "learning_rate": 3.1079591836734695e-05, "loss": 0.6353, "step": 9279 }, { "epoch": 37.87755102040816, "grad_norm": 10.405059814453125, "learning_rate": 3.107755102040816e-05, "loss": 3.1545, "step": 9280 }, { "epoch": 37.881632653061224, "grad_norm": 13.898362159729004, "learning_rate": 3.107551020408163e-05, "loss": 3.1671, "step": 9281 }, { "epoch": 37.885714285714286, "grad_norm": 32.84690475463867, "learning_rate": 3.1073469387755104e-05, "loss": 1.1598, "step": 9282 }, { "epoch": 37.88979591836735, "grad_norm": 11.667882919311523, "learning_rate": 3.107142857142857e-05, "loss": 3.2645, "step": 9283 }, { "epoch": 37.89387755102041, "grad_norm": 16.903362274169922, "learning_rate": 3.1069387755102045e-05, "loss": 2.2214, "step": 9284 }, { "epoch": 37.89795918367347, "grad_norm": 13.793554306030273, "learning_rate": 3.106734693877551e-05, "loss": 3.11, "step": 9285 }, { "epoch": 37.90204081632653, "grad_norm": 40.98135757446289, "learning_rate": 3.106530612244898e-05, "loss": 0.8168, "step": 9286 }, { "epoch": 37.906122448979595, "grad_norm": 17.472169876098633, "learning_rate": 3.106326530612245e-05, "loss": 2.5387, "step": 9287 }, { "epoch": 37.910204081632656, "grad_norm": 23.329137802124023, "learning_rate": 3.106122448979592e-05, "loss": 2.0938, "step": 9288 }, { "epoch": 37.91428571428571, "grad_norm": 39.480613708496094, "learning_rate": 3.105918367346939e-05, "loss": 1.8945, "step": 9289 }, { "epoch": 37.91836734693877, "grad_norm": 11.479887962341309, "learning_rate": 3.1057142857142854e-05, "loss": 3.0896, "step": 9290 }, { "epoch": 37.922448979591834, "grad_norm": 15.18136215209961, "learning_rate": 3.105510204081633e-05, "loss": 2.9178, "step": 9291 }, { "epoch": 37.926530612244896, "grad_norm": 15.604975700378418, "learning_rate": 3.10530612244898e-05, "loss": 2.5081, "step": 9292 }, { "epoch": 37.93061224489796, "grad_norm": 18.756074905395508, "learning_rate": 3.105102040816327e-05, "loss": 1.8162, "step": 9293 }, { "epoch": 37.93469387755102, "grad_norm": 29.569416046142578, "learning_rate": 3.1048979591836736e-05, "loss": 1.7284, "step": 9294 }, { "epoch": 37.93877551020408, "grad_norm": 20.819095611572266, "learning_rate": 3.10469387755102e-05, "loss": 2.5025, "step": 9295 }, { "epoch": 37.94285714285714, "grad_norm": 32.424224853515625, "learning_rate": 3.104489795918368e-05, "loss": 1.1499, "step": 9296 }, { "epoch": 37.946938775510205, "grad_norm": 37.4986686706543, "learning_rate": 3.1042857142857144e-05, "loss": 2.2129, "step": 9297 }, { "epoch": 37.95102040816327, "grad_norm": 15.310603141784668, "learning_rate": 3.104081632653061e-05, "loss": 2.7562, "step": 9298 }, { "epoch": 37.95510204081633, "grad_norm": 34.79362869262695, "learning_rate": 3.1038775510204085e-05, "loss": 0.7777, "step": 9299 }, { "epoch": 37.95918367346939, "grad_norm": 16.845935821533203, "learning_rate": 3.103673469387755e-05, "loss": 2.6362, "step": 9300 }, { "epoch": 37.96326530612245, "grad_norm": 15.573484420776367, "learning_rate": 3.1034693877551026e-05, "loss": 2.7199, "step": 9301 }, { "epoch": 37.96734693877551, "grad_norm": 33.953487396240234, "learning_rate": 3.1032653061224493e-05, "loss": 1.523, "step": 9302 }, { "epoch": 37.97142857142857, "grad_norm": 32.067020416259766, "learning_rate": 3.103061224489796e-05, "loss": 0.519, "step": 9303 }, { "epoch": 37.97551020408163, "grad_norm": 34.89740753173828, "learning_rate": 3.102857142857143e-05, "loss": 1.449, "step": 9304 }, { "epoch": 37.97959183673469, "grad_norm": 22.982114791870117, "learning_rate": 3.1026530612244895e-05, "loss": 2.3645, "step": 9305 }, { "epoch": 37.98367346938775, "grad_norm": 17.017072677612305, "learning_rate": 3.102448979591837e-05, "loss": 2.8067, "step": 9306 }, { "epoch": 37.987755102040815, "grad_norm": 34.41337203979492, "learning_rate": 3.102244897959184e-05, "loss": 2.0162, "step": 9307 }, { "epoch": 37.99183673469388, "grad_norm": 33.609500885009766, "learning_rate": 3.102040816326531e-05, "loss": 1.6115, "step": 9308 }, { "epoch": 37.99591836734694, "grad_norm": 12.574256896972656, "learning_rate": 3.101836734693878e-05, "loss": 2.7233, "step": 9309 }, { "epoch": 38.0, "grad_norm": 13.09484577178955, "learning_rate": 3.1016326530612244e-05, "loss": 3.0741, "step": 9310 }, { "epoch": 38.00408163265306, "grad_norm": 10.409232139587402, "learning_rate": 3.101428571428572e-05, "loss": 3.1847, "step": 9311 }, { "epoch": 38.00816326530612, "grad_norm": 20.72646141052246, "learning_rate": 3.1012244897959185e-05, "loss": 1.9783, "step": 9312 }, { "epoch": 38.012244897959185, "grad_norm": 18.24199104309082, "learning_rate": 3.101020408163265e-05, "loss": 2.359, "step": 9313 }, { "epoch": 38.01632653061225, "grad_norm": 17.737152099609375, "learning_rate": 3.1008163265306126e-05, "loss": 0.1696, "step": 9314 }, { "epoch": 38.02040816326531, "grad_norm": 17.864320755004883, "learning_rate": 3.100612244897959e-05, "loss": 2.4028, "step": 9315 }, { "epoch": 38.02448979591837, "grad_norm": 40.371829986572266, "learning_rate": 3.100408163265307e-05, "loss": 1.8805, "step": 9316 }, { "epoch": 38.02857142857143, "grad_norm": 10.93863296508789, "learning_rate": 3.1002040816326534e-05, "loss": 3.0578, "step": 9317 }, { "epoch": 38.03265306122449, "grad_norm": 32.37895584106445, "learning_rate": 3.1e-05, "loss": 1.9735, "step": 9318 }, { "epoch": 38.03673469387755, "grad_norm": 13.748659133911133, "learning_rate": 3.099795918367347e-05, "loss": 2.6356, "step": 9319 }, { "epoch": 38.04081632653061, "grad_norm": 14.559189796447754, "learning_rate": 3.0995918367346936e-05, "loss": 2.7071, "step": 9320 }, { "epoch": 38.04489795918367, "grad_norm": 19.797252655029297, "learning_rate": 3.099387755102041e-05, "loss": 2.0729, "step": 9321 }, { "epoch": 38.04897959183673, "grad_norm": 35.87255859375, "learning_rate": 3.099183673469388e-05, "loss": 1.7117, "step": 9322 }, { "epoch": 38.053061224489795, "grad_norm": 35.74061584472656, "learning_rate": 3.098979591836735e-05, "loss": 1.3969, "step": 9323 }, { "epoch": 38.05714285714286, "grad_norm": 35.60564041137695, "learning_rate": 3.098775510204082e-05, "loss": 1.3301, "step": 9324 }, { "epoch": 38.06122448979592, "grad_norm": 23.700428009033203, "learning_rate": 3.0985714285714285e-05, "loss": 1.8579, "step": 9325 }, { "epoch": 38.06530612244898, "grad_norm": 16.681488037109375, "learning_rate": 3.098367346938776e-05, "loss": 2.6868, "step": 9326 }, { "epoch": 38.06938775510204, "grad_norm": 13.806883811950684, "learning_rate": 3.0981632653061226e-05, "loss": 2.7448, "step": 9327 }, { "epoch": 38.073469387755104, "grad_norm": 27.934682846069336, "learning_rate": 3.097959183673469e-05, "loss": 1.8843, "step": 9328 }, { "epoch": 38.077551020408166, "grad_norm": 21.767101287841797, "learning_rate": 3.097755102040816e-05, "loss": 2.678, "step": 9329 }, { "epoch": 38.08163265306123, "grad_norm": 29.673633575439453, "learning_rate": 3.0975510204081634e-05, "loss": 1.8516, "step": 9330 }, { "epoch": 38.08571428571429, "grad_norm": 38.970680236816406, "learning_rate": 3.097346938775511e-05, "loss": 2.2285, "step": 9331 }, { "epoch": 38.089795918367344, "grad_norm": 16.539281845092773, "learning_rate": 3.0971428571428575e-05, "loss": 3.0514, "step": 9332 }, { "epoch": 38.093877551020405, "grad_norm": 17.484006881713867, "learning_rate": 3.096938775510204e-05, "loss": 2.8797, "step": 9333 }, { "epoch": 38.09795918367347, "grad_norm": 23.351699829101562, "learning_rate": 3.096734693877551e-05, "loss": 2.2945, "step": 9334 }, { "epoch": 38.10204081632653, "grad_norm": 14.507972717285156, "learning_rate": 3.096530612244898e-05, "loss": 2.7562, "step": 9335 }, { "epoch": 38.10612244897959, "grad_norm": 34.36561965942383, "learning_rate": 3.096326530612245e-05, "loss": 1.7619, "step": 9336 }, { "epoch": 38.11020408163265, "grad_norm": 26.512596130371094, "learning_rate": 3.096122448979592e-05, "loss": 2.391, "step": 9337 }, { "epoch": 38.114285714285714, "grad_norm": 28.695634841918945, "learning_rate": 3.095918367346939e-05, "loss": 1.282, "step": 9338 }, { "epoch": 38.118367346938776, "grad_norm": 22.15573501586914, "learning_rate": 3.095714285714286e-05, "loss": 2.365, "step": 9339 }, { "epoch": 38.12244897959184, "grad_norm": 13.401910781860352, "learning_rate": 3.095510204081633e-05, "loss": 2.6013, "step": 9340 }, { "epoch": 38.1265306122449, "grad_norm": 18.995304107666016, "learning_rate": 3.09530612244898e-05, "loss": 2.2367, "step": 9341 }, { "epoch": 38.13061224489796, "grad_norm": 13.70223331451416, "learning_rate": 3.0951020408163267e-05, "loss": 2.7717, "step": 9342 }, { "epoch": 38.13469387755102, "grad_norm": 16.05571746826172, "learning_rate": 3.0948979591836734e-05, "loss": 2.5041, "step": 9343 }, { "epoch": 38.138775510204084, "grad_norm": 13.48037052154541, "learning_rate": 3.09469387755102e-05, "loss": 3.026, "step": 9344 }, { "epoch": 38.142857142857146, "grad_norm": 37.25597381591797, "learning_rate": 3.0944897959183675e-05, "loss": 1.5643, "step": 9345 }, { "epoch": 38.14693877551021, "grad_norm": 14.850790023803711, "learning_rate": 3.094285714285715e-05, "loss": 2.5017, "step": 9346 }, { "epoch": 38.15102040816326, "grad_norm": 10.027629852294922, "learning_rate": 3.0940816326530616e-05, "loss": 3.0363, "step": 9347 }, { "epoch": 38.155102040816324, "grad_norm": 11.995121002197266, "learning_rate": 3.093877551020408e-05, "loss": 3.0887, "step": 9348 }, { "epoch": 38.159183673469386, "grad_norm": 17.767871856689453, "learning_rate": 3.093673469387755e-05, "loss": 2.7697, "step": 9349 }, { "epoch": 38.16326530612245, "grad_norm": 14.798981666564941, "learning_rate": 3.0934693877551024e-05, "loss": 2.7624, "step": 9350 }, { "epoch": 38.16734693877551, "grad_norm": 12.941404342651367, "learning_rate": 3.093265306122449e-05, "loss": 2.8849, "step": 9351 }, { "epoch": 38.17142857142857, "grad_norm": 14.445703506469727, "learning_rate": 3.093061224489796e-05, "loss": 2.8221, "step": 9352 }, { "epoch": 38.17551020408163, "grad_norm": 30.307313919067383, "learning_rate": 3.092857142857143e-05, "loss": 1.3055, "step": 9353 }, { "epoch": 38.179591836734694, "grad_norm": 23.70625114440918, "learning_rate": 3.09265306122449e-05, "loss": 2.0657, "step": 9354 }, { "epoch": 38.183673469387756, "grad_norm": 18.4305362701416, "learning_rate": 3.092448979591837e-05, "loss": 2.5887, "step": 9355 }, { "epoch": 38.18775510204082, "grad_norm": 37.639408111572266, "learning_rate": 3.092244897959184e-05, "loss": 2.0508, "step": 9356 }, { "epoch": 38.19183673469388, "grad_norm": 18.32086753845215, "learning_rate": 3.092040816326531e-05, "loss": 2.5396, "step": 9357 }, { "epoch": 38.19591836734694, "grad_norm": 35.36088943481445, "learning_rate": 3.0918367346938774e-05, "loss": 0.8929, "step": 9358 }, { "epoch": 38.2, "grad_norm": 15.818178176879883, "learning_rate": 3.091632653061224e-05, "loss": 2.7005, "step": 9359 }, { "epoch": 38.204081632653065, "grad_norm": 24.770822525024414, "learning_rate": 3.0914285714285715e-05, "loss": 2.5037, "step": 9360 }, { "epoch": 38.20816326530612, "grad_norm": 34.829986572265625, "learning_rate": 3.091224489795919e-05, "loss": 1.4955, "step": 9361 }, { "epoch": 38.21224489795918, "grad_norm": 16.528358459472656, "learning_rate": 3.0910204081632657e-05, "loss": 2.452, "step": 9362 }, { "epoch": 38.21632653061224, "grad_norm": 26.562467575073242, "learning_rate": 3.0908163265306124e-05, "loss": 1.8861, "step": 9363 }, { "epoch": 38.220408163265304, "grad_norm": 16.75884246826172, "learning_rate": 3.090612244897959e-05, "loss": 2.3897, "step": 9364 }, { "epoch": 38.224489795918366, "grad_norm": 17.539405822753906, "learning_rate": 3.0904081632653065e-05, "loss": 2.1999, "step": 9365 }, { "epoch": 38.22857142857143, "grad_norm": 41.64536666870117, "learning_rate": 3.090204081632653e-05, "loss": 1.3338, "step": 9366 }, { "epoch": 38.23265306122449, "grad_norm": 12.423835754394531, "learning_rate": 3.09e-05, "loss": 3.0349, "step": 9367 }, { "epoch": 38.23673469387755, "grad_norm": 26.718788146972656, "learning_rate": 3.0897959183673466e-05, "loss": 1.9324, "step": 9368 }, { "epoch": 38.24081632653061, "grad_norm": 24.69156837463379, "learning_rate": 3.089591836734694e-05, "loss": 2.1475, "step": 9369 }, { "epoch": 38.244897959183675, "grad_norm": 29.305814743041992, "learning_rate": 3.0893877551020414e-05, "loss": 1.4838, "step": 9370 }, { "epoch": 38.248979591836736, "grad_norm": 18.564674377441406, "learning_rate": 3.089183673469388e-05, "loss": 2.4277, "step": 9371 }, { "epoch": 38.2530612244898, "grad_norm": 29.38020896911621, "learning_rate": 3.088979591836735e-05, "loss": 1.5957, "step": 9372 }, { "epoch": 38.25714285714286, "grad_norm": 37.596378326416016, "learning_rate": 3.0887755102040815e-05, "loss": 1.3401, "step": 9373 }, { "epoch": 38.26122448979592, "grad_norm": 27.159473419189453, "learning_rate": 3.088571428571428e-05, "loss": 1.9071, "step": 9374 }, { "epoch": 38.265306122448976, "grad_norm": 17.589637756347656, "learning_rate": 3.0883673469387756e-05, "loss": 2.4924, "step": 9375 }, { "epoch": 38.26938775510204, "grad_norm": 46.63060760498047, "learning_rate": 3.088163265306122e-05, "loss": 0.7243, "step": 9376 }, { "epoch": 38.2734693877551, "grad_norm": 23.97014045715332, "learning_rate": 3.08795918367347e-05, "loss": 2.282, "step": 9377 }, { "epoch": 38.27755102040816, "grad_norm": 23.24009895324707, "learning_rate": 3.0877551020408164e-05, "loss": 2.4902, "step": 9378 }, { "epoch": 38.28163265306122, "grad_norm": 55.393592834472656, "learning_rate": 3.087551020408164e-05, "loss": 1.4885, "step": 9379 }, { "epoch": 38.285714285714285, "grad_norm": 19.0430908203125, "learning_rate": 3.0873469387755105e-05, "loss": 2.1322, "step": 9380 }, { "epoch": 38.289795918367346, "grad_norm": 39.60184860229492, "learning_rate": 3.087142857142857e-05, "loss": 0.8826, "step": 9381 }, { "epoch": 38.29387755102041, "grad_norm": 26.42776107788086, "learning_rate": 3.086938775510204e-05, "loss": 2.3955, "step": 9382 }, { "epoch": 38.29795918367347, "grad_norm": 40.840579986572266, "learning_rate": 3.086734693877551e-05, "loss": 1.5966, "step": 9383 }, { "epoch": 38.30204081632653, "grad_norm": 28.931400299072266, "learning_rate": 3.086530612244898e-05, "loss": 1.5791, "step": 9384 }, { "epoch": 38.30612244897959, "grad_norm": 19.44843292236328, "learning_rate": 3.0863265306122455e-05, "loss": 2.1723, "step": 9385 }, { "epoch": 38.310204081632655, "grad_norm": 22.237319946289062, "learning_rate": 3.086122448979592e-05, "loss": 1.5328, "step": 9386 }, { "epoch": 38.31428571428572, "grad_norm": 15.435912132263184, "learning_rate": 3.085918367346939e-05, "loss": 2.7237, "step": 9387 }, { "epoch": 38.31836734693878, "grad_norm": 17.547489166259766, "learning_rate": 3.0857142857142856e-05, "loss": 2.6326, "step": 9388 }, { "epoch": 38.32244897959184, "grad_norm": 22.822751998901367, "learning_rate": 3.085510204081633e-05, "loss": 2.1065, "step": 9389 }, { "epoch": 38.326530612244895, "grad_norm": 16.524381637573242, "learning_rate": 3.08530612244898e-05, "loss": 2.1058, "step": 9390 }, { "epoch": 38.33061224489796, "grad_norm": 39.60444259643555, "learning_rate": 3.0851020408163264e-05, "loss": 1.5911, "step": 9391 }, { "epoch": 38.33469387755102, "grad_norm": 24.31441879272461, "learning_rate": 3.084897959183674e-05, "loss": 2.6923, "step": 9392 }, { "epoch": 38.33877551020408, "grad_norm": 14.96029281616211, "learning_rate": 3.0846938775510205e-05, "loss": 3.1012, "step": 9393 }, { "epoch": 38.34285714285714, "grad_norm": 22.84246826171875, "learning_rate": 3.084489795918368e-05, "loss": 2.231, "step": 9394 }, { "epoch": 38.3469387755102, "grad_norm": 41.15449142456055, "learning_rate": 3.0842857142857146e-05, "loss": 2.5013, "step": 9395 }, { "epoch": 38.351020408163265, "grad_norm": 18.97922706604004, "learning_rate": 3.084081632653061e-05, "loss": 2.5989, "step": 9396 }, { "epoch": 38.35510204081633, "grad_norm": 37.7940559387207, "learning_rate": 3.083877551020408e-05, "loss": 1.621, "step": 9397 }, { "epoch": 38.35918367346939, "grad_norm": 15.977751731872559, "learning_rate": 3.083673469387755e-05, "loss": 2.9296, "step": 9398 }, { "epoch": 38.36326530612245, "grad_norm": 32.384132385253906, "learning_rate": 3.083469387755102e-05, "loss": 1.4417, "step": 9399 }, { "epoch": 38.36734693877551, "grad_norm": 22.131752014160156, "learning_rate": 3.0832653061224495e-05, "loss": 2.4792, "step": 9400 }, { "epoch": 38.371428571428574, "grad_norm": 26.940359115600586, "learning_rate": 3.083061224489796e-05, "loss": 2.4042, "step": 9401 }, { "epoch": 38.375510204081635, "grad_norm": 26.975204467773438, "learning_rate": 3.082857142857143e-05, "loss": 1.6724, "step": 9402 }, { "epoch": 38.3795918367347, "grad_norm": 28.53169059753418, "learning_rate": 3.08265306122449e-05, "loss": 0.7099, "step": 9403 }, { "epoch": 38.38367346938775, "grad_norm": 16.787071228027344, "learning_rate": 3.082448979591837e-05, "loss": 2.5497, "step": 9404 }, { "epoch": 38.38775510204081, "grad_norm": 29.54938316345215, "learning_rate": 3.082244897959184e-05, "loss": 1.6332, "step": 9405 }, { "epoch": 38.391836734693875, "grad_norm": 31.97853660583496, "learning_rate": 3.0820408163265305e-05, "loss": 1.106, "step": 9406 }, { "epoch": 38.39591836734694, "grad_norm": 29.89733123779297, "learning_rate": 3.081836734693878e-05, "loss": 1.9621, "step": 9407 }, { "epoch": 38.4, "grad_norm": 42.5813102722168, "learning_rate": 3.0816326530612246e-05, "loss": 0.8282, "step": 9408 }, { "epoch": 38.40408163265306, "grad_norm": 30.42380142211914, "learning_rate": 3.081428571428572e-05, "loss": 0.8458, "step": 9409 }, { "epoch": 38.40816326530612, "grad_norm": 56.96806335449219, "learning_rate": 3.081224489795919e-05, "loss": 0.6887, "step": 9410 }, { "epoch": 38.412244897959184, "grad_norm": 20.644302368164062, "learning_rate": 3.0810204081632654e-05, "loss": 2.5513, "step": 9411 }, { "epoch": 38.416326530612245, "grad_norm": 20.22519302368164, "learning_rate": 3.080816326530612e-05, "loss": 2.5804, "step": 9412 }, { "epoch": 38.42040816326531, "grad_norm": 32.777374267578125, "learning_rate": 3.080612244897959e-05, "loss": 1.9558, "step": 9413 }, { "epoch": 38.42448979591837, "grad_norm": 16.400760650634766, "learning_rate": 3.080408163265306e-05, "loss": 2.8078, "step": 9414 }, { "epoch": 38.42857142857143, "grad_norm": 36.34579849243164, "learning_rate": 3.0802040816326536e-05, "loss": 2.2966, "step": 9415 }, { "epoch": 38.43265306122449, "grad_norm": 28.98971939086914, "learning_rate": 3.08e-05, "loss": 1.617, "step": 9416 }, { "epoch": 38.436734693877554, "grad_norm": 13.000876426696777, "learning_rate": 3.079795918367347e-05, "loss": 2.6359, "step": 9417 }, { "epoch": 38.440816326530616, "grad_norm": 31.481399536132812, "learning_rate": 3.079591836734694e-05, "loss": 1.6256, "step": 9418 }, { "epoch": 38.44489795918367, "grad_norm": 13.071050643920898, "learning_rate": 3.079387755102041e-05, "loss": 2.7476, "step": 9419 }, { "epoch": 38.44897959183673, "grad_norm": 11.553114891052246, "learning_rate": 3.079183673469388e-05, "loss": 3.2788, "step": 9420 }, { "epoch": 38.453061224489794, "grad_norm": 25.569833755493164, "learning_rate": 3.0789795918367346e-05, "loss": 2.045, "step": 9421 }, { "epoch": 38.457142857142856, "grad_norm": 11.858344078063965, "learning_rate": 3.078775510204081e-05, "loss": 3.1822, "step": 9422 }, { "epoch": 38.46122448979592, "grad_norm": 37.25859832763672, "learning_rate": 3.078571428571429e-05, "loss": 2.475, "step": 9423 }, { "epoch": 38.46530612244898, "grad_norm": 19.45204734802246, "learning_rate": 3.078367346938776e-05, "loss": 2.5933, "step": 9424 }, { "epoch": 38.46938775510204, "grad_norm": 9.762797355651855, "learning_rate": 3.078163265306123e-05, "loss": 3.4423, "step": 9425 }, { "epoch": 38.4734693877551, "grad_norm": 31.16579246520996, "learning_rate": 3.0779591836734695e-05, "loss": 1.3549, "step": 9426 }, { "epoch": 38.477551020408164, "grad_norm": 19.959596633911133, "learning_rate": 3.077755102040816e-05, "loss": 2.2797, "step": 9427 }, { "epoch": 38.481632653061226, "grad_norm": 9.21632194519043, "learning_rate": 3.0775510204081636e-05, "loss": 3.1757, "step": 9428 }, { "epoch": 38.48571428571429, "grad_norm": 18.647550582885742, "learning_rate": 3.07734693877551e-05, "loss": 2.4528, "step": 9429 }, { "epoch": 38.48979591836735, "grad_norm": 25.16813850402832, "learning_rate": 3.077142857142857e-05, "loss": 1.8102, "step": 9430 }, { "epoch": 38.49387755102041, "grad_norm": 19.937973022460938, "learning_rate": 3.0769387755102044e-05, "loss": 2.2077, "step": 9431 }, { "epoch": 38.49795918367347, "grad_norm": 15.828826904296875, "learning_rate": 3.076734693877551e-05, "loss": 2.6787, "step": 9432 }, { "epoch": 38.50204081632653, "grad_norm": 27.459735870361328, "learning_rate": 3.0765306122448985e-05, "loss": 1.5513, "step": 9433 }, { "epoch": 38.50612244897959, "grad_norm": 18.337465286254883, "learning_rate": 3.076326530612245e-05, "loss": 2.6173, "step": 9434 }, { "epoch": 38.51020408163265, "grad_norm": 22.094404220581055, "learning_rate": 3.076122448979592e-05, "loss": 2.1722, "step": 9435 }, { "epoch": 38.51428571428571, "grad_norm": 16.27924156188965, "learning_rate": 3.0759183673469386e-05, "loss": 2.8035, "step": 9436 }, { "epoch": 38.518367346938774, "grad_norm": 23.660388946533203, "learning_rate": 3.0757142857142854e-05, "loss": 2.3046, "step": 9437 }, { "epoch": 38.522448979591836, "grad_norm": 21.879335403442383, "learning_rate": 3.075510204081633e-05, "loss": 1.7159, "step": 9438 }, { "epoch": 38.5265306122449, "grad_norm": 28.412052154541016, "learning_rate": 3.07530612244898e-05, "loss": 2.1554, "step": 9439 }, { "epoch": 38.53061224489796, "grad_norm": 26.55883026123047, "learning_rate": 3.075102040816327e-05, "loss": 2.1314, "step": 9440 }, { "epoch": 38.53469387755102, "grad_norm": 23.80962562561035, "learning_rate": 3.0748979591836736e-05, "loss": 2.3237, "step": 9441 }, { "epoch": 38.53877551020408, "grad_norm": 37.87883377075195, "learning_rate": 3.07469387755102e-05, "loss": 1.398, "step": 9442 }, { "epoch": 38.542857142857144, "grad_norm": 15.192158699035645, "learning_rate": 3.074489795918368e-05, "loss": 2.567, "step": 9443 }, { "epoch": 38.546938775510206, "grad_norm": 26.44880485534668, "learning_rate": 3.0742857142857144e-05, "loss": 2.0387, "step": 9444 }, { "epoch": 38.55102040816327, "grad_norm": 22.541576385498047, "learning_rate": 3.074081632653061e-05, "loss": 1.9867, "step": 9445 }, { "epoch": 38.55510204081633, "grad_norm": 13.505271911621094, "learning_rate": 3.0738775510204085e-05, "loss": 2.7079, "step": 9446 }, { "epoch": 38.55918367346939, "grad_norm": 26.493864059448242, "learning_rate": 3.073673469387755e-05, "loss": 2.3424, "step": 9447 }, { "epoch": 38.563265306122446, "grad_norm": 38.8054084777832, "learning_rate": 3.0734693877551026e-05, "loss": 1.527, "step": 9448 }, { "epoch": 38.56734693877551, "grad_norm": 37.2396240234375, "learning_rate": 3.073265306122449e-05, "loss": 0.7086, "step": 9449 }, { "epoch": 38.57142857142857, "grad_norm": 15.105864524841309, "learning_rate": 3.073061224489796e-05, "loss": 2.8438, "step": 9450 }, { "epoch": 38.57551020408163, "grad_norm": 15.672220230102539, "learning_rate": 3.072857142857143e-05, "loss": 2.7606, "step": 9451 }, { "epoch": 38.57959183673469, "grad_norm": 27.503108978271484, "learning_rate": 3.0726530612244894e-05, "loss": 2.3373, "step": 9452 }, { "epoch": 38.583673469387755, "grad_norm": 15.401588439941406, "learning_rate": 3.072448979591837e-05, "loss": 2.7834, "step": 9453 }, { "epoch": 38.587755102040816, "grad_norm": 25.360265731811523, "learning_rate": 3.072244897959184e-05, "loss": 2.1939, "step": 9454 }, { "epoch": 38.59183673469388, "grad_norm": 25.26331901550293, "learning_rate": 3.072040816326531e-05, "loss": 2.2925, "step": 9455 }, { "epoch": 38.59591836734694, "grad_norm": 19.84300422668457, "learning_rate": 3.0718367346938776e-05, "loss": 2.3884, "step": 9456 }, { "epoch": 38.6, "grad_norm": 12.64482307434082, "learning_rate": 3.0716326530612244e-05, "loss": 2.8809, "step": 9457 }, { "epoch": 38.60408163265306, "grad_norm": 28.004119873046875, "learning_rate": 3.071428571428572e-05, "loss": 1.9201, "step": 9458 }, { "epoch": 38.608163265306125, "grad_norm": 12.768526077270508, "learning_rate": 3.0712244897959185e-05, "loss": 2.8377, "step": 9459 }, { "epoch": 38.61224489795919, "grad_norm": 18.94879150390625, "learning_rate": 3.071020408163265e-05, "loss": 2.132, "step": 9460 }, { "epoch": 38.61632653061225, "grad_norm": 33.40476989746094, "learning_rate": 3.0708163265306126e-05, "loss": 0.7812, "step": 9461 }, { "epoch": 38.6204081632653, "grad_norm": 35.669673919677734, "learning_rate": 3.070612244897959e-05, "loss": 0.7652, "step": 9462 }, { "epoch": 38.624489795918365, "grad_norm": 26.542509078979492, "learning_rate": 3.0704081632653067e-05, "loss": 2.1393, "step": 9463 }, { "epoch": 38.628571428571426, "grad_norm": 11.466429710388184, "learning_rate": 3.0702040816326534e-05, "loss": 3.163, "step": 9464 }, { "epoch": 38.63265306122449, "grad_norm": 13.287069320678711, "learning_rate": 3.07e-05, "loss": 2.6599, "step": 9465 }, { "epoch": 38.63673469387755, "grad_norm": 17.082334518432617, "learning_rate": 3.069795918367347e-05, "loss": 2.8423, "step": 9466 }, { "epoch": 38.64081632653061, "grad_norm": 28.991975784301758, "learning_rate": 3.0695918367346935e-05, "loss": 2.0922, "step": 9467 }, { "epoch": 38.64489795918367, "grad_norm": 26.830659866333008, "learning_rate": 3.069387755102041e-05, "loss": 1.9658, "step": 9468 }, { "epoch": 38.648979591836735, "grad_norm": 37.23932647705078, "learning_rate": 3.069183673469388e-05, "loss": 1.7009, "step": 9469 }, { "epoch": 38.6530612244898, "grad_norm": 16.500228881835938, "learning_rate": 3.068979591836735e-05, "loss": 3.0637, "step": 9470 }, { "epoch": 38.65714285714286, "grad_norm": 47.1360969543457, "learning_rate": 3.068775510204082e-05, "loss": 1.4436, "step": 9471 }, { "epoch": 38.66122448979592, "grad_norm": 30.989572525024414, "learning_rate": 3.068571428571429e-05, "loss": 2.4783, "step": 9472 }, { "epoch": 38.66530612244898, "grad_norm": 28.025400161743164, "learning_rate": 3.068367346938776e-05, "loss": 1.7166, "step": 9473 }, { "epoch": 38.66938775510204, "grad_norm": 24.892507553100586, "learning_rate": 3.0681632653061225e-05, "loss": 2.4282, "step": 9474 }, { "epoch": 38.673469387755105, "grad_norm": 16.572551727294922, "learning_rate": 3.067959183673469e-05, "loss": 2.7211, "step": 9475 }, { "epoch": 38.67755102040816, "grad_norm": 15.155830383300781, "learning_rate": 3.067755102040816e-05, "loss": 2.8572, "step": 9476 }, { "epoch": 38.68163265306122, "grad_norm": 7.921699047088623, "learning_rate": 3.0675510204081633e-05, "loss": 3.5332, "step": 9477 }, { "epoch": 38.68571428571428, "grad_norm": 21.123666763305664, "learning_rate": 3.067346938775511e-05, "loss": 2.6503, "step": 9478 }, { "epoch": 38.689795918367345, "grad_norm": 16.820600509643555, "learning_rate": 3.0671428571428574e-05, "loss": 2.4565, "step": 9479 }, { "epoch": 38.69387755102041, "grad_norm": 34.606590270996094, "learning_rate": 3.066938775510204e-05, "loss": 0.5059, "step": 9480 }, { "epoch": 38.69795918367347, "grad_norm": 27.083778381347656, "learning_rate": 3.066734693877551e-05, "loss": 1.5085, "step": 9481 }, { "epoch": 38.70204081632653, "grad_norm": 13.396772384643555, "learning_rate": 3.066530612244898e-05, "loss": 3.0374, "step": 9482 }, { "epoch": 38.70612244897959, "grad_norm": 34.578887939453125, "learning_rate": 3.066326530612245e-05, "loss": 1.4181, "step": 9483 }, { "epoch": 38.710204081632654, "grad_norm": 21.15598487854004, "learning_rate": 3.066122448979592e-05, "loss": 2.7149, "step": 9484 }, { "epoch": 38.714285714285715, "grad_norm": 14.425854682922363, "learning_rate": 3.065918367346939e-05, "loss": 2.7164, "step": 9485 }, { "epoch": 38.71836734693878, "grad_norm": 55.90850830078125, "learning_rate": 3.065714285714286e-05, "loss": 0.9948, "step": 9486 }, { "epoch": 38.72244897959184, "grad_norm": 17.8941707611084, "learning_rate": 3.065510204081633e-05, "loss": 2.5745, "step": 9487 }, { "epoch": 38.7265306122449, "grad_norm": 28.352706909179688, "learning_rate": 3.06530612244898e-05, "loss": 1.7223, "step": 9488 }, { "epoch": 38.73061224489796, "grad_norm": 23.422971725463867, "learning_rate": 3.0651020408163266e-05, "loss": 2.8386, "step": 9489 }, { "epoch": 38.734693877551024, "grad_norm": 12.985272407531738, "learning_rate": 3.064897959183673e-05, "loss": 3.1236, "step": 9490 }, { "epoch": 38.73877551020408, "grad_norm": 39.211185455322266, "learning_rate": 3.06469387755102e-05, "loss": 1.3058, "step": 9491 }, { "epoch": 38.74285714285714, "grad_norm": 22.60769271850586, "learning_rate": 3.0644897959183674e-05, "loss": 2.0389, "step": 9492 }, { "epoch": 38.7469387755102, "grad_norm": 26.608104705810547, "learning_rate": 3.064285714285715e-05, "loss": 2.3456, "step": 9493 }, { "epoch": 38.751020408163264, "grad_norm": 23.442243576049805, "learning_rate": 3.0640816326530615e-05, "loss": 2.0202, "step": 9494 }, { "epoch": 38.755102040816325, "grad_norm": 32.32286071777344, "learning_rate": 3.063877551020408e-05, "loss": 2.1111, "step": 9495 }, { "epoch": 38.75918367346939, "grad_norm": 15.011212348937988, "learning_rate": 3.063673469387755e-05, "loss": 2.8789, "step": 9496 }, { "epoch": 38.76326530612245, "grad_norm": 16.34295082092285, "learning_rate": 3.0634693877551023e-05, "loss": 2.526, "step": 9497 }, { "epoch": 38.76734693877551, "grad_norm": 22.501142501831055, "learning_rate": 3.063265306122449e-05, "loss": 2.3078, "step": 9498 }, { "epoch": 38.77142857142857, "grad_norm": 16.727590560913086, "learning_rate": 3.063061224489796e-05, "loss": 2.7062, "step": 9499 }, { "epoch": 38.775510204081634, "grad_norm": 30.322616577148438, "learning_rate": 3.062857142857143e-05, "loss": 2.3005, "step": 9500 }, { "epoch": 38.779591836734696, "grad_norm": 28.43752098083496, "learning_rate": 3.06265306122449e-05, "loss": 1.6307, "step": 9501 }, { "epoch": 38.78367346938776, "grad_norm": 15.851269721984863, "learning_rate": 3.062448979591837e-05, "loss": 2.6409, "step": 9502 }, { "epoch": 38.78775510204082, "grad_norm": 30.094303131103516, "learning_rate": 3.062244897959184e-05, "loss": 1.6913, "step": 9503 }, { "epoch": 38.79183673469388, "grad_norm": 18.190296173095703, "learning_rate": 3.062040816326531e-05, "loss": 2.6201, "step": 9504 }, { "epoch": 38.795918367346935, "grad_norm": 25.86635971069336, "learning_rate": 3.0618367346938774e-05, "loss": 2.5196, "step": 9505 }, { "epoch": 38.8, "grad_norm": 21.343393325805664, "learning_rate": 3.061632653061224e-05, "loss": 2.2711, "step": 9506 }, { "epoch": 38.80408163265306, "grad_norm": 21.030933380126953, "learning_rate": 3.0614285714285715e-05, "loss": 2.5364, "step": 9507 }, { "epoch": 38.80816326530612, "grad_norm": 34.2291259765625, "learning_rate": 3.061224489795919e-05, "loss": 0.9375, "step": 9508 }, { "epoch": 38.81224489795918, "grad_norm": 32.841156005859375, "learning_rate": 3.0610204081632656e-05, "loss": 1.098, "step": 9509 }, { "epoch": 38.816326530612244, "grad_norm": 36.464866638183594, "learning_rate": 3.060816326530612e-05, "loss": 1.8511, "step": 9510 }, { "epoch": 38.820408163265306, "grad_norm": 10.403007507324219, "learning_rate": 3.060612244897959e-05, "loss": 2.9856, "step": 9511 }, { "epoch": 38.82448979591837, "grad_norm": 15.17838191986084, "learning_rate": 3.0604081632653064e-05, "loss": 2.655, "step": 9512 }, { "epoch": 38.82857142857143, "grad_norm": 9.070748329162598, "learning_rate": 3.060204081632653e-05, "loss": 3.4732, "step": 9513 }, { "epoch": 38.83265306122449, "grad_norm": 20.270366668701172, "learning_rate": 3.06e-05, "loss": 2.3786, "step": 9514 }, { "epoch": 38.83673469387755, "grad_norm": 17.16356086730957, "learning_rate": 3.059795918367347e-05, "loss": 2.7746, "step": 9515 }, { "epoch": 38.840816326530614, "grad_norm": 27.516254425048828, "learning_rate": 3.0595918367346946e-05, "loss": 2.1031, "step": 9516 }, { "epoch": 38.844897959183676, "grad_norm": 34.94010925292969, "learning_rate": 3.059387755102041e-05, "loss": 1.8796, "step": 9517 }, { "epoch": 38.84897959183674, "grad_norm": 27.99662971496582, "learning_rate": 3.059183673469388e-05, "loss": 1.7734, "step": 9518 }, { "epoch": 38.85306122448979, "grad_norm": 22.8017635345459, "learning_rate": 3.058979591836735e-05, "loss": 2.6607, "step": 9519 }, { "epoch": 38.857142857142854, "grad_norm": 18.057659149169922, "learning_rate": 3.0587755102040815e-05, "loss": 2.4617, "step": 9520 }, { "epoch": 38.861224489795916, "grad_norm": 26.05553436279297, "learning_rate": 3.058571428571429e-05, "loss": 2.3612, "step": 9521 }, { "epoch": 38.86530612244898, "grad_norm": 30.527271270751953, "learning_rate": 3.0583673469387756e-05, "loss": 2.1813, "step": 9522 }, { "epoch": 38.86938775510204, "grad_norm": 20.14811134338379, "learning_rate": 3.058163265306123e-05, "loss": 2.348, "step": 9523 }, { "epoch": 38.8734693877551, "grad_norm": 23.100261688232422, "learning_rate": 3.05795918367347e-05, "loss": 2.0791, "step": 9524 }, { "epoch": 38.87755102040816, "grad_norm": 18.51665687561035, "learning_rate": 3.0577551020408164e-05, "loss": 2.3554, "step": 9525 }, { "epoch": 38.881632653061224, "grad_norm": 25.70443344116211, "learning_rate": 3.057551020408164e-05, "loss": 2.124, "step": 9526 }, { "epoch": 38.885714285714286, "grad_norm": 17.758581161499023, "learning_rate": 3.0573469387755105e-05, "loss": 1.9219, "step": 9527 }, { "epoch": 38.88979591836735, "grad_norm": 32.55727005004883, "learning_rate": 3.057142857142857e-05, "loss": 1.4947, "step": 9528 }, { "epoch": 38.89387755102041, "grad_norm": 30.7603816986084, "learning_rate": 3.056938775510204e-05, "loss": 1.5556, "step": 9529 }, { "epoch": 38.89795918367347, "grad_norm": 27.641355514526367, "learning_rate": 3.0567346938775506e-05, "loss": 1.3871, "step": 9530 }, { "epoch": 38.90204081632653, "grad_norm": 15.44349479675293, "learning_rate": 3.056530612244898e-05, "loss": 2.8195, "step": 9531 }, { "epoch": 38.906122448979595, "grad_norm": 18.250036239624023, "learning_rate": 3.0563265306122454e-05, "loss": 2.5745, "step": 9532 }, { "epoch": 38.910204081632656, "grad_norm": 33.55278778076172, "learning_rate": 3.056122448979592e-05, "loss": 2.0358, "step": 9533 }, { "epoch": 38.91428571428571, "grad_norm": 21.2901611328125, "learning_rate": 3.055918367346939e-05, "loss": 2.6239, "step": 9534 }, { "epoch": 38.91836734693877, "grad_norm": 12.516779899597168, "learning_rate": 3.0557142857142855e-05, "loss": 3.2809, "step": 9535 }, { "epoch": 38.922448979591834, "grad_norm": 25.88994789123535, "learning_rate": 3.055510204081633e-05, "loss": 2.2608, "step": 9536 }, { "epoch": 38.926530612244896, "grad_norm": 47.300254821777344, "learning_rate": 3.0553061224489796e-05, "loss": 0.8483, "step": 9537 }, { "epoch": 38.93061224489796, "grad_norm": 24.292028427124023, "learning_rate": 3.0551020408163264e-05, "loss": 1.5422, "step": 9538 }, { "epoch": 38.93469387755102, "grad_norm": 17.861108779907227, "learning_rate": 3.054897959183674e-05, "loss": 2.7087, "step": 9539 }, { "epoch": 38.93877551020408, "grad_norm": 12.673452377319336, "learning_rate": 3.0546938775510205e-05, "loss": 2.8933, "step": 9540 }, { "epoch": 38.94285714285714, "grad_norm": 27.866779327392578, "learning_rate": 3.054489795918368e-05, "loss": 0.7768, "step": 9541 }, { "epoch": 38.946938775510205, "grad_norm": 25.132469177246094, "learning_rate": 3.0542857142857146e-05, "loss": 2.4425, "step": 9542 }, { "epoch": 38.95102040816327, "grad_norm": 15.072392463684082, "learning_rate": 3.054081632653061e-05, "loss": 2.4489, "step": 9543 }, { "epoch": 38.95510204081633, "grad_norm": 11.135324478149414, "learning_rate": 3.053877551020408e-05, "loss": 3.0676, "step": 9544 }, { "epoch": 38.95918367346939, "grad_norm": 18.70354461669922, "learning_rate": 3.053673469387755e-05, "loss": 2.2426, "step": 9545 }, { "epoch": 38.96326530612245, "grad_norm": 12.428506851196289, "learning_rate": 3.053469387755102e-05, "loss": 3.0979, "step": 9546 }, { "epoch": 38.96734693877551, "grad_norm": 16.6960506439209, "learning_rate": 3.0532653061224495e-05, "loss": 2.7034, "step": 9547 }, { "epoch": 38.97142857142857, "grad_norm": 18.903749465942383, "learning_rate": 3.053061224489796e-05, "loss": 2.1941, "step": 9548 }, { "epoch": 38.97551020408163, "grad_norm": 23.156808853149414, "learning_rate": 3.052857142857143e-05, "loss": 1.8639, "step": 9549 }, { "epoch": 38.97959183673469, "grad_norm": 22.532136917114258, "learning_rate": 3.0526530612244896e-05, "loss": 2.7529, "step": 9550 }, { "epoch": 38.98367346938775, "grad_norm": 12.996650695800781, "learning_rate": 3.052448979591837e-05, "loss": 2.8506, "step": 9551 }, { "epoch": 38.987755102040815, "grad_norm": 38.34108352661133, "learning_rate": 3.052244897959184e-05, "loss": 1.3549, "step": 9552 }, { "epoch": 38.99183673469388, "grad_norm": 29.947940826416016, "learning_rate": 3.0520408163265304e-05, "loss": 1.4935, "step": 9553 }, { "epoch": 38.99591836734694, "grad_norm": 49.847530364990234, "learning_rate": 3.051836734693878e-05, "loss": 1.6812, "step": 9554 }, { "epoch": 39.0, "grad_norm": 29.834671020507812, "learning_rate": 3.0516326530612242e-05, "loss": 1.833, "step": 9555 }, { "epoch": 39.00408163265306, "grad_norm": 15.066614151000977, "learning_rate": 3.0514285714285716e-05, "loss": 2.7714, "step": 9556 }, { "epoch": 39.00816326530612, "grad_norm": 18.05379295349121, "learning_rate": 3.0512244897959186e-05, "loss": 2.5888, "step": 9557 }, { "epoch": 39.012244897959185, "grad_norm": 15.902301788330078, "learning_rate": 3.0510204081632654e-05, "loss": 2.6846, "step": 9558 }, { "epoch": 39.01632653061225, "grad_norm": 19.050262451171875, "learning_rate": 3.050816326530612e-05, "loss": 2.2771, "step": 9559 }, { "epoch": 39.02040816326531, "grad_norm": 20.98780632019043, "learning_rate": 3.0506122448979595e-05, "loss": 2.5292, "step": 9560 }, { "epoch": 39.02448979591837, "grad_norm": 22.69915199279785, "learning_rate": 3.0504081632653065e-05, "loss": 1.8599, "step": 9561 }, { "epoch": 39.02857142857143, "grad_norm": 26.27195930480957, "learning_rate": 3.0502040816326532e-05, "loss": 1.494, "step": 9562 }, { "epoch": 39.03265306122449, "grad_norm": 9.617923736572266, "learning_rate": 3.05e-05, "loss": 3.3643, "step": 9563 }, { "epoch": 39.03673469387755, "grad_norm": 15.308195114135742, "learning_rate": 3.049795918367347e-05, "loss": 2.6707, "step": 9564 }, { "epoch": 39.04081632653061, "grad_norm": 9.362201690673828, "learning_rate": 3.0495918367346944e-05, "loss": 3.1011, "step": 9565 }, { "epoch": 39.04489795918367, "grad_norm": 12.836491584777832, "learning_rate": 3.049387755102041e-05, "loss": 2.5676, "step": 9566 }, { "epoch": 39.04897959183673, "grad_norm": 31.20772361755371, "learning_rate": 3.0491836734693878e-05, "loss": 1.5492, "step": 9567 }, { "epoch": 39.053061224489795, "grad_norm": 7.830653667449951, "learning_rate": 3.048979591836735e-05, "loss": 3.4851, "step": 9568 }, { "epoch": 39.05714285714286, "grad_norm": 15.807497024536133, "learning_rate": 3.0487755102040816e-05, "loss": 2.3055, "step": 9569 }, { "epoch": 39.06122448979592, "grad_norm": 38.20315170288086, "learning_rate": 3.048571428571429e-05, "loss": 2.3603, "step": 9570 }, { "epoch": 39.06530612244898, "grad_norm": 17.421232223510742, "learning_rate": 3.0483673469387757e-05, "loss": 2.5013, "step": 9571 }, { "epoch": 39.06938775510204, "grad_norm": 43.087188720703125, "learning_rate": 3.0481632653061227e-05, "loss": 1.2638, "step": 9572 }, { "epoch": 39.073469387755104, "grad_norm": 19.039567947387695, "learning_rate": 3.0479591836734694e-05, "loss": 2.4587, "step": 9573 }, { "epoch": 39.077551020408166, "grad_norm": 26.75591278076172, "learning_rate": 3.047755102040816e-05, "loss": 1.7545, "step": 9574 }, { "epoch": 39.08163265306123, "grad_norm": 43.026527404785156, "learning_rate": 3.0475510204081635e-05, "loss": 1.269, "step": 9575 }, { "epoch": 39.08571428571429, "grad_norm": 21.6102237701416, "learning_rate": 3.0473469387755106e-05, "loss": 2.1819, "step": 9576 }, { "epoch": 39.089795918367344, "grad_norm": 36.362396240234375, "learning_rate": 3.0471428571428573e-05, "loss": 1.5517, "step": 9577 }, { "epoch": 39.093877551020405, "grad_norm": 18.48907470703125, "learning_rate": 3.046938775510204e-05, "loss": 2.4956, "step": 9578 }, { "epoch": 39.09795918367347, "grad_norm": 18.037147521972656, "learning_rate": 3.046734693877551e-05, "loss": 2.4449, "step": 9579 }, { "epoch": 39.10204081632653, "grad_norm": 26.66692543029785, "learning_rate": 3.0465306122448985e-05, "loss": 1.9434, "step": 9580 }, { "epoch": 39.10612244897959, "grad_norm": 19.131505966186523, "learning_rate": 3.046326530612245e-05, "loss": 1.8586, "step": 9581 }, { "epoch": 39.11020408163265, "grad_norm": 25.535484313964844, "learning_rate": 3.046122448979592e-05, "loss": 1.3128, "step": 9582 }, { "epoch": 39.114285714285714, "grad_norm": 19.216554641723633, "learning_rate": 3.045918367346939e-05, "loss": 2.5295, "step": 9583 }, { "epoch": 39.118367346938776, "grad_norm": 37.88225555419922, "learning_rate": 3.0457142857142856e-05, "loss": 1.3563, "step": 9584 }, { "epoch": 39.12244897959184, "grad_norm": 19.5559024810791, "learning_rate": 3.045510204081633e-05, "loss": 2.8631, "step": 9585 }, { "epoch": 39.1265306122449, "grad_norm": 16.41151237487793, "learning_rate": 3.0453061224489797e-05, "loss": 2.6831, "step": 9586 }, { "epoch": 39.13061224489796, "grad_norm": 31.099950790405273, "learning_rate": 3.0451020408163268e-05, "loss": 1.7252, "step": 9587 }, { "epoch": 39.13469387755102, "grad_norm": 14.767273902893066, "learning_rate": 3.0448979591836735e-05, "loss": 2.9642, "step": 9588 }, { "epoch": 39.138775510204084, "grad_norm": 15.450581550598145, "learning_rate": 3.0446938775510202e-05, "loss": 3.083, "step": 9589 }, { "epoch": 39.142857142857146, "grad_norm": 25.908430099487305, "learning_rate": 3.0444897959183676e-05, "loss": 1.8245, "step": 9590 }, { "epoch": 39.14693877551021, "grad_norm": 17.652124404907227, "learning_rate": 3.0442857142857147e-05, "loss": 2.3405, "step": 9591 }, { "epoch": 39.15102040816326, "grad_norm": 20.183673858642578, "learning_rate": 3.0440816326530614e-05, "loss": 2.1134, "step": 9592 }, { "epoch": 39.155102040816324, "grad_norm": 24.40712547302246, "learning_rate": 3.043877551020408e-05, "loss": 2.0352, "step": 9593 }, { "epoch": 39.159183673469386, "grad_norm": 25.051311492919922, "learning_rate": 3.043673469387755e-05, "loss": 1.8782, "step": 9594 }, { "epoch": 39.16326530612245, "grad_norm": 19.287994384765625, "learning_rate": 3.0434693877551025e-05, "loss": 2.4898, "step": 9595 }, { "epoch": 39.16734693877551, "grad_norm": 34.45481491088867, "learning_rate": 3.0432653061224492e-05, "loss": 1.3573, "step": 9596 }, { "epoch": 39.17142857142857, "grad_norm": 19.849550247192383, "learning_rate": 3.043061224489796e-05, "loss": 2.507, "step": 9597 }, { "epoch": 39.17551020408163, "grad_norm": 34.52272415161133, "learning_rate": 3.042857142857143e-05, "loss": 1.6164, "step": 9598 }, { "epoch": 39.179591836734694, "grad_norm": 33.92204284667969, "learning_rate": 3.0426530612244897e-05, "loss": 0.7964, "step": 9599 }, { "epoch": 39.183673469387756, "grad_norm": 11.122833251953125, "learning_rate": 3.042448979591837e-05, "loss": 3.1805, "step": 9600 }, { "epoch": 39.18775510204082, "grad_norm": 40.3621826171875, "learning_rate": 3.0422448979591838e-05, "loss": 2.3913, "step": 9601 }, { "epoch": 39.19183673469388, "grad_norm": 21.882230758666992, "learning_rate": 3.042040816326531e-05, "loss": 2.5714, "step": 9602 }, { "epoch": 39.19591836734694, "grad_norm": 18.107402801513672, "learning_rate": 3.0418367346938776e-05, "loss": 2.5257, "step": 9603 }, { "epoch": 39.2, "grad_norm": 10.587052345275879, "learning_rate": 3.041632653061225e-05, "loss": 2.8962, "step": 9604 }, { "epoch": 39.204081632653065, "grad_norm": 51.84862518310547, "learning_rate": 3.0414285714285717e-05, "loss": 1.3961, "step": 9605 }, { "epoch": 39.20816326530612, "grad_norm": 42.46242141723633, "learning_rate": 3.0412244897959184e-05, "loss": 1.6183, "step": 9606 }, { "epoch": 39.21224489795918, "grad_norm": 17.923105239868164, "learning_rate": 3.0410204081632655e-05, "loss": 2.6053, "step": 9607 }, { "epoch": 39.21632653061224, "grad_norm": 37.445655822753906, "learning_rate": 3.040816326530612e-05, "loss": 1.3489, "step": 9608 }, { "epoch": 39.220408163265304, "grad_norm": 21.138967514038086, "learning_rate": 3.0406122448979596e-05, "loss": 2.6345, "step": 9609 }, { "epoch": 39.224489795918366, "grad_norm": 22.544036865234375, "learning_rate": 3.0404081632653063e-05, "loss": 2.3451, "step": 9610 }, { "epoch": 39.22857142857143, "grad_norm": 27.5393009185791, "learning_rate": 3.0402040816326533e-05, "loss": 0.6718, "step": 9611 }, { "epoch": 39.23265306122449, "grad_norm": 34.06199645996094, "learning_rate": 3.04e-05, "loss": 1.8984, "step": 9612 }, { "epoch": 39.23673469387755, "grad_norm": 37.26346206665039, "learning_rate": 3.0397959183673467e-05, "loss": 0.9305, "step": 9613 }, { "epoch": 39.24081632653061, "grad_norm": 55.972633361816406, "learning_rate": 3.039591836734694e-05, "loss": 1.5949, "step": 9614 }, { "epoch": 39.244897959183675, "grad_norm": 18.899551391601562, "learning_rate": 3.0393877551020412e-05, "loss": 2.5125, "step": 9615 }, { "epoch": 39.248979591836736, "grad_norm": 15.117095947265625, "learning_rate": 3.039183673469388e-05, "loss": 2.5993, "step": 9616 }, { "epoch": 39.2530612244898, "grad_norm": 18.587587356567383, "learning_rate": 3.0389795918367346e-05, "loss": 2.1933, "step": 9617 }, { "epoch": 39.25714285714286, "grad_norm": 25.349761962890625, "learning_rate": 3.0387755102040817e-05, "loss": 2.0043, "step": 9618 }, { "epoch": 39.26122448979592, "grad_norm": 24.1358642578125, "learning_rate": 3.038571428571429e-05, "loss": 2.3225, "step": 9619 }, { "epoch": 39.265306122448976, "grad_norm": 18.977357864379883, "learning_rate": 3.0383673469387758e-05, "loss": 2.2909, "step": 9620 }, { "epoch": 39.26938775510204, "grad_norm": 23.119279861450195, "learning_rate": 3.0381632653061225e-05, "loss": 2.2403, "step": 9621 }, { "epoch": 39.2734693877551, "grad_norm": 21.317729949951172, "learning_rate": 3.0379591836734695e-05, "loss": 2.1928, "step": 9622 }, { "epoch": 39.27755102040816, "grad_norm": 25.975788116455078, "learning_rate": 3.0377551020408162e-05, "loss": 1.4162, "step": 9623 }, { "epoch": 39.28163265306122, "grad_norm": 25.133136749267578, "learning_rate": 3.0375510204081636e-05, "loss": 2.2697, "step": 9624 }, { "epoch": 39.285714285714285, "grad_norm": 10.664273262023926, "learning_rate": 3.0373469387755103e-05, "loss": 3.237, "step": 9625 }, { "epoch": 39.289795918367346, "grad_norm": 27.177457809448242, "learning_rate": 3.0371428571428574e-05, "loss": 1.7167, "step": 9626 }, { "epoch": 39.29387755102041, "grad_norm": 19.63646697998047, "learning_rate": 3.036938775510204e-05, "loss": 2.5842, "step": 9627 }, { "epoch": 39.29795918367347, "grad_norm": 31.20327377319336, "learning_rate": 3.0367346938775508e-05, "loss": 1.3693, "step": 9628 }, { "epoch": 39.30204081632653, "grad_norm": 20.86068344116211, "learning_rate": 3.0365306122448982e-05, "loss": 2.5225, "step": 9629 }, { "epoch": 39.30612244897959, "grad_norm": 30.748735427856445, "learning_rate": 3.0363265306122453e-05, "loss": 1.6086, "step": 9630 }, { "epoch": 39.310204081632655, "grad_norm": 26.303598403930664, "learning_rate": 3.036122448979592e-05, "loss": 2.1847, "step": 9631 }, { "epoch": 39.31428571428572, "grad_norm": 34.64694595336914, "learning_rate": 3.0359183673469387e-05, "loss": 1.9111, "step": 9632 }, { "epoch": 39.31836734693878, "grad_norm": 16.005748748779297, "learning_rate": 3.0357142857142857e-05, "loss": 2.7036, "step": 9633 }, { "epoch": 39.32244897959184, "grad_norm": 23.199710845947266, "learning_rate": 3.035510204081633e-05, "loss": 2.0693, "step": 9634 }, { "epoch": 39.326530612244895, "grad_norm": 26.19234275817871, "learning_rate": 3.03530612244898e-05, "loss": 2.2975, "step": 9635 }, { "epoch": 39.33061224489796, "grad_norm": 24.087181091308594, "learning_rate": 3.0351020408163266e-05, "loss": 1.7046, "step": 9636 }, { "epoch": 39.33469387755102, "grad_norm": 11.149733543395996, "learning_rate": 3.0348979591836736e-05, "loss": 3.1036, "step": 9637 }, { "epoch": 39.33877551020408, "grad_norm": 36.099769592285156, "learning_rate": 3.0346938775510203e-05, "loss": 1.482, "step": 9638 }, { "epoch": 39.34285714285714, "grad_norm": 28.031309127807617, "learning_rate": 3.0344897959183677e-05, "loss": 1.985, "step": 9639 }, { "epoch": 39.3469387755102, "grad_norm": 14.101820945739746, "learning_rate": 3.0342857142857144e-05, "loss": 2.528, "step": 9640 }, { "epoch": 39.351020408163265, "grad_norm": 36.741371154785156, "learning_rate": 3.0340816326530615e-05, "loss": 2.2145, "step": 9641 }, { "epoch": 39.35510204081633, "grad_norm": 18.752334594726562, "learning_rate": 3.0338775510204082e-05, "loss": 2.4943, "step": 9642 }, { "epoch": 39.35918367346939, "grad_norm": 39.004737854003906, "learning_rate": 3.033673469387755e-05, "loss": 0.498, "step": 9643 }, { "epoch": 39.36326530612245, "grad_norm": 32.34730529785156, "learning_rate": 3.0334693877551023e-05, "loss": 1.03, "step": 9644 }, { "epoch": 39.36734693877551, "grad_norm": 23.332475662231445, "learning_rate": 3.0332653061224493e-05, "loss": 2.4251, "step": 9645 }, { "epoch": 39.371428571428574, "grad_norm": 53.516605377197266, "learning_rate": 3.033061224489796e-05, "loss": 1.0501, "step": 9646 }, { "epoch": 39.375510204081635, "grad_norm": 13.358990669250488, "learning_rate": 3.0328571428571428e-05, "loss": 3.0014, "step": 9647 }, { "epoch": 39.3795918367347, "grad_norm": 21.53733253479004, "learning_rate": 3.0326530612244898e-05, "loss": 2.7822, "step": 9648 }, { "epoch": 39.38367346938775, "grad_norm": 17.673315048217773, "learning_rate": 3.0324489795918372e-05, "loss": 2.5909, "step": 9649 }, { "epoch": 39.38775510204081, "grad_norm": 14.563375473022461, "learning_rate": 3.032244897959184e-05, "loss": 2.6141, "step": 9650 }, { "epoch": 39.391836734693875, "grad_norm": 31.07369613647461, "learning_rate": 3.0320408163265306e-05, "loss": 0.8463, "step": 9651 }, { "epoch": 39.39591836734694, "grad_norm": 14.522749900817871, "learning_rate": 3.0318367346938777e-05, "loss": 3.0223, "step": 9652 }, { "epoch": 39.4, "grad_norm": 16.536888122558594, "learning_rate": 3.031632653061225e-05, "loss": 2.6544, "step": 9653 }, { "epoch": 39.40408163265306, "grad_norm": 23.756973266601562, "learning_rate": 3.0314285714285718e-05, "loss": 2.1555, "step": 9654 }, { "epoch": 39.40816326530612, "grad_norm": 11.367154121398926, "learning_rate": 3.0312244897959185e-05, "loss": 2.9927, "step": 9655 }, { "epoch": 39.412244897959184, "grad_norm": 31.9989070892334, "learning_rate": 3.0310204081632655e-05, "loss": 1.3219, "step": 9656 }, { "epoch": 39.416326530612245, "grad_norm": 27.64427375793457, "learning_rate": 3.0308163265306123e-05, "loss": 2.2757, "step": 9657 }, { "epoch": 39.42040816326531, "grad_norm": 11.621661186218262, "learning_rate": 3.0306122448979597e-05, "loss": 3.0593, "step": 9658 }, { "epoch": 39.42448979591837, "grad_norm": 17.266530990600586, "learning_rate": 3.0304081632653064e-05, "loss": 2.0594, "step": 9659 }, { "epoch": 39.42857142857143, "grad_norm": 17.277908325195312, "learning_rate": 3.030204081632653e-05, "loss": 2.7184, "step": 9660 }, { "epoch": 39.43265306122449, "grad_norm": 21.792612075805664, "learning_rate": 3.03e-05, "loss": 2.2487, "step": 9661 }, { "epoch": 39.436734693877554, "grad_norm": 29.429763793945312, "learning_rate": 3.029795918367347e-05, "loss": 0.7017, "step": 9662 }, { "epoch": 39.440816326530616, "grad_norm": 14.777301788330078, "learning_rate": 3.0295918367346942e-05, "loss": 2.8932, "step": 9663 }, { "epoch": 39.44489795918367, "grad_norm": 40.431095123291016, "learning_rate": 3.029387755102041e-05, "loss": 0.9223, "step": 9664 }, { "epoch": 39.44897959183673, "grad_norm": 39.398277282714844, "learning_rate": 3.029183673469388e-05, "loss": 0.7272, "step": 9665 }, { "epoch": 39.453061224489794, "grad_norm": 18.879615783691406, "learning_rate": 3.0289795918367347e-05, "loss": 0.1808, "step": 9666 }, { "epoch": 39.457142857142856, "grad_norm": 19.442138671875, "learning_rate": 3.0287755102040814e-05, "loss": 2.6159, "step": 9667 }, { "epoch": 39.46122448979592, "grad_norm": 13.396661758422852, "learning_rate": 3.0285714285714288e-05, "loss": 3.1538, "step": 9668 }, { "epoch": 39.46530612244898, "grad_norm": 12.724165916442871, "learning_rate": 3.028367346938776e-05, "loss": 3.023, "step": 9669 }, { "epoch": 39.46938775510204, "grad_norm": 28.894399642944336, "learning_rate": 3.0281632653061226e-05, "loss": 2.355, "step": 9670 }, { "epoch": 39.4734693877551, "grad_norm": 14.868799209594727, "learning_rate": 3.0279591836734693e-05, "loss": 3.0479, "step": 9671 }, { "epoch": 39.477551020408164, "grad_norm": 14.857144355773926, "learning_rate": 3.0277551020408163e-05, "loss": 2.7298, "step": 9672 }, { "epoch": 39.481632653061226, "grad_norm": 21.791507720947266, "learning_rate": 3.0275510204081637e-05, "loss": 2.0764, "step": 9673 }, { "epoch": 39.48571428571429, "grad_norm": 27.921274185180664, "learning_rate": 3.0273469387755104e-05, "loss": 1.4835, "step": 9674 }, { "epoch": 39.48979591836735, "grad_norm": 14.034320831298828, "learning_rate": 3.027142857142857e-05, "loss": 2.6663, "step": 9675 }, { "epoch": 39.49387755102041, "grad_norm": 20.608325958251953, "learning_rate": 3.0269387755102042e-05, "loss": 1.9566, "step": 9676 }, { "epoch": 39.49795918367347, "grad_norm": 33.1755485534668, "learning_rate": 3.026734693877551e-05, "loss": 1.6531, "step": 9677 }, { "epoch": 39.50204081632653, "grad_norm": 11.737351417541504, "learning_rate": 3.0265306122448983e-05, "loss": 2.7651, "step": 9678 }, { "epoch": 39.50612244897959, "grad_norm": 26.44711685180664, "learning_rate": 3.026326530612245e-05, "loss": 1.7216, "step": 9679 }, { "epoch": 39.51020408163265, "grad_norm": 30.37832260131836, "learning_rate": 3.026122448979592e-05, "loss": 1.5114, "step": 9680 }, { "epoch": 39.51428571428571, "grad_norm": 15.280044555664062, "learning_rate": 3.0259183673469388e-05, "loss": 2.8386, "step": 9681 }, { "epoch": 39.518367346938774, "grad_norm": 27.30408477783203, "learning_rate": 3.0257142857142855e-05, "loss": 2.1566, "step": 9682 }, { "epoch": 39.522448979591836, "grad_norm": 31.619089126586914, "learning_rate": 3.025510204081633e-05, "loss": 1.7366, "step": 9683 }, { "epoch": 39.5265306122449, "grad_norm": 28.626928329467773, "learning_rate": 3.02530612244898e-05, "loss": 1.4773, "step": 9684 }, { "epoch": 39.53061224489796, "grad_norm": 28.77327537536621, "learning_rate": 3.0251020408163266e-05, "loss": 2.1554, "step": 9685 }, { "epoch": 39.53469387755102, "grad_norm": 35.10686111450195, "learning_rate": 3.0248979591836734e-05, "loss": 2.166, "step": 9686 }, { "epoch": 39.53877551020408, "grad_norm": 16.817277908325195, "learning_rate": 3.0246938775510204e-05, "loss": 2.5915, "step": 9687 }, { "epoch": 39.542857142857144, "grad_norm": 26.246244430541992, "learning_rate": 3.0244897959183678e-05, "loss": 2.4713, "step": 9688 }, { "epoch": 39.546938775510206, "grad_norm": 17.783790588378906, "learning_rate": 3.0242857142857145e-05, "loss": 2.3721, "step": 9689 }, { "epoch": 39.55102040816327, "grad_norm": 19.767017364501953, "learning_rate": 3.0240816326530612e-05, "loss": 2.6017, "step": 9690 }, { "epoch": 39.55510204081633, "grad_norm": 16.764787673950195, "learning_rate": 3.0238775510204083e-05, "loss": 2.4519, "step": 9691 }, { "epoch": 39.55918367346939, "grad_norm": 24.109577178955078, "learning_rate": 3.023673469387755e-05, "loss": 2.4525, "step": 9692 }, { "epoch": 39.563265306122446, "grad_norm": 32.84016418457031, "learning_rate": 3.0234693877551024e-05, "loss": 1.4925, "step": 9693 }, { "epoch": 39.56734693877551, "grad_norm": 18.732694625854492, "learning_rate": 3.023265306122449e-05, "loss": 2.1783, "step": 9694 }, { "epoch": 39.57142857142857, "grad_norm": 40.89655303955078, "learning_rate": 3.023061224489796e-05, "loss": 0.6149, "step": 9695 }, { "epoch": 39.57551020408163, "grad_norm": 44.81401824951172, "learning_rate": 3.022857142857143e-05, "loss": 0.7225, "step": 9696 }, { "epoch": 39.57959183673469, "grad_norm": 24.22666358947754, "learning_rate": 3.0226530612244902e-05, "loss": 2.326, "step": 9697 }, { "epoch": 39.583673469387755, "grad_norm": 15.667490005493164, "learning_rate": 3.022448979591837e-05, "loss": 2.7484, "step": 9698 }, { "epoch": 39.587755102040816, "grad_norm": 26.80087661743164, "learning_rate": 3.022244897959184e-05, "loss": 2.3923, "step": 9699 }, { "epoch": 39.59183673469388, "grad_norm": 12.194964408874512, "learning_rate": 3.0220408163265307e-05, "loss": 2.8423, "step": 9700 }, { "epoch": 39.59591836734694, "grad_norm": 20.66592788696289, "learning_rate": 3.0218367346938774e-05, "loss": 2.6112, "step": 9701 }, { "epoch": 39.6, "grad_norm": 38.04030990600586, "learning_rate": 3.0216326530612248e-05, "loss": 1.7231, "step": 9702 }, { "epoch": 39.60408163265306, "grad_norm": 26.68189239501953, "learning_rate": 3.021428571428572e-05, "loss": 1.8432, "step": 9703 }, { "epoch": 39.608163265306125, "grad_norm": 24.467544555664062, "learning_rate": 3.0212244897959186e-05, "loss": 2.0986, "step": 9704 }, { "epoch": 39.61224489795919, "grad_norm": 49.913597106933594, "learning_rate": 3.0210204081632653e-05, "loss": 0.8174, "step": 9705 }, { "epoch": 39.61632653061225, "grad_norm": 19.452518463134766, "learning_rate": 3.0208163265306124e-05, "loss": 2.6865, "step": 9706 }, { "epoch": 39.6204081632653, "grad_norm": 18.744924545288086, "learning_rate": 3.0206122448979594e-05, "loss": 2.5575, "step": 9707 }, { "epoch": 39.624489795918365, "grad_norm": 23.607908248901367, "learning_rate": 3.0204081632653065e-05, "loss": 2.379, "step": 9708 }, { "epoch": 39.628571428571426, "grad_norm": 19.773426055908203, "learning_rate": 3.0202040816326532e-05, "loss": 2.4706, "step": 9709 }, { "epoch": 39.63265306122449, "grad_norm": 27.588220596313477, "learning_rate": 3.02e-05, "loss": 2.2567, "step": 9710 }, { "epoch": 39.63673469387755, "grad_norm": 21.374719619750977, "learning_rate": 3.019795918367347e-05, "loss": 2.4304, "step": 9711 }, { "epoch": 39.64081632653061, "grad_norm": 15.207618713378906, "learning_rate": 3.0195918367346943e-05, "loss": 2.7839, "step": 9712 }, { "epoch": 39.64489795918367, "grad_norm": 35.39070510864258, "learning_rate": 3.019387755102041e-05, "loss": 1.7387, "step": 9713 }, { "epoch": 39.648979591836735, "grad_norm": 21.389141082763672, "learning_rate": 3.0191836734693878e-05, "loss": 2.405, "step": 9714 }, { "epoch": 39.6530612244898, "grad_norm": 16.083343505859375, "learning_rate": 3.0189795918367348e-05, "loss": 2.6403, "step": 9715 }, { "epoch": 39.65714285714286, "grad_norm": 26.461320877075195, "learning_rate": 3.0187755102040815e-05, "loss": 1.8928, "step": 9716 }, { "epoch": 39.66122448979592, "grad_norm": 14.339253425598145, "learning_rate": 3.018571428571429e-05, "loss": 2.6999, "step": 9717 }, { "epoch": 39.66530612244898, "grad_norm": 12.751911163330078, "learning_rate": 3.0183673469387756e-05, "loss": 3.2321, "step": 9718 }, { "epoch": 39.66938775510204, "grad_norm": 20.755765914916992, "learning_rate": 3.0181632653061227e-05, "loss": 2.4181, "step": 9719 }, { "epoch": 39.673469387755105, "grad_norm": 17.60875129699707, "learning_rate": 3.0179591836734694e-05, "loss": 2.7947, "step": 9720 }, { "epoch": 39.67755102040816, "grad_norm": 44.32619857788086, "learning_rate": 3.017755102040816e-05, "loss": 1.9216, "step": 9721 }, { "epoch": 39.68163265306122, "grad_norm": 36.63011169433594, "learning_rate": 3.0175510204081635e-05, "loss": 1.5768, "step": 9722 }, { "epoch": 39.68571428571428, "grad_norm": 37.56277084350586, "learning_rate": 3.0173469387755105e-05, "loss": 1.3515, "step": 9723 }, { "epoch": 39.689795918367345, "grad_norm": 29.265806198120117, "learning_rate": 3.0171428571428572e-05, "loss": 1.576, "step": 9724 }, { "epoch": 39.69387755102041, "grad_norm": 22.131301879882812, "learning_rate": 3.016938775510204e-05, "loss": 2.4196, "step": 9725 }, { "epoch": 39.69795918367347, "grad_norm": 30.583629608154297, "learning_rate": 3.016734693877551e-05, "loss": 1.0261, "step": 9726 }, { "epoch": 39.70204081632653, "grad_norm": 13.58428955078125, "learning_rate": 3.0165306122448984e-05, "loss": 2.7883, "step": 9727 }, { "epoch": 39.70612244897959, "grad_norm": 37.917274475097656, "learning_rate": 3.016326530612245e-05, "loss": 0.6803, "step": 9728 }, { "epoch": 39.710204081632654, "grad_norm": 31.312402725219727, "learning_rate": 3.0161224489795918e-05, "loss": 1.6402, "step": 9729 }, { "epoch": 39.714285714285715, "grad_norm": 13.672954559326172, "learning_rate": 3.015918367346939e-05, "loss": 2.7272, "step": 9730 }, { "epoch": 39.71836734693878, "grad_norm": 24.910676956176758, "learning_rate": 3.0157142857142856e-05, "loss": 1.5242, "step": 9731 }, { "epoch": 39.72244897959184, "grad_norm": 32.09110641479492, "learning_rate": 3.015510204081633e-05, "loss": 1.5428, "step": 9732 }, { "epoch": 39.7265306122449, "grad_norm": 20.76525115966797, "learning_rate": 3.0153061224489797e-05, "loss": 1.8602, "step": 9733 }, { "epoch": 39.73061224489796, "grad_norm": 28.952505111694336, "learning_rate": 3.0151020408163267e-05, "loss": 2.0819, "step": 9734 }, { "epoch": 39.734693877551024, "grad_norm": 11.257001876831055, "learning_rate": 3.0148979591836735e-05, "loss": 3.0826, "step": 9735 }, { "epoch": 39.73877551020408, "grad_norm": 35.014129638671875, "learning_rate": 3.0146938775510202e-05, "loss": 1.4935, "step": 9736 }, { "epoch": 39.74285714285714, "grad_norm": 23.191713333129883, "learning_rate": 3.0144897959183676e-05, "loss": 2.2929, "step": 9737 }, { "epoch": 39.7469387755102, "grad_norm": 18.49673843383789, "learning_rate": 3.0142857142857146e-05, "loss": 2.9068, "step": 9738 }, { "epoch": 39.751020408163264, "grad_norm": 26.50708770751953, "learning_rate": 3.0140816326530613e-05, "loss": 1.9572, "step": 9739 }, { "epoch": 39.755102040816325, "grad_norm": 17.05728530883789, "learning_rate": 3.013877551020408e-05, "loss": 2.7702, "step": 9740 }, { "epoch": 39.75918367346939, "grad_norm": 35.60956573486328, "learning_rate": 3.0136734693877554e-05, "loss": 2.0208, "step": 9741 }, { "epoch": 39.76326530612245, "grad_norm": 26.12047576904297, "learning_rate": 3.0134693877551025e-05, "loss": 1.9976, "step": 9742 }, { "epoch": 39.76734693877551, "grad_norm": 23.320449829101562, "learning_rate": 3.0132653061224492e-05, "loss": 2.3096, "step": 9743 }, { "epoch": 39.77142857142857, "grad_norm": 50.88465118408203, "learning_rate": 3.013061224489796e-05, "loss": 0.7296, "step": 9744 }, { "epoch": 39.775510204081634, "grad_norm": 36.23051452636719, "learning_rate": 3.012857142857143e-05, "loss": 1.45, "step": 9745 }, { "epoch": 39.779591836734696, "grad_norm": 22.986434936523438, "learning_rate": 3.0126530612244903e-05, "loss": 2.7207, "step": 9746 }, { "epoch": 39.78367346938776, "grad_norm": 18.37788200378418, "learning_rate": 3.012448979591837e-05, "loss": 2.0931, "step": 9747 }, { "epoch": 39.78775510204082, "grad_norm": 33.98236083984375, "learning_rate": 3.0122448979591838e-05, "loss": 1.7654, "step": 9748 }, { "epoch": 39.79183673469388, "grad_norm": 15.008545875549316, "learning_rate": 3.0120408163265308e-05, "loss": 2.8023, "step": 9749 }, { "epoch": 39.795918367346935, "grad_norm": 15.598270416259766, "learning_rate": 3.0118367346938775e-05, "loss": 2.7022, "step": 9750 }, { "epoch": 39.8, "grad_norm": 29.766551971435547, "learning_rate": 3.011632653061225e-05, "loss": 1.6726, "step": 9751 }, { "epoch": 39.80408163265306, "grad_norm": 25.88998794555664, "learning_rate": 3.0114285714285716e-05, "loss": 2.0771, "step": 9752 }, { "epoch": 39.80816326530612, "grad_norm": 32.77756881713867, "learning_rate": 3.0112244897959187e-05, "loss": 1.261, "step": 9753 }, { "epoch": 39.81224489795918, "grad_norm": 24.4893741607666, "learning_rate": 3.0110204081632654e-05, "loss": 2.0417, "step": 9754 }, { "epoch": 39.816326530612244, "grad_norm": 33.21974182128906, "learning_rate": 3.010816326530612e-05, "loss": 1.5012, "step": 9755 }, { "epoch": 39.820408163265306, "grad_norm": 33.8552360534668, "learning_rate": 3.0106122448979595e-05, "loss": 1.3359, "step": 9756 }, { "epoch": 39.82448979591837, "grad_norm": 26.163713455200195, "learning_rate": 3.0104081632653066e-05, "loss": 2.2218, "step": 9757 }, { "epoch": 39.82857142857143, "grad_norm": 23.447006225585938, "learning_rate": 3.0102040816326533e-05, "loss": 2.3018, "step": 9758 }, { "epoch": 39.83265306122449, "grad_norm": 12.137322425842285, "learning_rate": 3.01e-05, "loss": 3.1223, "step": 9759 }, { "epoch": 39.83673469387755, "grad_norm": 15.421728134155273, "learning_rate": 3.009795918367347e-05, "loss": 2.8157, "step": 9760 }, { "epoch": 39.840816326530614, "grad_norm": 21.511932373046875, "learning_rate": 3.009591836734694e-05, "loss": 2.6402, "step": 9761 }, { "epoch": 39.844897959183676, "grad_norm": 21.100805282592773, "learning_rate": 3.009387755102041e-05, "loss": 2.1887, "step": 9762 }, { "epoch": 39.84897959183674, "grad_norm": 20.50568199157715, "learning_rate": 3.009183673469388e-05, "loss": 2.0386, "step": 9763 }, { "epoch": 39.85306122448979, "grad_norm": 17.362577438354492, "learning_rate": 3.0089795918367346e-05, "loss": 2.1653, "step": 9764 }, { "epoch": 39.857142857142854, "grad_norm": 24.630300521850586, "learning_rate": 3.0087755102040816e-05, "loss": 1.9211, "step": 9765 }, { "epoch": 39.861224489795916, "grad_norm": 20.883258819580078, "learning_rate": 3.008571428571429e-05, "loss": 2.4949, "step": 9766 }, { "epoch": 39.86530612244898, "grad_norm": 17.975357055664062, "learning_rate": 3.0083673469387757e-05, "loss": 2.5817, "step": 9767 }, { "epoch": 39.86938775510204, "grad_norm": 19.40508270263672, "learning_rate": 3.0081632653061224e-05, "loss": 2.4088, "step": 9768 }, { "epoch": 39.8734693877551, "grad_norm": 31.921335220336914, "learning_rate": 3.0079591836734695e-05, "loss": 1.5912, "step": 9769 }, { "epoch": 39.87755102040816, "grad_norm": 10.99854850769043, "learning_rate": 3.0077551020408162e-05, "loss": 3.4377, "step": 9770 }, { "epoch": 39.881632653061224, "grad_norm": 24.326597213745117, "learning_rate": 3.0075510204081636e-05, "loss": 1.8381, "step": 9771 }, { "epoch": 39.885714285714286, "grad_norm": 13.379347801208496, "learning_rate": 3.0073469387755103e-05, "loss": 2.797, "step": 9772 }, { "epoch": 39.88979591836735, "grad_norm": 23.667625427246094, "learning_rate": 3.0071428571428573e-05, "loss": 2.0596, "step": 9773 }, { "epoch": 39.89387755102041, "grad_norm": 28.940452575683594, "learning_rate": 3.006938775510204e-05, "loss": 2.2956, "step": 9774 }, { "epoch": 39.89795918367347, "grad_norm": 26.90225601196289, "learning_rate": 3.0067346938775508e-05, "loss": 2.0048, "step": 9775 }, { "epoch": 39.90204081632653, "grad_norm": 15.689345359802246, "learning_rate": 3.006530612244898e-05, "loss": 2.7759, "step": 9776 }, { "epoch": 39.906122448979595, "grad_norm": 20.226316452026367, "learning_rate": 3.0063265306122452e-05, "loss": 2.2899, "step": 9777 }, { "epoch": 39.910204081632656, "grad_norm": 24.567672729492188, "learning_rate": 3.006122448979592e-05, "loss": 2.3865, "step": 9778 }, { "epoch": 39.91428571428571, "grad_norm": 16.54248809814453, "learning_rate": 3.0059183673469386e-05, "loss": 2.6484, "step": 9779 }, { "epoch": 39.91836734693877, "grad_norm": 29.41356658935547, "learning_rate": 3.0057142857142857e-05, "loss": 1.3315, "step": 9780 }, { "epoch": 39.922448979591834, "grad_norm": 19.897172927856445, "learning_rate": 3.005510204081633e-05, "loss": 2.437, "step": 9781 }, { "epoch": 39.926530612244896, "grad_norm": 23.6657657623291, "learning_rate": 3.0053061224489798e-05, "loss": 1.9059, "step": 9782 }, { "epoch": 39.93061224489796, "grad_norm": 14.27773666381836, "learning_rate": 3.0051020408163265e-05, "loss": 3.091, "step": 9783 }, { "epoch": 39.93469387755102, "grad_norm": 23.508895874023438, "learning_rate": 3.0048979591836736e-05, "loss": 2.5149, "step": 9784 }, { "epoch": 39.93877551020408, "grad_norm": 24.343019485473633, "learning_rate": 3.0046938775510203e-05, "loss": 1.9343, "step": 9785 }, { "epoch": 39.94285714285714, "grad_norm": 13.542839050292969, "learning_rate": 3.0044897959183677e-05, "loss": 3.0302, "step": 9786 }, { "epoch": 39.946938775510205, "grad_norm": 27.746797561645508, "learning_rate": 3.0042857142857144e-05, "loss": 1.4149, "step": 9787 }, { "epoch": 39.95102040816327, "grad_norm": 14.01254653930664, "learning_rate": 3.0040816326530614e-05, "loss": 2.7776, "step": 9788 }, { "epoch": 39.95510204081633, "grad_norm": 38.755611419677734, "learning_rate": 3.003877551020408e-05, "loss": 0.6649, "step": 9789 }, { "epoch": 39.95918367346939, "grad_norm": 18.12276268005371, "learning_rate": 3.0036734693877555e-05, "loss": 2.7247, "step": 9790 }, { "epoch": 39.96326530612245, "grad_norm": 36.75114440917969, "learning_rate": 3.0034693877551022e-05, "loss": 1.5064, "step": 9791 }, { "epoch": 39.96734693877551, "grad_norm": 27.400203704833984, "learning_rate": 3.0032653061224493e-05, "loss": 1.475, "step": 9792 }, { "epoch": 39.97142857142857, "grad_norm": 14.28831958770752, "learning_rate": 3.003061224489796e-05, "loss": 2.7935, "step": 9793 }, { "epoch": 39.97551020408163, "grad_norm": 19.01906967163086, "learning_rate": 3.0028571428571427e-05, "loss": 2.1534, "step": 9794 }, { "epoch": 39.97959183673469, "grad_norm": 21.3363094329834, "learning_rate": 3.00265306122449e-05, "loss": 2.2324, "step": 9795 }, { "epoch": 39.98367346938775, "grad_norm": 34.88427734375, "learning_rate": 3.002448979591837e-05, "loss": 1.9494, "step": 9796 }, { "epoch": 39.987755102040815, "grad_norm": 12.73086166381836, "learning_rate": 3.002244897959184e-05, "loss": 2.8284, "step": 9797 }, { "epoch": 39.99183673469388, "grad_norm": 29.66861915588379, "learning_rate": 3.0020408163265306e-05, "loss": 2.1874, "step": 9798 }, { "epoch": 39.99591836734694, "grad_norm": 28.2200927734375, "learning_rate": 3.0018367346938776e-05, "loss": 1.6774, "step": 9799 }, { "epoch": 40.0, "grad_norm": 36.8502311706543, "learning_rate": 3.001632653061225e-05, "loss": 2.294, "step": 9800 }, { "epoch": 40.00408163265306, "grad_norm": 21.76411247253418, "learning_rate": 3.0014285714285717e-05, "loss": 2.66, "step": 9801 }, { "epoch": 40.00816326530612, "grad_norm": 13.831974983215332, "learning_rate": 3.0012244897959184e-05, "loss": 3.0284, "step": 9802 }, { "epoch": 40.012244897959185, "grad_norm": 22.664995193481445, "learning_rate": 3.0010204081632655e-05, "loss": 2.0789, "step": 9803 }, { "epoch": 40.01632653061225, "grad_norm": 31.41679573059082, "learning_rate": 3.0008163265306122e-05, "loss": 0.5912, "step": 9804 }, { "epoch": 40.02040816326531, "grad_norm": 24.203067779541016, "learning_rate": 3.0006122448979596e-05, "loss": 1.985, "step": 9805 }, { "epoch": 40.02448979591837, "grad_norm": 24.438743591308594, "learning_rate": 3.0004081632653063e-05, "loss": 2.3404, "step": 9806 }, { "epoch": 40.02857142857143, "grad_norm": 28.219345092773438, "learning_rate": 3.0002040816326534e-05, "loss": 2.05, "step": 9807 }, { "epoch": 40.03265306122449, "grad_norm": 24.622060775756836, "learning_rate": 3e-05, "loss": 1.8083, "step": 9808 }, { "epoch": 40.03673469387755, "grad_norm": 39.250770568847656, "learning_rate": 2.9997959183673468e-05, "loss": 1.604, "step": 9809 }, { "epoch": 40.04081632653061, "grad_norm": 16.316436767578125, "learning_rate": 2.9995918367346942e-05, "loss": 2.8292, "step": 9810 }, { "epoch": 40.04489795918367, "grad_norm": 19.747154235839844, "learning_rate": 2.9993877551020412e-05, "loss": 1.9637, "step": 9811 }, { "epoch": 40.04897959183673, "grad_norm": 38.129364013671875, "learning_rate": 2.999183673469388e-05, "loss": 1.4286, "step": 9812 }, { "epoch": 40.053061224489795, "grad_norm": 26.93644142150879, "learning_rate": 2.9989795918367347e-05, "loss": 2.1998, "step": 9813 }, { "epoch": 40.05714285714286, "grad_norm": 30.034788131713867, "learning_rate": 2.9987755102040817e-05, "loss": 2.0694, "step": 9814 }, { "epoch": 40.06122448979592, "grad_norm": 17.81770896911621, "learning_rate": 2.9985714285714288e-05, "loss": 2.8374, "step": 9815 }, { "epoch": 40.06530612244898, "grad_norm": 29.760913848876953, "learning_rate": 2.9983673469387758e-05, "loss": 1.5773, "step": 9816 }, { "epoch": 40.06938775510204, "grad_norm": 19.018077850341797, "learning_rate": 2.9981632653061225e-05, "loss": 2.1819, "step": 9817 }, { "epoch": 40.073469387755104, "grad_norm": 17.795448303222656, "learning_rate": 2.9979591836734692e-05, "loss": 2.6909, "step": 9818 }, { "epoch": 40.077551020408166, "grad_norm": 18.903047561645508, "learning_rate": 2.9977551020408163e-05, "loss": 2.5543, "step": 9819 }, { "epoch": 40.08163265306123, "grad_norm": 25.321712493896484, "learning_rate": 2.9975510204081637e-05, "loss": 2.539, "step": 9820 }, { "epoch": 40.08571428571429, "grad_norm": 14.801848411560059, "learning_rate": 2.9973469387755104e-05, "loss": 2.6211, "step": 9821 }, { "epoch": 40.089795918367344, "grad_norm": 17.83098793029785, "learning_rate": 2.997142857142857e-05, "loss": 2.0017, "step": 9822 }, { "epoch": 40.093877551020405, "grad_norm": 17.09080696105957, "learning_rate": 2.996938775510204e-05, "loss": 2.4734, "step": 9823 }, { "epoch": 40.09795918367347, "grad_norm": 26.97104835510254, "learning_rate": 2.996734693877551e-05, "loss": 2.213, "step": 9824 }, { "epoch": 40.10204081632653, "grad_norm": 23.483917236328125, "learning_rate": 2.9965306122448983e-05, "loss": 2.4835, "step": 9825 }, { "epoch": 40.10612244897959, "grad_norm": 12.442994117736816, "learning_rate": 2.996326530612245e-05, "loss": 2.9668, "step": 9826 }, { "epoch": 40.11020408163265, "grad_norm": 12.78480339050293, "learning_rate": 2.996122448979592e-05, "loss": 2.6926, "step": 9827 }, { "epoch": 40.114285714285714, "grad_norm": 39.94268798828125, "learning_rate": 2.9959183673469387e-05, "loss": 1.3875, "step": 9828 }, { "epoch": 40.118367346938776, "grad_norm": 22.60536766052246, "learning_rate": 2.9957142857142854e-05, "loss": 1.792, "step": 9829 }, { "epoch": 40.12244897959184, "grad_norm": 16.68794822692871, "learning_rate": 2.995510204081633e-05, "loss": 2.3283, "step": 9830 }, { "epoch": 40.1265306122449, "grad_norm": 15.873313903808594, "learning_rate": 2.99530612244898e-05, "loss": 2.9994, "step": 9831 }, { "epoch": 40.13061224489796, "grad_norm": 16.641420364379883, "learning_rate": 2.9951020408163266e-05, "loss": 2.4897, "step": 9832 }, { "epoch": 40.13469387755102, "grad_norm": 18.802358627319336, "learning_rate": 2.9948979591836733e-05, "loss": 2.277, "step": 9833 }, { "epoch": 40.138775510204084, "grad_norm": 19.48831558227539, "learning_rate": 2.9946938775510207e-05, "loss": 2.2603, "step": 9834 }, { "epoch": 40.142857142857146, "grad_norm": 13.877335548400879, "learning_rate": 2.9944897959183678e-05, "loss": 3.1131, "step": 9835 }, { "epoch": 40.14693877551021, "grad_norm": 24.91948699951172, "learning_rate": 2.9942857142857145e-05, "loss": 2.3917, "step": 9836 }, { "epoch": 40.15102040816326, "grad_norm": 23.2894287109375, "learning_rate": 2.9940816326530612e-05, "loss": 2.5446, "step": 9837 }, { "epoch": 40.155102040816324, "grad_norm": 35.31298065185547, "learning_rate": 2.9938775510204082e-05, "loss": 1.4462, "step": 9838 }, { "epoch": 40.159183673469386, "grad_norm": 12.536365509033203, "learning_rate": 2.9936734693877556e-05, "loss": 2.8889, "step": 9839 }, { "epoch": 40.16326530612245, "grad_norm": 34.9715576171875, "learning_rate": 2.9934693877551023e-05, "loss": 0.8198, "step": 9840 }, { "epoch": 40.16734693877551, "grad_norm": 39.8417854309082, "learning_rate": 2.993265306122449e-05, "loss": 0.962, "step": 9841 }, { "epoch": 40.17142857142857, "grad_norm": 28.138397216796875, "learning_rate": 2.993061224489796e-05, "loss": 2.0566, "step": 9842 }, { "epoch": 40.17551020408163, "grad_norm": 34.592498779296875, "learning_rate": 2.9928571428571428e-05, "loss": 1.8461, "step": 9843 }, { "epoch": 40.179591836734694, "grad_norm": 31.746381759643555, "learning_rate": 2.9926530612244902e-05, "loss": 1.8851, "step": 9844 }, { "epoch": 40.183673469387756, "grad_norm": 49.03145217895508, "learning_rate": 2.992448979591837e-05, "loss": 0.5614, "step": 9845 }, { "epoch": 40.18775510204082, "grad_norm": 12.840174674987793, "learning_rate": 2.992244897959184e-05, "loss": 3.1495, "step": 9846 }, { "epoch": 40.19183673469388, "grad_norm": 39.46322250366211, "learning_rate": 2.9920408163265307e-05, "loss": 1.6585, "step": 9847 }, { "epoch": 40.19591836734694, "grad_norm": 39.51191711425781, "learning_rate": 2.9918367346938774e-05, "loss": 2.3357, "step": 9848 }, { "epoch": 40.2, "grad_norm": 22.054067611694336, "learning_rate": 2.9916326530612248e-05, "loss": 2.3407, "step": 9849 }, { "epoch": 40.204081632653065, "grad_norm": 19.84799575805664, "learning_rate": 2.9914285714285718e-05, "loss": 2.0826, "step": 9850 }, { "epoch": 40.20816326530612, "grad_norm": 16.00128936767578, "learning_rate": 2.9912244897959185e-05, "loss": 2.6591, "step": 9851 }, { "epoch": 40.21224489795918, "grad_norm": 46.72755432128906, "learning_rate": 2.9910204081632653e-05, "loss": 1.8774, "step": 9852 }, { "epoch": 40.21632653061224, "grad_norm": 42.25045394897461, "learning_rate": 2.9908163265306123e-05, "loss": 1.5213, "step": 9853 }, { "epoch": 40.220408163265304, "grad_norm": 10.751128196716309, "learning_rate": 2.9906122448979597e-05, "loss": 3.1261, "step": 9854 }, { "epoch": 40.224489795918366, "grad_norm": 29.42551040649414, "learning_rate": 2.9904081632653064e-05, "loss": 1.5146, "step": 9855 }, { "epoch": 40.22857142857143, "grad_norm": 21.76936149597168, "learning_rate": 2.990204081632653e-05, "loss": 1.9967, "step": 9856 }, { "epoch": 40.23265306122449, "grad_norm": 32.54249954223633, "learning_rate": 2.9900000000000002e-05, "loss": 1.1909, "step": 9857 }, { "epoch": 40.23673469387755, "grad_norm": 29.909385681152344, "learning_rate": 2.989795918367347e-05, "loss": 2.2714, "step": 9858 }, { "epoch": 40.24081632653061, "grad_norm": 35.95024871826172, "learning_rate": 2.9895918367346943e-05, "loss": 2.381, "step": 9859 }, { "epoch": 40.244897959183675, "grad_norm": 22.477087020874023, "learning_rate": 2.989387755102041e-05, "loss": 2.517, "step": 9860 }, { "epoch": 40.248979591836736, "grad_norm": 20.920658111572266, "learning_rate": 2.989183673469388e-05, "loss": 2.4462, "step": 9861 }, { "epoch": 40.2530612244898, "grad_norm": 12.689844131469727, "learning_rate": 2.9889795918367348e-05, "loss": 3.0151, "step": 9862 }, { "epoch": 40.25714285714286, "grad_norm": 29.716129302978516, "learning_rate": 2.9887755102040815e-05, "loss": 2.2578, "step": 9863 }, { "epoch": 40.26122448979592, "grad_norm": 15.433313369750977, "learning_rate": 2.988571428571429e-05, "loss": 2.7256, "step": 9864 }, { "epoch": 40.265306122448976, "grad_norm": 21.73919105529785, "learning_rate": 2.988367346938776e-05, "loss": 2.5971, "step": 9865 }, { "epoch": 40.26938775510204, "grad_norm": 12.358136177062988, "learning_rate": 2.9881632653061226e-05, "loss": 2.9738, "step": 9866 }, { "epoch": 40.2734693877551, "grad_norm": 23.95865821838379, "learning_rate": 2.9879591836734693e-05, "loss": 1.9717, "step": 9867 }, { "epoch": 40.27755102040816, "grad_norm": 30.494068145751953, "learning_rate": 2.987755102040816e-05, "loss": 1.869, "step": 9868 }, { "epoch": 40.28163265306122, "grad_norm": 21.040359497070312, "learning_rate": 2.9875510204081634e-05, "loss": 2.4954, "step": 9869 }, { "epoch": 40.285714285714285, "grad_norm": 58.635746002197266, "learning_rate": 2.9873469387755105e-05, "loss": 0.9785, "step": 9870 }, { "epoch": 40.289795918367346, "grad_norm": 19.02621841430664, "learning_rate": 2.9871428571428572e-05, "loss": 2.456, "step": 9871 }, { "epoch": 40.29387755102041, "grad_norm": 15.690359115600586, "learning_rate": 2.986938775510204e-05, "loss": 2.6471, "step": 9872 }, { "epoch": 40.29795918367347, "grad_norm": 20.51329803466797, "learning_rate": 2.986734693877551e-05, "loss": 2.0874, "step": 9873 }, { "epoch": 40.30204081632653, "grad_norm": 22.78043556213379, "learning_rate": 2.9865306122448984e-05, "loss": 1.7946, "step": 9874 }, { "epoch": 40.30612244897959, "grad_norm": 34.7205696105957, "learning_rate": 2.986326530612245e-05, "loss": 2.2156, "step": 9875 }, { "epoch": 40.310204081632655, "grad_norm": 23.1200008392334, "learning_rate": 2.9861224489795918e-05, "loss": 2.4847, "step": 9876 }, { "epoch": 40.31428571428572, "grad_norm": 38.7383918762207, "learning_rate": 2.9859183673469388e-05, "loss": 1.4708, "step": 9877 }, { "epoch": 40.31836734693878, "grad_norm": 20.181089401245117, "learning_rate": 2.9857142857142862e-05, "loss": 2.291, "step": 9878 }, { "epoch": 40.32244897959184, "grad_norm": 34.81210708618164, "learning_rate": 2.985510204081633e-05, "loss": 1.3004, "step": 9879 }, { "epoch": 40.326530612244895, "grad_norm": 27.481691360473633, "learning_rate": 2.9853061224489796e-05, "loss": 1.7024, "step": 9880 }, { "epoch": 40.33061224489796, "grad_norm": 16.35770606994629, "learning_rate": 2.9851020408163267e-05, "loss": 2.6237, "step": 9881 }, { "epoch": 40.33469387755102, "grad_norm": 28.45629119873047, "learning_rate": 2.9848979591836734e-05, "loss": 1.9288, "step": 9882 }, { "epoch": 40.33877551020408, "grad_norm": 19.871437072753906, "learning_rate": 2.9846938775510208e-05, "loss": 2.5253, "step": 9883 }, { "epoch": 40.34285714285714, "grad_norm": 26.197208404541016, "learning_rate": 2.9844897959183675e-05, "loss": 2.0748, "step": 9884 }, { "epoch": 40.3469387755102, "grad_norm": 22.88514518737793, "learning_rate": 2.9842857142857146e-05, "loss": 1.8129, "step": 9885 }, { "epoch": 40.351020408163265, "grad_norm": 17.596355438232422, "learning_rate": 2.9840816326530613e-05, "loss": 2.6838, "step": 9886 }, { "epoch": 40.35510204081633, "grad_norm": 23.243906021118164, "learning_rate": 2.983877551020408e-05, "loss": 1.9374, "step": 9887 }, { "epoch": 40.35918367346939, "grad_norm": 29.03093719482422, "learning_rate": 2.9836734693877554e-05, "loss": 1.2111, "step": 9888 }, { "epoch": 40.36326530612245, "grad_norm": 19.910982131958008, "learning_rate": 2.9834693877551024e-05, "loss": 2.5714, "step": 9889 }, { "epoch": 40.36734693877551, "grad_norm": 28.88772964477539, "learning_rate": 2.983265306122449e-05, "loss": 1.9487, "step": 9890 }, { "epoch": 40.371428571428574, "grad_norm": 21.147933959960938, "learning_rate": 2.983061224489796e-05, "loss": 2.2098, "step": 9891 }, { "epoch": 40.375510204081635, "grad_norm": 33.38015365600586, "learning_rate": 2.982857142857143e-05, "loss": 0.8029, "step": 9892 }, { "epoch": 40.3795918367347, "grad_norm": 53.82832336425781, "learning_rate": 2.9826530612244903e-05, "loss": 1.274, "step": 9893 }, { "epoch": 40.38367346938775, "grad_norm": 18.365388870239258, "learning_rate": 2.982448979591837e-05, "loss": 2.5414, "step": 9894 }, { "epoch": 40.38775510204081, "grad_norm": 16.46255111694336, "learning_rate": 2.9822448979591837e-05, "loss": 2.5997, "step": 9895 }, { "epoch": 40.391836734693875, "grad_norm": 28.38609504699707, "learning_rate": 2.9820408163265308e-05, "loss": 1.3812, "step": 9896 }, { "epoch": 40.39591836734694, "grad_norm": 26.737340927124023, "learning_rate": 2.9818367346938775e-05, "loss": 1.2595, "step": 9897 }, { "epoch": 40.4, "grad_norm": 17.16948127746582, "learning_rate": 2.981632653061225e-05, "loss": 2.3973, "step": 9898 }, { "epoch": 40.40408163265306, "grad_norm": 43.06195068359375, "learning_rate": 2.9814285714285716e-05, "loss": 1.559, "step": 9899 }, { "epoch": 40.40816326530612, "grad_norm": 30.496986389160156, "learning_rate": 2.9812244897959186e-05, "loss": 2.1811, "step": 9900 }, { "epoch": 40.412244897959184, "grad_norm": 10.270215034484863, "learning_rate": 2.9810204081632653e-05, "loss": 3.1307, "step": 9901 }, { "epoch": 40.416326530612245, "grad_norm": 35.71529769897461, "learning_rate": 2.980816326530612e-05, "loss": 1.2382, "step": 9902 }, { "epoch": 40.42040816326531, "grad_norm": 10.931726455688477, "learning_rate": 2.9806122448979595e-05, "loss": 3.3614, "step": 9903 }, { "epoch": 40.42448979591837, "grad_norm": 14.331191062927246, "learning_rate": 2.9804081632653065e-05, "loss": 2.9414, "step": 9904 }, { "epoch": 40.42857142857143, "grad_norm": 24.437801361083984, "learning_rate": 2.9802040816326532e-05, "loss": 1.9916, "step": 9905 }, { "epoch": 40.43265306122449, "grad_norm": 32.056861877441406, "learning_rate": 2.98e-05, "loss": 1.5866, "step": 9906 }, { "epoch": 40.436734693877554, "grad_norm": 21.196382522583008, "learning_rate": 2.979795918367347e-05, "loss": 1.7592, "step": 9907 }, { "epoch": 40.440816326530616, "grad_norm": 23.27487564086914, "learning_rate": 2.9795918367346944e-05, "loss": 2.2837, "step": 9908 }, { "epoch": 40.44489795918367, "grad_norm": 48.42829132080078, "learning_rate": 2.979387755102041e-05, "loss": 1.3752, "step": 9909 }, { "epoch": 40.44897959183673, "grad_norm": 24.865188598632812, "learning_rate": 2.9791836734693878e-05, "loss": 2.2771, "step": 9910 }, { "epoch": 40.453061224489794, "grad_norm": 15.430898666381836, "learning_rate": 2.978979591836735e-05, "loss": 2.7284, "step": 9911 }, { "epoch": 40.457142857142856, "grad_norm": 26.271757125854492, "learning_rate": 2.9787755102040816e-05, "loss": 1.5451, "step": 9912 }, { "epoch": 40.46122448979592, "grad_norm": 33.20426559448242, "learning_rate": 2.978571428571429e-05, "loss": 1.0031, "step": 9913 }, { "epoch": 40.46530612244898, "grad_norm": 13.571796417236328, "learning_rate": 2.9783673469387757e-05, "loss": 2.7614, "step": 9914 }, { "epoch": 40.46938775510204, "grad_norm": 36.37934875488281, "learning_rate": 2.9781632653061227e-05, "loss": 1.2354, "step": 9915 }, { "epoch": 40.4734693877551, "grad_norm": 47.175567626953125, "learning_rate": 2.9779591836734694e-05, "loss": 1.7978, "step": 9916 }, { "epoch": 40.477551020408164, "grad_norm": 43.64710235595703, "learning_rate": 2.977755102040816e-05, "loss": 2.0027, "step": 9917 }, { "epoch": 40.481632653061226, "grad_norm": 28.140722274780273, "learning_rate": 2.9775510204081635e-05, "loss": 1.6066, "step": 9918 }, { "epoch": 40.48571428571429, "grad_norm": 18.200929641723633, "learning_rate": 2.9773469387755102e-05, "loss": 2.5634, "step": 9919 }, { "epoch": 40.48979591836735, "grad_norm": 27.051816940307617, "learning_rate": 2.9771428571428573e-05, "loss": 2.3036, "step": 9920 }, { "epoch": 40.49387755102041, "grad_norm": 24.383230209350586, "learning_rate": 2.976938775510204e-05, "loss": 2.242, "step": 9921 }, { "epoch": 40.49795918367347, "grad_norm": 18.737443923950195, "learning_rate": 2.9767346938775514e-05, "loss": 2.7191, "step": 9922 }, { "epoch": 40.50204081632653, "grad_norm": 27.40213966369629, "learning_rate": 2.976530612244898e-05, "loss": 1.8174, "step": 9923 }, { "epoch": 40.50612244897959, "grad_norm": 38.632896423339844, "learning_rate": 2.976326530612245e-05, "loss": 1.3661, "step": 9924 }, { "epoch": 40.51020408163265, "grad_norm": 22.49775505065918, "learning_rate": 2.976122448979592e-05, "loss": 1.8745, "step": 9925 }, { "epoch": 40.51428571428571, "grad_norm": 18.612136840820312, "learning_rate": 2.9759183673469386e-05, "loss": 2.629, "step": 9926 }, { "epoch": 40.518367346938774, "grad_norm": 17.325481414794922, "learning_rate": 2.975714285714286e-05, "loss": 2.4845, "step": 9927 }, { "epoch": 40.522448979591836, "grad_norm": 46.19401550292969, "learning_rate": 2.975510204081633e-05, "loss": 0.5498, "step": 9928 }, { "epoch": 40.5265306122449, "grad_norm": 24.302444458007812, "learning_rate": 2.9753061224489797e-05, "loss": 2.6423, "step": 9929 }, { "epoch": 40.53061224489796, "grad_norm": 11.88295841217041, "learning_rate": 2.9751020408163265e-05, "loss": 3.0601, "step": 9930 }, { "epoch": 40.53469387755102, "grad_norm": 19.67642593383789, "learning_rate": 2.9748979591836735e-05, "loss": 0.1887, "step": 9931 }, { "epoch": 40.53877551020408, "grad_norm": 22.61467933654785, "learning_rate": 2.974693877551021e-05, "loss": 2.216, "step": 9932 }, { "epoch": 40.542857142857144, "grad_norm": 17.784791946411133, "learning_rate": 2.9744897959183676e-05, "loss": 2.6659, "step": 9933 }, { "epoch": 40.546938775510206, "grad_norm": 39.90578079223633, "learning_rate": 2.9742857142857143e-05, "loss": 1.3588, "step": 9934 }, { "epoch": 40.55102040816327, "grad_norm": 13.519196510314941, "learning_rate": 2.9740816326530614e-05, "loss": 2.8174, "step": 9935 }, { "epoch": 40.55510204081633, "grad_norm": 39.178932189941406, "learning_rate": 2.973877551020408e-05, "loss": 1.3428, "step": 9936 }, { "epoch": 40.55918367346939, "grad_norm": 16.90998077392578, "learning_rate": 2.9736734693877555e-05, "loss": 2.7271, "step": 9937 }, { "epoch": 40.563265306122446, "grad_norm": 35.49149703979492, "learning_rate": 2.9734693877551022e-05, "loss": 0.7396, "step": 9938 }, { "epoch": 40.56734693877551, "grad_norm": 19.53778648376465, "learning_rate": 2.9732653061224492e-05, "loss": 2.4267, "step": 9939 }, { "epoch": 40.57142857142857, "grad_norm": 28.690404891967773, "learning_rate": 2.973061224489796e-05, "loss": 0.6987, "step": 9940 }, { "epoch": 40.57551020408163, "grad_norm": 15.287598609924316, "learning_rate": 2.9728571428571427e-05, "loss": 2.7691, "step": 9941 }, { "epoch": 40.57959183673469, "grad_norm": 27.370962142944336, "learning_rate": 2.97265306122449e-05, "loss": 1.8283, "step": 9942 }, { "epoch": 40.583673469387755, "grad_norm": 17.276390075683594, "learning_rate": 2.972448979591837e-05, "loss": 2.0295, "step": 9943 }, { "epoch": 40.587755102040816, "grad_norm": 24.621856689453125, "learning_rate": 2.9722448979591838e-05, "loss": 2.035, "step": 9944 }, { "epoch": 40.59183673469388, "grad_norm": 16.267122268676758, "learning_rate": 2.9720408163265305e-05, "loss": 2.3852, "step": 9945 }, { "epoch": 40.59591836734694, "grad_norm": 31.23583221435547, "learning_rate": 2.9718367346938776e-05, "loss": 1.7259, "step": 9946 }, { "epoch": 40.6, "grad_norm": 23.76442527770996, "learning_rate": 2.971632653061225e-05, "loss": 2.3667, "step": 9947 }, { "epoch": 40.60408163265306, "grad_norm": 28.729625701904297, "learning_rate": 2.9714285714285717e-05, "loss": 2.0136, "step": 9948 }, { "epoch": 40.608163265306125, "grad_norm": 41.53856658935547, "learning_rate": 2.9712244897959184e-05, "loss": 2.1562, "step": 9949 }, { "epoch": 40.61224489795919, "grad_norm": 20.045665740966797, "learning_rate": 2.9710204081632654e-05, "loss": 2.0788, "step": 9950 }, { "epoch": 40.61632653061225, "grad_norm": 43.78807067871094, "learning_rate": 2.970816326530612e-05, "loss": 0.6634, "step": 9951 }, { "epoch": 40.6204081632653, "grad_norm": 25.171184539794922, "learning_rate": 2.9706122448979595e-05, "loss": 2.6593, "step": 9952 }, { "epoch": 40.624489795918365, "grad_norm": 41.00587844848633, "learning_rate": 2.9704081632653063e-05, "loss": 0.7244, "step": 9953 }, { "epoch": 40.628571428571426, "grad_norm": 17.535242080688477, "learning_rate": 2.9702040816326533e-05, "loss": 2.6005, "step": 9954 }, { "epoch": 40.63265306122449, "grad_norm": 17.101558685302734, "learning_rate": 2.97e-05, "loss": 2.7029, "step": 9955 }, { "epoch": 40.63673469387755, "grad_norm": 34.87258529663086, "learning_rate": 2.9697959183673467e-05, "loss": 1.352, "step": 9956 }, { "epoch": 40.64081632653061, "grad_norm": 25.218284606933594, "learning_rate": 2.969591836734694e-05, "loss": 2.2328, "step": 9957 }, { "epoch": 40.64489795918367, "grad_norm": 52.6053581237793, "learning_rate": 2.9693877551020412e-05, "loss": 1.5152, "step": 9958 }, { "epoch": 40.648979591836735, "grad_norm": 18.675485610961914, "learning_rate": 2.969183673469388e-05, "loss": 2.6565, "step": 9959 }, { "epoch": 40.6530612244898, "grad_norm": 16.82158088684082, "learning_rate": 2.9689795918367346e-05, "loss": 2.7566, "step": 9960 }, { "epoch": 40.65714285714286, "grad_norm": 22.256999969482422, "learning_rate": 2.9687755102040817e-05, "loss": 1.4672, "step": 9961 }, { "epoch": 40.66122448979592, "grad_norm": 11.979194641113281, "learning_rate": 2.968571428571429e-05, "loss": 3.2531, "step": 9962 }, { "epoch": 40.66530612244898, "grad_norm": 25.826824188232422, "learning_rate": 2.9683673469387758e-05, "loss": 1.8591, "step": 9963 }, { "epoch": 40.66938775510204, "grad_norm": 28.899154663085938, "learning_rate": 2.9681632653061225e-05, "loss": 1.3355, "step": 9964 }, { "epoch": 40.673469387755105, "grad_norm": 19.93602180480957, "learning_rate": 2.9679591836734695e-05, "loss": 2.2643, "step": 9965 }, { "epoch": 40.67755102040816, "grad_norm": 18.108217239379883, "learning_rate": 2.9677551020408162e-05, "loss": 2.5466, "step": 9966 }, { "epoch": 40.68163265306122, "grad_norm": 29.236074447631836, "learning_rate": 2.9675510204081636e-05, "loss": 2.1539, "step": 9967 }, { "epoch": 40.68571428571428, "grad_norm": 28.82597541809082, "learning_rate": 2.9673469387755103e-05, "loss": 1.5442, "step": 9968 }, { "epoch": 40.689795918367345, "grad_norm": 32.03996658325195, "learning_rate": 2.9671428571428574e-05, "loss": 1.4634, "step": 9969 }, { "epoch": 40.69387755102041, "grad_norm": 25.793922424316406, "learning_rate": 2.966938775510204e-05, "loss": 1.9409, "step": 9970 }, { "epoch": 40.69795918367347, "grad_norm": 39.779415130615234, "learning_rate": 2.9667346938775515e-05, "loss": 1.4606, "step": 9971 }, { "epoch": 40.70204081632653, "grad_norm": 25.78291893005371, "learning_rate": 2.9665306122448982e-05, "loss": 1.8235, "step": 9972 }, { "epoch": 40.70612244897959, "grad_norm": 41.844215393066406, "learning_rate": 2.966326530612245e-05, "loss": 0.88, "step": 9973 }, { "epoch": 40.710204081632654, "grad_norm": 16.238265991210938, "learning_rate": 2.966122448979592e-05, "loss": 2.5595, "step": 9974 }, { "epoch": 40.714285714285715, "grad_norm": 11.254767417907715, "learning_rate": 2.9659183673469387e-05, "loss": 3.4324, "step": 9975 }, { "epoch": 40.71836734693878, "grad_norm": 30.520639419555664, "learning_rate": 2.965714285714286e-05, "loss": 0.7939, "step": 9976 }, { "epoch": 40.72244897959184, "grad_norm": 37.8429069519043, "learning_rate": 2.9655102040816328e-05, "loss": 1.7181, "step": 9977 }, { "epoch": 40.7265306122449, "grad_norm": 36.060951232910156, "learning_rate": 2.96530612244898e-05, "loss": 0.6558, "step": 9978 }, { "epoch": 40.73061224489796, "grad_norm": 35.19839859008789, "learning_rate": 2.9651020408163265e-05, "loss": 1.5883, "step": 9979 }, { "epoch": 40.734693877551024, "grad_norm": 13.77615737915039, "learning_rate": 2.9648979591836733e-05, "loss": 2.6802, "step": 9980 }, { "epoch": 40.73877551020408, "grad_norm": 11.434947967529297, "learning_rate": 2.9646938775510206e-05, "loss": 3.0285, "step": 9981 }, { "epoch": 40.74285714285714, "grad_norm": 16.144010543823242, "learning_rate": 2.9644897959183677e-05, "loss": 2.7865, "step": 9982 }, { "epoch": 40.7469387755102, "grad_norm": 43.253787994384766, "learning_rate": 2.9642857142857144e-05, "loss": 1.3148, "step": 9983 }, { "epoch": 40.751020408163264, "grad_norm": 23.87767791748047, "learning_rate": 2.964081632653061e-05, "loss": 2.2162, "step": 9984 }, { "epoch": 40.755102040816325, "grad_norm": 27.66019630432129, "learning_rate": 2.9638775510204082e-05, "loss": 2.2106, "step": 9985 }, { "epoch": 40.75918367346939, "grad_norm": 15.633894920349121, "learning_rate": 2.9636734693877556e-05, "loss": 2.4683, "step": 9986 }, { "epoch": 40.76326530612245, "grad_norm": 36.3570671081543, "learning_rate": 2.9634693877551023e-05, "loss": 1.6321, "step": 9987 }, { "epoch": 40.76734693877551, "grad_norm": 7.374781608581543, "learning_rate": 2.963265306122449e-05, "loss": 3.4866, "step": 9988 }, { "epoch": 40.77142857142857, "grad_norm": 15.049338340759277, "learning_rate": 2.963061224489796e-05, "loss": 2.6799, "step": 9989 }, { "epoch": 40.775510204081634, "grad_norm": 22.41917610168457, "learning_rate": 2.9628571428571428e-05, "loss": 1.6823, "step": 9990 }, { "epoch": 40.779591836734696, "grad_norm": 27.809907913208008, "learning_rate": 2.96265306122449e-05, "loss": 1.469, "step": 9991 }, { "epoch": 40.78367346938776, "grad_norm": 25.871013641357422, "learning_rate": 2.962448979591837e-05, "loss": 1.8246, "step": 9992 }, { "epoch": 40.78775510204082, "grad_norm": 14.63335132598877, "learning_rate": 2.962244897959184e-05, "loss": 2.646, "step": 9993 }, { "epoch": 40.79183673469388, "grad_norm": 29.350099563598633, "learning_rate": 2.9620408163265306e-05, "loss": 1.8714, "step": 9994 }, { "epoch": 40.795918367346935, "grad_norm": 30.950632095336914, "learning_rate": 2.9618367346938773e-05, "loss": 1.5712, "step": 9995 }, { "epoch": 40.8, "grad_norm": 27.968050003051758, "learning_rate": 2.9616326530612247e-05, "loss": 2.1042, "step": 9996 }, { "epoch": 40.80408163265306, "grad_norm": 27.984617233276367, "learning_rate": 2.9614285714285718e-05, "loss": 1.5059, "step": 9997 }, { "epoch": 40.80816326530612, "grad_norm": 19.957462310791016, "learning_rate": 2.9612244897959185e-05, "loss": 2.0193, "step": 9998 }, { "epoch": 40.81224489795918, "grad_norm": 18.38855743408203, "learning_rate": 2.9610204081632652e-05, "loss": 2.6387, "step": 9999 }, { "epoch": 40.816326530612244, "grad_norm": 38.8245735168457, "learning_rate": 2.9608163265306123e-05, "loss": 2.4088, "step": 10000 }, { "epoch": 40.820408163265306, "grad_norm": 31.85787010192871, "learning_rate": 2.9606122448979596e-05, "loss": 1.6331, "step": 10001 }, { "epoch": 40.82448979591837, "grad_norm": 23.101360321044922, "learning_rate": 2.9604081632653064e-05, "loss": 1.9078, "step": 10002 }, { "epoch": 40.82857142857143, "grad_norm": 15.006377220153809, "learning_rate": 2.960204081632653e-05, "loss": 2.8476, "step": 10003 }, { "epoch": 40.83265306122449, "grad_norm": 14.540410041809082, "learning_rate": 2.96e-05, "loss": 2.9944, "step": 10004 }, { "epoch": 40.83673469387755, "grad_norm": 20.040443420410156, "learning_rate": 2.959795918367347e-05, "loss": 2.378, "step": 10005 }, { "epoch": 40.840816326530614, "grad_norm": 16.19895362854004, "learning_rate": 2.9595918367346942e-05, "loss": 2.639, "step": 10006 }, { "epoch": 40.844897959183676, "grad_norm": 16.235727310180664, "learning_rate": 2.959387755102041e-05, "loss": 2.9886, "step": 10007 }, { "epoch": 40.84897959183674, "grad_norm": 19.220308303833008, "learning_rate": 2.959183673469388e-05, "loss": 2.7163, "step": 10008 }, { "epoch": 40.85306122448979, "grad_norm": 22.482257843017578, "learning_rate": 2.9589795918367347e-05, "loss": 2.4294, "step": 10009 }, { "epoch": 40.857142857142854, "grad_norm": 17.38543701171875, "learning_rate": 2.9587755102040814e-05, "loss": 2.5103, "step": 10010 }, { "epoch": 40.861224489795916, "grad_norm": 28.386676788330078, "learning_rate": 2.9585714285714288e-05, "loss": 1.4104, "step": 10011 }, { "epoch": 40.86530612244898, "grad_norm": 16.193614959716797, "learning_rate": 2.958367346938776e-05, "loss": 2.7043, "step": 10012 }, { "epoch": 40.86938775510204, "grad_norm": 25.24837303161621, "learning_rate": 2.9581632653061226e-05, "loss": 1.803, "step": 10013 }, { "epoch": 40.8734693877551, "grad_norm": 18.31875991821289, "learning_rate": 2.9579591836734693e-05, "loss": 2.4831, "step": 10014 }, { "epoch": 40.87755102040816, "grad_norm": 32.46467208862305, "learning_rate": 2.9577551020408167e-05, "loss": 2.3014, "step": 10015 }, { "epoch": 40.881632653061224, "grad_norm": 12.34933853149414, "learning_rate": 2.9575510204081637e-05, "loss": 2.5528, "step": 10016 }, { "epoch": 40.885714285714286, "grad_norm": 34.55997085571289, "learning_rate": 2.9573469387755104e-05, "loss": 1.633, "step": 10017 }, { "epoch": 40.88979591836735, "grad_norm": 16.224271774291992, "learning_rate": 2.957142857142857e-05, "loss": 3.0184, "step": 10018 }, { "epoch": 40.89387755102041, "grad_norm": 15.225815773010254, "learning_rate": 2.9569387755102042e-05, "loss": 2.7868, "step": 10019 }, { "epoch": 40.89795918367347, "grad_norm": 17.1093692779541, "learning_rate": 2.9567346938775516e-05, "loss": 2.7987, "step": 10020 }, { "epoch": 40.90204081632653, "grad_norm": 27.535436630249023, "learning_rate": 2.9565306122448983e-05, "loss": 2.0357, "step": 10021 }, { "epoch": 40.906122448979595, "grad_norm": 12.247686386108398, "learning_rate": 2.956326530612245e-05, "loss": 2.9877, "step": 10022 }, { "epoch": 40.910204081632656, "grad_norm": 19.349637985229492, "learning_rate": 2.956122448979592e-05, "loss": 2.3626, "step": 10023 }, { "epoch": 40.91428571428571, "grad_norm": 27.88827896118164, "learning_rate": 2.9559183673469388e-05, "loss": 2.2358, "step": 10024 }, { "epoch": 40.91836734693877, "grad_norm": 23.268163681030273, "learning_rate": 2.955714285714286e-05, "loss": 1.2603, "step": 10025 }, { "epoch": 40.922448979591834, "grad_norm": 20.53775405883789, "learning_rate": 2.955510204081633e-05, "loss": 2.3865, "step": 10026 }, { "epoch": 40.926530612244896, "grad_norm": 48.88850402832031, "learning_rate": 2.9553061224489796e-05, "loss": 1.442, "step": 10027 }, { "epoch": 40.93061224489796, "grad_norm": 39.759281158447266, "learning_rate": 2.9551020408163266e-05, "loss": 1.8129, "step": 10028 }, { "epoch": 40.93469387755102, "grad_norm": 19.614368438720703, "learning_rate": 2.9548979591836734e-05, "loss": 2.1424, "step": 10029 }, { "epoch": 40.93877551020408, "grad_norm": 21.64149284362793, "learning_rate": 2.9546938775510207e-05, "loss": 2.4524, "step": 10030 }, { "epoch": 40.94285714285714, "grad_norm": 22.051036834716797, "learning_rate": 2.9544897959183675e-05, "loss": 2.3382, "step": 10031 }, { "epoch": 40.946938775510205, "grad_norm": 46.267093658447266, "learning_rate": 2.9542857142857145e-05, "loss": 0.8284, "step": 10032 }, { "epoch": 40.95102040816327, "grad_norm": 20.80196762084961, "learning_rate": 2.9540816326530612e-05, "loss": 2.5064, "step": 10033 }, { "epoch": 40.95510204081633, "grad_norm": 39.3648567199707, "learning_rate": 2.953877551020408e-05, "loss": 0.7033, "step": 10034 }, { "epoch": 40.95918367346939, "grad_norm": 15.708866119384766, "learning_rate": 2.9536734693877553e-05, "loss": 2.7434, "step": 10035 }, { "epoch": 40.96326530612245, "grad_norm": 15.889719009399414, "learning_rate": 2.9534693877551024e-05, "loss": 2.6756, "step": 10036 }, { "epoch": 40.96734693877551, "grad_norm": 20.42378044128418, "learning_rate": 2.953265306122449e-05, "loss": 2.3271, "step": 10037 }, { "epoch": 40.97142857142857, "grad_norm": 22.3851318359375, "learning_rate": 2.9530612244897958e-05, "loss": 2.2232, "step": 10038 }, { "epoch": 40.97551020408163, "grad_norm": 22.0161075592041, "learning_rate": 2.952857142857143e-05, "loss": 2.1818, "step": 10039 }, { "epoch": 40.97959183673469, "grad_norm": 26.5289306640625, "learning_rate": 2.9526530612244902e-05, "loss": 1.9997, "step": 10040 }, { "epoch": 40.98367346938775, "grad_norm": 35.5722541809082, "learning_rate": 2.952448979591837e-05, "loss": 1.4967, "step": 10041 }, { "epoch": 40.987755102040815, "grad_norm": 11.567827224731445, "learning_rate": 2.9522448979591837e-05, "loss": 3.1128, "step": 10042 }, { "epoch": 40.99183673469388, "grad_norm": 22.31427574157715, "learning_rate": 2.9520408163265307e-05, "loss": 2.2148, "step": 10043 }, { "epoch": 40.99591836734694, "grad_norm": 16.341533660888672, "learning_rate": 2.9518367346938774e-05, "loss": 2.6337, "step": 10044 }, { "epoch": 41.0, "grad_norm": 16.568511962890625, "learning_rate": 2.9516326530612248e-05, "loss": 2.5938, "step": 10045 }, { "epoch": 41.00408163265306, "grad_norm": 15.381704330444336, "learning_rate": 2.9514285714285715e-05, "loss": 2.5927, "step": 10046 }, { "epoch": 41.00816326530612, "grad_norm": 21.86948585510254, "learning_rate": 2.9512244897959186e-05, "loss": 1.9809, "step": 10047 }, { "epoch": 41.012244897959185, "grad_norm": 11.777074813842773, "learning_rate": 2.9510204081632653e-05, "loss": 2.8673, "step": 10048 }, { "epoch": 41.01632653061225, "grad_norm": 35.699989318847656, "learning_rate": 2.950816326530612e-05, "loss": 1.3402, "step": 10049 }, { "epoch": 41.02040816326531, "grad_norm": 18.141992568969727, "learning_rate": 2.9506122448979594e-05, "loss": 2.1582, "step": 10050 }, { "epoch": 41.02448979591837, "grad_norm": 89.11966705322266, "learning_rate": 2.9504081632653065e-05, "loss": 0.9258, "step": 10051 }, { "epoch": 41.02857142857143, "grad_norm": 39.08005905151367, "learning_rate": 2.950204081632653e-05, "loss": 1.4561, "step": 10052 }, { "epoch": 41.03265306122449, "grad_norm": 24.186914443969727, "learning_rate": 2.95e-05, "loss": 2.3791, "step": 10053 }, { "epoch": 41.03673469387755, "grad_norm": 18.197242736816406, "learning_rate": 2.949795918367347e-05, "loss": 2.3463, "step": 10054 }, { "epoch": 41.04081632653061, "grad_norm": 13.910383224487305, "learning_rate": 2.9495918367346943e-05, "loss": 2.5697, "step": 10055 }, { "epoch": 41.04489795918367, "grad_norm": 19.858585357666016, "learning_rate": 2.949387755102041e-05, "loss": 2.5284, "step": 10056 }, { "epoch": 41.04897959183673, "grad_norm": 23.46088218688965, "learning_rate": 2.9491836734693877e-05, "loss": 2.0093, "step": 10057 }, { "epoch": 41.053061224489795, "grad_norm": 21.751991271972656, "learning_rate": 2.9489795918367348e-05, "loss": 2.166, "step": 10058 }, { "epoch": 41.05714285714286, "grad_norm": 30.265792846679688, "learning_rate": 2.9487755102040822e-05, "loss": 1.7997, "step": 10059 }, { "epoch": 41.06122448979592, "grad_norm": 20.014659881591797, "learning_rate": 2.948571428571429e-05, "loss": 2.2805, "step": 10060 }, { "epoch": 41.06530612244898, "grad_norm": 14.459507942199707, "learning_rate": 2.9483673469387756e-05, "loss": 2.8555, "step": 10061 }, { "epoch": 41.06938775510204, "grad_norm": 24.345266342163086, "learning_rate": 2.9481632653061227e-05, "loss": 2.2592, "step": 10062 }, { "epoch": 41.073469387755104, "grad_norm": 24.214981079101562, "learning_rate": 2.9479591836734694e-05, "loss": 1.8076, "step": 10063 }, { "epoch": 41.077551020408166, "grad_norm": 18.620803833007812, "learning_rate": 2.9477551020408168e-05, "loss": 2.5626, "step": 10064 }, { "epoch": 41.08163265306123, "grad_norm": 23.372493743896484, "learning_rate": 2.9475510204081635e-05, "loss": 2.3227, "step": 10065 }, { "epoch": 41.08571428571429, "grad_norm": 26.388002395629883, "learning_rate": 2.9473469387755105e-05, "loss": 2.1423, "step": 10066 }, { "epoch": 41.089795918367344, "grad_norm": 30.55522346496582, "learning_rate": 2.9471428571428572e-05, "loss": 1.1122, "step": 10067 }, { "epoch": 41.093877551020405, "grad_norm": 17.281152725219727, "learning_rate": 2.946938775510204e-05, "loss": 1.9394, "step": 10068 }, { "epoch": 41.09795918367347, "grad_norm": 24.365434646606445, "learning_rate": 2.9467346938775513e-05, "loss": 1.9375, "step": 10069 }, { "epoch": 41.10204081632653, "grad_norm": 20.821956634521484, "learning_rate": 2.9465306122448984e-05, "loss": 2.254, "step": 10070 }, { "epoch": 41.10612244897959, "grad_norm": 16.410011291503906, "learning_rate": 2.946326530612245e-05, "loss": 3.0215, "step": 10071 }, { "epoch": 41.11020408163265, "grad_norm": 47.698402404785156, "learning_rate": 2.9461224489795918e-05, "loss": 1.3234, "step": 10072 }, { "epoch": 41.114285714285714, "grad_norm": 36.38002014160156, "learning_rate": 2.945918367346939e-05, "loss": 1.5014, "step": 10073 }, { "epoch": 41.118367346938776, "grad_norm": 31.262868881225586, "learning_rate": 2.9457142857142863e-05, "loss": 1.6227, "step": 10074 }, { "epoch": 41.12244897959184, "grad_norm": 37.86070251464844, "learning_rate": 2.945510204081633e-05, "loss": 1.3951, "step": 10075 }, { "epoch": 41.1265306122449, "grad_norm": 34.78986740112305, "learning_rate": 2.9453061224489797e-05, "loss": 1.3318, "step": 10076 }, { "epoch": 41.13061224489796, "grad_norm": 21.531526565551758, "learning_rate": 2.9451020408163264e-05, "loss": 2.052, "step": 10077 }, { "epoch": 41.13469387755102, "grad_norm": 27.49171257019043, "learning_rate": 2.9448979591836735e-05, "loss": 1.8538, "step": 10078 }, { "epoch": 41.138775510204084, "grad_norm": 18.816816329956055, "learning_rate": 2.944693877551021e-05, "loss": 2.4881, "step": 10079 }, { "epoch": 41.142857142857146, "grad_norm": 49.334163665771484, "learning_rate": 2.9444897959183676e-05, "loss": 1.2205, "step": 10080 }, { "epoch": 41.14693877551021, "grad_norm": 39.28561782836914, "learning_rate": 2.9442857142857143e-05, "loss": 1.6555, "step": 10081 }, { "epoch": 41.15102040816326, "grad_norm": 35.22496795654297, "learning_rate": 2.9440816326530613e-05, "loss": 0.9637, "step": 10082 }, { "epoch": 41.155102040816324, "grad_norm": 43.56450271606445, "learning_rate": 2.943877551020408e-05, "loss": 1.7298, "step": 10083 }, { "epoch": 41.159183673469386, "grad_norm": 24.33047103881836, "learning_rate": 2.9436734693877554e-05, "loss": 1.3699, "step": 10084 }, { "epoch": 41.16326530612245, "grad_norm": 31.739736557006836, "learning_rate": 2.943469387755102e-05, "loss": 1.2546, "step": 10085 }, { "epoch": 41.16734693877551, "grad_norm": 32.227840423583984, "learning_rate": 2.9432653061224492e-05, "loss": 2.0822, "step": 10086 }, { "epoch": 41.17142857142857, "grad_norm": 44.14352035522461, "learning_rate": 2.943061224489796e-05, "loss": 2.4142, "step": 10087 }, { "epoch": 41.17551020408163, "grad_norm": 21.620655059814453, "learning_rate": 2.9428571428571426e-05, "loss": 2.4274, "step": 10088 }, { "epoch": 41.179591836734694, "grad_norm": 33.12514877319336, "learning_rate": 2.94265306122449e-05, "loss": 0.6887, "step": 10089 }, { "epoch": 41.183673469387756, "grad_norm": 19.02181625366211, "learning_rate": 2.942448979591837e-05, "loss": 2.4445, "step": 10090 }, { "epoch": 41.18775510204082, "grad_norm": 42.8150520324707, "learning_rate": 2.9422448979591838e-05, "loss": 0.8389, "step": 10091 }, { "epoch": 41.19183673469388, "grad_norm": 24.023094177246094, "learning_rate": 2.9420408163265305e-05, "loss": 2.2589, "step": 10092 }, { "epoch": 41.19591836734694, "grad_norm": 32.258758544921875, "learning_rate": 2.9418367346938775e-05, "loss": 2.1226, "step": 10093 }, { "epoch": 41.2, "grad_norm": 14.617766380310059, "learning_rate": 2.941632653061225e-05, "loss": 2.7005, "step": 10094 }, { "epoch": 41.204081632653065, "grad_norm": 14.878119468688965, "learning_rate": 2.9414285714285716e-05, "loss": 2.5206, "step": 10095 }, { "epoch": 41.20816326530612, "grad_norm": 29.322311401367188, "learning_rate": 2.9412244897959183e-05, "loss": 2.0483, "step": 10096 }, { "epoch": 41.21224489795918, "grad_norm": 35.544334411621094, "learning_rate": 2.9410204081632654e-05, "loss": 1.426, "step": 10097 }, { "epoch": 41.21632653061224, "grad_norm": 17.812976837158203, "learning_rate": 2.940816326530612e-05, "loss": 2.4701, "step": 10098 }, { "epoch": 41.220408163265304, "grad_norm": 16.23309898376465, "learning_rate": 2.9406122448979595e-05, "loss": 2.69, "step": 10099 }, { "epoch": 41.224489795918366, "grad_norm": 18.106355667114258, "learning_rate": 2.9404081632653062e-05, "loss": 2.4842, "step": 10100 }, { "epoch": 41.22857142857143, "grad_norm": 10.151494979858398, "learning_rate": 2.9402040816326533e-05, "loss": 3.3447, "step": 10101 }, { "epoch": 41.23265306122449, "grad_norm": 14.956993103027344, "learning_rate": 2.94e-05, "loss": 3.1058, "step": 10102 }, { "epoch": 41.23673469387755, "grad_norm": 22.95768928527832, "learning_rate": 2.9397959183673474e-05, "loss": 1.9477, "step": 10103 }, { "epoch": 41.24081632653061, "grad_norm": 14.619185447692871, "learning_rate": 2.939591836734694e-05, "loss": 2.7293, "step": 10104 }, { "epoch": 41.244897959183675, "grad_norm": 24.614940643310547, "learning_rate": 2.939387755102041e-05, "loss": 2.1914, "step": 10105 }, { "epoch": 41.248979591836736, "grad_norm": 35.16924285888672, "learning_rate": 2.939183673469388e-05, "loss": 1.5537, "step": 10106 }, { "epoch": 41.2530612244898, "grad_norm": 28.096832275390625, "learning_rate": 2.9389795918367346e-05, "loss": 1.4093, "step": 10107 }, { "epoch": 41.25714285714286, "grad_norm": 15.990683555603027, "learning_rate": 2.938775510204082e-05, "loss": 2.273, "step": 10108 }, { "epoch": 41.26122448979592, "grad_norm": 15.33842945098877, "learning_rate": 2.938571428571429e-05, "loss": 2.6236, "step": 10109 }, { "epoch": 41.265306122448976, "grad_norm": 12.671036720275879, "learning_rate": 2.9383673469387757e-05, "loss": 2.9599, "step": 10110 }, { "epoch": 41.26938775510204, "grad_norm": 17.193931579589844, "learning_rate": 2.9381632653061224e-05, "loss": 2.3897, "step": 10111 }, { "epoch": 41.2734693877551, "grad_norm": 25.091388702392578, "learning_rate": 2.9379591836734695e-05, "loss": 2.3151, "step": 10112 }, { "epoch": 41.27755102040816, "grad_norm": 15.556203842163086, "learning_rate": 2.937755102040817e-05, "loss": 2.5526, "step": 10113 }, { "epoch": 41.28163265306122, "grad_norm": 16.282033920288086, "learning_rate": 2.9375510204081636e-05, "loss": 2.5114, "step": 10114 }, { "epoch": 41.285714285714285, "grad_norm": 15.715739250183105, "learning_rate": 2.9373469387755103e-05, "loss": 2.6128, "step": 10115 }, { "epoch": 41.289795918367346, "grad_norm": 33.98699188232422, "learning_rate": 2.9371428571428573e-05, "loss": 1.2121, "step": 10116 }, { "epoch": 41.29387755102041, "grad_norm": 36.453243255615234, "learning_rate": 2.936938775510204e-05, "loss": 1.515, "step": 10117 }, { "epoch": 41.29795918367347, "grad_norm": 15.787219047546387, "learning_rate": 2.9367346938775514e-05, "loss": 2.6501, "step": 10118 }, { "epoch": 41.30204081632653, "grad_norm": 21.643163681030273, "learning_rate": 2.936530612244898e-05, "loss": 2.3979, "step": 10119 }, { "epoch": 41.30612244897959, "grad_norm": 31.53287696838379, "learning_rate": 2.9363265306122452e-05, "loss": 1.4219, "step": 10120 }, { "epoch": 41.310204081632655, "grad_norm": 24.18386459350586, "learning_rate": 2.936122448979592e-05, "loss": 2.1222, "step": 10121 }, { "epoch": 41.31428571428572, "grad_norm": 32.5942497253418, "learning_rate": 2.9359183673469386e-05, "loss": 1.8997, "step": 10122 }, { "epoch": 41.31836734693878, "grad_norm": 25.945266723632812, "learning_rate": 2.935714285714286e-05, "loss": 1.7801, "step": 10123 }, { "epoch": 41.32244897959184, "grad_norm": 41.87873840332031, "learning_rate": 2.935510204081633e-05, "loss": 0.7301, "step": 10124 }, { "epoch": 41.326530612244895, "grad_norm": 28.85091781616211, "learning_rate": 2.9353061224489798e-05, "loss": 0.6265, "step": 10125 }, { "epoch": 41.33061224489796, "grad_norm": 13.129374504089355, "learning_rate": 2.9351020408163265e-05, "loss": 2.9366, "step": 10126 }, { "epoch": 41.33469387755102, "grad_norm": 20.711912155151367, "learning_rate": 2.9348979591836735e-05, "loss": 2.2166, "step": 10127 }, { "epoch": 41.33877551020408, "grad_norm": 30.830015182495117, "learning_rate": 2.9346938775510206e-05, "loss": 1.3285, "step": 10128 }, { "epoch": 41.34285714285714, "grad_norm": 11.575800895690918, "learning_rate": 2.9344897959183676e-05, "loss": 3.0029, "step": 10129 }, { "epoch": 41.3469387755102, "grad_norm": 16.85903549194336, "learning_rate": 2.9342857142857144e-05, "loss": 2.8495, "step": 10130 }, { "epoch": 41.351020408163265, "grad_norm": 27.425785064697266, "learning_rate": 2.934081632653061e-05, "loss": 1.7613, "step": 10131 }, { "epoch": 41.35510204081633, "grad_norm": 30.201162338256836, "learning_rate": 2.933877551020408e-05, "loss": 1.7527, "step": 10132 }, { "epoch": 41.35918367346939, "grad_norm": 13.346004486083984, "learning_rate": 2.9336734693877555e-05, "loss": 3.1553, "step": 10133 }, { "epoch": 41.36326530612245, "grad_norm": 25.239545822143555, "learning_rate": 2.9334693877551022e-05, "loss": 1.9933, "step": 10134 }, { "epoch": 41.36734693877551, "grad_norm": 30.31134796142578, "learning_rate": 2.933265306122449e-05, "loss": 1.7128, "step": 10135 }, { "epoch": 41.371428571428574, "grad_norm": 16.02507972717285, "learning_rate": 2.933061224489796e-05, "loss": 3.0315, "step": 10136 }, { "epoch": 41.375510204081635, "grad_norm": 9.675204277038574, "learning_rate": 2.9328571428571427e-05, "loss": 3.0838, "step": 10137 }, { "epoch": 41.3795918367347, "grad_norm": 46.94830322265625, "learning_rate": 2.93265306122449e-05, "loss": 0.9265, "step": 10138 }, { "epoch": 41.38367346938775, "grad_norm": 20.1091365814209, "learning_rate": 2.9324489795918368e-05, "loss": 2.3541, "step": 10139 }, { "epoch": 41.38775510204081, "grad_norm": 30.515024185180664, "learning_rate": 2.932244897959184e-05, "loss": 1.932, "step": 10140 }, { "epoch": 41.391836734693875, "grad_norm": 24.66908836364746, "learning_rate": 2.9320408163265306e-05, "loss": 2.0078, "step": 10141 }, { "epoch": 41.39591836734694, "grad_norm": 53.366363525390625, "learning_rate": 2.9318367346938773e-05, "loss": 1.6285, "step": 10142 }, { "epoch": 41.4, "grad_norm": 49.869239807128906, "learning_rate": 2.9316326530612247e-05, "loss": 1.207, "step": 10143 }, { "epoch": 41.40408163265306, "grad_norm": 27.016191482543945, "learning_rate": 2.9314285714285717e-05, "loss": 1.4566, "step": 10144 }, { "epoch": 41.40816326530612, "grad_norm": 32.94169998168945, "learning_rate": 2.9312244897959184e-05, "loss": 1.9625, "step": 10145 }, { "epoch": 41.412244897959184, "grad_norm": 14.531841278076172, "learning_rate": 2.931020408163265e-05, "loss": 2.7647, "step": 10146 }, { "epoch": 41.416326530612245, "grad_norm": 15.028035163879395, "learning_rate": 2.9308163265306122e-05, "loss": 2.9149, "step": 10147 }, { "epoch": 41.42040816326531, "grad_norm": 15.293386459350586, "learning_rate": 2.9306122448979596e-05, "loss": 2.6074, "step": 10148 }, { "epoch": 41.42448979591837, "grad_norm": 18.920204162597656, "learning_rate": 2.9304081632653063e-05, "loss": 2.593, "step": 10149 }, { "epoch": 41.42857142857143, "grad_norm": 15.787599563598633, "learning_rate": 2.930204081632653e-05, "loss": 2.5945, "step": 10150 }, { "epoch": 41.43265306122449, "grad_norm": 20.24238395690918, "learning_rate": 2.93e-05, "loss": 2.6411, "step": 10151 }, { "epoch": 41.436734693877554, "grad_norm": 35.277366638183594, "learning_rate": 2.9297959183673475e-05, "loss": 2.0562, "step": 10152 }, { "epoch": 41.440816326530616, "grad_norm": 27.265331268310547, "learning_rate": 2.9295918367346942e-05, "loss": 1.7361, "step": 10153 }, { "epoch": 41.44489795918367, "grad_norm": 17.17949867248535, "learning_rate": 2.929387755102041e-05, "loss": 2.5253, "step": 10154 }, { "epoch": 41.44897959183673, "grad_norm": 35.947509765625, "learning_rate": 2.929183673469388e-05, "loss": 1.5074, "step": 10155 }, { "epoch": 41.453061224489794, "grad_norm": 24.33963394165039, "learning_rate": 2.9289795918367346e-05, "loss": 1.3286, "step": 10156 }, { "epoch": 41.457142857142856, "grad_norm": 22.69255256652832, "learning_rate": 2.928775510204082e-05, "loss": 1.6898, "step": 10157 }, { "epoch": 41.46122448979592, "grad_norm": 14.657861709594727, "learning_rate": 2.9285714285714288e-05, "loss": 2.6098, "step": 10158 }, { "epoch": 41.46530612244898, "grad_norm": 37.456459045410156, "learning_rate": 2.9283673469387758e-05, "loss": 0.544, "step": 10159 }, { "epoch": 41.46938775510204, "grad_norm": 15.007633209228516, "learning_rate": 2.9281632653061225e-05, "loss": 2.5459, "step": 10160 }, { "epoch": 41.4734693877551, "grad_norm": 33.654300689697266, "learning_rate": 2.9279591836734692e-05, "loss": 1.1984, "step": 10161 }, { "epoch": 41.477551020408164, "grad_norm": 19.726055145263672, "learning_rate": 2.9277551020408166e-05, "loss": 2.4238, "step": 10162 }, { "epoch": 41.481632653061226, "grad_norm": 38.727901458740234, "learning_rate": 2.9275510204081637e-05, "loss": 1.8519, "step": 10163 }, { "epoch": 41.48571428571429, "grad_norm": 29.851905822753906, "learning_rate": 2.9273469387755104e-05, "loss": 1.7144, "step": 10164 }, { "epoch": 41.48979591836735, "grad_norm": 15.801854133605957, "learning_rate": 2.927142857142857e-05, "loss": 2.9394, "step": 10165 }, { "epoch": 41.49387755102041, "grad_norm": 29.131975173950195, "learning_rate": 2.926938775510204e-05, "loss": 1.2761, "step": 10166 }, { "epoch": 41.49795918367347, "grad_norm": 27.722640991210938, "learning_rate": 2.9267346938775515e-05, "loss": 2.2979, "step": 10167 }, { "epoch": 41.50204081632653, "grad_norm": 18.27143096923828, "learning_rate": 2.9265306122448982e-05, "loss": 2.4789, "step": 10168 }, { "epoch": 41.50612244897959, "grad_norm": 20.62036895751953, "learning_rate": 2.926326530612245e-05, "loss": 2.0586, "step": 10169 }, { "epoch": 41.51020408163265, "grad_norm": 18.62322235107422, "learning_rate": 2.926122448979592e-05, "loss": 2.6778, "step": 10170 }, { "epoch": 41.51428571428571, "grad_norm": 18.136058807373047, "learning_rate": 2.9259183673469387e-05, "loss": 2.6778, "step": 10171 }, { "epoch": 41.518367346938774, "grad_norm": 23.90801429748535, "learning_rate": 2.925714285714286e-05, "loss": 1.9488, "step": 10172 }, { "epoch": 41.522448979591836, "grad_norm": 25.850446701049805, "learning_rate": 2.9255102040816328e-05, "loss": 1.3194, "step": 10173 }, { "epoch": 41.5265306122449, "grad_norm": 41.537940979003906, "learning_rate": 2.92530612244898e-05, "loss": 0.8198, "step": 10174 }, { "epoch": 41.53061224489796, "grad_norm": 13.008673667907715, "learning_rate": 2.9251020408163266e-05, "loss": 3.1998, "step": 10175 }, { "epoch": 41.53469387755102, "grad_norm": 25.694583892822266, "learning_rate": 2.9248979591836733e-05, "loss": 1.9501, "step": 10176 }, { "epoch": 41.53877551020408, "grad_norm": 29.72562026977539, "learning_rate": 2.9246938775510207e-05, "loss": 2.0973, "step": 10177 }, { "epoch": 41.542857142857144, "grad_norm": 21.95079231262207, "learning_rate": 2.9244897959183677e-05, "loss": 2.5642, "step": 10178 }, { "epoch": 41.546938775510206, "grad_norm": 22.761587142944336, "learning_rate": 2.9242857142857145e-05, "loss": 1.8584, "step": 10179 }, { "epoch": 41.55102040816327, "grad_norm": 14.114585876464844, "learning_rate": 2.9240816326530612e-05, "loss": 2.9851, "step": 10180 }, { "epoch": 41.55510204081633, "grad_norm": 30.397390365600586, "learning_rate": 2.9238775510204082e-05, "loss": 1.2153, "step": 10181 }, { "epoch": 41.55918367346939, "grad_norm": 19.794797897338867, "learning_rate": 2.9236734693877553e-05, "loss": 2.0675, "step": 10182 }, { "epoch": 41.563265306122446, "grad_norm": 10.591946601867676, "learning_rate": 2.9234693877551023e-05, "loss": 3.3463, "step": 10183 }, { "epoch": 41.56734693877551, "grad_norm": 25.61660385131836, "learning_rate": 2.923265306122449e-05, "loss": 2.0575, "step": 10184 }, { "epoch": 41.57142857142857, "grad_norm": 26.25838851928711, "learning_rate": 2.9230612244897957e-05, "loss": 2.3961, "step": 10185 }, { "epoch": 41.57551020408163, "grad_norm": 26.984275817871094, "learning_rate": 2.9228571428571428e-05, "loss": 1.9929, "step": 10186 }, { "epoch": 41.57959183673469, "grad_norm": 22.427751541137695, "learning_rate": 2.9226530612244902e-05, "loss": 2.6984, "step": 10187 }, { "epoch": 41.583673469387755, "grad_norm": 16.501182556152344, "learning_rate": 2.922448979591837e-05, "loss": 2.6477, "step": 10188 }, { "epoch": 41.587755102040816, "grad_norm": 25.924009323120117, "learning_rate": 2.9222448979591836e-05, "loss": 2.4812, "step": 10189 }, { "epoch": 41.59183673469388, "grad_norm": 21.879362106323242, "learning_rate": 2.9220408163265307e-05, "loss": 1.8119, "step": 10190 }, { "epoch": 41.59591836734694, "grad_norm": 26.597057342529297, "learning_rate": 2.9218367346938774e-05, "loss": 1.8041, "step": 10191 }, { "epoch": 41.6, "grad_norm": 14.246583938598633, "learning_rate": 2.9216326530612248e-05, "loss": 2.5641, "step": 10192 }, { "epoch": 41.60408163265306, "grad_norm": 24.221595764160156, "learning_rate": 2.9214285714285715e-05, "loss": 2.3194, "step": 10193 }, { "epoch": 41.608163265306125, "grad_norm": 37.70946502685547, "learning_rate": 2.9212244897959185e-05, "loss": 1.454, "step": 10194 }, { "epoch": 41.61224489795919, "grad_norm": 23.559123992919922, "learning_rate": 2.9210204081632652e-05, "loss": 1.6026, "step": 10195 }, { "epoch": 41.61632653061225, "grad_norm": 39.00459671020508, "learning_rate": 2.9208163265306126e-05, "loss": 0.6281, "step": 10196 }, { "epoch": 41.6204081632653, "grad_norm": 18.142593383789062, "learning_rate": 2.9206122448979593e-05, "loss": 2.7187, "step": 10197 }, { "epoch": 41.624489795918365, "grad_norm": 15.689102172851562, "learning_rate": 2.9204081632653064e-05, "loss": 2.7099, "step": 10198 }, { "epoch": 41.628571428571426, "grad_norm": 34.562156677246094, "learning_rate": 2.920204081632653e-05, "loss": 1.9889, "step": 10199 }, { "epoch": 41.63265306122449, "grad_norm": 28.35755157470703, "learning_rate": 2.9199999999999998e-05, "loss": 1.5552, "step": 10200 }, { "epoch": 41.63673469387755, "grad_norm": 33.333290100097656, "learning_rate": 2.9197959183673472e-05, "loss": 0.9868, "step": 10201 }, { "epoch": 41.64081632653061, "grad_norm": 41.97866439819336, "learning_rate": 2.9195918367346943e-05, "loss": 1.6027, "step": 10202 }, { "epoch": 41.64489795918367, "grad_norm": 33.24817657470703, "learning_rate": 2.919387755102041e-05, "loss": 1.3726, "step": 10203 }, { "epoch": 41.648979591836735, "grad_norm": 29.14314079284668, "learning_rate": 2.9191836734693877e-05, "loss": 1.9037, "step": 10204 }, { "epoch": 41.6530612244898, "grad_norm": 8.983920097351074, "learning_rate": 2.9189795918367347e-05, "loss": 3.4834, "step": 10205 }, { "epoch": 41.65714285714286, "grad_norm": 39.226585388183594, "learning_rate": 2.918775510204082e-05, "loss": 0.8515, "step": 10206 }, { "epoch": 41.66122448979592, "grad_norm": 37.47362518310547, "learning_rate": 2.918571428571429e-05, "loss": 1.5228, "step": 10207 }, { "epoch": 41.66530612244898, "grad_norm": 11.677203178405762, "learning_rate": 2.9183673469387756e-05, "loss": 3.1264, "step": 10208 }, { "epoch": 41.66938775510204, "grad_norm": 22.474706649780273, "learning_rate": 2.9181632653061226e-05, "loss": 1.9229, "step": 10209 }, { "epoch": 41.673469387755105, "grad_norm": 11.592259407043457, "learning_rate": 2.9179591836734693e-05, "loss": 3.0252, "step": 10210 }, { "epoch": 41.67755102040816, "grad_norm": 36.41264343261719, "learning_rate": 2.9177551020408167e-05, "loss": 1.3437, "step": 10211 }, { "epoch": 41.68163265306122, "grad_norm": 20.43474769592285, "learning_rate": 2.9175510204081634e-05, "loss": 2.5329, "step": 10212 }, { "epoch": 41.68571428571428, "grad_norm": 24.865392684936523, "learning_rate": 2.9173469387755105e-05, "loss": 2.2135, "step": 10213 }, { "epoch": 41.689795918367345, "grad_norm": 22.109920501708984, "learning_rate": 2.9171428571428572e-05, "loss": 2.4916, "step": 10214 }, { "epoch": 41.69387755102041, "grad_norm": 12.66132926940918, "learning_rate": 2.916938775510204e-05, "loss": 2.9468, "step": 10215 }, { "epoch": 41.69795918367347, "grad_norm": 21.954570770263672, "learning_rate": 2.9167346938775513e-05, "loss": 1.802, "step": 10216 }, { "epoch": 41.70204081632653, "grad_norm": 29.179485321044922, "learning_rate": 2.9165306122448983e-05, "loss": 2.2901, "step": 10217 }, { "epoch": 41.70612244897959, "grad_norm": 36.800567626953125, "learning_rate": 2.916326530612245e-05, "loss": 2.3287, "step": 10218 }, { "epoch": 41.710204081632654, "grad_norm": 24.950586318969727, "learning_rate": 2.9161224489795918e-05, "loss": 2.5584, "step": 10219 }, { "epoch": 41.714285714285715, "grad_norm": 22.332233428955078, "learning_rate": 2.9159183673469388e-05, "loss": 2.0098, "step": 10220 }, { "epoch": 41.71836734693878, "grad_norm": 16.721784591674805, "learning_rate": 2.9157142857142862e-05, "loss": 2.9845, "step": 10221 }, { "epoch": 41.72244897959184, "grad_norm": 20.76087188720703, "learning_rate": 2.915510204081633e-05, "loss": 2.2318, "step": 10222 }, { "epoch": 41.7265306122449, "grad_norm": 33.35964584350586, "learning_rate": 2.9153061224489796e-05, "loss": 1.6103, "step": 10223 }, { "epoch": 41.73061224489796, "grad_norm": 22.24188995361328, "learning_rate": 2.9151020408163267e-05, "loss": 2.193, "step": 10224 }, { "epoch": 41.734693877551024, "grad_norm": 24.869054794311523, "learning_rate": 2.9148979591836734e-05, "loss": 2.2645, "step": 10225 }, { "epoch": 41.73877551020408, "grad_norm": 15.014370918273926, "learning_rate": 2.9146938775510208e-05, "loss": 2.471, "step": 10226 }, { "epoch": 41.74285714285714, "grad_norm": 13.290362358093262, "learning_rate": 2.9144897959183675e-05, "loss": 2.683, "step": 10227 }, { "epoch": 41.7469387755102, "grad_norm": 29.155075073242188, "learning_rate": 2.9142857142857146e-05, "loss": 0.5982, "step": 10228 }, { "epoch": 41.751020408163264, "grad_norm": 40.524070739746094, "learning_rate": 2.9140816326530613e-05, "loss": 1.797, "step": 10229 }, { "epoch": 41.755102040816325, "grad_norm": 35.025814056396484, "learning_rate": 2.913877551020408e-05, "loss": 1.2288, "step": 10230 }, { "epoch": 41.75918367346939, "grad_norm": 13.771296501159668, "learning_rate": 2.9136734693877554e-05, "loss": 2.4965, "step": 10231 }, { "epoch": 41.76326530612245, "grad_norm": 16.892269134521484, "learning_rate": 2.9134693877551024e-05, "loss": 2.5545, "step": 10232 }, { "epoch": 41.76734693877551, "grad_norm": 16.64534568786621, "learning_rate": 2.913265306122449e-05, "loss": 2.6031, "step": 10233 }, { "epoch": 41.77142857142857, "grad_norm": 35.19681930541992, "learning_rate": 2.913061224489796e-05, "loss": 1.4064, "step": 10234 }, { "epoch": 41.775510204081634, "grad_norm": 20.877851486206055, "learning_rate": 2.912857142857143e-05, "loss": 2.3109, "step": 10235 }, { "epoch": 41.779591836734696, "grad_norm": 29.038270950317383, "learning_rate": 2.91265306122449e-05, "loss": 0.4125, "step": 10236 }, { "epoch": 41.78367346938776, "grad_norm": 60.22811508178711, "learning_rate": 2.912448979591837e-05, "loss": 1.3779, "step": 10237 }, { "epoch": 41.78775510204082, "grad_norm": 28.081628799438477, "learning_rate": 2.9122448979591837e-05, "loss": 2.3889, "step": 10238 }, { "epoch": 41.79183673469388, "grad_norm": 28.686826705932617, "learning_rate": 2.9120408163265304e-05, "loss": 1.6084, "step": 10239 }, { "epoch": 41.795918367346935, "grad_norm": 62.120094299316406, "learning_rate": 2.9118367346938778e-05, "loss": 0.8174, "step": 10240 }, { "epoch": 41.8, "grad_norm": 22.051429748535156, "learning_rate": 2.911632653061225e-05, "loss": 2.4812, "step": 10241 }, { "epoch": 41.80408163265306, "grad_norm": 36.160736083984375, "learning_rate": 2.9114285714285716e-05, "loss": 1.2709, "step": 10242 }, { "epoch": 41.80816326530612, "grad_norm": 26.118419647216797, "learning_rate": 2.9112244897959183e-05, "loss": 1.3999, "step": 10243 }, { "epoch": 41.81224489795918, "grad_norm": 33.89806365966797, "learning_rate": 2.9110204081632653e-05, "loss": 2.2967, "step": 10244 }, { "epoch": 41.816326530612244, "grad_norm": 25.1740665435791, "learning_rate": 2.9108163265306127e-05, "loss": 2.4329, "step": 10245 }, { "epoch": 41.820408163265306, "grad_norm": 12.221461296081543, "learning_rate": 2.9106122448979594e-05, "loss": 0.1394, "step": 10246 }, { "epoch": 41.82448979591837, "grad_norm": 29.136363983154297, "learning_rate": 2.910408163265306e-05, "loss": 1.8637, "step": 10247 }, { "epoch": 41.82857142857143, "grad_norm": 13.867352485656738, "learning_rate": 2.9102040816326532e-05, "loss": 2.7793, "step": 10248 }, { "epoch": 41.83265306122449, "grad_norm": 31.003990173339844, "learning_rate": 2.91e-05, "loss": 1.7197, "step": 10249 }, { "epoch": 41.83673469387755, "grad_norm": 21.03230857849121, "learning_rate": 2.9097959183673473e-05, "loss": 2.5692, "step": 10250 }, { "epoch": 41.840816326530614, "grad_norm": 28.765432357788086, "learning_rate": 2.909591836734694e-05, "loss": 2.3468, "step": 10251 }, { "epoch": 41.844897959183676, "grad_norm": 11.430696487426758, "learning_rate": 2.909387755102041e-05, "loss": 3.0525, "step": 10252 }, { "epoch": 41.84897959183674, "grad_norm": 26.288148880004883, "learning_rate": 2.9091836734693878e-05, "loss": 1.854, "step": 10253 }, { "epoch": 41.85306122448979, "grad_norm": 20.889572143554688, "learning_rate": 2.9089795918367345e-05, "loss": 2.1239, "step": 10254 }, { "epoch": 41.857142857142854, "grad_norm": 17.096773147583008, "learning_rate": 2.908775510204082e-05, "loss": 2.7455, "step": 10255 }, { "epoch": 41.861224489795916, "grad_norm": 20.010868072509766, "learning_rate": 2.908571428571429e-05, "loss": 2.1047, "step": 10256 }, { "epoch": 41.86530612244898, "grad_norm": 26.48031234741211, "learning_rate": 2.9083673469387757e-05, "loss": 2.6224, "step": 10257 }, { "epoch": 41.86938775510204, "grad_norm": 17.25213050842285, "learning_rate": 2.9081632653061224e-05, "loss": 2.8111, "step": 10258 }, { "epoch": 41.8734693877551, "grad_norm": 22.45402717590332, "learning_rate": 2.9079591836734694e-05, "loss": 2.3677, "step": 10259 }, { "epoch": 41.87755102040816, "grad_norm": 25.40785789489746, "learning_rate": 2.9077551020408168e-05, "loss": 2.2237, "step": 10260 }, { "epoch": 41.881632653061224, "grad_norm": 23.111989974975586, "learning_rate": 2.9075510204081635e-05, "loss": 2.2432, "step": 10261 }, { "epoch": 41.885714285714286, "grad_norm": 16.295461654663086, "learning_rate": 2.9073469387755102e-05, "loss": 2.3549, "step": 10262 }, { "epoch": 41.88979591836735, "grad_norm": 14.844223976135254, "learning_rate": 2.9071428571428573e-05, "loss": 2.7253, "step": 10263 }, { "epoch": 41.89387755102041, "grad_norm": 29.362319946289062, "learning_rate": 2.906938775510204e-05, "loss": 1.2537, "step": 10264 }, { "epoch": 41.89795918367347, "grad_norm": 22.70291519165039, "learning_rate": 2.9067346938775514e-05, "loss": 2.1833, "step": 10265 }, { "epoch": 41.90204081632653, "grad_norm": 17.935855865478516, "learning_rate": 2.906530612244898e-05, "loss": 2.5394, "step": 10266 }, { "epoch": 41.906122448979595, "grad_norm": 30.40314292907715, "learning_rate": 2.906326530612245e-05, "loss": 2.1459, "step": 10267 }, { "epoch": 41.910204081632656, "grad_norm": 16.7530517578125, "learning_rate": 2.906122448979592e-05, "loss": 2.7379, "step": 10268 }, { "epoch": 41.91428571428571, "grad_norm": 22.543560028076172, "learning_rate": 2.9059183673469386e-05, "loss": 2.1811, "step": 10269 }, { "epoch": 41.91836734693877, "grad_norm": 34.32453536987305, "learning_rate": 2.905714285714286e-05, "loss": 1.3139, "step": 10270 }, { "epoch": 41.922448979591834, "grad_norm": 25.01123809814453, "learning_rate": 2.905510204081633e-05, "loss": 1.2072, "step": 10271 }, { "epoch": 41.926530612244896, "grad_norm": 38.98904800415039, "learning_rate": 2.9053061224489797e-05, "loss": 1.6111, "step": 10272 }, { "epoch": 41.93061224489796, "grad_norm": 38.234222412109375, "learning_rate": 2.9051020408163264e-05, "loss": 0.673, "step": 10273 }, { "epoch": 41.93469387755102, "grad_norm": 26.37119483947754, "learning_rate": 2.9048979591836735e-05, "loss": 2.0872, "step": 10274 }, { "epoch": 41.93877551020408, "grad_norm": 18.05921173095703, "learning_rate": 2.904693877551021e-05, "loss": 2.4913, "step": 10275 }, { "epoch": 41.94285714285714, "grad_norm": 29.706266403198242, "learning_rate": 2.9044897959183676e-05, "loss": 2.2797, "step": 10276 }, { "epoch": 41.946938775510205, "grad_norm": 19.18996810913086, "learning_rate": 2.9042857142857143e-05, "loss": 2.3751, "step": 10277 }, { "epoch": 41.95102040816327, "grad_norm": 19.23686981201172, "learning_rate": 2.9040816326530614e-05, "loss": 2.7557, "step": 10278 }, { "epoch": 41.95510204081633, "grad_norm": 55.30156326293945, "learning_rate": 2.903877551020408e-05, "loss": 0.6323, "step": 10279 }, { "epoch": 41.95918367346939, "grad_norm": 22.17546272277832, "learning_rate": 2.9036734693877555e-05, "loss": 2.5241, "step": 10280 }, { "epoch": 41.96326530612245, "grad_norm": 30.3015193939209, "learning_rate": 2.9034693877551022e-05, "loss": 1.5025, "step": 10281 }, { "epoch": 41.96734693877551, "grad_norm": 23.273494720458984, "learning_rate": 2.9032653061224492e-05, "loss": 1.8142, "step": 10282 }, { "epoch": 41.97142857142857, "grad_norm": 22.387157440185547, "learning_rate": 2.903061224489796e-05, "loss": 2.5925, "step": 10283 }, { "epoch": 41.97551020408163, "grad_norm": 15.79603385925293, "learning_rate": 2.9028571428571427e-05, "loss": 2.3719, "step": 10284 }, { "epoch": 41.97959183673469, "grad_norm": 15.628873825073242, "learning_rate": 2.90265306122449e-05, "loss": 2.7754, "step": 10285 }, { "epoch": 41.98367346938775, "grad_norm": 25.447437286376953, "learning_rate": 2.902448979591837e-05, "loss": 2.2022, "step": 10286 }, { "epoch": 41.987755102040815, "grad_norm": 21.161712646484375, "learning_rate": 2.9022448979591838e-05, "loss": 2.5089, "step": 10287 }, { "epoch": 41.99183673469388, "grad_norm": 23.80973243713379, "learning_rate": 2.9020408163265305e-05, "loss": 1.9225, "step": 10288 }, { "epoch": 41.99591836734694, "grad_norm": 21.73750114440918, "learning_rate": 2.901836734693878e-05, "loss": 1.8295, "step": 10289 }, { "epoch": 42.0, "grad_norm": 23.918359756469727, "learning_rate": 2.9016326530612246e-05, "loss": 2.1827, "step": 10290 }, { "epoch": 42.00408163265306, "grad_norm": 43.628150939941406, "learning_rate": 2.9014285714285717e-05, "loss": 1.4922, "step": 10291 }, { "epoch": 42.00816326530612, "grad_norm": 17.40302085876465, "learning_rate": 2.9012244897959184e-05, "loss": 2.5968, "step": 10292 }, { "epoch": 42.012244897959185, "grad_norm": 31.62129020690918, "learning_rate": 2.901020408163265e-05, "loss": 1.0713, "step": 10293 }, { "epoch": 42.01632653061225, "grad_norm": 36.65016555786133, "learning_rate": 2.9008163265306125e-05, "loss": 2.0416, "step": 10294 }, { "epoch": 42.02040816326531, "grad_norm": 20.324317932128906, "learning_rate": 2.9006122448979595e-05, "loss": 1.8885, "step": 10295 }, { "epoch": 42.02448979591837, "grad_norm": 25.693201065063477, "learning_rate": 2.9004081632653063e-05, "loss": 1.8142, "step": 10296 }, { "epoch": 42.02857142857143, "grad_norm": 32.304813385009766, "learning_rate": 2.900204081632653e-05, "loss": 1.577, "step": 10297 }, { "epoch": 42.03265306122449, "grad_norm": 36.407108306884766, "learning_rate": 2.9e-05, "loss": 1.7788, "step": 10298 }, { "epoch": 42.03673469387755, "grad_norm": 17.552940368652344, "learning_rate": 2.8997959183673474e-05, "loss": 2.1859, "step": 10299 }, { "epoch": 42.04081632653061, "grad_norm": 16.355497360229492, "learning_rate": 2.899591836734694e-05, "loss": 2.9963, "step": 10300 }, { "epoch": 42.04489795918367, "grad_norm": 34.70778274536133, "learning_rate": 2.899387755102041e-05, "loss": 1.8354, "step": 10301 }, { "epoch": 42.04897959183673, "grad_norm": 38.168800354003906, "learning_rate": 2.899183673469388e-05, "loss": 0.5491, "step": 10302 }, { "epoch": 42.053061224489795, "grad_norm": 29.79727554321289, "learning_rate": 2.8989795918367346e-05, "loss": 1.8642, "step": 10303 }, { "epoch": 42.05714285714286, "grad_norm": 29.54650115966797, "learning_rate": 2.898775510204082e-05, "loss": 1.9436, "step": 10304 }, { "epoch": 42.06122448979592, "grad_norm": 44.563453674316406, "learning_rate": 2.8985714285714287e-05, "loss": 0.5593, "step": 10305 }, { "epoch": 42.06530612244898, "grad_norm": 35.57579803466797, "learning_rate": 2.8983673469387758e-05, "loss": 1.4143, "step": 10306 }, { "epoch": 42.06938775510204, "grad_norm": 16.02328109741211, "learning_rate": 2.8981632653061225e-05, "loss": 2.4392, "step": 10307 }, { "epoch": 42.073469387755104, "grad_norm": 13.161002159118652, "learning_rate": 2.8979591836734692e-05, "loss": 2.9381, "step": 10308 }, { "epoch": 42.077551020408166, "grad_norm": 16.5378475189209, "learning_rate": 2.8977551020408166e-05, "loss": 2.6058, "step": 10309 }, { "epoch": 42.08163265306123, "grad_norm": 42.86449432373047, "learning_rate": 2.8975510204081636e-05, "loss": 0.6533, "step": 10310 }, { "epoch": 42.08571428571429, "grad_norm": 30.247676849365234, "learning_rate": 2.8973469387755103e-05, "loss": 1.1787, "step": 10311 }, { "epoch": 42.089795918367344, "grad_norm": 28.33612060546875, "learning_rate": 2.897142857142857e-05, "loss": 0.5449, "step": 10312 }, { "epoch": 42.093877551020405, "grad_norm": 37.120418548583984, "learning_rate": 2.896938775510204e-05, "loss": 1.855, "step": 10313 }, { "epoch": 42.09795918367347, "grad_norm": 26.079587936401367, "learning_rate": 2.8967346938775515e-05, "loss": 2.189, "step": 10314 }, { "epoch": 42.10204081632653, "grad_norm": 16.843551635742188, "learning_rate": 2.8965306122448982e-05, "loss": 2.652, "step": 10315 }, { "epoch": 42.10612244897959, "grad_norm": 13.896781921386719, "learning_rate": 2.896326530612245e-05, "loss": 3.0364, "step": 10316 }, { "epoch": 42.11020408163265, "grad_norm": 16.435565948486328, "learning_rate": 2.896122448979592e-05, "loss": 2.5966, "step": 10317 }, { "epoch": 42.114285714285714, "grad_norm": 25.246585845947266, "learning_rate": 2.8959183673469387e-05, "loss": 1.3578, "step": 10318 }, { "epoch": 42.118367346938776, "grad_norm": 24.13053321838379, "learning_rate": 2.895714285714286e-05, "loss": 1.7988, "step": 10319 }, { "epoch": 42.12244897959184, "grad_norm": 20.755332946777344, "learning_rate": 2.8955102040816328e-05, "loss": 2.3243, "step": 10320 }, { "epoch": 42.1265306122449, "grad_norm": 21.809062957763672, "learning_rate": 2.8953061224489798e-05, "loss": 2.4528, "step": 10321 }, { "epoch": 42.13061224489796, "grad_norm": 31.076995849609375, "learning_rate": 2.8951020408163265e-05, "loss": 1.1051, "step": 10322 }, { "epoch": 42.13469387755102, "grad_norm": 34.99375534057617, "learning_rate": 2.8948979591836733e-05, "loss": 2.2707, "step": 10323 }, { "epoch": 42.138775510204084, "grad_norm": 15.601325035095215, "learning_rate": 2.8946938775510206e-05, "loss": 2.6487, "step": 10324 }, { "epoch": 42.142857142857146, "grad_norm": 15.666924476623535, "learning_rate": 2.8944897959183677e-05, "loss": 2.5729, "step": 10325 }, { "epoch": 42.14693877551021, "grad_norm": 9.275988578796387, "learning_rate": 2.8942857142857144e-05, "loss": 3.4557, "step": 10326 }, { "epoch": 42.15102040816326, "grad_norm": 25.69205665588379, "learning_rate": 2.894081632653061e-05, "loss": 1.7426, "step": 10327 }, { "epoch": 42.155102040816324, "grad_norm": 38.46853256225586, "learning_rate": 2.8938775510204082e-05, "loss": 1.2147, "step": 10328 }, { "epoch": 42.159183673469386, "grad_norm": 17.6704158782959, "learning_rate": 2.8936734693877556e-05, "loss": 2.2566, "step": 10329 }, { "epoch": 42.16326530612245, "grad_norm": 16.973173141479492, "learning_rate": 2.8934693877551023e-05, "loss": 2.5444, "step": 10330 }, { "epoch": 42.16734693877551, "grad_norm": 18.504146575927734, "learning_rate": 2.893265306122449e-05, "loss": 2.5035, "step": 10331 }, { "epoch": 42.17142857142857, "grad_norm": 16.726499557495117, "learning_rate": 2.893061224489796e-05, "loss": 2.3788, "step": 10332 }, { "epoch": 42.17551020408163, "grad_norm": 30.44207763671875, "learning_rate": 2.8928571428571434e-05, "loss": 2.1003, "step": 10333 }, { "epoch": 42.179591836734694, "grad_norm": 34.18601608276367, "learning_rate": 2.89265306122449e-05, "loss": 1.3236, "step": 10334 }, { "epoch": 42.183673469387756, "grad_norm": 29.268274307250977, "learning_rate": 2.892448979591837e-05, "loss": 1.2136, "step": 10335 }, { "epoch": 42.18775510204082, "grad_norm": 13.832820892333984, "learning_rate": 2.892244897959184e-05, "loss": 2.9769, "step": 10336 }, { "epoch": 42.19183673469388, "grad_norm": 25.759599685668945, "learning_rate": 2.8920408163265306e-05, "loss": 1.7127, "step": 10337 }, { "epoch": 42.19591836734694, "grad_norm": 23.999582290649414, "learning_rate": 2.891836734693878e-05, "loss": 2.1271, "step": 10338 }, { "epoch": 42.2, "grad_norm": 26.274063110351562, "learning_rate": 2.8916326530612247e-05, "loss": 2.1383, "step": 10339 }, { "epoch": 42.204081632653065, "grad_norm": 20.631811141967773, "learning_rate": 2.8914285714285714e-05, "loss": 1.9432, "step": 10340 }, { "epoch": 42.20816326530612, "grad_norm": 21.751876831054688, "learning_rate": 2.8912244897959185e-05, "loss": 1.8527, "step": 10341 }, { "epoch": 42.21224489795918, "grad_norm": 26.022655487060547, "learning_rate": 2.8910204081632652e-05, "loss": 1.3271, "step": 10342 }, { "epoch": 42.21632653061224, "grad_norm": 13.921055793762207, "learning_rate": 2.8908163265306126e-05, "loss": 2.8952, "step": 10343 }, { "epoch": 42.220408163265304, "grad_norm": 24.468963623046875, "learning_rate": 2.8906122448979593e-05, "loss": 2.3347, "step": 10344 }, { "epoch": 42.224489795918366, "grad_norm": 19.9569091796875, "learning_rate": 2.8904081632653063e-05, "loss": 2.4714, "step": 10345 }, { "epoch": 42.22857142857143, "grad_norm": 13.587031364440918, "learning_rate": 2.890204081632653e-05, "loss": 2.9214, "step": 10346 }, { "epoch": 42.23265306122449, "grad_norm": 35.66020965576172, "learning_rate": 2.8899999999999998e-05, "loss": 1.4609, "step": 10347 }, { "epoch": 42.23673469387755, "grad_norm": 26.87389373779297, "learning_rate": 2.889795918367347e-05, "loss": 2.097, "step": 10348 }, { "epoch": 42.24081632653061, "grad_norm": 44.20173263549805, "learning_rate": 2.8895918367346942e-05, "loss": 2.34, "step": 10349 }, { "epoch": 42.244897959183675, "grad_norm": 26.640153884887695, "learning_rate": 2.889387755102041e-05, "loss": 2.0155, "step": 10350 }, { "epoch": 42.248979591836736, "grad_norm": 32.79252243041992, "learning_rate": 2.8891836734693876e-05, "loss": 1.6131, "step": 10351 }, { "epoch": 42.2530612244898, "grad_norm": 15.604381561279297, "learning_rate": 2.8889795918367347e-05, "loss": 2.6226, "step": 10352 }, { "epoch": 42.25714285714286, "grad_norm": 34.08061218261719, "learning_rate": 2.888775510204082e-05, "loss": 1.4583, "step": 10353 }, { "epoch": 42.26122448979592, "grad_norm": 26.681468963623047, "learning_rate": 2.8885714285714288e-05, "loss": 2.2117, "step": 10354 }, { "epoch": 42.265306122448976, "grad_norm": 21.770198822021484, "learning_rate": 2.8883673469387755e-05, "loss": 2.2996, "step": 10355 }, { "epoch": 42.26938775510204, "grad_norm": 15.311564445495605, "learning_rate": 2.8881632653061226e-05, "loss": 2.9217, "step": 10356 }, { "epoch": 42.2734693877551, "grad_norm": 29.096567153930664, "learning_rate": 2.8879591836734693e-05, "loss": 1.9024, "step": 10357 }, { "epoch": 42.27755102040816, "grad_norm": 24.463773727416992, "learning_rate": 2.8877551020408167e-05, "loss": 2.002, "step": 10358 }, { "epoch": 42.28163265306122, "grad_norm": 23.586795806884766, "learning_rate": 2.8875510204081634e-05, "loss": 1.7282, "step": 10359 }, { "epoch": 42.285714285714285, "grad_norm": 23.60204315185547, "learning_rate": 2.8873469387755104e-05, "loss": 1.7104, "step": 10360 }, { "epoch": 42.289795918367346, "grad_norm": 29.91648292541504, "learning_rate": 2.887142857142857e-05, "loss": 1.3935, "step": 10361 }, { "epoch": 42.29387755102041, "grad_norm": 31.005970001220703, "learning_rate": 2.886938775510204e-05, "loss": 2.1419, "step": 10362 }, { "epoch": 42.29795918367347, "grad_norm": 34.749488830566406, "learning_rate": 2.8867346938775512e-05, "loss": 1.5257, "step": 10363 }, { "epoch": 42.30204081632653, "grad_norm": 17.570219039916992, "learning_rate": 2.8865306122448983e-05, "loss": 2.6753, "step": 10364 }, { "epoch": 42.30612244897959, "grad_norm": 28.970212936401367, "learning_rate": 2.886326530612245e-05, "loss": 1.0343, "step": 10365 }, { "epoch": 42.310204081632655, "grad_norm": 44.160221099853516, "learning_rate": 2.8861224489795917e-05, "loss": 0.7982, "step": 10366 }, { "epoch": 42.31428571428572, "grad_norm": 18.49732208251953, "learning_rate": 2.8859183673469388e-05, "loss": 2.8014, "step": 10367 }, { "epoch": 42.31836734693878, "grad_norm": 25.03914451599121, "learning_rate": 2.885714285714286e-05, "loss": 2.0674, "step": 10368 }, { "epoch": 42.32244897959184, "grad_norm": 19.03462028503418, "learning_rate": 2.885510204081633e-05, "loss": 2.5406, "step": 10369 }, { "epoch": 42.326530612244895, "grad_norm": 30.606822967529297, "learning_rate": 2.8853061224489796e-05, "loss": 1.4873, "step": 10370 }, { "epoch": 42.33061224489796, "grad_norm": 30.403844833374023, "learning_rate": 2.8851020408163266e-05, "loss": 2.1505, "step": 10371 }, { "epoch": 42.33469387755102, "grad_norm": 19.969215393066406, "learning_rate": 2.8848979591836733e-05, "loss": 2.382, "step": 10372 }, { "epoch": 42.33877551020408, "grad_norm": 16.895658493041992, "learning_rate": 2.8846938775510207e-05, "loss": 2.6094, "step": 10373 }, { "epoch": 42.34285714285714, "grad_norm": 21.17259407043457, "learning_rate": 2.8844897959183675e-05, "loss": 2.5193, "step": 10374 }, { "epoch": 42.3469387755102, "grad_norm": 20.60866928100586, "learning_rate": 2.8842857142857145e-05, "loss": 2.3896, "step": 10375 }, { "epoch": 42.351020408163265, "grad_norm": 18.30318832397461, "learning_rate": 2.8840816326530612e-05, "loss": 2.6945, "step": 10376 }, { "epoch": 42.35510204081633, "grad_norm": 32.493804931640625, "learning_rate": 2.8838775510204086e-05, "loss": 1.8561, "step": 10377 }, { "epoch": 42.35918367346939, "grad_norm": 27.586904525756836, "learning_rate": 2.8836734693877553e-05, "loss": 1.9903, "step": 10378 }, { "epoch": 42.36326530612245, "grad_norm": 39.11788558959961, "learning_rate": 2.8834693877551024e-05, "loss": 1.3431, "step": 10379 }, { "epoch": 42.36734693877551, "grad_norm": 26.14811897277832, "learning_rate": 2.883265306122449e-05, "loss": 1.3225, "step": 10380 }, { "epoch": 42.371428571428574, "grad_norm": 11.039382934570312, "learning_rate": 2.8830612244897958e-05, "loss": 3.129, "step": 10381 }, { "epoch": 42.375510204081635, "grad_norm": 13.236429214477539, "learning_rate": 2.8828571428571432e-05, "loss": 2.8435, "step": 10382 }, { "epoch": 42.3795918367347, "grad_norm": 31.531644821166992, "learning_rate": 2.8826530612244902e-05, "loss": 1.081, "step": 10383 }, { "epoch": 42.38367346938775, "grad_norm": 21.151166915893555, "learning_rate": 2.882448979591837e-05, "loss": 2.6518, "step": 10384 }, { "epoch": 42.38775510204081, "grad_norm": 35.33429718017578, "learning_rate": 2.8822448979591837e-05, "loss": 1.1473, "step": 10385 }, { "epoch": 42.391836734693875, "grad_norm": 23.585012435913086, "learning_rate": 2.8820408163265307e-05, "loss": 2.4702, "step": 10386 }, { "epoch": 42.39591836734694, "grad_norm": 23.376455307006836, "learning_rate": 2.881836734693878e-05, "loss": 2.284, "step": 10387 }, { "epoch": 42.4, "grad_norm": 19.869915008544922, "learning_rate": 2.8816326530612248e-05, "loss": 2.2287, "step": 10388 }, { "epoch": 42.40408163265306, "grad_norm": 38.34715270996094, "learning_rate": 2.8814285714285715e-05, "loss": 0.7787, "step": 10389 }, { "epoch": 42.40816326530612, "grad_norm": 27.788379669189453, "learning_rate": 2.8812244897959186e-05, "loss": 1.7715, "step": 10390 }, { "epoch": 42.412244897959184, "grad_norm": 34.45817565917969, "learning_rate": 2.8810204081632653e-05, "loss": 0.6556, "step": 10391 }, { "epoch": 42.416326530612245, "grad_norm": 48.73828125, "learning_rate": 2.8808163265306127e-05, "loss": 1.3011, "step": 10392 }, { "epoch": 42.42040816326531, "grad_norm": 19.827396392822266, "learning_rate": 2.8806122448979594e-05, "loss": 2.3702, "step": 10393 }, { "epoch": 42.42448979591837, "grad_norm": 24.610857009887695, "learning_rate": 2.880408163265306e-05, "loss": 1.9756, "step": 10394 }, { "epoch": 42.42857142857143, "grad_norm": 31.046297073364258, "learning_rate": 2.880204081632653e-05, "loss": 1.3306, "step": 10395 }, { "epoch": 42.43265306122449, "grad_norm": 50.908958435058594, "learning_rate": 2.88e-05, "loss": 0.7889, "step": 10396 }, { "epoch": 42.436734693877554, "grad_norm": 21.602767944335938, "learning_rate": 2.8797959183673473e-05, "loss": 1.9787, "step": 10397 }, { "epoch": 42.440816326530616, "grad_norm": 23.160812377929688, "learning_rate": 2.879591836734694e-05, "loss": 2.4681, "step": 10398 }, { "epoch": 42.44489795918367, "grad_norm": 24.281219482421875, "learning_rate": 2.879387755102041e-05, "loss": 2.0201, "step": 10399 }, { "epoch": 42.44897959183673, "grad_norm": 26.90155792236328, "learning_rate": 2.8791836734693877e-05, "loss": 1.7268, "step": 10400 }, { "epoch": 42.453061224489794, "grad_norm": 26.86115264892578, "learning_rate": 2.8789795918367344e-05, "loss": 1.9642, "step": 10401 }, { "epoch": 42.457142857142856, "grad_norm": 15.507572174072266, "learning_rate": 2.878775510204082e-05, "loss": 2.4401, "step": 10402 }, { "epoch": 42.46122448979592, "grad_norm": 33.75511169433594, "learning_rate": 2.878571428571429e-05, "loss": 1.6496, "step": 10403 }, { "epoch": 42.46530612244898, "grad_norm": 17.45208168029785, "learning_rate": 2.8783673469387756e-05, "loss": 2.436, "step": 10404 }, { "epoch": 42.46938775510204, "grad_norm": 19.30938720703125, "learning_rate": 2.8781632653061223e-05, "loss": 2.4082, "step": 10405 }, { "epoch": 42.4734693877551, "grad_norm": 32.59688186645508, "learning_rate": 2.8779591836734694e-05, "loss": 1.4007, "step": 10406 }, { "epoch": 42.477551020408164, "grad_norm": 21.443798065185547, "learning_rate": 2.8777551020408168e-05, "loss": 2.4704, "step": 10407 }, { "epoch": 42.481632653061226, "grad_norm": 16.0758056640625, "learning_rate": 2.8775510204081635e-05, "loss": 2.544, "step": 10408 }, { "epoch": 42.48571428571429, "grad_norm": 35.056922912597656, "learning_rate": 2.8773469387755102e-05, "loss": 1.4381, "step": 10409 }, { "epoch": 42.48979591836735, "grad_norm": 16.87875747680664, "learning_rate": 2.8771428571428572e-05, "loss": 2.5358, "step": 10410 }, { "epoch": 42.49387755102041, "grad_norm": 17.781890869140625, "learning_rate": 2.876938775510204e-05, "loss": 2.9256, "step": 10411 }, { "epoch": 42.49795918367347, "grad_norm": 16.02819061279297, "learning_rate": 2.8767346938775513e-05, "loss": 2.287, "step": 10412 }, { "epoch": 42.50204081632653, "grad_norm": 29.743894577026367, "learning_rate": 2.876530612244898e-05, "loss": 0.9196, "step": 10413 }, { "epoch": 42.50612244897959, "grad_norm": 13.948201179504395, "learning_rate": 2.876326530612245e-05, "loss": 2.9857, "step": 10414 }, { "epoch": 42.51020408163265, "grad_norm": 28.75872230529785, "learning_rate": 2.8761224489795918e-05, "loss": 1.3326, "step": 10415 }, { "epoch": 42.51428571428571, "grad_norm": 22.664535522460938, "learning_rate": 2.8759183673469385e-05, "loss": 2.2806, "step": 10416 }, { "epoch": 42.518367346938774, "grad_norm": 42.66181182861328, "learning_rate": 2.875714285714286e-05, "loss": 1.2382, "step": 10417 }, { "epoch": 42.522448979591836, "grad_norm": 24.281702041625977, "learning_rate": 2.875510204081633e-05, "loss": 2.3073, "step": 10418 }, { "epoch": 42.5265306122449, "grad_norm": 23.139039993286133, "learning_rate": 2.8753061224489797e-05, "loss": 2.4209, "step": 10419 }, { "epoch": 42.53061224489796, "grad_norm": 52.854827880859375, "learning_rate": 2.8751020408163264e-05, "loss": 1.3051, "step": 10420 }, { "epoch": 42.53469387755102, "grad_norm": 31.593280792236328, "learning_rate": 2.8748979591836738e-05, "loss": 1.8674, "step": 10421 }, { "epoch": 42.53877551020408, "grad_norm": 28.66166114807129, "learning_rate": 2.874693877551021e-05, "loss": 2.3266, "step": 10422 }, { "epoch": 42.542857142857144, "grad_norm": 11.403092384338379, "learning_rate": 2.8744897959183675e-05, "loss": 3.3679, "step": 10423 }, { "epoch": 42.546938775510206, "grad_norm": 18.272518157958984, "learning_rate": 2.8742857142857143e-05, "loss": 2.4224, "step": 10424 }, { "epoch": 42.55102040816327, "grad_norm": 25.025943756103516, "learning_rate": 2.8740816326530613e-05, "loss": 2.1422, "step": 10425 }, { "epoch": 42.55510204081633, "grad_norm": 27.85076904296875, "learning_rate": 2.8738775510204087e-05, "loss": 1.9992, "step": 10426 }, { "epoch": 42.55918367346939, "grad_norm": 26.2943172454834, "learning_rate": 2.8736734693877554e-05, "loss": 2.2281, "step": 10427 }, { "epoch": 42.563265306122446, "grad_norm": 23.904403686523438, "learning_rate": 2.873469387755102e-05, "loss": 2.0195, "step": 10428 }, { "epoch": 42.56734693877551, "grad_norm": 55.975730895996094, "learning_rate": 2.8732653061224492e-05, "loss": 2.3239, "step": 10429 }, { "epoch": 42.57142857142857, "grad_norm": 14.685158729553223, "learning_rate": 2.873061224489796e-05, "loss": 2.9117, "step": 10430 }, { "epoch": 42.57551020408163, "grad_norm": 35.71390914916992, "learning_rate": 2.8728571428571433e-05, "loss": 1.5925, "step": 10431 }, { "epoch": 42.57959183673469, "grad_norm": 10.082625389099121, "learning_rate": 2.87265306122449e-05, "loss": 3.0689, "step": 10432 }, { "epoch": 42.583673469387755, "grad_norm": 26.704631805419922, "learning_rate": 2.872448979591837e-05, "loss": 1.794, "step": 10433 }, { "epoch": 42.587755102040816, "grad_norm": 15.345115661621094, "learning_rate": 2.8722448979591838e-05, "loss": 2.5315, "step": 10434 }, { "epoch": 42.59183673469388, "grad_norm": 43.17375946044922, "learning_rate": 2.8720408163265305e-05, "loss": 1.1978, "step": 10435 }, { "epoch": 42.59591836734694, "grad_norm": 45.2375373840332, "learning_rate": 2.871836734693878e-05, "loss": 1.184, "step": 10436 }, { "epoch": 42.6, "grad_norm": 58.795875549316406, "learning_rate": 2.871632653061225e-05, "loss": 0.6581, "step": 10437 }, { "epoch": 42.60408163265306, "grad_norm": 20.75320053100586, "learning_rate": 2.8714285714285716e-05, "loss": 2.3296, "step": 10438 }, { "epoch": 42.608163265306125, "grad_norm": 30.575376510620117, "learning_rate": 2.8712244897959183e-05, "loss": 1.8831, "step": 10439 }, { "epoch": 42.61224489795919, "grad_norm": 25.24622917175293, "learning_rate": 2.8710204081632654e-05, "loss": 1.8664, "step": 10440 }, { "epoch": 42.61632653061225, "grad_norm": 31.34749984741211, "learning_rate": 2.8708163265306128e-05, "loss": 2.0795, "step": 10441 }, { "epoch": 42.6204081632653, "grad_norm": 29.300153732299805, "learning_rate": 2.8706122448979595e-05, "loss": 1.7804, "step": 10442 }, { "epoch": 42.624489795918365, "grad_norm": 28.94234275817871, "learning_rate": 2.8704081632653062e-05, "loss": 2.269, "step": 10443 }, { "epoch": 42.628571428571426, "grad_norm": 27.657419204711914, "learning_rate": 2.8702040816326533e-05, "loss": 1.9108, "step": 10444 }, { "epoch": 42.63265306122449, "grad_norm": 21.054969787597656, "learning_rate": 2.87e-05, "loss": 2.4351, "step": 10445 }, { "epoch": 42.63673469387755, "grad_norm": 20.228071212768555, "learning_rate": 2.8697959183673474e-05, "loss": 2.583, "step": 10446 }, { "epoch": 42.64081632653061, "grad_norm": 23.768394470214844, "learning_rate": 2.869591836734694e-05, "loss": 2.0722, "step": 10447 }, { "epoch": 42.64489795918367, "grad_norm": 33.16232681274414, "learning_rate": 2.8693877551020408e-05, "loss": 2.122, "step": 10448 }, { "epoch": 42.648979591836735, "grad_norm": 28.95403480529785, "learning_rate": 2.869183673469388e-05, "loss": 1.7495, "step": 10449 }, { "epoch": 42.6530612244898, "grad_norm": 23.35213851928711, "learning_rate": 2.8689795918367345e-05, "loss": 2.2312, "step": 10450 }, { "epoch": 42.65714285714286, "grad_norm": 36.62937545776367, "learning_rate": 2.868775510204082e-05, "loss": 1.5802, "step": 10451 }, { "epoch": 42.66122448979592, "grad_norm": 36.96900939941406, "learning_rate": 2.8685714285714286e-05, "loss": 1.6208, "step": 10452 }, { "epoch": 42.66530612244898, "grad_norm": 17.018949508666992, "learning_rate": 2.8683673469387757e-05, "loss": 2.5156, "step": 10453 }, { "epoch": 42.66938775510204, "grad_norm": 18.38873291015625, "learning_rate": 2.8681632653061224e-05, "loss": 2.4092, "step": 10454 }, { "epoch": 42.673469387755105, "grad_norm": 14.589949607849121, "learning_rate": 2.867959183673469e-05, "loss": 2.5846, "step": 10455 }, { "epoch": 42.67755102040816, "grad_norm": 22.843524932861328, "learning_rate": 2.8677551020408165e-05, "loss": 2.3403, "step": 10456 }, { "epoch": 42.68163265306122, "grad_norm": 20.435379028320312, "learning_rate": 2.8675510204081636e-05, "loss": 2.2565, "step": 10457 }, { "epoch": 42.68571428571428, "grad_norm": 22.362855911254883, "learning_rate": 2.8673469387755103e-05, "loss": 2.6077, "step": 10458 }, { "epoch": 42.689795918367345, "grad_norm": 15.993514060974121, "learning_rate": 2.867142857142857e-05, "loss": 2.6021, "step": 10459 }, { "epoch": 42.69387755102041, "grad_norm": 15.878568649291992, "learning_rate": 2.866938775510204e-05, "loss": 2.6104, "step": 10460 }, { "epoch": 42.69795918367347, "grad_norm": 35.15595626831055, "learning_rate": 2.8667346938775514e-05, "loss": 1.2633, "step": 10461 }, { "epoch": 42.70204081632653, "grad_norm": 36.30720520019531, "learning_rate": 2.866530612244898e-05, "loss": 2.1739, "step": 10462 }, { "epoch": 42.70612244897959, "grad_norm": 17.94475555419922, "learning_rate": 2.866326530612245e-05, "loss": 2.6015, "step": 10463 }, { "epoch": 42.710204081632654, "grad_norm": 14.308430671691895, "learning_rate": 2.866122448979592e-05, "loss": 2.9164, "step": 10464 }, { "epoch": 42.714285714285715, "grad_norm": 11.767489433288574, "learning_rate": 2.8659183673469386e-05, "loss": 3.0993, "step": 10465 }, { "epoch": 42.71836734693878, "grad_norm": 35.20790100097656, "learning_rate": 2.865714285714286e-05, "loss": 0.434, "step": 10466 }, { "epoch": 42.72244897959184, "grad_norm": 14.22828197479248, "learning_rate": 2.8655102040816327e-05, "loss": 3.1518, "step": 10467 }, { "epoch": 42.7265306122449, "grad_norm": 18.613525390625, "learning_rate": 2.8653061224489798e-05, "loss": 2.0416, "step": 10468 }, { "epoch": 42.73061224489796, "grad_norm": 32.67840576171875, "learning_rate": 2.8651020408163265e-05, "loss": 0.5651, "step": 10469 }, { "epoch": 42.734693877551024, "grad_norm": 42.964073181152344, "learning_rate": 2.864897959183674e-05, "loss": 1.3323, "step": 10470 }, { "epoch": 42.73877551020408, "grad_norm": 39.79094696044922, "learning_rate": 2.8646938775510206e-05, "loss": 1.7191, "step": 10471 }, { "epoch": 42.74285714285714, "grad_norm": 38.145904541015625, "learning_rate": 2.8644897959183676e-05, "loss": 1.5799, "step": 10472 }, { "epoch": 42.7469387755102, "grad_norm": 27.02174186706543, "learning_rate": 2.8642857142857144e-05, "loss": 1.9758, "step": 10473 }, { "epoch": 42.751020408163264, "grad_norm": 17.88320541381836, "learning_rate": 2.864081632653061e-05, "loss": 2.6242, "step": 10474 }, { "epoch": 42.755102040816325, "grad_norm": 46.52811813354492, "learning_rate": 2.8638775510204085e-05, "loss": 0.8793, "step": 10475 }, { "epoch": 42.75918367346939, "grad_norm": 18.755056381225586, "learning_rate": 2.8636734693877555e-05, "loss": 2.4722, "step": 10476 }, { "epoch": 42.76326530612245, "grad_norm": 32.83094024658203, "learning_rate": 2.8634693877551022e-05, "loss": 2.218, "step": 10477 }, { "epoch": 42.76734693877551, "grad_norm": 22.49466896057129, "learning_rate": 2.863265306122449e-05, "loss": 1.6303, "step": 10478 }, { "epoch": 42.77142857142857, "grad_norm": 31.594261169433594, "learning_rate": 2.863061224489796e-05, "loss": 2.2965, "step": 10479 }, { "epoch": 42.775510204081634, "grad_norm": 32.23009490966797, "learning_rate": 2.8628571428571434e-05, "loss": 1.1845, "step": 10480 }, { "epoch": 42.779591836734696, "grad_norm": 15.522640228271484, "learning_rate": 2.86265306122449e-05, "loss": 3.0459, "step": 10481 }, { "epoch": 42.78367346938776, "grad_norm": 35.64439392089844, "learning_rate": 2.8624489795918368e-05, "loss": 1.3885, "step": 10482 }, { "epoch": 42.78775510204082, "grad_norm": 23.37929916381836, "learning_rate": 2.862244897959184e-05, "loss": 2.3071, "step": 10483 }, { "epoch": 42.79183673469388, "grad_norm": 35.8829345703125, "learning_rate": 2.8620408163265306e-05, "loss": 1.5576, "step": 10484 }, { "epoch": 42.795918367346935, "grad_norm": 20.677099227905273, "learning_rate": 2.861836734693878e-05, "loss": 2.4595, "step": 10485 }, { "epoch": 42.8, "grad_norm": 27.49112892150879, "learning_rate": 2.8616326530612247e-05, "loss": 2.2428, "step": 10486 }, { "epoch": 42.80408163265306, "grad_norm": 34.880653381347656, "learning_rate": 2.8614285714285717e-05, "loss": 1.7724, "step": 10487 }, { "epoch": 42.80816326530612, "grad_norm": 14.815361022949219, "learning_rate": 2.8612244897959184e-05, "loss": 2.6605, "step": 10488 }, { "epoch": 42.81224489795918, "grad_norm": 10.22048568725586, "learning_rate": 2.861020408163265e-05, "loss": 3.293, "step": 10489 }, { "epoch": 42.816326530612244, "grad_norm": 12.109619140625, "learning_rate": 2.8608163265306125e-05, "loss": 2.9537, "step": 10490 }, { "epoch": 42.820408163265306, "grad_norm": 22.343576431274414, "learning_rate": 2.8606122448979596e-05, "loss": 2.1472, "step": 10491 }, { "epoch": 42.82448979591837, "grad_norm": 21.55253791809082, "learning_rate": 2.8604081632653063e-05, "loss": 2.4722, "step": 10492 }, { "epoch": 42.82857142857143, "grad_norm": 23.288209915161133, "learning_rate": 2.860204081632653e-05, "loss": 2.0939, "step": 10493 }, { "epoch": 42.83265306122449, "grad_norm": 18.69296646118164, "learning_rate": 2.86e-05, "loss": 2.7859, "step": 10494 }, { "epoch": 42.83673469387755, "grad_norm": 19.13862419128418, "learning_rate": 2.8597959183673475e-05, "loss": 2.4849, "step": 10495 }, { "epoch": 42.840816326530614, "grad_norm": 19.782848358154297, "learning_rate": 2.859591836734694e-05, "loss": 2.0381, "step": 10496 }, { "epoch": 42.844897959183676, "grad_norm": 41.76889419555664, "learning_rate": 2.859387755102041e-05, "loss": 1.4648, "step": 10497 }, { "epoch": 42.84897959183674, "grad_norm": 15.875986099243164, "learning_rate": 2.8591836734693876e-05, "loss": 0.1285, "step": 10498 }, { "epoch": 42.85306122448979, "grad_norm": 13.59803295135498, "learning_rate": 2.8589795918367346e-05, "loss": 2.7561, "step": 10499 }, { "epoch": 42.857142857142854, "grad_norm": 29.042057037353516, "learning_rate": 2.858775510204082e-05, "loss": 1.9368, "step": 10500 }, { "epoch": 42.861224489795916, "grad_norm": 17.174457550048828, "learning_rate": 2.8585714285714287e-05, "loss": 2.6461, "step": 10501 }, { "epoch": 42.86530612244898, "grad_norm": 26.630847930908203, "learning_rate": 2.8583673469387755e-05, "loss": 2.1526, "step": 10502 }, { "epoch": 42.86938775510204, "grad_norm": 34.2310791015625, "learning_rate": 2.8581632653061225e-05, "loss": 2.0098, "step": 10503 }, { "epoch": 42.8734693877551, "grad_norm": 17.194427490234375, "learning_rate": 2.8579591836734692e-05, "loss": 2.5014, "step": 10504 }, { "epoch": 42.87755102040816, "grad_norm": 22.477975845336914, "learning_rate": 2.8577551020408166e-05, "loss": 2.1436, "step": 10505 }, { "epoch": 42.881632653061224, "grad_norm": 31.719696044921875, "learning_rate": 2.8575510204081633e-05, "loss": 1.7399, "step": 10506 }, { "epoch": 42.885714285714286, "grad_norm": 36.15385818481445, "learning_rate": 2.8573469387755104e-05, "loss": 1.3799, "step": 10507 }, { "epoch": 42.88979591836735, "grad_norm": 29.530080795288086, "learning_rate": 2.857142857142857e-05, "loss": 2.1103, "step": 10508 }, { "epoch": 42.89387755102041, "grad_norm": 20.188207626342773, "learning_rate": 2.8569387755102038e-05, "loss": 2.4855, "step": 10509 }, { "epoch": 42.89795918367347, "grad_norm": 32.613624572753906, "learning_rate": 2.8567346938775512e-05, "loss": 0.9061, "step": 10510 }, { "epoch": 42.90204081632653, "grad_norm": 28.180877685546875, "learning_rate": 2.8565306122448982e-05, "loss": 1.7601, "step": 10511 }, { "epoch": 42.906122448979595, "grad_norm": 18.203969955444336, "learning_rate": 2.856326530612245e-05, "loss": 2.4766, "step": 10512 }, { "epoch": 42.910204081632656, "grad_norm": 16.0964298248291, "learning_rate": 2.8561224489795917e-05, "loss": 2.6625, "step": 10513 }, { "epoch": 42.91428571428571, "grad_norm": 16.576610565185547, "learning_rate": 2.855918367346939e-05, "loss": 2.7197, "step": 10514 }, { "epoch": 42.91836734693877, "grad_norm": 20.304859161376953, "learning_rate": 2.855714285714286e-05, "loss": 2.6534, "step": 10515 }, { "epoch": 42.922448979591834, "grad_norm": 31.817995071411133, "learning_rate": 2.8555102040816328e-05, "loss": 2.0111, "step": 10516 }, { "epoch": 42.926530612244896, "grad_norm": 16.15941619873047, "learning_rate": 2.8553061224489795e-05, "loss": 2.6253, "step": 10517 }, { "epoch": 42.93061224489796, "grad_norm": 28.92976188659668, "learning_rate": 2.8551020408163266e-05, "loss": 1.2621, "step": 10518 }, { "epoch": 42.93469387755102, "grad_norm": 47.80323791503906, "learning_rate": 2.854897959183674e-05, "loss": 0.8067, "step": 10519 }, { "epoch": 42.93877551020408, "grad_norm": 56.30767822265625, "learning_rate": 2.8546938775510207e-05, "loss": 0.8267, "step": 10520 }, { "epoch": 42.94285714285714, "grad_norm": 19.46476173400879, "learning_rate": 2.8544897959183674e-05, "loss": 2.4552, "step": 10521 }, { "epoch": 42.946938775510205, "grad_norm": 23.371421813964844, "learning_rate": 2.8542857142857144e-05, "loss": 2.248, "step": 10522 }, { "epoch": 42.95102040816327, "grad_norm": 30.679237365722656, "learning_rate": 2.854081632653061e-05, "loss": 1.4575, "step": 10523 }, { "epoch": 42.95510204081633, "grad_norm": 27.060199737548828, "learning_rate": 2.8538775510204086e-05, "loss": 2.3723, "step": 10524 }, { "epoch": 42.95918367346939, "grad_norm": 17.856603622436523, "learning_rate": 2.8536734693877553e-05, "loss": 2.6809, "step": 10525 }, { "epoch": 42.96326530612245, "grad_norm": 39.92642593383789, "learning_rate": 2.8534693877551023e-05, "loss": 0.6389, "step": 10526 }, { "epoch": 42.96734693877551, "grad_norm": 32.17784118652344, "learning_rate": 2.853265306122449e-05, "loss": 1.7602, "step": 10527 }, { "epoch": 42.97142857142857, "grad_norm": 15.966217994689941, "learning_rate": 2.8530612244897957e-05, "loss": 2.9251, "step": 10528 }, { "epoch": 42.97551020408163, "grad_norm": 41.86806106567383, "learning_rate": 2.852857142857143e-05, "loss": 1.4086, "step": 10529 }, { "epoch": 42.97959183673469, "grad_norm": 39.006290435791016, "learning_rate": 2.8526530612244902e-05, "loss": 1.6876, "step": 10530 }, { "epoch": 42.98367346938775, "grad_norm": 16.323291778564453, "learning_rate": 2.852448979591837e-05, "loss": 2.7884, "step": 10531 }, { "epoch": 42.987755102040815, "grad_norm": 19.334463119506836, "learning_rate": 2.8522448979591836e-05, "loss": 2.4707, "step": 10532 }, { "epoch": 42.99183673469388, "grad_norm": 35.624847412109375, "learning_rate": 2.8520408163265307e-05, "loss": 1.4666, "step": 10533 }, { "epoch": 42.99591836734694, "grad_norm": 15.114763259887695, "learning_rate": 2.851836734693878e-05, "loss": 2.5442, "step": 10534 }, { "epoch": 43.0, "grad_norm": 30.976625442504883, "learning_rate": 2.8516326530612248e-05, "loss": 1.3217, "step": 10535 }, { "epoch": 43.00408163265306, "grad_norm": 27.42008399963379, "learning_rate": 2.8514285714285715e-05, "loss": 2.1, "step": 10536 }, { "epoch": 43.00816326530612, "grad_norm": 20.048477172851562, "learning_rate": 2.8512244897959185e-05, "loss": 2.3343, "step": 10537 }, { "epoch": 43.012244897959185, "grad_norm": 31.309982299804688, "learning_rate": 2.8510204081632652e-05, "loss": 1.8255, "step": 10538 }, { "epoch": 43.01632653061225, "grad_norm": 18.02945327758789, "learning_rate": 2.8508163265306126e-05, "loss": 2.2565, "step": 10539 }, { "epoch": 43.02040816326531, "grad_norm": 28.34691047668457, "learning_rate": 2.8506122448979593e-05, "loss": 2.1163, "step": 10540 }, { "epoch": 43.02448979591837, "grad_norm": 24.08938217163086, "learning_rate": 2.8504081632653064e-05, "loss": 2.2479, "step": 10541 }, { "epoch": 43.02857142857143, "grad_norm": 32.192474365234375, "learning_rate": 2.850204081632653e-05, "loss": 1.7554, "step": 10542 }, { "epoch": 43.03265306122449, "grad_norm": 22.304121017456055, "learning_rate": 2.8499999999999998e-05, "loss": 2.3598, "step": 10543 }, { "epoch": 43.03673469387755, "grad_norm": 29.533601760864258, "learning_rate": 2.8497959183673472e-05, "loss": 1.3797, "step": 10544 }, { "epoch": 43.04081632653061, "grad_norm": 71.59955596923828, "learning_rate": 2.8495918367346943e-05, "loss": 1.4851, "step": 10545 }, { "epoch": 43.04489795918367, "grad_norm": 66.72362518310547, "learning_rate": 2.849387755102041e-05, "loss": 0.5906, "step": 10546 }, { "epoch": 43.04897959183673, "grad_norm": 17.017749786376953, "learning_rate": 2.8491836734693877e-05, "loss": 2.6211, "step": 10547 }, { "epoch": 43.053061224489795, "grad_norm": 32.99668502807617, "learning_rate": 2.8489795918367347e-05, "loss": 2.1248, "step": 10548 }, { "epoch": 43.05714285714286, "grad_norm": 20.701913833618164, "learning_rate": 2.8487755102040818e-05, "loss": 1.9008, "step": 10549 }, { "epoch": 43.06122448979592, "grad_norm": 27.11467933654785, "learning_rate": 2.848571428571429e-05, "loss": 1.5763, "step": 10550 }, { "epoch": 43.06530612244898, "grad_norm": 26.52032470703125, "learning_rate": 2.8483673469387756e-05, "loss": 2.3609, "step": 10551 }, { "epoch": 43.06938775510204, "grad_norm": 48.823814392089844, "learning_rate": 2.8481632653061223e-05, "loss": 0.6835, "step": 10552 }, { "epoch": 43.073469387755104, "grad_norm": 32.53337478637695, "learning_rate": 2.8479591836734693e-05, "loss": 1.2489, "step": 10553 }, { "epoch": 43.077551020408166, "grad_norm": 30.561080932617188, "learning_rate": 2.8477551020408167e-05, "loss": 1.9311, "step": 10554 }, { "epoch": 43.08163265306123, "grad_norm": 32.32073974609375, "learning_rate": 2.8475510204081634e-05, "loss": 1.6222, "step": 10555 }, { "epoch": 43.08571428571429, "grad_norm": 36.269737243652344, "learning_rate": 2.84734693877551e-05, "loss": 1.3185, "step": 10556 }, { "epoch": 43.089795918367344, "grad_norm": 31.565799713134766, "learning_rate": 2.8471428571428572e-05, "loss": 1.1556, "step": 10557 }, { "epoch": 43.093877551020405, "grad_norm": 22.621679306030273, "learning_rate": 2.8469387755102046e-05, "loss": 2.1653, "step": 10558 }, { "epoch": 43.09795918367347, "grad_norm": 31.640600204467773, "learning_rate": 2.8467346938775513e-05, "loss": 1.6386, "step": 10559 }, { "epoch": 43.10204081632653, "grad_norm": 31.851158142089844, "learning_rate": 2.846530612244898e-05, "loss": 2.1443, "step": 10560 }, { "epoch": 43.10612244897959, "grad_norm": 33.50413513183594, "learning_rate": 2.846326530612245e-05, "loss": 1.5604, "step": 10561 }, { "epoch": 43.11020408163265, "grad_norm": 15.74501895904541, "learning_rate": 2.8461224489795918e-05, "loss": 2.4855, "step": 10562 }, { "epoch": 43.114285714285714, "grad_norm": 34.892974853515625, "learning_rate": 2.845918367346939e-05, "loss": 0.7244, "step": 10563 }, { "epoch": 43.118367346938776, "grad_norm": 24.518569946289062, "learning_rate": 2.845714285714286e-05, "loss": 2.1438, "step": 10564 }, { "epoch": 43.12244897959184, "grad_norm": 42.31025695800781, "learning_rate": 2.845510204081633e-05, "loss": 1.357, "step": 10565 }, { "epoch": 43.1265306122449, "grad_norm": 21.424325942993164, "learning_rate": 2.8453061224489796e-05, "loss": 2.2168, "step": 10566 }, { "epoch": 43.13061224489796, "grad_norm": 15.836846351623535, "learning_rate": 2.8451020408163263e-05, "loss": 2.4898, "step": 10567 }, { "epoch": 43.13469387755102, "grad_norm": 28.048595428466797, "learning_rate": 2.8448979591836737e-05, "loss": 0.5234, "step": 10568 }, { "epoch": 43.138775510204084, "grad_norm": 17.440811157226562, "learning_rate": 2.8446938775510208e-05, "loss": 2.6075, "step": 10569 }, { "epoch": 43.142857142857146, "grad_norm": 28.136999130249023, "learning_rate": 2.8444897959183675e-05, "loss": 2.4407, "step": 10570 }, { "epoch": 43.14693877551021, "grad_norm": 30.63652992248535, "learning_rate": 2.8442857142857142e-05, "loss": 1.5493, "step": 10571 }, { "epoch": 43.15102040816326, "grad_norm": 17.207719802856445, "learning_rate": 2.8440816326530613e-05, "loss": 2.4591, "step": 10572 }, { "epoch": 43.155102040816324, "grad_norm": 12.690600395202637, "learning_rate": 2.8438775510204086e-05, "loss": 2.796, "step": 10573 }, { "epoch": 43.159183673469386, "grad_norm": 27.06150245666504, "learning_rate": 2.8436734693877554e-05, "loss": 1.9391, "step": 10574 }, { "epoch": 43.16326530612245, "grad_norm": 19.57394790649414, "learning_rate": 2.843469387755102e-05, "loss": 2.418, "step": 10575 }, { "epoch": 43.16734693877551, "grad_norm": 20.270620346069336, "learning_rate": 2.843265306122449e-05, "loss": 1.8384, "step": 10576 }, { "epoch": 43.17142857142857, "grad_norm": 22.189973831176758, "learning_rate": 2.843061224489796e-05, "loss": 2.3934, "step": 10577 }, { "epoch": 43.17551020408163, "grad_norm": 36.73844528198242, "learning_rate": 2.8428571428571432e-05, "loss": 1.4183, "step": 10578 }, { "epoch": 43.179591836734694, "grad_norm": 25.519079208374023, "learning_rate": 2.84265306122449e-05, "loss": 1.7493, "step": 10579 }, { "epoch": 43.183673469387756, "grad_norm": 18.059444427490234, "learning_rate": 2.842448979591837e-05, "loss": 2.4561, "step": 10580 }, { "epoch": 43.18775510204082, "grad_norm": 13.533766746520996, "learning_rate": 2.8422448979591837e-05, "loss": 3.0291, "step": 10581 }, { "epoch": 43.19183673469388, "grad_norm": 26.296958923339844, "learning_rate": 2.8420408163265304e-05, "loss": 2.2217, "step": 10582 }, { "epoch": 43.19591836734694, "grad_norm": 21.14116668701172, "learning_rate": 2.8418367346938778e-05, "loss": 2.3869, "step": 10583 }, { "epoch": 43.2, "grad_norm": 23.008949279785156, "learning_rate": 2.841632653061225e-05, "loss": 2.2872, "step": 10584 }, { "epoch": 43.204081632653065, "grad_norm": 16.873577117919922, "learning_rate": 2.8414285714285716e-05, "loss": 2.6017, "step": 10585 }, { "epoch": 43.20816326530612, "grad_norm": 19.365558624267578, "learning_rate": 2.8412244897959183e-05, "loss": 2.3124, "step": 10586 }, { "epoch": 43.21224489795918, "grad_norm": 15.021235466003418, "learning_rate": 2.8410204081632653e-05, "loss": 2.6254, "step": 10587 }, { "epoch": 43.21632653061224, "grad_norm": 29.266437530517578, "learning_rate": 2.8408163265306127e-05, "loss": 2.0075, "step": 10588 }, { "epoch": 43.220408163265304, "grad_norm": 26.342260360717773, "learning_rate": 2.8406122448979594e-05, "loss": 1.5933, "step": 10589 }, { "epoch": 43.224489795918366, "grad_norm": 37.93718338012695, "learning_rate": 2.840408163265306e-05, "loss": 0.6964, "step": 10590 }, { "epoch": 43.22857142857143, "grad_norm": 33.601078033447266, "learning_rate": 2.8402040816326532e-05, "loss": 0.3472, "step": 10591 }, { "epoch": 43.23265306122449, "grad_norm": 24.61002540588379, "learning_rate": 2.84e-05, "loss": 2.4988, "step": 10592 }, { "epoch": 43.23673469387755, "grad_norm": 26.843477249145508, "learning_rate": 2.8397959183673473e-05, "loss": 1.9076, "step": 10593 }, { "epoch": 43.24081632653061, "grad_norm": 19.551517486572266, "learning_rate": 2.839591836734694e-05, "loss": 2.5734, "step": 10594 }, { "epoch": 43.244897959183675, "grad_norm": 37.812435150146484, "learning_rate": 2.839387755102041e-05, "loss": 1.2107, "step": 10595 }, { "epoch": 43.248979591836736, "grad_norm": 16.151351928710938, "learning_rate": 2.8391836734693878e-05, "loss": 2.5615, "step": 10596 }, { "epoch": 43.2530612244898, "grad_norm": 35.634212493896484, "learning_rate": 2.8389795918367345e-05, "loss": 1.7605, "step": 10597 }, { "epoch": 43.25714285714286, "grad_norm": 28.79726219177246, "learning_rate": 2.838775510204082e-05, "loss": 1.9077, "step": 10598 }, { "epoch": 43.26122448979592, "grad_norm": 22.517969131469727, "learning_rate": 2.838571428571429e-05, "loss": 1.6592, "step": 10599 }, { "epoch": 43.265306122448976, "grad_norm": 17.64750862121582, "learning_rate": 2.8383673469387756e-05, "loss": 2.2132, "step": 10600 }, { "epoch": 43.26938775510204, "grad_norm": 14.594613075256348, "learning_rate": 2.8381632653061224e-05, "loss": 2.53, "step": 10601 }, { "epoch": 43.2734693877551, "grad_norm": 41.04907989501953, "learning_rate": 2.8379591836734697e-05, "loss": 1.503, "step": 10602 }, { "epoch": 43.27755102040816, "grad_norm": 27.125728607177734, "learning_rate": 2.8377551020408165e-05, "loss": 1.9388, "step": 10603 }, { "epoch": 43.28163265306122, "grad_norm": 17.408323287963867, "learning_rate": 2.8375510204081635e-05, "loss": 2.6278, "step": 10604 }, { "epoch": 43.285714285714285, "grad_norm": 21.36342430114746, "learning_rate": 2.8373469387755102e-05, "loss": 2.2051, "step": 10605 }, { "epoch": 43.289795918367346, "grad_norm": 27.17134666442871, "learning_rate": 2.837142857142857e-05, "loss": 1.6543, "step": 10606 }, { "epoch": 43.29387755102041, "grad_norm": 11.815153121948242, "learning_rate": 2.8369387755102043e-05, "loss": 3.3532, "step": 10607 }, { "epoch": 43.29795918367347, "grad_norm": 38.03893280029297, "learning_rate": 2.8367346938775514e-05, "loss": 1.5151, "step": 10608 }, { "epoch": 43.30204081632653, "grad_norm": 40.22394943237305, "learning_rate": 2.836530612244898e-05, "loss": 2.0663, "step": 10609 }, { "epoch": 43.30612244897959, "grad_norm": 14.891701698303223, "learning_rate": 2.8363265306122448e-05, "loss": 3.0759, "step": 10610 }, { "epoch": 43.310204081632655, "grad_norm": 40.04580307006836, "learning_rate": 2.836122448979592e-05, "loss": 1.1705, "step": 10611 }, { "epoch": 43.31428571428572, "grad_norm": 23.176687240600586, "learning_rate": 2.8359183673469392e-05, "loss": 1.9268, "step": 10612 }, { "epoch": 43.31836734693878, "grad_norm": 14.322577476501465, "learning_rate": 2.835714285714286e-05, "loss": 3.0062, "step": 10613 }, { "epoch": 43.32244897959184, "grad_norm": 28.3021297454834, "learning_rate": 2.8355102040816327e-05, "loss": 2.1226, "step": 10614 }, { "epoch": 43.326530612244895, "grad_norm": 21.65425682067871, "learning_rate": 2.8353061224489797e-05, "loss": 1.9548, "step": 10615 }, { "epoch": 43.33061224489796, "grad_norm": 15.039839744567871, "learning_rate": 2.8351020408163264e-05, "loss": 2.6479, "step": 10616 }, { "epoch": 43.33469387755102, "grad_norm": 36.206478118896484, "learning_rate": 2.8348979591836738e-05, "loss": 1.1469, "step": 10617 }, { "epoch": 43.33877551020408, "grad_norm": 31.500396728515625, "learning_rate": 2.8346938775510205e-05, "loss": 1.8745, "step": 10618 }, { "epoch": 43.34285714285714, "grad_norm": 17.98172950744629, "learning_rate": 2.8344897959183676e-05, "loss": 2.4083, "step": 10619 }, { "epoch": 43.3469387755102, "grad_norm": 23.32461166381836, "learning_rate": 2.8342857142857143e-05, "loss": 2.0488, "step": 10620 }, { "epoch": 43.351020408163265, "grad_norm": 19.457258224487305, "learning_rate": 2.834081632653061e-05, "loss": 2.3542, "step": 10621 }, { "epoch": 43.35510204081633, "grad_norm": 40.71842956542969, "learning_rate": 2.8338775510204084e-05, "loss": 1.3538, "step": 10622 }, { "epoch": 43.35918367346939, "grad_norm": 42.93119430541992, "learning_rate": 2.8336734693877555e-05, "loss": 1.5097, "step": 10623 }, { "epoch": 43.36326530612245, "grad_norm": 27.730117797851562, "learning_rate": 2.833469387755102e-05, "loss": 2.2827, "step": 10624 }, { "epoch": 43.36734693877551, "grad_norm": 42.49693298339844, "learning_rate": 2.833265306122449e-05, "loss": 1.1707, "step": 10625 }, { "epoch": 43.371428571428574, "grad_norm": 37.766510009765625, "learning_rate": 2.833061224489796e-05, "loss": 2.2517, "step": 10626 }, { "epoch": 43.375510204081635, "grad_norm": 15.003886222839355, "learning_rate": 2.8328571428571433e-05, "loss": 2.9709, "step": 10627 }, { "epoch": 43.3795918367347, "grad_norm": 28.788236618041992, "learning_rate": 2.83265306122449e-05, "loss": 2.1896, "step": 10628 }, { "epoch": 43.38367346938775, "grad_norm": 20.851398468017578, "learning_rate": 2.8324489795918367e-05, "loss": 2.3477, "step": 10629 }, { "epoch": 43.38775510204081, "grad_norm": 38.51064682006836, "learning_rate": 2.8322448979591838e-05, "loss": 1.396, "step": 10630 }, { "epoch": 43.391836734693875, "grad_norm": 17.878053665161133, "learning_rate": 2.8320408163265305e-05, "loss": 2.3224, "step": 10631 }, { "epoch": 43.39591836734694, "grad_norm": 15.575406074523926, "learning_rate": 2.831836734693878e-05, "loss": 2.4941, "step": 10632 }, { "epoch": 43.4, "grad_norm": 27.537546157836914, "learning_rate": 2.8316326530612246e-05, "loss": 0.6036, "step": 10633 }, { "epoch": 43.40408163265306, "grad_norm": 22.444324493408203, "learning_rate": 2.8314285714285717e-05, "loss": 1.7113, "step": 10634 }, { "epoch": 43.40816326530612, "grad_norm": 15.978271484375, "learning_rate": 2.8312244897959184e-05, "loss": 2.236, "step": 10635 }, { "epoch": 43.412244897959184, "grad_norm": 38.781978607177734, "learning_rate": 2.831020408163265e-05, "loss": 0.5657, "step": 10636 }, { "epoch": 43.416326530612245, "grad_norm": 14.47799301147461, "learning_rate": 2.8308163265306125e-05, "loss": 2.905, "step": 10637 }, { "epoch": 43.42040816326531, "grad_norm": 26.579069137573242, "learning_rate": 2.8306122448979595e-05, "loss": 1.3257, "step": 10638 }, { "epoch": 43.42448979591837, "grad_norm": 13.914140701293945, "learning_rate": 2.8304081632653062e-05, "loss": 2.5196, "step": 10639 }, { "epoch": 43.42857142857143, "grad_norm": 39.45137405395508, "learning_rate": 2.830204081632653e-05, "loss": 1.5362, "step": 10640 }, { "epoch": 43.43265306122449, "grad_norm": 20.741443634033203, "learning_rate": 2.83e-05, "loss": 1.9144, "step": 10641 }, { "epoch": 43.436734693877554, "grad_norm": 29.097444534301758, "learning_rate": 2.8297959183673474e-05, "loss": 1.9194, "step": 10642 }, { "epoch": 43.440816326530616, "grad_norm": 29.744606018066406, "learning_rate": 2.829591836734694e-05, "loss": 2.1556, "step": 10643 }, { "epoch": 43.44489795918367, "grad_norm": 26.442001342773438, "learning_rate": 2.8293877551020408e-05, "loss": 2.0636, "step": 10644 }, { "epoch": 43.44897959183673, "grad_norm": 19.028230667114258, "learning_rate": 2.829183673469388e-05, "loss": 2.3788, "step": 10645 }, { "epoch": 43.453061224489794, "grad_norm": 18.551244735717773, "learning_rate": 2.8289795918367346e-05, "loss": 2.6306, "step": 10646 }, { "epoch": 43.457142857142856, "grad_norm": 48.49081802368164, "learning_rate": 2.828775510204082e-05, "loss": 1.1429, "step": 10647 }, { "epoch": 43.46122448979592, "grad_norm": 45.55302429199219, "learning_rate": 2.8285714285714287e-05, "loss": 1.2583, "step": 10648 }, { "epoch": 43.46530612244898, "grad_norm": 36.030723571777344, "learning_rate": 2.8283673469387757e-05, "loss": 1.4619, "step": 10649 }, { "epoch": 43.46938775510204, "grad_norm": 19.9819278717041, "learning_rate": 2.8281632653061225e-05, "loss": 2.5869, "step": 10650 }, { "epoch": 43.4734693877551, "grad_norm": 12.735492706298828, "learning_rate": 2.82795918367347e-05, "loss": 3.0415, "step": 10651 }, { "epoch": 43.477551020408164, "grad_norm": 43.580787658691406, "learning_rate": 2.8277551020408166e-05, "loss": 0.4687, "step": 10652 }, { "epoch": 43.481632653061226, "grad_norm": 22.82835578918457, "learning_rate": 2.8275510204081636e-05, "loss": 2.407, "step": 10653 }, { "epoch": 43.48571428571429, "grad_norm": 24.31235694885254, "learning_rate": 2.8273469387755103e-05, "loss": 2.4369, "step": 10654 }, { "epoch": 43.48979591836735, "grad_norm": 15.368983268737793, "learning_rate": 2.827142857142857e-05, "loss": 2.956, "step": 10655 }, { "epoch": 43.49387755102041, "grad_norm": 16.591360092163086, "learning_rate": 2.8269387755102044e-05, "loss": 2.4898, "step": 10656 }, { "epoch": 43.49795918367347, "grad_norm": 26.342267990112305, "learning_rate": 2.826734693877551e-05, "loss": 1.8283, "step": 10657 }, { "epoch": 43.50204081632653, "grad_norm": 28.653289794921875, "learning_rate": 2.8265306122448982e-05, "loss": 1.4369, "step": 10658 }, { "epoch": 43.50612244897959, "grad_norm": 20.827442169189453, "learning_rate": 2.826326530612245e-05, "loss": 2.7445, "step": 10659 }, { "epoch": 43.51020408163265, "grad_norm": 27.789012908935547, "learning_rate": 2.8261224489795916e-05, "loss": 2.1789, "step": 10660 }, { "epoch": 43.51428571428571, "grad_norm": 27.81277847290039, "learning_rate": 2.825918367346939e-05, "loss": 0.5204, "step": 10661 }, { "epoch": 43.518367346938774, "grad_norm": 21.24427032470703, "learning_rate": 2.825714285714286e-05, "loss": 1.7752, "step": 10662 }, { "epoch": 43.522448979591836, "grad_norm": 37.41818618774414, "learning_rate": 2.8255102040816328e-05, "loss": 1.1174, "step": 10663 }, { "epoch": 43.5265306122449, "grad_norm": 24.968017578125, "learning_rate": 2.8253061224489795e-05, "loss": 1.947, "step": 10664 }, { "epoch": 43.53061224489796, "grad_norm": 37.83137512207031, "learning_rate": 2.8251020408163265e-05, "loss": 1.3612, "step": 10665 }, { "epoch": 43.53469387755102, "grad_norm": 17.2081298828125, "learning_rate": 2.824897959183674e-05, "loss": 2.6007, "step": 10666 }, { "epoch": 43.53877551020408, "grad_norm": 17.343339920043945, "learning_rate": 2.8246938775510206e-05, "loss": 2.279, "step": 10667 }, { "epoch": 43.542857142857144, "grad_norm": 24.714406967163086, "learning_rate": 2.8244897959183673e-05, "loss": 1.5815, "step": 10668 }, { "epoch": 43.546938775510206, "grad_norm": 19.215478897094727, "learning_rate": 2.8242857142857144e-05, "loss": 2.3893, "step": 10669 }, { "epoch": 43.55102040816327, "grad_norm": 19.5826416015625, "learning_rate": 2.824081632653061e-05, "loss": 2.4978, "step": 10670 }, { "epoch": 43.55510204081633, "grad_norm": 43.880863189697266, "learning_rate": 2.8238775510204085e-05, "loss": 1.4535, "step": 10671 }, { "epoch": 43.55918367346939, "grad_norm": 11.445747375488281, "learning_rate": 2.8236734693877552e-05, "loss": 3.097, "step": 10672 }, { "epoch": 43.563265306122446, "grad_norm": 26.756500244140625, "learning_rate": 2.8234693877551023e-05, "loss": 2.6275, "step": 10673 }, { "epoch": 43.56734693877551, "grad_norm": 32.7728271484375, "learning_rate": 2.823265306122449e-05, "loss": 1.7441, "step": 10674 }, { "epoch": 43.57142857142857, "grad_norm": 18.80577850341797, "learning_rate": 2.8230612244897957e-05, "loss": 2.5242, "step": 10675 }, { "epoch": 43.57551020408163, "grad_norm": 14.964003562927246, "learning_rate": 2.822857142857143e-05, "loss": 0.1342, "step": 10676 }, { "epoch": 43.57959183673469, "grad_norm": 16.536216735839844, "learning_rate": 2.82265306122449e-05, "loss": 2.7106, "step": 10677 }, { "epoch": 43.583673469387755, "grad_norm": 29.615642547607422, "learning_rate": 2.822448979591837e-05, "loss": 1.78, "step": 10678 }, { "epoch": 43.587755102040816, "grad_norm": 30.219423294067383, "learning_rate": 2.8222448979591836e-05, "loss": 1.3109, "step": 10679 }, { "epoch": 43.59183673469388, "grad_norm": 35.905765533447266, "learning_rate": 2.8220408163265306e-05, "loss": 1.311, "step": 10680 }, { "epoch": 43.59591836734694, "grad_norm": 24.655805587768555, "learning_rate": 2.821836734693878e-05, "loss": 1.3466, "step": 10681 }, { "epoch": 43.6, "grad_norm": 16.62350845336914, "learning_rate": 2.8216326530612247e-05, "loss": 2.5585, "step": 10682 }, { "epoch": 43.60408163265306, "grad_norm": 19.538665771484375, "learning_rate": 2.8214285714285714e-05, "loss": 2.8753, "step": 10683 }, { "epoch": 43.608163265306125, "grad_norm": 45.263614654541016, "learning_rate": 2.8212244897959185e-05, "loss": 1.2828, "step": 10684 }, { "epoch": 43.61224489795919, "grad_norm": 34.64401626586914, "learning_rate": 2.8210204081632652e-05, "loss": 1.7072, "step": 10685 }, { "epoch": 43.61632653061225, "grad_norm": 30.876832962036133, "learning_rate": 2.8208163265306126e-05, "loss": 1.4789, "step": 10686 }, { "epoch": 43.6204081632653, "grad_norm": 39.77755355834961, "learning_rate": 2.8206122448979593e-05, "loss": 1.5412, "step": 10687 }, { "epoch": 43.624489795918365, "grad_norm": 24.659591674804688, "learning_rate": 2.8204081632653063e-05, "loss": 2.3876, "step": 10688 }, { "epoch": 43.628571428571426, "grad_norm": 13.88927173614502, "learning_rate": 2.820204081632653e-05, "loss": 2.3867, "step": 10689 }, { "epoch": 43.63265306122449, "grad_norm": 44.01846694946289, "learning_rate": 2.8199999999999998e-05, "loss": 1.1774, "step": 10690 }, { "epoch": 43.63673469387755, "grad_norm": 26.07117462158203, "learning_rate": 2.819795918367347e-05, "loss": 1.2688, "step": 10691 }, { "epoch": 43.64081632653061, "grad_norm": 32.64539337158203, "learning_rate": 2.8195918367346942e-05, "loss": 1.787, "step": 10692 }, { "epoch": 43.64489795918367, "grad_norm": 23.404813766479492, "learning_rate": 2.819387755102041e-05, "loss": 2.1967, "step": 10693 }, { "epoch": 43.648979591836735, "grad_norm": 20.785720825195312, "learning_rate": 2.8191836734693876e-05, "loss": 2.359, "step": 10694 }, { "epoch": 43.6530612244898, "grad_norm": 39.17352294921875, "learning_rate": 2.818979591836735e-05, "loss": 1.2584, "step": 10695 }, { "epoch": 43.65714285714286, "grad_norm": 25.970046997070312, "learning_rate": 2.818775510204082e-05, "loss": 2.2001, "step": 10696 }, { "epoch": 43.66122448979592, "grad_norm": 12.016322135925293, "learning_rate": 2.8185714285714288e-05, "loss": 3.2786, "step": 10697 }, { "epoch": 43.66530612244898, "grad_norm": 21.053693771362305, "learning_rate": 2.8183673469387755e-05, "loss": 2.4637, "step": 10698 }, { "epoch": 43.66938775510204, "grad_norm": 18.806901931762695, "learning_rate": 2.8181632653061226e-05, "loss": 2.7823, "step": 10699 }, { "epoch": 43.673469387755105, "grad_norm": 27.138559341430664, "learning_rate": 2.81795918367347e-05, "loss": 2.0315, "step": 10700 }, { "epoch": 43.67755102040816, "grad_norm": 30.82223129272461, "learning_rate": 2.8177551020408167e-05, "loss": 1.3353, "step": 10701 }, { "epoch": 43.68163265306122, "grad_norm": 19.59502601623535, "learning_rate": 2.8175510204081634e-05, "loss": 2.606, "step": 10702 }, { "epoch": 43.68571428571428, "grad_norm": 11.687819480895996, "learning_rate": 2.8173469387755104e-05, "loss": 3.0663, "step": 10703 }, { "epoch": 43.689795918367345, "grad_norm": 29.713287353515625, "learning_rate": 2.817142857142857e-05, "loss": 1.8261, "step": 10704 }, { "epoch": 43.69387755102041, "grad_norm": 21.874353408813477, "learning_rate": 2.8169387755102045e-05, "loss": 2.461, "step": 10705 }, { "epoch": 43.69795918367347, "grad_norm": 26.453353881835938, "learning_rate": 2.8167346938775512e-05, "loss": 2.3251, "step": 10706 }, { "epoch": 43.70204081632653, "grad_norm": 35.77001953125, "learning_rate": 2.816530612244898e-05, "loss": 1.3702, "step": 10707 }, { "epoch": 43.70612244897959, "grad_norm": 29.806697845458984, "learning_rate": 2.816326530612245e-05, "loss": 1.0552, "step": 10708 }, { "epoch": 43.710204081632654, "grad_norm": 25.368192672729492, "learning_rate": 2.8161224489795917e-05, "loss": 1.7665, "step": 10709 }, { "epoch": 43.714285714285715, "grad_norm": 24.125137329101562, "learning_rate": 2.815918367346939e-05, "loss": 1.9883, "step": 10710 }, { "epoch": 43.71836734693878, "grad_norm": 23.31096649169922, "learning_rate": 2.8157142857142858e-05, "loss": 2.6716, "step": 10711 }, { "epoch": 43.72244897959184, "grad_norm": 20.278596878051758, "learning_rate": 2.815510204081633e-05, "loss": 2.5122, "step": 10712 }, { "epoch": 43.7265306122449, "grad_norm": 27.873271942138672, "learning_rate": 2.8153061224489796e-05, "loss": 1.774, "step": 10713 }, { "epoch": 43.73061224489796, "grad_norm": 24.39888572692871, "learning_rate": 2.8151020408163263e-05, "loss": 2.0644, "step": 10714 }, { "epoch": 43.734693877551024, "grad_norm": 10.070240020751953, "learning_rate": 2.8148979591836737e-05, "loss": 3.4668, "step": 10715 }, { "epoch": 43.73877551020408, "grad_norm": 12.934663772583008, "learning_rate": 2.8146938775510207e-05, "loss": 2.9141, "step": 10716 }, { "epoch": 43.74285714285714, "grad_norm": 28.09228515625, "learning_rate": 2.8144897959183674e-05, "loss": 1.7605, "step": 10717 }, { "epoch": 43.7469387755102, "grad_norm": 38.45356369018555, "learning_rate": 2.814285714285714e-05, "loss": 1.9932, "step": 10718 }, { "epoch": 43.751020408163264, "grad_norm": 23.079259872436523, "learning_rate": 2.8140816326530612e-05, "loss": 2.1706, "step": 10719 }, { "epoch": 43.755102040816325, "grad_norm": 34.17673110961914, "learning_rate": 2.8138775510204086e-05, "loss": 1.7975, "step": 10720 }, { "epoch": 43.75918367346939, "grad_norm": 36.66066360473633, "learning_rate": 2.8136734693877553e-05, "loss": 0.5713, "step": 10721 }, { "epoch": 43.76326530612245, "grad_norm": 19.01149559020996, "learning_rate": 2.813469387755102e-05, "loss": 2.5368, "step": 10722 }, { "epoch": 43.76734693877551, "grad_norm": 13.797789573669434, "learning_rate": 2.813265306122449e-05, "loss": 2.9809, "step": 10723 }, { "epoch": 43.77142857142857, "grad_norm": 30.637218475341797, "learning_rate": 2.8130612244897958e-05, "loss": 1.8874, "step": 10724 }, { "epoch": 43.775510204081634, "grad_norm": 37.888126373291016, "learning_rate": 2.8128571428571432e-05, "loss": 0.8424, "step": 10725 }, { "epoch": 43.779591836734696, "grad_norm": 27.54792594909668, "learning_rate": 2.81265306122449e-05, "loss": 2.1314, "step": 10726 }, { "epoch": 43.78367346938776, "grad_norm": 19.680233001708984, "learning_rate": 2.812448979591837e-05, "loss": 2.6896, "step": 10727 }, { "epoch": 43.78775510204082, "grad_norm": 34.70523452758789, "learning_rate": 2.8122448979591837e-05, "loss": 0.8655, "step": 10728 }, { "epoch": 43.79183673469388, "grad_norm": 26.5794677734375, "learning_rate": 2.8120408163265304e-05, "loss": 1.9078, "step": 10729 }, { "epoch": 43.795918367346935, "grad_norm": 33.10405349731445, "learning_rate": 2.8118367346938778e-05, "loss": 0.7283, "step": 10730 }, { "epoch": 43.8, "grad_norm": 30.761423110961914, "learning_rate": 2.8116326530612248e-05, "loss": 1.6132, "step": 10731 }, { "epoch": 43.80408163265306, "grad_norm": 26.16842269897461, "learning_rate": 2.8114285714285715e-05, "loss": 1.9267, "step": 10732 }, { "epoch": 43.80816326530612, "grad_norm": 45.013309478759766, "learning_rate": 2.8112244897959182e-05, "loss": 1.1837, "step": 10733 }, { "epoch": 43.81224489795918, "grad_norm": 25.950767517089844, "learning_rate": 2.8110204081632653e-05, "loss": 1.9409, "step": 10734 }, { "epoch": 43.816326530612244, "grad_norm": 46.30533981323242, "learning_rate": 2.8108163265306127e-05, "loss": 0.8632, "step": 10735 }, { "epoch": 43.820408163265306, "grad_norm": 22.260276794433594, "learning_rate": 2.8106122448979594e-05, "loss": 2.2173, "step": 10736 }, { "epoch": 43.82448979591837, "grad_norm": 15.611761093139648, "learning_rate": 2.810408163265306e-05, "loss": 2.8881, "step": 10737 }, { "epoch": 43.82857142857143, "grad_norm": 16.232702255249023, "learning_rate": 2.810204081632653e-05, "loss": 2.618, "step": 10738 }, { "epoch": 43.83265306122449, "grad_norm": 22.250505447387695, "learning_rate": 2.8100000000000005e-05, "loss": 2.5163, "step": 10739 }, { "epoch": 43.83673469387755, "grad_norm": 22.297183990478516, "learning_rate": 2.8097959183673473e-05, "loss": 2.2012, "step": 10740 }, { "epoch": 43.840816326530614, "grad_norm": 16.081424713134766, "learning_rate": 2.809591836734694e-05, "loss": 2.8935, "step": 10741 }, { "epoch": 43.844897959183676, "grad_norm": 30.337871551513672, "learning_rate": 2.809387755102041e-05, "loss": 1.9036, "step": 10742 }, { "epoch": 43.84897959183674, "grad_norm": 18.359846115112305, "learning_rate": 2.8091836734693877e-05, "loss": 2.3716, "step": 10743 }, { "epoch": 43.85306122448979, "grad_norm": 25.833202362060547, "learning_rate": 2.808979591836735e-05, "loss": 2.1194, "step": 10744 }, { "epoch": 43.857142857142854, "grad_norm": 18.411733627319336, "learning_rate": 2.808775510204082e-05, "loss": 2.6428, "step": 10745 }, { "epoch": 43.861224489795916, "grad_norm": 35.48247528076172, "learning_rate": 2.808571428571429e-05, "loss": 0.5815, "step": 10746 }, { "epoch": 43.86530612244898, "grad_norm": 43.709617614746094, "learning_rate": 2.8083673469387756e-05, "loss": 1.3766, "step": 10747 }, { "epoch": 43.86938775510204, "grad_norm": 13.617627143859863, "learning_rate": 2.8081632653061223e-05, "loss": 2.7003, "step": 10748 }, { "epoch": 43.8734693877551, "grad_norm": 20.085922241210938, "learning_rate": 2.8079591836734697e-05, "loss": 2.28, "step": 10749 }, { "epoch": 43.87755102040816, "grad_norm": 43.33565139770508, "learning_rate": 2.8077551020408167e-05, "loss": 0.8095, "step": 10750 }, { "epoch": 43.881632653061224, "grad_norm": 18.850515365600586, "learning_rate": 2.8075510204081635e-05, "loss": 2.5481, "step": 10751 }, { "epoch": 43.885714285714286, "grad_norm": 34.976783752441406, "learning_rate": 2.8073469387755102e-05, "loss": 1.2084, "step": 10752 }, { "epoch": 43.88979591836735, "grad_norm": 21.56700897216797, "learning_rate": 2.8071428571428572e-05, "loss": 2.1004, "step": 10753 }, { "epoch": 43.89387755102041, "grad_norm": 25.80221939086914, "learning_rate": 2.8069387755102046e-05, "loss": 2.0705, "step": 10754 }, { "epoch": 43.89795918367347, "grad_norm": 31.910293579101562, "learning_rate": 2.8067346938775513e-05, "loss": 2.1651, "step": 10755 }, { "epoch": 43.90204081632653, "grad_norm": 25.94673728942871, "learning_rate": 2.806530612244898e-05, "loss": 2.4696, "step": 10756 }, { "epoch": 43.906122448979595, "grad_norm": 18.662887573242188, "learning_rate": 2.806326530612245e-05, "loss": 2.5766, "step": 10757 }, { "epoch": 43.910204081632656, "grad_norm": 14.007421493530273, "learning_rate": 2.8061224489795918e-05, "loss": 2.9926, "step": 10758 }, { "epoch": 43.91428571428571, "grad_norm": 15.310711860656738, "learning_rate": 2.8059183673469392e-05, "loss": 2.8767, "step": 10759 }, { "epoch": 43.91836734693877, "grad_norm": 36.39554214477539, "learning_rate": 2.805714285714286e-05, "loss": 1.4005, "step": 10760 }, { "epoch": 43.922448979591834, "grad_norm": 23.45915985107422, "learning_rate": 2.8055102040816326e-05, "loss": 1.6691, "step": 10761 }, { "epoch": 43.926530612244896, "grad_norm": 26.96693229675293, "learning_rate": 2.8053061224489797e-05, "loss": 1.398, "step": 10762 }, { "epoch": 43.93061224489796, "grad_norm": 14.826343536376953, "learning_rate": 2.8051020408163264e-05, "loss": 2.6548, "step": 10763 }, { "epoch": 43.93469387755102, "grad_norm": 21.43300437927246, "learning_rate": 2.8048979591836738e-05, "loss": 2.2028, "step": 10764 }, { "epoch": 43.93877551020408, "grad_norm": 34.5645866394043, "learning_rate": 2.8046938775510205e-05, "loss": 1.4513, "step": 10765 }, { "epoch": 43.94285714285714, "grad_norm": 33.370635986328125, "learning_rate": 2.8044897959183675e-05, "loss": 2.3421, "step": 10766 }, { "epoch": 43.946938775510205, "grad_norm": 22.75652313232422, "learning_rate": 2.8042857142857143e-05, "loss": 1.9895, "step": 10767 }, { "epoch": 43.95102040816327, "grad_norm": 35.988250732421875, "learning_rate": 2.804081632653061e-05, "loss": 1.2351, "step": 10768 }, { "epoch": 43.95510204081633, "grad_norm": 18.506637573242188, "learning_rate": 2.8038775510204084e-05, "loss": 2.4292, "step": 10769 }, { "epoch": 43.95918367346939, "grad_norm": 34.55293273925781, "learning_rate": 2.8036734693877554e-05, "loss": 2.3096, "step": 10770 }, { "epoch": 43.96326530612245, "grad_norm": 20.22757339477539, "learning_rate": 2.803469387755102e-05, "loss": 1.9979, "step": 10771 }, { "epoch": 43.96734693877551, "grad_norm": 52.26325607299805, "learning_rate": 2.8032653061224488e-05, "loss": 1.6726, "step": 10772 }, { "epoch": 43.97142857142857, "grad_norm": 16.52012062072754, "learning_rate": 2.803061224489796e-05, "loss": 2.6189, "step": 10773 }, { "epoch": 43.97551020408163, "grad_norm": 20.898096084594727, "learning_rate": 2.8028571428571433e-05, "loss": 2.3294, "step": 10774 }, { "epoch": 43.97959183673469, "grad_norm": 12.016936302185059, "learning_rate": 2.80265306122449e-05, "loss": 2.8982, "step": 10775 }, { "epoch": 43.98367346938775, "grad_norm": 33.50780487060547, "learning_rate": 2.8024489795918367e-05, "loss": 2.1261, "step": 10776 }, { "epoch": 43.987755102040815, "grad_norm": 38.00664138793945, "learning_rate": 2.8022448979591837e-05, "loss": 1.3001, "step": 10777 }, { "epoch": 43.99183673469388, "grad_norm": 28.39544677734375, "learning_rate": 2.8020408163265305e-05, "loss": 1.6704, "step": 10778 }, { "epoch": 43.99591836734694, "grad_norm": 24.458026885986328, "learning_rate": 2.801836734693878e-05, "loss": 2.0494, "step": 10779 }, { "epoch": 44.0, "grad_norm": 13.084766387939453, "learning_rate": 2.8016326530612246e-05, "loss": 2.9361, "step": 10780 }, { "epoch": 44.00408163265306, "grad_norm": 29.0417423248291, "learning_rate": 2.8014285714285716e-05, "loss": 2.1976, "step": 10781 }, { "epoch": 44.00816326530612, "grad_norm": 16.995702743530273, "learning_rate": 2.8012244897959183e-05, "loss": 2.4951, "step": 10782 }, { "epoch": 44.012244897959185, "grad_norm": 32.17266082763672, "learning_rate": 2.801020408163265e-05, "loss": 1.0744, "step": 10783 }, { "epoch": 44.01632653061225, "grad_norm": 32.12179946899414, "learning_rate": 2.8008163265306124e-05, "loss": 0.9466, "step": 10784 }, { "epoch": 44.02040816326531, "grad_norm": 24.42783546447754, "learning_rate": 2.8006122448979595e-05, "loss": 1.1765, "step": 10785 }, { "epoch": 44.02448979591837, "grad_norm": 23.57517433166504, "learning_rate": 2.8004081632653062e-05, "loss": 2.5343, "step": 10786 }, { "epoch": 44.02857142857143, "grad_norm": 41.53131103515625, "learning_rate": 2.800204081632653e-05, "loss": 2.1773, "step": 10787 }, { "epoch": 44.03265306122449, "grad_norm": 30.09495735168457, "learning_rate": 2.8000000000000003e-05, "loss": 1.6966, "step": 10788 }, { "epoch": 44.03673469387755, "grad_norm": 18.58335304260254, "learning_rate": 2.7997959183673473e-05, "loss": 2.6134, "step": 10789 }, { "epoch": 44.04081632653061, "grad_norm": 21.683584213256836, "learning_rate": 2.799591836734694e-05, "loss": 2.3186, "step": 10790 }, { "epoch": 44.04489795918367, "grad_norm": 37.31428909301758, "learning_rate": 2.7993877551020408e-05, "loss": 1.024, "step": 10791 }, { "epoch": 44.04897959183673, "grad_norm": 25.918041229248047, "learning_rate": 2.7991836734693878e-05, "loss": 1.6734, "step": 10792 }, { "epoch": 44.053061224489795, "grad_norm": 28.587141036987305, "learning_rate": 2.7989795918367352e-05, "loss": 1.4798, "step": 10793 }, { "epoch": 44.05714285714286, "grad_norm": 14.710546493530273, "learning_rate": 2.798775510204082e-05, "loss": 2.7427, "step": 10794 }, { "epoch": 44.06122448979592, "grad_norm": 36.21510314941406, "learning_rate": 2.7985714285714286e-05, "loss": 1.6847, "step": 10795 }, { "epoch": 44.06530612244898, "grad_norm": 39.99113845825195, "learning_rate": 2.7983673469387757e-05, "loss": 1.3226, "step": 10796 }, { "epoch": 44.06938775510204, "grad_norm": 28.553720474243164, "learning_rate": 2.7981632653061224e-05, "loss": 1.2189, "step": 10797 }, { "epoch": 44.073469387755104, "grad_norm": 22.819149017333984, "learning_rate": 2.7979591836734698e-05, "loss": 1.8619, "step": 10798 }, { "epoch": 44.077551020408166, "grad_norm": 13.467145919799805, "learning_rate": 2.7977551020408165e-05, "loss": 2.7675, "step": 10799 }, { "epoch": 44.08163265306123, "grad_norm": 30.10553550720215, "learning_rate": 2.7975510204081636e-05, "loss": 2.0983, "step": 10800 }, { "epoch": 44.08571428571429, "grad_norm": 12.642797470092773, "learning_rate": 2.7973469387755103e-05, "loss": 3.3113, "step": 10801 }, { "epoch": 44.089795918367344, "grad_norm": 34.1699104309082, "learning_rate": 2.797142857142857e-05, "loss": 0.693, "step": 10802 }, { "epoch": 44.093877551020405, "grad_norm": 28.97601318359375, "learning_rate": 2.7969387755102044e-05, "loss": 1.3446, "step": 10803 }, { "epoch": 44.09795918367347, "grad_norm": 25.493072509765625, "learning_rate": 2.7967346938775514e-05, "loss": 1.7125, "step": 10804 }, { "epoch": 44.10204081632653, "grad_norm": 26.799728393554688, "learning_rate": 2.796530612244898e-05, "loss": 1.6901, "step": 10805 }, { "epoch": 44.10612244897959, "grad_norm": 14.800848007202148, "learning_rate": 2.796326530612245e-05, "loss": 2.3223, "step": 10806 }, { "epoch": 44.11020408163265, "grad_norm": 37.04591369628906, "learning_rate": 2.796122448979592e-05, "loss": 0.4408, "step": 10807 }, { "epoch": 44.114285714285714, "grad_norm": 21.88445472717285, "learning_rate": 2.7959183673469393e-05, "loss": 1.8887, "step": 10808 }, { "epoch": 44.118367346938776, "grad_norm": 39.706886291503906, "learning_rate": 2.795714285714286e-05, "loss": 0.6522, "step": 10809 }, { "epoch": 44.12244897959184, "grad_norm": 33.64678955078125, "learning_rate": 2.7955102040816327e-05, "loss": 0.5172, "step": 10810 }, { "epoch": 44.1265306122449, "grad_norm": 18.672035217285156, "learning_rate": 2.7953061224489798e-05, "loss": 2.5634, "step": 10811 }, { "epoch": 44.13061224489796, "grad_norm": 37.2133903503418, "learning_rate": 2.7951020408163265e-05, "loss": 1.2432, "step": 10812 }, { "epoch": 44.13469387755102, "grad_norm": 20.577430725097656, "learning_rate": 2.794897959183674e-05, "loss": 2.6078, "step": 10813 }, { "epoch": 44.138775510204084, "grad_norm": 30.889455795288086, "learning_rate": 2.7946938775510206e-05, "loss": 1.6092, "step": 10814 }, { "epoch": 44.142857142857146, "grad_norm": 42.698333740234375, "learning_rate": 2.7944897959183673e-05, "loss": 1.1518, "step": 10815 }, { "epoch": 44.14693877551021, "grad_norm": 29.178503036499023, "learning_rate": 2.7942857142857143e-05, "loss": 1.6418, "step": 10816 }, { "epoch": 44.15102040816326, "grad_norm": 24.308425903320312, "learning_rate": 2.794081632653061e-05, "loss": 1.5072, "step": 10817 }, { "epoch": 44.155102040816324, "grad_norm": 39.64636993408203, "learning_rate": 2.7938775510204084e-05, "loss": 1.3291, "step": 10818 }, { "epoch": 44.159183673469386, "grad_norm": 36.775089263916016, "learning_rate": 2.793673469387755e-05, "loss": 2.1184, "step": 10819 }, { "epoch": 44.16326530612245, "grad_norm": 25.54303550720215, "learning_rate": 2.7934693877551022e-05, "loss": 2.0081, "step": 10820 }, { "epoch": 44.16734693877551, "grad_norm": 31.407819747924805, "learning_rate": 2.793265306122449e-05, "loss": 2.1334, "step": 10821 }, { "epoch": 44.17142857142857, "grad_norm": 21.21913719177246, "learning_rate": 2.7930612244897956e-05, "loss": 2.2876, "step": 10822 }, { "epoch": 44.17551020408163, "grad_norm": 42.31036376953125, "learning_rate": 2.792857142857143e-05, "loss": 1.0969, "step": 10823 }, { "epoch": 44.179591836734694, "grad_norm": 28.481884002685547, "learning_rate": 2.79265306122449e-05, "loss": 1.5849, "step": 10824 }, { "epoch": 44.183673469387756, "grad_norm": 49.20100402832031, "learning_rate": 2.7924489795918368e-05, "loss": 1.9495, "step": 10825 }, { "epoch": 44.18775510204082, "grad_norm": 23.980308532714844, "learning_rate": 2.7922448979591835e-05, "loss": 1.9271, "step": 10826 }, { "epoch": 44.19183673469388, "grad_norm": 22.554954528808594, "learning_rate": 2.7920408163265306e-05, "loss": 2.3501, "step": 10827 }, { "epoch": 44.19591836734694, "grad_norm": 34.19049072265625, "learning_rate": 2.791836734693878e-05, "loss": 1.9395, "step": 10828 }, { "epoch": 44.2, "grad_norm": 21.725831985473633, "learning_rate": 2.7916326530612247e-05, "loss": 2.3376, "step": 10829 }, { "epoch": 44.204081632653065, "grad_norm": 24.25489044189453, "learning_rate": 2.7914285714285714e-05, "loss": 1.8718, "step": 10830 }, { "epoch": 44.20816326530612, "grad_norm": 34.35353469848633, "learning_rate": 2.7912244897959184e-05, "loss": 1.2788, "step": 10831 }, { "epoch": 44.21224489795918, "grad_norm": 11.578814506530762, "learning_rate": 2.7910204081632658e-05, "loss": 2.9883, "step": 10832 }, { "epoch": 44.21632653061224, "grad_norm": 41.797515869140625, "learning_rate": 2.7908163265306125e-05, "loss": 0.5477, "step": 10833 }, { "epoch": 44.220408163265304, "grad_norm": 18.78895378112793, "learning_rate": 2.7906122448979592e-05, "loss": 2.4064, "step": 10834 }, { "epoch": 44.224489795918366, "grad_norm": 35.773380279541016, "learning_rate": 2.7904081632653063e-05, "loss": 1.3588, "step": 10835 }, { "epoch": 44.22857142857143, "grad_norm": 23.4143009185791, "learning_rate": 2.790204081632653e-05, "loss": 1.6959, "step": 10836 }, { "epoch": 44.23265306122449, "grad_norm": 23.122350692749023, "learning_rate": 2.7900000000000004e-05, "loss": 1.8623, "step": 10837 }, { "epoch": 44.23673469387755, "grad_norm": 31.21453857421875, "learning_rate": 2.789795918367347e-05, "loss": 1.7499, "step": 10838 }, { "epoch": 44.24081632653061, "grad_norm": 28.07850456237793, "learning_rate": 2.789591836734694e-05, "loss": 2.2157, "step": 10839 }, { "epoch": 44.244897959183675, "grad_norm": 19.141164779663086, "learning_rate": 2.789387755102041e-05, "loss": 2.596, "step": 10840 }, { "epoch": 44.248979591836736, "grad_norm": 17.83423614501953, "learning_rate": 2.7891836734693876e-05, "loss": 2.5815, "step": 10841 }, { "epoch": 44.2530612244898, "grad_norm": 21.171539306640625, "learning_rate": 2.788979591836735e-05, "loss": 2.5945, "step": 10842 }, { "epoch": 44.25714285714286, "grad_norm": 18.08321189880371, "learning_rate": 2.788775510204082e-05, "loss": 2.5446, "step": 10843 }, { "epoch": 44.26122448979592, "grad_norm": 31.16669464111328, "learning_rate": 2.7885714285714287e-05, "loss": 0.7591, "step": 10844 }, { "epoch": 44.265306122448976, "grad_norm": 26.744525909423828, "learning_rate": 2.7883673469387754e-05, "loss": 1.304, "step": 10845 }, { "epoch": 44.26938775510204, "grad_norm": 25.991836547851562, "learning_rate": 2.7881632653061225e-05, "loss": 2.0031, "step": 10846 }, { "epoch": 44.2734693877551, "grad_norm": 35.22581100463867, "learning_rate": 2.78795918367347e-05, "loss": 0.5917, "step": 10847 }, { "epoch": 44.27755102040816, "grad_norm": 14.6511869430542, "learning_rate": 2.7877551020408166e-05, "loss": 2.857, "step": 10848 }, { "epoch": 44.28163265306122, "grad_norm": 17.183515548706055, "learning_rate": 2.7875510204081633e-05, "loss": 2.5048, "step": 10849 }, { "epoch": 44.285714285714285, "grad_norm": 47.376502990722656, "learning_rate": 2.7873469387755104e-05, "loss": 1.5008, "step": 10850 }, { "epoch": 44.289795918367346, "grad_norm": 19.121721267700195, "learning_rate": 2.787142857142857e-05, "loss": 2.4793, "step": 10851 }, { "epoch": 44.29387755102041, "grad_norm": 29.993091583251953, "learning_rate": 2.7869387755102045e-05, "loss": 1.6195, "step": 10852 }, { "epoch": 44.29795918367347, "grad_norm": 27.724365234375, "learning_rate": 2.7867346938775512e-05, "loss": 1.8865, "step": 10853 }, { "epoch": 44.30204081632653, "grad_norm": 19.07225227355957, "learning_rate": 2.7865306122448982e-05, "loss": 2.3059, "step": 10854 }, { "epoch": 44.30612244897959, "grad_norm": 26.453094482421875, "learning_rate": 2.786326530612245e-05, "loss": 1.8521, "step": 10855 }, { "epoch": 44.310204081632655, "grad_norm": 39.38302993774414, "learning_rate": 2.7861224489795917e-05, "loss": 1.2653, "step": 10856 }, { "epoch": 44.31428571428572, "grad_norm": 24.55344009399414, "learning_rate": 2.785918367346939e-05, "loss": 1.9978, "step": 10857 }, { "epoch": 44.31836734693878, "grad_norm": 25.46135902404785, "learning_rate": 2.785714285714286e-05, "loss": 2.1506, "step": 10858 }, { "epoch": 44.32244897959184, "grad_norm": 20.371551513671875, "learning_rate": 2.7855102040816328e-05, "loss": 1.8969, "step": 10859 }, { "epoch": 44.326530612244895, "grad_norm": 34.894866943359375, "learning_rate": 2.7853061224489795e-05, "loss": 1.7348, "step": 10860 }, { "epoch": 44.33061224489796, "grad_norm": 26.735748291015625, "learning_rate": 2.7851020408163266e-05, "loss": 2.2195, "step": 10861 }, { "epoch": 44.33469387755102, "grad_norm": 28.942567825317383, "learning_rate": 2.784897959183674e-05, "loss": 2.2092, "step": 10862 }, { "epoch": 44.33877551020408, "grad_norm": 38.500064849853516, "learning_rate": 2.7846938775510207e-05, "loss": 1.5, "step": 10863 }, { "epoch": 44.34285714285714, "grad_norm": 19.198259353637695, "learning_rate": 2.7844897959183674e-05, "loss": 2.6828, "step": 10864 }, { "epoch": 44.3469387755102, "grad_norm": 17.49216079711914, "learning_rate": 2.7842857142857144e-05, "loss": 2.1748, "step": 10865 }, { "epoch": 44.351020408163265, "grad_norm": 20.462583541870117, "learning_rate": 2.784081632653061e-05, "loss": 2.1886, "step": 10866 }, { "epoch": 44.35510204081633, "grad_norm": 44.53276824951172, "learning_rate": 2.7838775510204085e-05, "loss": 0.4926, "step": 10867 }, { "epoch": 44.35918367346939, "grad_norm": 24.777790069580078, "learning_rate": 2.7836734693877553e-05, "loss": 2.2605, "step": 10868 }, { "epoch": 44.36326530612245, "grad_norm": 18.013648986816406, "learning_rate": 2.783469387755102e-05, "loss": 2.21, "step": 10869 }, { "epoch": 44.36734693877551, "grad_norm": 16.9941463470459, "learning_rate": 2.783265306122449e-05, "loss": 2.4271, "step": 10870 }, { "epoch": 44.371428571428574, "grad_norm": 15.2575101852417, "learning_rate": 2.7830612244897957e-05, "loss": 2.8435, "step": 10871 }, { "epoch": 44.375510204081635, "grad_norm": 59.93049240112305, "learning_rate": 2.782857142857143e-05, "loss": 0.6163, "step": 10872 }, { "epoch": 44.3795918367347, "grad_norm": 26.914426803588867, "learning_rate": 2.78265306122449e-05, "loss": 1.7783, "step": 10873 }, { "epoch": 44.38367346938775, "grad_norm": 34.658695220947266, "learning_rate": 2.782448979591837e-05, "loss": 1.1054, "step": 10874 }, { "epoch": 44.38775510204081, "grad_norm": 24.620895385742188, "learning_rate": 2.7822448979591836e-05, "loss": 2.0286, "step": 10875 }, { "epoch": 44.391836734693875, "grad_norm": 23.201101303100586, "learning_rate": 2.782040816326531e-05, "loss": 2.3884, "step": 10876 }, { "epoch": 44.39591836734694, "grad_norm": 20.150392532348633, "learning_rate": 2.7818367346938777e-05, "loss": 2.3944, "step": 10877 }, { "epoch": 44.4, "grad_norm": 13.201904296875, "learning_rate": 2.7816326530612248e-05, "loss": 2.9695, "step": 10878 }, { "epoch": 44.40408163265306, "grad_norm": 30.051204681396484, "learning_rate": 2.7814285714285715e-05, "loss": 2.0904, "step": 10879 }, { "epoch": 44.40816326530612, "grad_norm": 20.539508819580078, "learning_rate": 2.7812244897959182e-05, "loss": 1.8875, "step": 10880 }, { "epoch": 44.412244897959184, "grad_norm": 19.74814796447754, "learning_rate": 2.7810204081632656e-05, "loss": 2.3381, "step": 10881 }, { "epoch": 44.416326530612245, "grad_norm": 20.750614166259766, "learning_rate": 2.7808163265306126e-05, "loss": 2.4253, "step": 10882 }, { "epoch": 44.42040816326531, "grad_norm": 22.915740966796875, "learning_rate": 2.7806122448979593e-05, "loss": 1.8071, "step": 10883 }, { "epoch": 44.42448979591837, "grad_norm": 32.94709396362305, "learning_rate": 2.780408163265306e-05, "loss": 1.2387, "step": 10884 }, { "epoch": 44.42857142857143, "grad_norm": 20.949756622314453, "learning_rate": 2.780204081632653e-05, "loss": 2.6042, "step": 10885 }, { "epoch": 44.43265306122449, "grad_norm": 44.12384796142578, "learning_rate": 2.7800000000000005e-05, "loss": 1.4867, "step": 10886 }, { "epoch": 44.436734693877554, "grad_norm": 23.42473793029785, "learning_rate": 2.7797959183673472e-05, "loss": 2.2503, "step": 10887 }, { "epoch": 44.440816326530616, "grad_norm": 17.636737823486328, "learning_rate": 2.779591836734694e-05, "loss": 2.5807, "step": 10888 }, { "epoch": 44.44489795918367, "grad_norm": 24.53801155090332, "learning_rate": 2.779387755102041e-05, "loss": 2.2469, "step": 10889 }, { "epoch": 44.44897959183673, "grad_norm": 7.875754356384277, "learning_rate": 2.7791836734693877e-05, "loss": 3.4197, "step": 10890 }, { "epoch": 44.453061224489794, "grad_norm": 18.45414924621582, "learning_rate": 2.778979591836735e-05, "loss": 2.3348, "step": 10891 }, { "epoch": 44.457142857142856, "grad_norm": 17.107999801635742, "learning_rate": 2.7787755102040818e-05, "loss": 2.9203, "step": 10892 }, { "epoch": 44.46122448979592, "grad_norm": 37.198631286621094, "learning_rate": 2.778571428571429e-05, "loss": 1.2089, "step": 10893 }, { "epoch": 44.46530612244898, "grad_norm": 17.425811767578125, "learning_rate": 2.7783673469387755e-05, "loss": 2.4466, "step": 10894 }, { "epoch": 44.46938775510204, "grad_norm": 14.927389144897461, "learning_rate": 2.7781632653061223e-05, "loss": 2.8201, "step": 10895 }, { "epoch": 44.4734693877551, "grad_norm": 25.155345916748047, "learning_rate": 2.7779591836734696e-05, "loss": 2.3734, "step": 10896 }, { "epoch": 44.477551020408164, "grad_norm": 42.253456115722656, "learning_rate": 2.7777551020408167e-05, "loss": 1.3583, "step": 10897 }, { "epoch": 44.481632653061226, "grad_norm": 28.201078414916992, "learning_rate": 2.7775510204081634e-05, "loss": 1.2476, "step": 10898 }, { "epoch": 44.48571428571429, "grad_norm": 25.3084774017334, "learning_rate": 2.77734693877551e-05, "loss": 1.5339, "step": 10899 }, { "epoch": 44.48979591836735, "grad_norm": 16.729339599609375, "learning_rate": 2.7771428571428572e-05, "loss": 2.7308, "step": 10900 }, { "epoch": 44.49387755102041, "grad_norm": 18.076147079467773, "learning_rate": 2.7769387755102046e-05, "loss": 2.6008, "step": 10901 }, { "epoch": 44.49795918367347, "grad_norm": 27.561246871948242, "learning_rate": 2.7767346938775513e-05, "loss": 2.0561, "step": 10902 }, { "epoch": 44.50204081632653, "grad_norm": 30.033082962036133, "learning_rate": 2.776530612244898e-05, "loss": 1.6722, "step": 10903 }, { "epoch": 44.50612244897959, "grad_norm": 18.187807083129883, "learning_rate": 2.776326530612245e-05, "loss": 2.4199, "step": 10904 }, { "epoch": 44.51020408163265, "grad_norm": 24.71413803100586, "learning_rate": 2.7761224489795918e-05, "loss": 2.0869, "step": 10905 }, { "epoch": 44.51428571428571, "grad_norm": 29.3402156829834, "learning_rate": 2.775918367346939e-05, "loss": 0.7388, "step": 10906 }, { "epoch": 44.518367346938774, "grad_norm": 13.746403694152832, "learning_rate": 2.775714285714286e-05, "loss": 2.9997, "step": 10907 }, { "epoch": 44.522448979591836, "grad_norm": 31.68462371826172, "learning_rate": 2.775510204081633e-05, "loss": 1.5229, "step": 10908 }, { "epoch": 44.5265306122449, "grad_norm": 29.752294540405273, "learning_rate": 2.7753061224489796e-05, "loss": 1.3086, "step": 10909 }, { "epoch": 44.53061224489796, "grad_norm": 29.044429779052734, "learning_rate": 2.7751020408163263e-05, "loss": 2.1552, "step": 10910 }, { "epoch": 44.53469387755102, "grad_norm": 45.87163543701172, "learning_rate": 2.7748979591836737e-05, "loss": 1.4445, "step": 10911 }, { "epoch": 44.53877551020408, "grad_norm": 33.13603591918945, "learning_rate": 2.7746938775510208e-05, "loss": 2.0368, "step": 10912 }, { "epoch": 44.542857142857144, "grad_norm": 18.403722763061523, "learning_rate": 2.7744897959183675e-05, "loss": 2.4997, "step": 10913 }, { "epoch": 44.546938775510206, "grad_norm": 15.616583824157715, "learning_rate": 2.7742857142857142e-05, "loss": 2.6364, "step": 10914 }, { "epoch": 44.55102040816327, "grad_norm": 20.595802307128906, "learning_rate": 2.7740816326530613e-05, "loss": 2.3861, "step": 10915 }, { "epoch": 44.55510204081633, "grad_norm": 17.592134475708008, "learning_rate": 2.7738775510204083e-05, "loss": 2.3513, "step": 10916 }, { "epoch": 44.55918367346939, "grad_norm": 29.119802474975586, "learning_rate": 2.7736734693877554e-05, "loss": 1.7112, "step": 10917 }, { "epoch": 44.563265306122446, "grad_norm": 11.145257949829102, "learning_rate": 2.773469387755102e-05, "loss": 3.2316, "step": 10918 }, { "epoch": 44.56734693877551, "grad_norm": 23.495872497558594, "learning_rate": 2.7732653061224488e-05, "loss": 2.4476, "step": 10919 }, { "epoch": 44.57142857142857, "grad_norm": 16.216768264770508, "learning_rate": 2.773061224489796e-05, "loss": 2.5515, "step": 10920 }, { "epoch": 44.57551020408163, "grad_norm": 16.515377044677734, "learning_rate": 2.7728571428571432e-05, "loss": 2.519, "step": 10921 }, { "epoch": 44.57959183673469, "grad_norm": 33.15916442871094, "learning_rate": 2.77265306122449e-05, "loss": 1.4802, "step": 10922 }, { "epoch": 44.583673469387755, "grad_norm": 55.05351257324219, "learning_rate": 2.7724489795918366e-05, "loss": 1.346, "step": 10923 }, { "epoch": 44.587755102040816, "grad_norm": 16.452974319458008, "learning_rate": 2.7722448979591837e-05, "loss": 2.543, "step": 10924 }, { "epoch": 44.59183673469388, "grad_norm": 19.128787994384766, "learning_rate": 2.772040816326531e-05, "loss": 2.3456, "step": 10925 }, { "epoch": 44.59591836734694, "grad_norm": 17.2393856048584, "learning_rate": 2.7718367346938778e-05, "loss": 2.4851, "step": 10926 }, { "epoch": 44.6, "grad_norm": 15.10044002532959, "learning_rate": 2.7716326530612245e-05, "loss": 2.8771, "step": 10927 }, { "epoch": 44.60408163265306, "grad_norm": 23.467220306396484, "learning_rate": 2.7714285714285716e-05, "loss": 2.4502, "step": 10928 }, { "epoch": 44.608163265306125, "grad_norm": 30.87308692932129, "learning_rate": 2.7712244897959183e-05, "loss": 2.0227, "step": 10929 }, { "epoch": 44.61224489795919, "grad_norm": 23.30165672302246, "learning_rate": 2.7710204081632657e-05, "loss": 2.1376, "step": 10930 }, { "epoch": 44.61632653061225, "grad_norm": 22.6496524810791, "learning_rate": 2.7708163265306124e-05, "loss": 2.0473, "step": 10931 }, { "epoch": 44.6204081632653, "grad_norm": 16.9304141998291, "learning_rate": 2.7706122448979594e-05, "loss": 2.6026, "step": 10932 }, { "epoch": 44.624489795918365, "grad_norm": 30.40054702758789, "learning_rate": 2.770408163265306e-05, "loss": 1.7712, "step": 10933 }, { "epoch": 44.628571428571426, "grad_norm": 16.943450927734375, "learning_rate": 2.770204081632653e-05, "loss": 2.4985, "step": 10934 }, { "epoch": 44.63265306122449, "grad_norm": 30.125885009765625, "learning_rate": 2.7700000000000002e-05, "loss": 1.9398, "step": 10935 }, { "epoch": 44.63673469387755, "grad_norm": 26.381839752197266, "learning_rate": 2.7697959183673473e-05, "loss": 1.5696, "step": 10936 }, { "epoch": 44.64081632653061, "grad_norm": 24.646169662475586, "learning_rate": 2.769591836734694e-05, "loss": 2.4191, "step": 10937 }, { "epoch": 44.64489795918367, "grad_norm": 14.810354232788086, "learning_rate": 2.7693877551020407e-05, "loss": 3.0939, "step": 10938 }, { "epoch": 44.648979591836735, "grad_norm": 36.0075798034668, "learning_rate": 2.7691836734693878e-05, "loss": 1.0606, "step": 10939 }, { "epoch": 44.6530612244898, "grad_norm": 31.529714584350586, "learning_rate": 2.768979591836735e-05, "loss": 1.8216, "step": 10940 }, { "epoch": 44.65714285714286, "grad_norm": 26.95096206665039, "learning_rate": 2.768775510204082e-05, "loss": 1.9159, "step": 10941 }, { "epoch": 44.66122448979592, "grad_norm": 27.107913970947266, "learning_rate": 2.7685714285714286e-05, "loss": 1.0794, "step": 10942 }, { "epoch": 44.66530612244898, "grad_norm": 31.614761352539062, "learning_rate": 2.7683673469387756e-05, "loss": 2.1203, "step": 10943 }, { "epoch": 44.66938775510204, "grad_norm": 23.235904693603516, "learning_rate": 2.7681632653061224e-05, "loss": 2.193, "step": 10944 }, { "epoch": 44.673469387755105, "grad_norm": 30.759105682373047, "learning_rate": 2.7679591836734697e-05, "loss": 1.6422, "step": 10945 }, { "epoch": 44.67755102040816, "grad_norm": 23.83780288696289, "learning_rate": 2.7677551020408165e-05, "loss": 2.0062, "step": 10946 }, { "epoch": 44.68163265306122, "grad_norm": 20.590421676635742, "learning_rate": 2.7675510204081635e-05, "loss": 2.383, "step": 10947 }, { "epoch": 44.68571428571428, "grad_norm": 16.155641555786133, "learning_rate": 2.7673469387755102e-05, "loss": 2.5459, "step": 10948 }, { "epoch": 44.689795918367345, "grad_norm": 44.383907318115234, "learning_rate": 2.767142857142857e-05, "loss": 0.7047, "step": 10949 }, { "epoch": 44.69387755102041, "grad_norm": 24.107683181762695, "learning_rate": 2.7669387755102043e-05, "loss": 2.0221, "step": 10950 }, { "epoch": 44.69795918367347, "grad_norm": 40.642513275146484, "learning_rate": 2.7667346938775514e-05, "loss": 1.3377, "step": 10951 }, { "epoch": 44.70204081632653, "grad_norm": 26.653255462646484, "learning_rate": 2.766530612244898e-05, "loss": 2.2496, "step": 10952 }, { "epoch": 44.70612244897959, "grad_norm": 31.623083114624023, "learning_rate": 2.7663265306122448e-05, "loss": 1.2408, "step": 10953 }, { "epoch": 44.710204081632654, "grad_norm": 26.45518684387207, "learning_rate": 2.766122448979592e-05, "loss": 1.8326, "step": 10954 }, { "epoch": 44.714285714285715, "grad_norm": 15.913911819458008, "learning_rate": 2.7659183673469392e-05, "loss": 2.6474, "step": 10955 }, { "epoch": 44.71836734693878, "grad_norm": 27.93069076538086, "learning_rate": 2.765714285714286e-05, "loss": 1.9462, "step": 10956 }, { "epoch": 44.72244897959184, "grad_norm": 31.671045303344727, "learning_rate": 2.7655102040816327e-05, "loss": 1.1507, "step": 10957 }, { "epoch": 44.7265306122449, "grad_norm": 15.591938018798828, "learning_rate": 2.7653061224489797e-05, "loss": 2.4547, "step": 10958 }, { "epoch": 44.73061224489796, "grad_norm": 30.81519889831543, "learning_rate": 2.7651020408163264e-05, "loss": 1.8762, "step": 10959 }, { "epoch": 44.734693877551024, "grad_norm": 33.869117736816406, "learning_rate": 2.7648979591836738e-05, "loss": 1.6109, "step": 10960 }, { "epoch": 44.73877551020408, "grad_norm": 19.071487426757812, "learning_rate": 2.7646938775510205e-05, "loss": 2.2371, "step": 10961 }, { "epoch": 44.74285714285714, "grad_norm": 25.15395736694336, "learning_rate": 2.7644897959183676e-05, "loss": 2.1778, "step": 10962 }, { "epoch": 44.7469387755102, "grad_norm": 17.099853515625, "learning_rate": 2.7642857142857143e-05, "loss": 2.5878, "step": 10963 }, { "epoch": 44.751020408163264, "grad_norm": 28.259063720703125, "learning_rate": 2.764081632653061e-05, "loss": 1.9148, "step": 10964 }, { "epoch": 44.755102040816325, "grad_norm": 43.23379135131836, "learning_rate": 2.7638775510204084e-05, "loss": 1.1158, "step": 10965 }, { "epoch": 44.75918367346939, "grad_norm": 31.654539108276367, "learning_rate": 2.7636734693877554e-05, "loss": 1.689, "step": 10966 }, { "epoch": 44.76326530612245, "grad_norm": 16.517995834350586, "learning_rate": 2.763469387755102e-05, "loss": 2.9182, "step": 10967 }, { "epoch": 44.76734693877551, "grad_norm": 22.602609634399414, "learning_rate": 2.763265306122449e-05, "loss": 2.3428, "step": 10968 }, { "epoch": 44.77142857142857, "grad_norm": 17.321475982666016, "learning_rate": 2.7630612244897963e-05, "loss": 2.5372, "step": 10969 }, { "epoch": 44.775510204081634, "grad_norm": 18.061410903930664, "learning_rate": 2.762857142857143e-05, "loss": 2.6779, "step": 10970 }, { "epoch": 44.779591836734696, "grad_norm": 13.963008880615234, "learning_rate": 2.76265306122449e-05, "loss": 2.8585, "step": 10971 }, { "epoch": 44.78367346938776, "grad_norm": 24.592863082885742, "learning_rate": 2.7624489795918367e-05, "loss": 1.6881, "step": 10972 }, { "epoch": 44.78775510204082, "grad_norm": 20.891963958740234, "learning_rate": 2.7622448979591835e-05, "loss": 2.4778, "step": 10973 }, { "epoch": 44.79183673469388, "grad_norm": 14.846092224121094, "learning_rate": 2.762040816326531e-05, "loss": 2.6508, "step": 10974 }, { "epoch": 44.795918367346935, "grad_norm": 33.027854919433594, "learning_rate": 2.761836734693878e-05, "loss": 0.4998, "step": 10975 }, { "epoch": 44.8, "grad_norm": 22.332313537597656, "learning_rate": 2.7616326530612246e-05, "loss": 2.4439, "step": 10976 }, { "epoch": 44.80408163265306, "grad_norm": 36.030757904052734, "learning_rate": 2.7614285714285713e-05, "loss": 1.4107, "step": 10977 }, { "epoch": 44.80816326530612, "grad_norm": 16.747520446777344, "learning_rate": 2.7612244897959184e-05, "loss": 2.8669, "step": 10978 }, { "epoch": 44.81224489795918, "grad_norm": 22.508298873901367, "learning_rate": 2.7610204081632658e-05, "loss": 2.0434, "step": 10979 }, { "epoch": 44.816326530612244, "grad_norm": 22.061979293823242, "learning_rate": 2.7608163265306125e-05, "loss": 1.7131, "step": 10980 }, { "epoch": 44.820408163265306, "grad_norm": 48.32805633544922, "learning_rate": 2.7606122448979592e-05, "loss": 0.7891, "step": 10981 }, { "epoch": 44.82448979591837, "grad_norm": 60.56144714355469, "learning_rate": 2.7604081632653062e-05, "loss": 1.6583, "step": 10982 }, { "epoch": 44.82857142857143, "grad_norm": 22.808034896850586, "learning_rate": 2.760204081632653e-05, "loss": 2.3759, "step": 10983 }, { "epoch": 44.83265306122449, "grad_norm": 31.16081428527832, "learning_rate": 2.7600000000000003e-05, "loss": 1.4325, "step": 10984 }, { "epoch": 44.83673469387755, "grad_norm": 35.310611724853516, "learning_rate": 2.759795918367347e-05, "loss": 1.5807, "step": 10985 }, { "epoch": 44.840816326530614, "grad_norm": 9.122773170471191, "learning_rate": 2.759591836734694e-05, "loss": 0.1154, "step": 10986 }, { "epoch": 44.844897959183676, "grad_norm": 34.753448486328125, "learning_rate": 2.7593877551020408e-05, "loss": 1.5027, "step": 10987 }, { "epoch": 44.84897959183674, "grad_norm": 16.41878890991211, "learning_rate": 2.7591836734693875e-05, "loss": 2.8588, "step": 10988 }, { "epoch": 44.85306122448979, "grad_norm": 46.976898193359375, "learning_rate": 2.758979591836735e-05, "loss": 1.1525, "step": 10989 }, { "epoch": 44.857142857142854, "grad_norm": 25.13117218017578, "learning_rate": 2.758775510204082e-05, "loss": 1.8688, "step": 10990 }, { "epoch": 44.861224489795916, "grad_norm": 18.90601921081543, "learning_rate": 2.7585714285714287e-05, "loss": 2.3914, "step": 10991 }, { "epoch": 44.86530612244898, "grad_norm": 19.075334548950195, "learning_rate": 2.7583673469387754e-05, "loss": 2.3143, "step": 10992 }, { "epoch": 44.86938775510204, "grad_norm": 26.319740295410156, "learning_rate": 2.7581632653061224e-05, "loss": 1.887, "step": 10993 }, { "epoch": 44.8734693877551, "grad_norm": 23.858022689819336, "learning_rate": 2.75795918367347e-05, "loss": 2.1091, "step": 10994 }, { "epoch": 44.87755102040816, "grad_norm": 10.067554473876953, "learning_rate": 2.7577551020408166e-05, "loss": 3.0238, "step": 10995 }, { "epoch": 44.881632653061224, "grad_norm": 36.54296875, "learning_rate": 2.7575510204081633e-05, "loss": 2.0362, "step": 10996 }, { "epoch": 44.885714285714286, "grad_norm": 13.435591697692871, "learning_rate": 2.7573469387755103e-05, "loss": 2.9533, "step": 10997 }, { "epoch": 44.88979591836735, "grad_norm": 33.99830627441406, "learning_rate": 2.757142857142857e-05, "loss": 1.3157, "step": 10998 }, { "epoch": 44.89387755102041, "grad_norm": 29.02014923095703, "learning_rate": 2.7569387755102044e-05, "loss": 2.1552, "step": 10999 }, { "epoch": 44.89795918367347, "grad_norm": 27.777795791625977, "learning_rate": 2.756734693877551e-05, "loss": 2.2627, "step": 11000 }, { "epoch": 44.90204081632653, "grad_norm": 33.33777618408203, "learning_rate": 2.7565306122448982e-05, "loss": 1.1107, "step": 11001 }, { "epoch": 44.906122448979595, "grad_norm": 30.428022384643555, "learning_rate": 2.756326530612245e-05, "loss": 1.439, "step": 11002 }, { "epoch": 44.910204081632656, "grad_norm": 17.787940979003906, "learning_rate": 2.7561224489795916e-05, "loss": 2.4573, "step": 11003 }, { "epoch": 44.91428571428571, "grad_norm": 33.31197738647461, "learning_rate": 2.755918367346939e-05, "loss": 2.0246, "step": 11004 }, { "epoch": 44.91836734693877, "grad_norm": 36.20452117919922, "learning_rate": 2.755714285714286e-05, "loss": 0.6857, "step": 11005 }, { "epoch": 44.922448979591834, "grad_norm": 26.771705627441406, "learning_rate": 2.7555102040816328e-05, "loss": 2.1844, "step": 11006 }, { "epoch": 44.926530612244896, "grad_norm": 25.67636489868164, "learning_rate": 2.7553061224489795e-05, "loss": 1.9008, "step": 11007 }, { "epoch": 44.93061224489796, "grad_norm": 21.563310623168945, "learning_rate": 2.7551020408163265e-05, "loss": 2.3242, "step": 11008 }, { "epoch": 44.93469387755102, "grad_norm": 48.030277252197266, "learning_rate": 2.754897959183674e-05, "loss": 0.6139, "step": 11009 }, { "epoch": 44.93877551020408, "grad_norm": 34.66609573364258, "learning_rate": 2.7546938775510206e-05, "loss": 1.1285, "step": 11010 }, { "epoch": 44.94285714285714, "grad_norm": 13.931231498718262, "learning_rate": 2.7544897959183673e-05, "loss": 2.9675, "step": 11011 }, { "epoch": 44.946938775510205, "grad_norm": 30.07726287841797, "learning_rate": 2.7542857142857144e-05, "loss": 1.3244, "step": 11012 }, { "epoch": 44.95102040816327, "grad_norm": 48.8741340637207, "learning_rate": 2.7540816326530618e-05, "loss": 1.2506, "step": 11013 }, { "epoch": 44.95510204081633, "grad_norm": 23.242918014526367, "learning_rate": 2.7538775510204085e-05, "loss": 2.5832, "step": 11014 }, { "epoch": 44.95918367346939, "grad_norm": 38.2369384765625, "learning_rate": 2.7536734693877552e-05, "loss": 1.1859, "step": 11015 }, { "epoch": 44.96326530612245, "grad_norm": 48.92034149169922, "learning_rate": 2.7534693877551023e-05, "loss": 0.6995, "step": 11016 }, { "epoch": 44.96734693877551, "grad_norm": 36.28668212890625, "learning_rate": 2.753265306122449e-05, "loss": 1.8288, "step": 11017 }, { "epoch": 44.97142857142857, "grad_norm": 18.824216842651367, "learning_rate": 2.7530612244897964e-05, "loss": 2.5153, "step": 11018 }, { "epoch": 44.97551020408163, "grad_norm": 12.512358665466309, "learning_rate": 2.752857142857143e-05, "loss": 3.0977, "step": 11019 }, { "epoch": 44.97959183673469, "grad_norm": 44.444305419921875, "learning_rate": 2.75265306122449e-05, "loss": 1.1338, "step": 11020 }, { "epoch": 44.98367346938775, "grad_norm": 26.724334716796875, "learning_rate": 2.752448979591837e-05, "loss": 2.1947, "step": 11021 }, { "epoch": 44.987755102040815, "grad_norm": 21.1449031829834, "learning_rate": 2.7522448979591835e-05, "loss": 2.1407, "step": 11022 }, { "epoch": 44.99183673469388, "grad_norm": 21.977657318115234, "learning_rate": 2.752040816326531e-05, "loss": 1.8789, "step": 11023 }, { "epoch": 44.99591836734694, "grad_norm": 27.942426681518555, "learning_rate": 2.7518367346938777e-05, "loss": 0.3276, "step": 11024 }, { "epoch": 45.0, "grad_norm": 18.324649810791016, "learning_rate": 2.7516326530612247e-05, "loss": 2.5385, "step": 11025 }, { "epoch": 45.00408163265306, "grad_norm": 48.755558013916016, "learning_rate": 2.7514285714285714e-05, "loss": 2.2608, "step": 11026 }, { "epoch": 45.00816326530612, "grad_norm": 15.054101943969727, "learning_rate": 2.751224489795918e-05, "loss": 2.3862, "step": 11027 }, { "epoch": 45.012244897959185, "grad_norm": 17.31920623779297, "learning_rate": 2.7510204081632655e-05, "loss": 2.5565, "step": 11028 }, { "epoch": 45.01632653061225, "grad_norm": 25.55077362060547, "learning_rate": 2.7508163265306126e-05, "loss": 2.3381, "step": 11029 }, { "epoch": 45.02040816326531, "grad_norm": 12.83147144317627, "learning_rate": 2.7506122448979593e-05, "loss": 2.7469, "step": 11030 }, { "epoch": 45.02448979591837, "grad_norm": 26.015439987182617, "learning_rate": 2.750408163265306e-05, "loss": 2.1537, "step": 11031 }, { "epoch": 45.02857142857143, "grad_norm": 35.27544021606445, "learning_rate": 2.750204081632653e-05, "loss": 1.7563, "step": 11032 }, { "epoch": 45.03265306122449, "grad_norm": 28.68794822692871, "learning_rate": 2.7500000000000004e-05, "loss": 2.0488, "step": 11033 }, { "epoch": 45.03673469387755, "grad_norm": 38.38536071777344, "learning_rate": 2.749795918367347e-05, "loss": 0.4628, "step": 11034 }, { "epoch": 45.04081632653061, "grad_norm": 16.19515609741211, "learning_rate": 2.749591836734694e-05, "loss": 2.7724, "step": 11035 }, { "epoch": 45.04489795918367, "grad_norm": 13.011828422546387, "learning_rate": 2.749387755102041e-05, "loss": 3.1989, "step": 11036 }, { "epoch": 45.04897959183673, "grad_norm": 27.559663772583008, "learning_rate": 2.7491836734693876e-05, "loss": 1.1911, "step": 11037 }, { "epoch": 45.053061224489795, "grad_norm": 29.15041160583496, "learning_rate": 2.748979591836735e-05, "loss": 1.7155, "step": 11038 }, { "epoch": 45.05714285714286, "grad_norm": 17.602447509765625, "learning_rate": 2.7487755102040817e-05, "loss": 2.8724, "step": 11039 }, { "epoch": 45.06122448979592, "grad_norm": 21.015825271606445, "learning_rate": 2.7485714285714288e-05, "loss": 2.024, "step": 11040 }, { "epoch": 45.06530612244898, "grad_norm": 49.32619857788086, "learning_rate": 2.7483673469387755e-05, "loss": 0.5627, "step": 11041 }, { "epoch": 45.06938775510204, "grad_norm": 40.84563446044922, "learning_rate": 2.7481632653061222e-05, "loss": 1.3944, "step": 11042 }, { "epoch": 45.073469387755104, "grad_norm": 21.71531105041504, "learning_rate": 2.7479591836734696e-05, "loss": 2.6052, "step": 11043 }, { "epoch": 45.077551020408166, "grad_norm": 29.154699325561523, "learning_rate": 2.7477551020408166e-05, "loss": 2.0023, "step": 11044 }, { "epoch": 45.08163265306123, "grad_norm": 20.514646530151367, "learning_rate": 2.7475510204081634e-05, "loss": 2.4024, "step": 11045 }, { "epoch": 45.08571428571429, "grad_norm": 39.29213333129883, "learning_rate": 2.74734693877551e-05, "loss": 1.3806, "step": 11046 }, { "epoch": 45.089795918367344, "grad_norm": 26.276681900024414, "learning_rate": 2.747142857142857e-05, "loss": 1.8566, "step": 11047 }, { "epoch": 45.093877551020405, "grad_norm": 38.63019561767578, "learning_rate": 2.7469387755102045e-05, "loss": 2.0164, "step": 11048 }, { "epoch": 45.09795918367347, "grad_norm": 27.358015060424805, "learning_rate": 2.7467346938775512e-05, "loss": 1.5109, "step": 11049 }, { "epoch": 45.10204081632653, "grad_norm": 33.0787467956543, "learning_rate": 2.746530612244898e-05, "loss": 1.4666, "step": 11050 }, { "epoch": 45.10612244897959, "grad_norm": 26.82578468322754, "learning_rate": 2.746326530612245e-05, "loss": 1.9146, "step": 11051 }, { "epoch": 45.11020408163265, "grad_norm": 28.65071678161621, "learning_rate": 2.7461224489795917e-05, "loss": 1.8238, "step": 11052 }, { "epoch": 45.114285714285714, "grad_norm": 21.546558380126953, "learning_rate": 2.745918367346939e-05, "loss": 1.9443, "step": 11053 }, { "epoch": 45.118367346938776, "grad_norm": 39.45823669433594, "learning_rate": 2.7457142857142858e-05, "loss": 1.3419, "step": 11054 }, { "epoch": 45.12244897959184, "grad_norm": 28.24911880493164, "learning_rate": 2.745510204081633e-05, "loss": 1.5104, "step": 11055 }, { "epoch": 45.1265306122449, "grad_norm": 21.32765769958496, "learning_rate": 2.7453061224489796e-05, "loss": 2.3473, "step": 11056 }, { "epoch": 45.13061224489796, "grad_norm": 28.601144790649414, "learning_rate": 2.745102040816327e-05, "loss": 1.755, "step": 11057 }, { "epoch": 45.13469387755102, "grad_norm": 22.471343994140625, "learning_rate": 2.7448979591836737e-05, "loss": 2.3109, "step": 11058 }, { "epoch": 45.138775510204084, "grad_norm": 26.27092170715332, "learning_rate": 2.7446938775510207e-05, "loss": 1.6028, "step": 11059 }, { "epoch": 45.142857142857146, "grad_norm": 27.273391723632812, "learning_rate": 2.7444897959183674e-05, "loss": 1.9355, "step": 11060 }, { "epoch": 45.14693877551021, "grad_norm": 18.411359786987305, "learning_rate": 2.744285714285714e-05, "loss": 2.4187, "step": 11061 }, { "epoch": 45.15102040816326, "grad_norm": 13.619854927062988, "learning_rate": 2.7440816326530615e-05, "loss": 2.8213, "step": 11062 }, { "epoch": 45.155102040816324, "grad_norm": 14.265833854675293, "learning_rate": 2.7438775510204086e-05, "loss": 2.7971, "step": 11063 }, { "epoch": 45.159183673469386, "grad_norm": 22.693328857421875, "learning_rate": 2.7436734693877553e-05, "loss": 2.376, "step": 11064 }, { "epoch": 45.16326530612245, "grad_norm": 35.560550689697266, "learning_rate": 2.743469387755102e-05, "loss": 1.1878, "step": 11065 }, { "epoch": 45.16734693877551, "grad_norm": 30.978910446166992, "learning_rate": 2.743265306122449e-05, "loss": 1.6608, "step": 11066 }, { "epoch": 45.17142857142857, "grad_norm": 34.01213073730469, "learning_rate": 2.7430612244897965e-05, "loss": 1.0902, "step": 11067 }, { "epoch": 45.17551020408163, "grad_norm": 37.37730026245117, "learning_rate": 2.742857142857143e-05, "loss": 1.2518, "step": 11068 }, { "epoch": 45.179591836734694, "grad_norm": 12.66440486907959, "learning_rate": 2.74265306122449e-05, "loss": 2.965, "step": 11069 }, { "epoch": 45.183673469387756, "grad_norm": 32.742637634277344, "learning_rate": 2.742448979591837e-05, "loss": 1.2917, "step": 11070 }, { "epoch": 45.18775510204082, "grad_norm": 13.621520042419434, "learning_rate": 2.7422448979591836e-05, "loss": 2.5244, "step": 11071 }, { "epoch": 45.19183673469388, "grad_norm": 24.20863151550293, "learning_rate": 2.742040816326531e-05, "loss": 2.1835, "step": 11072 }, { "epoch": 45.19591836734694, "grad_norm": 17.794677734375, "learning_rate": 2.7418367346938777e-05, "loss": 2.5682, "step": 11073 }, { "epoch": 45.2, "grad_norm": 19.11311149597168, "learning_rate": 2.7416326530612248e-05, "loss": 2.3679, "step": 11074 }, { "epoch": 45.204081632653065, "grad_norm": 34.91179656982422, "learning_rate": 2.7414285714285715e-05, "loss": 1.8868, "step": 11075 }, { "epoch": 45.20816326530612, "grad_norm": 42.71150588989258, "learning_rate": 2.7412244897959182e-05, "loss": 1.0562, "step": 11076 }, { "epoch": 45.21224489795918, "grad_norm": 24.101558685302734, "learning_rate": 2.7410204081632656e-05, "loss": 2.0215, "step": 11077 }, { "epoch": 45.21632653061224, "grad_norm": 19.381893157958984, "learning_rate": 2.7408163265306123e-05, "loss": 2.2106, "step": 11078 }, { "epoch": 45.220408163265304, "grad_norm": 33.85148620605469, "learning_rate": 2.7406122448979594e-05, "loss": 1.141, "step": 11079 }, { "epoch": 45.224489795918366, "grad_norm": 18.746719360351562, "learning_rate": 2.740408163265306e-05, "loss": 2.4982, "step": 11080 }, { "epoch": 45.22857142857143, "grad_norm": 20.782756805419922, "learning_rate": 2.7402040816326528e-05, "loss": 1.8771, "step": 11081 }, { "epoch": 45.23265306122449, "grad_norm": 34.86109161376953, "learning_rate": 2.7400000000000002e-05, "loss": 1.0293, "step": 11082 }, { "epoch": 45.23673469387755, "grad_norm": 19.651504516601562, "learning_rate": 2.7397959183673472e-05, "loss": 2.3458, "step": 11083 }, { "epoch": 45.24081632653061, "grad_norm": 26.073610305786133, "learning_rate": 2.739591836734694e-05, "loss": 1.2189, "step": 11084 }, { "epoch": 45.244897959183675, "grad_norm": 39.031044006347656, "learning_rate": 2.7393877551020407e-05, "loss": 1.7269, "step": 11085 }, { "epoch": 45.248979591836736, "grad_norm": 36.58982849121094, "learning_rate": 2.7391836734693877e-05, "loss": 0.4877, "step": 11086 }, { "epoch": 45.2530612244898, "grad_norm": 21.166990280151367, "learning_rate": 2.738979591836735e-05, "loss": 2.366, "step": 11087 }, { "epoch": 45.25714285714286, "grad_norm": 34.907249450683594, "learning_rate": 2.7387755102040818e-05, "loss": 0.9647, "step": 11088 }, { "epoch": 45.26122448979592, "grad_norm": 16.02142333984375, "learning_rate": 2.7385714285714285e-05, "loss": 2.9899, "step": 11089 }, { "epoch": 45.265306122448976, "grad_norm": 37.95112609863281, "learning_rate": 2.7383673469387756e-05, "loss": 0.5311, "step": 11090 }, { "epoch": 45.26938775510204, "grad_norm": 18.549936294555664, "learning_rate": 2.7381632653061223e-05, "loss": 2.4329, "step": 11091 }, { "epoch": 45.2734693877551, "grad_norm": 30.657634735107422, "learning_rate": 2.7379591836734697e-05, "loss": 1.5595, "step": 11092 }, { "epoch": 45.27755102040816, "grad_norm": 33.88160705566406, "learning_rate": 2.7377551020408164e-05, "loss": 1.6253, "step": 11093 }, { "epoch": 45.28163265306122, "grad_norm": 16.269922256469727, "learning_rate": 2.7375510204081635e-05, "loss": 2.5812, "step": 11094 }, { "epoch": 45.285714285714285, "grad_norm": 20.421619415283203, "learning_rate": 2.73734693877551e-05, "loss": 2.2211, "step": 11095 }, { "epoch": 45.289795918367346, "grad_norm": 23.593732833862305, "learning_rate": 2.737142857142857e-05, "loss": 2.1014, "step": 11096 }, { "epoch": 45.29387755102041, "grad_norm": 34.97114944458008, "learning_rate": 2.7369387755102043e-05, "loss": 0.5649, "step": 11097 }, { "epoch": 45.29795918367347, "grad_norm": 22.987028121948242, "learning_rate": 2.7367346938775513e-05, "loss": 2.2836, "step": 11098 }, { "epoch": 45.30204081632653, "grad_norm": 21.776737213134766, "learning_rate": 2.736530612244898e-05, "loss": 2.1374, "step": 11099 }, { "epoch": 45.30612244897959, "grad_norm": 21.4439640045166, "learning_rate": 2.7363265306122447e-05, "loss": 2.1743, "step": 11100 }, { "epoch": 45.310204081632655, "grad_norm": 27.235504150390625, "learning_rate": 2.7361224489795918e-05, "loss": 1.8167, "step": 11101 }, { "epoch": 45.31428571428572, "grad_norm": 11.870694160461426, "learning_rate": 2.7359183673469392e-05, "loss": 2.8583, "step": 11102 }, { "epoch": 45.31836734693878, "grad_norm": 41.345428466796875, "learning_rate": 2.735714285714286e-05, "loss": 1.3722, "step": 11103 }, { "epoch": 45.32244897959184, "grad_norm": 23.415559768676758, "learning_rate": 2.7355102040816326e-05, "loss": 2.203, "step": 11104 }, { "epoch": 45.326530612244895, "grad_norm": 26.040212631225586, "learning_rate": 2.7353061224489797e-05, "loss": 1.7818, "step": 11105 }, { "epoch": 45.33061224489796, "grad_norm": 23.624048233032227, "learning_rate": 2.735102040816327e-05, "loss": 2.0645, "step": 11106 }, { "epoch": 45.33469387755102, "grad_norm": 11.925424575805664, "learning_rate": 2.7348979591836738e-05, "loss": 3.4314, "step": 11107 }, { "epoch": 45.33877551020408, "grad_norm": 29.68572235107422, "learning_rate": 2.7346938775510205e-05, "loss": 1.5641, "step": 11108 }, { "epoch": 45.34285714285714, "grad_norm": 19.01490020751953, "learning_rate": 2.7344897959183675e-05, "loss": 2.2539, "step": 11109 }, { "epoch": 45.3469387755102, "grad_norm": 53.58060073852539, "learning_rate": 2.7342857142857142e-05, "loss": 1.2662, "step": 11110 }, { "epoch": 45.351020408163265, "grad_norm": 47.70278549194336, "learning_rate": 2.7340816326530616e-05, "loss": 0.4478, "step": 11111 }, { "epoch": 45.35510204081633, "grad_norm": 23.7153263092041, "learning_rate": 2.7338775510204083e-05, "loss": 2.3263, "step": 11112 }, { "epoch": 45.35918367346939, "grad_norm": 17.296377182006836, "learning_rate": 2.7336734693877554e-05, "loss": 2.573, "step": 11113 }, { "epoch": 45.36326530612245, "grad_norm": 34.71957778930664, "learning_rate": 2.733469387755102e-05, "loss": 2.1519, "step": 11114 }, { "epoch": 45.36734693877551, "grad_norm": 23.207815170288086, "learning_rate": 2.7332653061224488e-05, "loss": 1.7543, "step": 11115 }, { "epoch": 45.371428571428574, "grad_norm": 23.32079315185547, "learning_rate": 2.7330612244897962e-05, "loss": 2.2925, "step": 11116 }, { "epoch": 45.375510204081635, "grad_norm": 38.73780822753906, "learning_rate": 2.7328571428571433e-05, "loss": 1.3217, "step": 11117 }, { "epoch": 45.3795918367347, "grad_norm": 26.065580368041992, "learning_rate": 2.73265306122449e-05, "loss": 1.2349, "step": 11118 }, { "epoch": 45.38367346938775, "grad_norm": 33.64189529418945, "learning_rate": 2.7324489795918367e-05, "loss": 1.6513, "step": 11119 }, { "epoch": 45.38775510204081, "grad_norm": 32.665523529052734, "learning_rate": 2.7322448979591837e-05, "loss": 1.2611, "step": 11120 }, { "epoch": 45.391836734693875, "grad_norm": 27.68565559387207, "learning_rate": 2.732040816326531e-05, "loss": 2.0843, "step": 11121 }, { "epoch": 45.39591836734694, "grad_norm": 20.239887237548828, "learning_rate": 2.731836734693878e-05, "loss": 2.3421, "step": 11122 }, { "epoch": 45.4, "grad_norm": 37.5910530090332, "learning_rate": 2.7316326530612246e-05, "loss": 1.0325, "step": 11123 }, { "epoch": 45.40408163265306, "grad_norm": 25.585105895996094, "learning_rate": 2.7314285714285716e-05, "loss": 2.1819, "step": 11124 }, { "epoch": 45.40816326530612, "grad_norm": 11.892316818237305, "learning_rate": 2.7312244897959183e-05, "loss": 3.3123, "step": 11125 }, { "epoch": 45.412244897959184, "grad_norm": 24.793485641479492, "learning_rate": 2.7310204081632657e-05, "loss": 2.4657, "step": 11126 }, { "epoch": 45.416326530612245, "grad_norm": 26.080350875854492, "learning_rate": 2.7308163265306124e-05, "loss": 1.8255, "step": 11127 }, { "epoch": 45.42040816326531, "grad_norm": 29.533977508544922, "learning_rate": 2.730612244897959e-05, "loss": 1.0555, "step": 11128 }, { "epoch": 45.42448979591837, "grad_norm": 27.25588607788086, "learning_rate": 2.7304081632653062e-05, "loss": 1.1914, "step": 11129 }, { "epoch": 45.42857142857143, "grad_norm": 10.985626220703125, "learning_rate": 2.730204081632653e-05, "loss": 2.9807, "step": 11130 }, { "epoch": 45.43265306122449, "grad_norm": 41.0985107421875, "learning_rate": 2.7300000000000003e-05, "loss": 0.6783, "step": 11131 }, { "epoch": 45.436734693877554, "grad_norm": 36.584774017333984, "learning_rate": 2.729795918367347e-05, "loss": 1.3424, "step": 11132 }, { "epoch": 45.440816326530616, "grad_norm": 42.71162796020508, "learning_rate": 2.729591836734694e-05, "loss": 1.129, "step": 11133 }, { "epoch": 45.44489795918367, "grad_norm": 16.368417739868164, "learning_rate": 2.7293877551020408e-05, "loss": 2.5564, "step": 11134 }, { "epoch": 45.44897959183673, "grad_norm": 40.72576904296875, "learning_rate": 2.7291836734693875e-05, "loss": 1.4336, "step": 11135 }, { "epoch": 45.453061224489794, "grad_norm": 22.622957229614258, "learning_rate": 2.728979591836735e-05, "loss": 1.8827, "step": 11136 }, { "epoch": 45.457142857142856, "grad_norm": 17.231111526489258, "learning_rate": 2.728775510204082e-05, "loss": 2.5521, "step": 11137 }, { "epoch": 45.46122448979592, "grad_norm": 13.90181827545166, "learning_rate": 2.7285714285714286e-05, "loss": 2.9474, "step": 11138 }, { "epoch": 45.46530612244898, "grad_norm": 14.500115394592285, "learning_rate": 2.7283673469387753e-05, "loss": 2.3138, "step": 11139 }, { "epoch": 45.46938775510204, "grad_norm": 18.752052307128906, "learning_rate": 2.7281632653061224e-05, "loss": 2.349, "step": 11140 }, { "epoch": 45.4734693877551, "grad_norm": 23.62549591064453, "learning_rate": 2.7279591836734698e-05, "loss": 1.6681, "step": 11141 }, { "epoch": 45.477551020408164, "grad_norm": 39.84457015991211, "learning_rate": 2.7277551020408165e-05, "loss": 1.5433, "step": 11142 }, { "epoch": 45.481632653061226, "grad_norm": 22.746923446655273, "learning_rate": 2.7275510204081632e-05, "loss": 1.8232, "step": 11143 }, { "epoch": 45.48571428571429, "grad_norm": 19.00505828857422, "learning_rate": 2.7273469387755103e-05, "loss": 2.5697, "step": 11144 }, { "epoch": 45.48979591836735, "grad_norm": 29.425689697265625, "learning_rate": 2.727142857142857e-05, "loss": 1.6865, "step": 11145 }, { "epoch": 45.49387755102041, "grad_norm": 34.998165130615234, "learning_rate": 2.7269387755102044e-05, "loss": 1.8316, "step": 11146 }, { "epoch": 45.49795918367347, "grad_norm": 16.37656593322754, "learning_rate": 2.726734693877551e-05, "loss": 2.3862, "step": 11147 }, { "epoch": 45.50204081632653, "grad_norm": 28.04903793334961, "learning_rate": 2.726530612244898e-05, "loss": 1.6496, "step": 11148 }, { "epoch": 45.50612244897959, "grad_norm": 17.75676155090332, "learning_rate": 2.726326530612245e-05, "loss": 2.5283, "step": 11149 }, { "epoch": 45.51020408163265, "grad_norm": 12.574808120727539, "learning_rate": 2.7261224489795922e-05, "loss": 3.0057, "step": 11150 }, { "epoch": 45.51428571428571, "grad_norm": 32.64609146118164, "learning_rate": 2.725918367346939e-05, "loss": 1.6978, "step": 11151 }, { "epoch": 45.518367346938774, "grad_norm": 53.927268981933594, "learning_rate": 2.725714285714286e-05, "loss": 1.3296, "step": 11152 }, { "epoch": 45.522448979591836, "grad_norm": 32.877872467041016, "learning_rate": 2.7255102040816327e-05, "loss": 1.6765, "step": 11153 }, { "epoch": 45.5265306122449, "grad_norm": 16.00128936767578, "learning_rate": 2.7253061224489794e-05, "loss": 2.8521, "step": 11154 }, { "epoch": 45.53061224489796, "grad_norm": 35.20121383666992, "learning_rate": 2.7251020408163268e-05, "loss": 1.1392, "step": 11155 }, { "epoch": 45.53469387755102, "grad_norm": 31.074377059936523, "learning_rate": 2.724897959183674e-05, "loss": 2.0959, "step": 11156 }, { "epoch": 45.53877551020408, "grad_norm": 31.284151077270508, "learning_rate": 2.7246938775510206e-05, "loss": 2.0378, "step": 11157 }, { "epoch": 45.542857142857144, "grad_norm": 20.438631057739258, "learning_rate": 2.7244897959183673e-05, "loss": 2.1827, "step": 11158 }, { "epoch": 45.546938775510206, "grad_norm": 27.972509384155273, "learning_rate": 2.7242857142857143e-05, "loss": 1.6343, "step": 11159 }, { "epoch": 45.55102040816327, "grad_norm": 32.69671630859375, "learning_rate": 2.7240816326530617e-05, "loss": 1.5537, "step": 11160 }, { "epoch": 45.55510204081633, "grad_norm": 29.477293014526367, "learning_rate": 2.7238775510204084e-05, "loss": 1.7557, "step": 11161 }, { "epoch": 45.55918367346939, "grad_norm": 18.509450912475586, "learning_rate": 2.723673469387755e-05, "loss": 2.5516, "step": 11162 }, { "epoch": 45.563265306122446, "grad_norm": 23.859663009643555, "learning_rate": 2.7234693877551022e-05, "loss": 2.1799, "step": 11163 }, { "epoch": 45.56734693877551, "grad_norm": 28.24108123779297, "learning_rate": 2.723265306122449e-05, "loss": 1.7912, "step": 11164 }, { "epoch": 45.57142857142857, "grad_norm": 25.472675323486328, "learning_rate": 2.7230612244897963e-05, "loss": 1.894, "step": 11165 }, { "epoch": 45.57551020408163, "grad_norm": 59.65285873413086, "learning_rate": 2.722857142857143e-05, "loss": 1.0733, "step": 11166 }, { "epoch": 45.57959183673469, "grad_norm": 24.381677627563477, "learning_rate": 2.72265306122449e-05, "loss": 2.3229, "step": 11167 }, { "epoch": 45.583673469387755, "grad_norm": 35.63137435913086, "learning_rate": 2.7224489795918368e-05, "loss": 1.9344, "step": 11168 }, { "epoch": 45.587755102040816, "grad_norm": 14.544638633728027, "learning_rate": 2.7222448979591835e-05, "loss": 2.8184, "step": 11169 }, { "epoch": 45.59183673469388, "grad_norm": 12.331223487854004, "learning_rate": 2.722040816326531e-05, "loss": 0.1012, "step": 11170 }, { "epoch": 45.59591836734694, "grad_norm": 16.47783660888672, "learning_rate": 2.721836734693878e-05, "loss": 2.4815, "step": 11171 }, { "epoch": 45.6, "grad_norm": 42.9424934387207, "learning_rate": 2.7216326530612247e-05, "loss": 1.3022, "step": 11172 }, { "epoch": 45.60408163265306, "grad_norm": 16.312517166137695, "learning_rate": 2.7214285714285714e-05, "loss": 2.541, "step": 11173 }, { "epoch": 45.608163265306125, "grad_norm": 24.365415573120117, "learning_rate": 2.7212244897959184e-05, "loss": 2.3853, "step": 11174 }, { "epoch": 45.61224489795919, "grad_norm": 19.873933792114258, "learning_rate": 2.7210204081632658e-05, "loss": 2.2874, "step": 11175 }, { "epoch": 45.61632653061225, "grad_norm": 38.39353561401367, "learning_rate": 2.7208163265306125e-05, "loss": 1.241, "step": 11176 }, { "epoch": 45.6204081632653, "grad_norm": 36.57072067260742, "learning_rate": 2.7206122448979592e-05, "loss": 1.2971, "step": 11177 }, { "epoch": 45.624489795918365, "grad_norm": 24.26961898803711, "learning_rate": 2.7204081632653063e-05, "loss": 2.4035, "step": 11178 }, { "epoch": 45.628571428571426, "grad_norm": 26.807071685791016, "learning_rate": 2.720204081632653e-05, "loss": 1.2877, "step": 11179 }, { "epoch": 45.63265306122449, "grad_norm": 25.018115997314453, "learning_rate": 2.7200000000000004e-05, "loss": 2.1185, "step": 11180 }, { "epoch": 45.63673469387755, "grad_norm": 17.08152198791504, "learning_rate": 2.719795918367347e-05, "loss": 2.4092, "step": 11181 }, { "epoch": 45.64081632653061, "grad_norm": 15.775221824645996, "learning_rate": 2.7195918367346938e-05, "loss": 3.0652, "step": 11182 }, { "epoch": 45.64489795918367, "grad_norm": NaN, "learning_rate": 2.719387755102041e-05, "loss": 0.6639, "step": 11183 }, { "epoch": 45.648979591836735, "grad_norm": 27.335346221923828, "learning_rate": 2.719387755102041e-05, "loss": 2.1484, "step": 11184 }, { "epoch": 45.6530612244898, "grad_norm": 26.964794158935547, "learning_rate": 2.7191836734693876e-05, "loss": 2.2245, "step": 11185 }, { "epoch": 45.65714285714286, "grad_norm": 29.010169982910156, "learning_rate": 2.718979591836735e-05, "loss": 2.0011, "step": 11186 }, { "epoch": 45.66122448979592, "grad_norm": 51.74996566772461, "learning_rate": 2.7187755102040817e-05, "loss": 1.0757, "step": 11187 }, { "epoch": 45.66530612244898, "grad_norm": 18.187692642211914, "learning_rate": 2.7185714285714287e-05, "loss": 2.4581, "step": 11188 }, { "epoch": 45.66938775510204, "grad_norm": 17.348243713378906, "learning_rate": 2.7183673469387754e-05, "loss": 2.8031, "step": 11189 }, { "epoch": 45.673469387755105, "grad_norm": 23.975980758666992, "learning_rate": 2.718163265306122e-05, "loss": 2.3538, "step": 11190 }, { "epoch": 45.67755102040816, "grad_norm": 27.383392333984375, "learning_rate": 2.7179591836734695e-05, "loss": 1.2136, "step": 11191 }, { "epoch": 45.68163265306122, "grad_norm": 57.49296569824219, "learning_rate": 2.7177551020408166e-05, "loss": 1.2135, "step": 11192 }, { "epoch": 45.68571428571428, "grad_norm": 36.371028900146484, "learning_rate": 2.7175510204081633e-05, "loss": 1.1692, "step": 11193 }, { "epoch": 45.689795918367345, "grad_norm": 18.550294876098633, "learning_rate": 2.71734693877551e-05, "loss": 2.1666, "step": 11194 }, { "epoch": 45.69387755102041, "grad_norm": 22.638050079345703, "learning_rate": 2.7171428571428574e-05, "loss": 2.3859, "step": 11195 }, { "epoch": 45.69795918367347, "grad_norm": 24.30902099609375, "learning_rate": 2.7169387755102045e-05, "loss": 1.9528, "step": 11196 }, { "epoch": 45.70204081632653, "grad_norm": 19.693511962890625, "learning_rate": 2.7167346938775512e-05, "loss": 2.3523, "step": 11197 }, { "epoch": 45.70612244897959, "grad_norm": 24.259023666381836, "learning_rate": 2.716530612244898e-05, "loss": 1.9616, "step": 11198 }, { "epoch": 45.710204081632654, "grad_norm": 23.520116806030273, "learning_rate": 2.716326530612245e-05, "loss": 1.9623, "step": 11199 }, { "epoch": 45.714285714285715, "grad_norm": 21.283714294433594, "learning_rate": 2.7161224489795923e-05, "loss": 2.5774, "step": 11200 }, { "epoch": 45.71836734693878, "grad_norm": 30.111648559570312, "learning_rate": 2.715918367346939e-05, "loss": 1.5805, "step": 11201 }, { "epoch": 45.72244897959184, "grad_norm": 17.31949234008789, "learning_rate": 2.7157142857142858e-05, "loss": 2.3629, "step": 11202 }, { "epoch": 45.7265306122449, "grad_norm": 23.29625701904297, "learning_rate": 2.7155102040816328e-05, "loss": 1.8559, "step": 11203 }, { "epoch": 45.73061224489796, "grad_norm": 35.37070846557617, "learning_rate": 2.7153061224489795e-05, "loss": 0.6645, "step": 11204 }, { "epoch": 45.734693877551024, "grad_norm": 25.181238174438477, "learning_rate": 2.715102040816327e-05, "loss": 2.2751, "step": 11205 }, { "epoch": 45.73877551020408, "grad_norm": 31.765207290649414, "learning_rate": 2.7148979591836736e-05, "loss": 0.5247, "step": 11206 }, { "epoch": 45.74285714285714, "grad_norm": 22.6510009765625, "learning_rate": 2.7146938775510207e-05, "loss": 2.6813, "step": 11207 }, { "epoch": 45.7469387755102, "grad_norm": 27.70985984802246, "learning_rate": 2.7144897959183674e-05, "loss": 1.8925, "step": 11208 }, { "epoch": 45.751020408163264, "grad_norm": 27.9593563079834, "learning_rate": 2.714285714285714e-05, "loss": 1.5145, "step": 11209 }, { "epoch": 45.755102040816325, "grad_norm": 26.184297561645508, "learning_rate": 2.7140816326530615e-05, "loss": 1.8662, "step": 11210 }, { "epoch": 45.75918367346939, "grad_norm": 23.503786087036133, "learning_rate": 2.7138775510204085e-05, "loss": 2.5416, "step": 11211 }, { "epoch": 45.76326530612245, "grad_norm": 26.837478637695312, "learning_rate": 2.7136734693877553e-05, "loss": 1.75, "step": 11212 }, { "epoch": 45.76734693877551, "grad_norm": 35.33190155029297, "learning_rate": 2.713469387755102e-05, "loss": 1.0368, "step": 11213 }, { "epoch": 45.77142857142857, "grad_norm": 24.20571517944336, "learning_rate": 2.713265306122449e-05, "loss": 2.1637, "step": 11214 }, { "epoch": 45.775510204081634, "grad_norm": 19.641014099121094, "learning_rate": 2.7130612244897964e-05, "loss": 2.5137, "step": 11215 }, { "epoch": 45.779591836734696, "grad_norm": 33.44704818725586, "learning_rate": 2.712857142857143e-05, "loss": 1.0853, "step": 11216 }, { "epoch": 45.78367346938776, "grad_norm": 29.0522403717041, "learning_rate": 2.7126530612244898e-05, "loss": 1.7205, "step": 11217 }, { "epoch": 45.78775510204082, "grad_norm": 24.272851943969727, "learning_rate": 2.712448979591837e-05, "loss": 2.3186, "step": 11218 }, { "epoch": 45.79183673469388, "grad_norm": 18.39423179626465, "learning_rate": 2.7122448979591836e-05, "loss": 2.6135, "step": 11219 }, { "epoch": 45.795918367346935, "grad_norm": 47.83763122558594, "learning_rate": 2.712040816326531e-05, "loss": 1.6291, "step": 11220 }, { "epoch": 45.8, "grad_norm": 19.734376907348633, "learning_rate": 2.7118367346938777e-05, "loss": 1.9056, "step": 11221 }, { "epoch": 45.80408163265306, "grad_norm": 25.58975601196289, "learning_rate": 2.7116326530612247e-05, "loss": 1.3313, "step": 11222 }, { "epoch": 45.80816326530612, "grad_norm": 28.61115264892578, "learning_rate": 2.7114285714285715e-05, "loss": 2.1553, "step": 11223 }, { "epoch": 45.81224489795918, "grad_norm": 26.921733856201172, "learning_rate": 2.7112244897959182e-05, "loss": 2.0582, "step": 11224 }, { "epoch": 45.816326530612244, "grad_norm": 16.73078155517578, "learning_rate": 2.7110204081632656e-05, "loss": 2.8315, "step": 11225 }, { "epoch": 45.820408163265306, "grad_norm": 33.652957916259766, "learning_rate": 2.7108163265306126e-05, "loss": 1.1321, "step": 11226 }, { "epoch": 45.82448979591837, "grad_norm": 37.471923828125, "learning_rate": 2.7106122448979593e-05, "loss": 0.5052, "step": 11227 }, { "epoch": 45.82857142857143, "grad_norm": 24.597911834716797, "learning_rate": 2.710408163265306e-05, "loss": 1.9879, "step": 11228 }, { "epoch": 45.83265306122449, "grad_norm": 18.534027099609375, "learning_rate": 2.710204081632653e-05, "loss": 2.8482, "step": 11229 }, { "epoch": 45.83673469387755, "grad_norm": 17.81624412536621, "learning_rate": 2.7100000000000005e-05, "loss": 2.5268, "step": 11230 }, { "epoch": 45.840816326530614, "grad_norm": 13.148187637329102, "learning_rate": 2.7097959183673472e-05, "loss": 2.9299, "step": 11231 }, { "epoch": 45.844897959183676, "grad_norm": 25.887451171875, "learning_rate": 2.709591836734694e-05, "loss": 2.0088, "step": 11232 }, { "epoch": 45.84897959183674, "grad_norm": 47.4496955871582, "learning_rate": 2.709387755102041e-05, "loss": 0.5224, "step": 11233 }, { "epoch": 45.85306122448979, "grad_norm": 19.141807556152344, "learning_rate": 2.7091836734693877e-05, "loss": 2.7469, "step": 11234 }, { "epoch": 45.857142857142854, "grad_norm": 18.678333282470703, "learning_rate": 2.708979591836735e-05, "loss": 2.4452, "step": 11235 }, { "epoch": 45.861224489795916, "grad_norm": 25.855066299438477, "learning_rate": 2.7087755102040818e-05, "loss": 0.2874, "step": 11236 }, { "epoch": 45.86530612244898, "grad_norm": 31.860885620117188, "learning_rate": 2.7085714285714285e-05, "loss": 1.2502, "step": 11237 }, { "epoch": 45.86938775510204, "grad_norm": 38.231388092041016, "learning_rate": 2.7083673469387755e-05, "loss": 1.4376, "step": 11238 }, { "epoch": 45.8734693877551, "grad_norm": 34.888099670410156, "learning_rate": 2.708163265306123e-05, "loss": 1.7227, "step": 11239 }, { "epoch": 45.87755102040816, "grad_norm": 34.96502685546875, "learning_rate": 2.7079591836734696e-05, "loss": 0.7939, "step": 11240 }, { "epoch": 45.881632653061224, "grad_norm": 23.461076736450195, "learning_rate": 2.7077551020408164e-05, "loss": 2.3943, "step": 11241 }, { "epoch": 45.885714285714286, "grad_norm": 35.18479537963867, "learning_rate": 2.7075510204081634e-05, "loss": 1.9743, "step": 11242 }, { "epoch": 45.88979591836735, "grad_norm": 18.264572143554688, "learning_rate": 2.70734693877551e-05, "loss": 2.6439, "step": 11243 }, { "epoch": 45.89387755102041, "grad_norm": 18.438148498535156, "learning_rate": 2.7071428571428575e-05, "loss": 2.2129, "step": 11244 }, { "epoch": 45.89795918367347, "grad_norm": 28.776044845581055, "learning_rate": 2.7069387755102042e-05, "loss": 1.6489, "step": 11245 }, { "epoch": 45.90204081632653, "grad_norm": 31.349687576293945, "learning_rate": 2.7067346938775513e-05, "loss": 1.5303, "step": 11246 }, { "epoch": 45.906122448979595, "grad_norm": 31.049699783325195, "learning_rate": 2.706530612244898e-05, "loss": 2.4014, "step": 11247 }, { "epoch": 45.910204081632656, "grad_norm": 37.70650100708008, "learning_rate": 2.7063265306122447e-05, "loss": 1.2705, "step": 11248 }, { "epoch": 45.91428571428571, "grad_norm": 15.461403846740723, "learning_rate": 2.706122448979592e-05, "loss": 2.8679, "step": 11249 }, { "epoch": 45.91836734693877, "grad_norm": 32.78752899169922, "learning_rate": 2.705918367346939e-05, "loss": 2.1223, "step": 11250 }, { "epoch": 45.922448979591834, "grad_norm": 29.90123176574707, "learning_rate": 2.705714285714286e-05, "loss": 1.5665, "step": 11251 }, { "epoch": 45.926530612244896, "grad_norm": 39.82522201538086, "learning_rate": 2.7055102040816326e-05, "loss": 1.921, "step": 11252 }, { "epoch": 45.93061224489796, "grad_norm": 18.3262939453125, "learning_rate": 2.7053061224489796e-05, "loss": 2.5979, "step": 11253 }, { "epoch": 45.93469387755102, "grad_norm": 43.48563003540039, "learning_rate": 2.705102040816327e-05, "loss": 1.1073, "step": 11254 }, { "epoch": 45.93877551020408, "grad_norm": 37.09575271606445, "learning_rate": 2.7048979591836737e-05, "loss": 0.7062, "step": 11255 }, { "epoch": 45.94285714285714, "grad_norm": 36.60930633544922, "learning_rate": 2.7046938775510204e-05, "loss": 0.7062, "step": 11256 }, { "epoch": 45.946938775510205, "grad_norm": 14.978631019592285, "learning_rate": 2.7044897959183675e-05, "loss": 2.4908, "step": 11257 }, { "epoch": 45.95102040816327, "grad_norm": 40.79867935180664, "learning_rate": 2.7042857142857142e-05, "loss": 0.8616, "step": 11258 }, { "epoch": 45.95510204081633, "grad_norm": 27.264902114868164, "learning_rate": 2.7040816326530616e-05, "loss": 1.7717, "step": 11259 }, { "epoch": 45.95918367346939, "grad_norm": 38.88286209106445, "learning_rate": 2.7038775510204083e-05, "loss": 1.1317, "step": 11260 }, { "epoch": 45.96326530612245, "grad_norm": 41.28080368041992, "learning_rate": 2.7036734693877553e-05, "loss": 1.4963, "step": 11261 }, { "epoch": 45.96734693877551, "grad_norm": 30.709379196166992, "learning_rate": 2.703469387755102e-05, "loss": 2.026, "step": 11262 }, { "epoch": 45.97142857142857, "grad_norm": 17.10355567932129, "learning_rate": 2.7032653061224488e-05, "loss": 2.4601, "step": 11263 }, { "epoch": 45.97551020408163, "grad_norm": 27.89201545715332, "learning_rate": 2.703061224489796e-05, "loss": 1.6244, "step": 11264 }, { "epoch": 45.97959183673469, "grad_norm": 27.30716896057129, "learning_rate": 2.7028571428571432e-05, "loss": 2.0501, "step": 11265 }, { "epoch": 45.98367346938775, "grad_norm": 21.02611541748047, "learning_rate": 2.70265306122449e-05, "loss": 2.3147, "step": 11266 }, { "epoch": 45.987755102040815, "grad_norm": 18.835693359375, "learning_rate": 2.7024489795918366e-05, "loss": 2.3239, "step": 11267 }, { "epoch": 45.99183673469388, "grad_norm": 18.787797927856445, "learning_rate": 2.7022448979591837e-05, "loss": 2.5409, "step": 11268 }, { "epoch": 45.99591836734694, "grad_norm": 25.656038284301758, "learning_rate": 2.702040816326531e-05, "loss": 2.1024, "step": 11269 }, { "epoch": 46.0, "grad_norm": 28.966766357421875, "learning_rate": 2.7018367346938778e-05, "loss": 2.2502, "step": 11270 }, { "epoch": 46.00408163265306, "grad_norm": 34.840267181396484, "learning_rate": 2.7016326530612245e-05, "loss": 1.4075, "step": 11271 }, { "epoch": 46.00816326530612, "grad_norm": 18.079021453857422, "learning_rate": 2.7014285714285716e-05, "loss": 2.5187, "step": 11272 }, { "epoch": 46.012244897959185, "grad_norm": 17.49239730834961, "learning_rate": 2.7012244897959183e-05, "loss": 2.7279, "step": 11273 }, { "epoch": 46.01632653061225, "grad_norm": 19.934589385986328, "learning_rate": 2.7010204081632657e-05, "loss": 2.2405, "step": 11274 }, { "epoch": 46.02040816326531, "grad_norm": 35.566009521484375, "learning_rate": 2.7008163265306124e-05, "loss": 0.691, "step": 11275 }, { "epoch": 46.02448979591837, "grad_norm": 36.48664855957031, "learning_rate": 2.7006122448979594e-05, "loss": 1.7204, "step": 11276 }, { "epoch": 46.02857142857143, "grad_norm": 24.795591354370117, "learning_rate": 2.700408163265306e-05, "loss": 1.9967, "step": 11277 }, { "epoch": 46.03265306122449, "grad_norm": 23.917116165161133, "learning_rate": 2.700204081632653e-05, "loss": 1.7709, "step": 11278 }, { "epoch": 46.03673469387755, "grad_norm": 51.24211502075195, "learning_rate": 2.7000000000000002e-05, "loss": 1.0285, "step": 11279 }, { "epoch": 46.04081632653061, "grad_norm": 20.926502227783203, "learning_rate": 2.6997959183673473e-05, "loss": 2.0367, "step": 11280 }, { "epoch": 46.04489795918367, "grad_norm": 19.2484073638916, "learning_rate": 2.699591836734694e-05, "loss": 2.3724, "step": 11281 }, { "epoch": 46.04897959183673, "grad_norm": 49.107086181640625, "learning_rate": 2.6993877551020407e-05, "loss": 0.9968, "step": 11282 }, { "epoch": 46.053061224489795, "grad_norm": 16.082914352416992, "learning_rate": 2.6991836734693878e-05, "loss": 2.3941, "step": 11283 }, { "epoch": 46.05714285714286, "grad_norm": 54.25445556640625, "learning_rate": 2.698979591836735e-05, "loss": 0.6011, "step": 11284 }, { "epoch": 46.06122448979592, "grad_norm": 25.398073196411133, "learning_rate": 2.698775510204082e-05, "loss": 2.1502, "step": 11285 }, { "epoch": 46.06530612244898, "grad_norm": 29.800018310546875, "learning_rate": 2.6985714285714286e-05, "loss": 2.0581, "step": 11286 }, { "epoch": 46.06938775510204, "grad_norm": 43.17192459106445, "learning_rate": 2.6983673469387756e-05, "loss": 0.4549, "step": 11287 }, { "epoch": 46.073469387755104, "grad_norm": 41.21458053588867, "learning_rate": 2.6981632653061227e-05, "loss": 1.2322, "step": 11288 }, { "epoch": 46.077551020408166, "grad_norm": 18.862594604492188, "learning_rate": 2.6979591836734697e-05, "loss": 2.3177, "step": 11289 }, { "epoch": 46.08163265306123, "grad_norm": 30.25166893005371, "learning_rate": 2.6977551020408164e-05, "loss": 0.9941, "step": 11290 }, { "epoch": 46.08571428571429, "grad_norm": 17.145042419433594, "learning_rate": 2.697551020408163e-05, "loss": 2.3918, "step": 11291 }, { "epoch": 46.089795918367344, "grad_norm": 15.915960311889648, "learning_rate": 2.6973469387755102e-05, "loss": 2.4061, "step": 11292 }, { "epoch": 46.093877551020405, "grad_norm": 29.43923568725586, "learning_rate": 2.6971428571428576e-05, "loss": 1.4759, "step": 11293 }, { "epoch": 46.09795918367347, "grad_norm": 25.822063446044922, "learning_rate": 2.6969387755102043e-05, "loss": 2.6527, "step": 11294 }, { "epoch": 46.10204081632653, "grad_norm": 48.355934143066406, "learning_rate": 2.696734693877551e-05, "loss": 1.0186, "step": 11295 }, { "epoch": 46.10612244897959, "grad_norm": 30.484407424926758, "learning_rate": 2.696530612244898e-05, "loss": 2.2993, "step": 11296 }, { "epoch": 46.11020408163265, "grad_norm": 26.91539192199707, "learning_rate": 2.6963265306122448e-05, "loss": 1.4492, "step": 11297 }, { "epoch": 46.114285714285714, "grad_norm": 54.62507247924805, "learning_rate": 2.6961224489795922e-05, "loss": 0.693, "step": 11298 }, { "epoch": 46.118367346938776, "grad_norm": 18.848548889160156, "learning_rate": 2.695918367346939e-05, "loss": 2.5587, "step": 11299 }, { "epoch": 46.12244897959184, "grad_norm": 24.832988739013672, "learning_rate": 2.695714285714286e-05, "loss": 2.4107, "step": 11300 }, { "epoch": 46.1265306122449, "grad_norm": 10.81674575805664, "learning_rate": 2.6955102040816327e-05, "loss": 2.9526, "step": 11301 }, { "epoch": 46.13061224489796, "grad_norm": 26.657594680786133, "learning_rate": 2.6953061224489794e-05, "loss": 2.453, "step": 11302 }, { "epoch": 46.13469387755102, "grad_norm": 18.60356903076172, "learning_rate": 2.6951020408163268e-05, "loss": 2.4111, "step": 11303 }, { "epoch": 46.138775510204084, "grad_norm": 37.001529693603516, "learning_rate": 2.6948979591836738e-05, "loss": 2.0117, "step": 11304 }, { "epoch": 46.142857142857146, "grad_norm": 18.094289779663086, "learning_rate": 2.6946938775510205e-05, "loss": 2.526, "step": 11305 }, { "epoch": 46.14693877551021, "grad_norm": 27.380582809448242, "learning_rate": 2.6944897959183672e-05, "loss": 1.9029, "step": 11306 }, { "epoch": 46.15102040816326, "grad_norm": 17.268095016479492, "learning_rate": 2.6942857142857143e-05, "loss": 2.4122, "step": 11307 }, { "epoch": 46.155102040816324, "grad_norm": 34.657737731933594, "learning_rate": 2.6940816326530617e-05, "loss": 2.0615, "step": 11308 }, { "epoch": 46.159183673469386, "grad_norm": 39.184242248535156, "learning_rate": 2.6938775510204084e-05, "loss": 1.6279, "step": 11309 }, { "epoch": 46.16326530612245, "grad_norm": 42.87044143676758, "learning_rate": 2.693673469387755e-05, "loss": 1.2853, "step": 11310 }, { "epoch": 46.16734693877551, "grad_norm": 21.709091186523438, "learning_rate": 2.693469387755102e-05, "loss": 2.2982, "step": 11311 }, { "epoch": 46.17142857142857, "grad_norm": 44.108642578125, "learning_rate": 2.693265306122449e-05, "loss": 1.1561, "step": 11312 }, { "epoch": 46.17551020408163, "grad_norm": 14.64854907989502, "learning_rate": 2.6930612244897963e-05, "loss": 2.5634, "step": 11313 }, { "epoch": 46.179591836734694, "grad_norm": 41.84084701538086, "learning_rate": 2.692857142857143e-05, "loss": 1.3193, "step": 11314 }, { "epoch": 46.183673469387756, "grad_norm": 19.96953773498535, "learning_rate": 2.69265306122449e-05, "loss": 2.4852, "step": 11315 }, { "epoch": 46.18775510204082, "grad_norm": 17.732624053955078, "learning_rate": 2.6924489795918367e-05, "loss": 2.1695, "step": 11316 }, { "epoch": 46.19183673469388, "grad_norm": 26.65540313720703, "learning_rate": 2.6922448979591834e-05, "loss": 1.8634, "step": 11317 }, { "epoch": 46.19591836734694, "grad_norm": 31.605897903442383, "learning_rate": 2.692040816326531e-05, "loss": 0.4806, "step": 11318 }, { "epoch": 46.2, "grad_norm": 26.988479614257812, "learning_rate": 2.691836734693878e-05, "loss": 1.8187, "step": 11319 }, { "epoch": 46.204081632653065, "grad_norm": 20.416154861450195, "learning_rate": 2.6916326530612246e-05, "loss": 1.7498, "step": 11320 }, { "epoch": 46.20816326530612, "grad_norm": 32.50031280517578, "learning_rate": 2.6914285714285713e-05, "loss": 1.4731, "step": 11321 }, { "epoch": 46.21224489795918, "grad_norm": 18.986644744873047, "learning_rate": 2.6912244897959184e-05, "loss": 2.5322, "step": 11322 }, { "epoch": 46.21632653061224, "grad_norm": 35.286964416503906, "learning_rate": 2.6910204081632658e-05, "loss": 0.6922, "step": 11323 }, { "epoch": 46.220408163265304, "grad_norm": 13.762917518615723, "learning_rate": 2.6908163265306125e-05, "loss": 2.7922, "step": 11324 }, { "epoch": 46.224489795918366, "grad_norm": 15.91734790802002, "learning_rate": 2.6906122448979592e-05, "loss": 2.855, "step": 11325 }, { "epoch": 46.22857142857143, "grad_norm": 36.90888977050781, "learning_rate": 2.6904081632653062e-05, "loss": 1.0736, "step": 11326 }, { "epoch": 46.23265306122449, "grad_norm": 43.17621612548828, "learning_rate": 2.690204081632653e-05, "loss": 1.9374, "step": 11327 }, { "epoch": 46.23673469387755, "grad_norm": 28.678537368774414, "learning_rate": 2.6900000000000003e-05, "loss": 1.445, "step": 11328 }, { "epoch": 46.24081632653061, "grad_norm": 18.10653305053711, "learning_rate": 2.689795918367347e-05, "loss": 2.4526, "step": 11329 }, { "epoch": 46.244897959183675, "grad_norm": 15.482381820678711, "learning_rate": 2.689591836734694e-05, "loss": 2.7376, "step": 11330 }, { "epoch": 46.248979591836736, "grad_norm": 11.788914680480957, "learning_rate": 2.6893877551020408e-05, "loss": 3.1661, "step": 11331 }, { "epoch": 46.2530612244898, "grad_norm": 37.35972213745117, "learning_rate": 2.6891836734693882e-05, "loss": 1.3837, "step": 11332 }, { "epoch": 46.25714285714286, "grad_norm": 19.038782119750977, "learning_rate": 2.688979591836735e-05, "loss": 2.3716, "step": 11333 }, { "epoch": 46.26122448979592, "grad_norm": 24.71091651916504, "learning_rate": 2.688775510204082e-05, "loss": 1.8498, "step": 11334 }, { "epoch": 46.265306122448976, "grad_norm": 20.89400863647461, "learning_rate": 2.6885714285714287e-05, "loss": 1.83, "step": 11335 }, { "epoch": 46.26938775510204, "grad_norm": 14.529926300048828, "learning_rate": 2.6883673469387754e-05, "loss": 2.7967, "step": 11336 }, { "epoch": 46.2734693877551, "grad_norm": 25.248517990112305, "learning_rate": 2.6881632653061228e-05, "loss": 1.6571, "step": 11337 }, { "epoch": 46.27755102040816, "grad_norm": 14.922396659851074, "learning_rate": 2.6879591836734695e-05, "loss": 3.0165, "step": 11338 }, { "epoch": 46.28163265306122, "grad_norm": 24.173242568969727, "learning_rate": 2.6877551020408165e-05, "loss": 2.3676, "step": 11339 }, { "epoch": 46.285714285714285, "grad_norm": 45.03221130371094, "learning_rate": 2.6875510204081633e-05, "loss": 0.5543, "step": 11340 }, { "epoch": 46.289795918367346, "grad_norm": 27.137451171875, "learning_rate": 2.68734693877551e-05, "loss": 2.3146, "step": 11341 }, { "epoch": 46.29387755102041, "grad_norm": 19.673566818237305, "learning_rate": 2.6871428571428574e-05, "loss": 2.4496, "step": 11342 }, { "epoch": 46.29795918367347, "grad_norm": 40.782352447509766, "learning_rate": 2.6869387755102044e-05, "loss": 0.4779, "step": 11343 }, { "epoch": 46.30204081632653, "grad_norm": 36.603614807128906, "learning_rate": 2.686734693877551e-05, "loss": 1.1734, "step": 11344 }, { "epoch": 46.30612244897959, "grad_norm": 37.70683670043945, "learning_rate": 2.686530612244898e-05, "loss": 0.5029, "step": 11345 }, { "epoch": 46.310204081632655, "grad_norm": 36.94087600708008, "learning_rate": 2.686326530612245e-05, "loss": 2.1402, "step": 11346 }, { "epoch": 46.31428571428572, "grad_norm": 33.063594818115234, "learning_rate": 2.6861224489795923e-05, "loss": 1.2647, "step": 11347 }, { "epoch": 46.31836734693878, "grad_norm": 27.187999725341797, "learning_rate": 2.685918367346939e-05, "loss": 1.6217, "step": 11348 }, { "epoch": 46.32244897959184, "grad_norm": 47.98436737060547, "learning_rate": 2.6857142857142857e-05, "loss": 1.0187, "step": 11349 }, { "epoch": 46.326530612244895, "grad_norm": 33.930511474609375, "learning_rate": 2.6855102040816328e-05, "loss": 1.2509, "step": 11350 }, { "epoch": 46.33061224489796, "grad_norm": 32.986968994140625, "learning_rate": 2.6853061224489795e-05, "loss": 1.7245, "step": 11351 }, { "epoch": 46.33469387755102, "grad_norm": 49.798179626464844, "learning_rate": 2.685102040816327e-05, "loss": 1.4886, "step": 11352 }, { "epoch": 46.33877551020408, "grad_norm": 19.675472259521484, "learning_rate": 2.6848979591836736e-05, "loss": 2.3556, "step": 11353 }, { "epoch": 46.34285714285714, "grad_norm": 28.2037353515625, "learning_rate": 2.6846938775510206e-05, "loss": 2.2149, "step": 11354 }, { "epoch": 46.3469387755102, "grad_norm": 24.52973175048828, "learning_rate": 2.6844897959183673e-05, "loss": 1.7622, "step": 11355 }, { "epoch": 46.351020408163265, "grad_norm": 24.734758377075195, "learning_rate": 2.684285714285714e-05, "loss": 2.2407, "step": 11356 }, { "epoch": 46.35510204081633, "grad_norm": 46.93643569946289, "learning_rate": 2.6840816326530614e-05, "loss": 1.0271, "step": 11357 }, { "epoch": 46.35918367346939, "grad_norm": 25.015045166015625, "learning_rate": 2.6838775510204085e-05, "loss": 2.3152, "step": 11358 }, { "epoch": 46.36326530612245, "grad_norm": 23.840106964111328, "learning_rate": 2.6836734693877552e-05, "loss": 2.0848, "step": 11359 }, { "epoch": 46.36734693877551, "grad_norm": 29.565752029418945, "learning_rate": 2.683469387755102e-05, "loss": 1.0796, "step": 11360 }, { "epoch": 46.371428571428574, "grad_norm": 29.06832504272461, "learning_rate": 2.683265306122449e-05, "loss": 2.1182, "step": 11361 }, { "epoch": 46.375510204081635, "grad_norm": 27.435016632080078, "learning_rate": 2.6830612244897964e-05, "loss": 1.502, "step": 11362 }, { "epoch": 46.3795918367347, "grad_norm": 14.935953140258789, "learning_rate": 2.682857142857143e-05, "loss": 2.8284, "step": 11363 }, { "epoch": 46.38367346938775, "grad_norm": 39.32400894165039, "learning_rate": 2.6826530612244898e-05, "loss": 0.4062, "step": 11364 }, { "epoch": 46.38775510204081, "grad_norm": 27.90336799621582, "learning_rate": 2.6824489795918368e-05, "loss": 1.5712, "step": 11365 }, { "epoch": 46.391836734693875, "grad_norm": 9.112431526184082, "learning_rate": 2.6822448979591835e-05, "loss": 3.4071, "step": 11366 }, { "epoch": 46.39591836734694, "grad_norm": 24.13798713684082, "learning_rate": 2.682040816326531e-05, "loss": 1.8448, "step": 11367 }, { "epoch": 46.4, "grad_norm": 25.35022735595703, "learning_rate": 2.6818367346938776e-05, "loss": 2.3352, "step": 11368 }, { "epoch": 46.40408163265306, "grad_norm": 58.865692138671875, "learning_rate": 2.6816326530612247e-05, "loss": 1.3012, "step": 11369 }, { "epoch": 46.40816326530612, "grad_norm": 17.74881362915039, "learning_rate": 2.6814285714285714e-05, "loss": 2.7775, "step": 11370 }, { "epoch": 46.412244897959184, "grad_norm": 25.50129508972168, "learning_rate": 2.681224489795918e-05, "loss": 2.1371, "step": 11371 }, { "epoch": 46.416326530612245, "grad_norm": 29.85834312438965, "learning_rate": 2.6810204081632655e-05, "loss": 2.1533, "step": 11372 }, { "epoch": 46.42040816326531, "grad_norm": 29.297555923461914, "learning_rate": 2.6808163265306126e-05, "loss": 2.0189, "step": 11373 }, { "epoch": 46.42448979591837, "grad_norm": 30.518709182739258, "learning_rate": 2.6806122448979593e-05, "loss": 1.5839, "step": 11374 }, { "epoch": 46.42857142857143, "grad_norm": 18.49831771850586, "learning_rate": 2.680408163265306e-05, "loss": 2.3386, "step": 11375 }, { "epoch": 46.43265306122449, "grad_norm": 29.60710906982422, "learning_rate": 2.6802040816326534e-05, "loss": 1.8306, "step": 11376 }, { "epoch": 46.436734693877554, "grad_norm": 27.206859588623047, "learning_rate": 2.6800000000000004e-05, "loss": 1.8545, "step": 11377 }, { "epoch": 46.440816326530616, "grad_norm": 17.706323623657227, "learning_rate": 2.679795918367347e-05, "loss": 2.1336, "step": 11378 }, { "epoch": 46.44489795918367, "grad_norm": 36.750728607177734, "learning_rate": 2.679591836734694e-05, "loss": 1.1072, "step": 11379 }, { "epoch": 46.44897959183673, "grad_norm": 18.458576202392578, "learning_rate": 2.679387755102041e-05, "loss": 2.2584, "step": 11380 }, { "epoch": 46.453061224489794, "grad_norm": 53.00048065185547, "learning_rate": 2.6791836734693883e-05, "loss": 0.6657, "step": 11381 }, { "epoch": 46.457142857142856, "grad_norm": 20.423442840576172, "learning_rate": 2.678979591836735e-05, "loss": 2.3531, "step": 11382 }, { "epoch": 46.46122448979592, "grad_norm": 32.51708221435547, "learning_rate": 2.6787755102040817e-05, "loss": 0.7401, "step": 11383 }, { "epoch": 46.46530612244898, "grad_norm": 29.690067291259766, "learning_rate": 2.6785714285714288e-05, "loss": 2.0771, "step": 11384 }, { "epoch": 46.46938775510204, "grad_norm": 24.69036865234375, "learning_rate": 2.6783673469387755e-05, "loss": 2.0775, "step": 11385 }, { "epoch": 46.4734693877551, "grad_norm": 24.06089210510254, "learning_rate": 2.678163265306123e-05, "loss": 2.3333, "step": 11386 }, { "epoch": 46.477551020408164, "grad_norm": 42.01222229003906, "learning_rate": 2.6779591836734696e-05, "loss": 1.1973, "step": 11387 }, { "epoch": 46.481632653061226, "grad_norm": 33.44012451171875, "learning_rate": 2.6777551020408166e-05, "loss": 2.0145, "step": 11388 }, { "epoch": 46.48571428571429, "grad_norm": 37.02669906616211, "learning_rate": 2.6775510204081634e-05, "loss": 1.0477, "step": 11389 }, { "epoch": 46.48979591836735, "grad_norm": 32.124916076660156, "learning_rate": 2.67734693877551e-05, "loss": 1.9792, "step": 11390 }, { "epoch": 46.49387755102041, "grad_norm": 25.991439819335938, "learning_rate": 2.6771428571428575e-05, "loss": 1.9229, "step": 11391 }, { "epoch": 46.49795918367347, "grad_norm": 36.191219329833984, "learning_rate": 2.676938775510204e-05, "loss": 1.5125, "step": 11392 }, { "epoch": 46.50204081632653, "grad_norm": 27.405067443847656, "learning_rate": 2.6767346938775512e-05, "loss": 1.6337, "step": 11393 }, { "epoch": 46.50612244897959, "grad_norm": 32.90495681762695, "learning_rate": 2.676530612244898e-05, "loss": 1.1899, "step": 11394 }, { "epoch": 46.51020408163265, "grad_norm": 27.710012435913086, "learning_rate": 2.6763265306122446e-05, "loss": 1.7724, "step": 11395 }, { "epoch": 46.51428571428571, "grad_norm": 30.37972640991211, "learning_rate": 2.676122448979592e-05, "loss": 1.2264, "step": 11396 }, { "epoch": 46.518367346938774, "grad_norm": 31.718921661376953, "learning_rate": 2.675918367346939e-05, "loss": 1.3643, "step": 11397 }, { "epoch": 46.522448979591836, "grad_norm": 32.01045227050781, "learning_rate": 2.6757142857142858e-05, "loss": 2.008, "step": 11398 }, { "epoch": 46.5265306122449, "grad_norm": 26.773427963256836, "learning_rate": 2.6755102040816325e-05, "loss": 1.8191, "step": 11399 }, { "epoch": 46.53061224489796, "grad_norm": 27.717912673950195, "learning_rate": 2.6753061224489796e-05, "loss": 1.7051, "step": 11400 }, { "epoch": 46.53469387755102, "grad_norm": 33.1543083190918, "learning_rate": 2.675102040816327e-05, "loss": 1.1865, "step": 11401 }, { "epoch": 46.53877551020408, "grad_norm": 27.283733367919922, "learning_rate": 2.6748979591836737e-05, "loss": 2.1555, "step": 11402 }, { "epoch": 46.542857142857144, "grad_norm": 23.05213737487793, "learning_rate": 2.6746938775510204e-05, "loss": 2.211, "step": 11403 }, { "epoch": 46.546938775510206, "grad_norm": 34.47644805908203, "learning_rate": 2.6744897959183674e-05, "loss": 0.6394, "step": 11404 }, { "epoch": 46.55102040816327, "grad_norm": 26.63722801208496, "learning_rate": 2.674285714285714e-05, "loss": 1.7765, "step": 11405 }, { "epoch": 46.55510204081633, "grad_norm": 32.51848602294922, "learning_rate": 2.6740816326530615e-05, "loss": 1.4818, "step": 11406 }, { "epoch": 46.55918367346939, "grad_norm": 30.750802993774414, "learning_rate": 2.6738775510204082e-05, "loss": 1.5825, "step": 11407 }, { "epoch": 46.563265306122446, "grad_norm": 20.600997924804688, "learning_rate": 2.6736734693877553e-05, "loss": 2.2145, "step": 11408 }, { "epoch": 46.56734693877551, "grad_norm": 38.61012649536133, "learning_rate": 2.673469387755102e-05, "loss": 1.333, "step": 11409 }, { "epoch": 46.57142857142857, "grad_norm": 13.252924919128418, "learning_rate": 2.6732653061224487e-05, "loss": 0.1094, "step": 11410 }, { "epoch": 46.57551020408163, "grad_norm": 19.22584342956543, "learning_rate": 2.673061224489796e-05, "loss": 2.3979, "step": 11411 }, { "epoch": 46.57959183673469, "grad_norm": 29.938426971435547, "learning_rate": 2.672857142857143e-05, "loss": 1.234, "step": 11412 }, { "epoch": 46.583673469387755, "grad_norm": 20.95356559753418, "learning_rate": 2.67265306122449e-05, "loss": 2.4245, "step": 11413 }, { "epoch": 46.587755102040816, "grad_norm": 21.089515686035156, "learning_rate": 2.6724489795918366e-05, "loss": 2.3983, "step": 11414 }, { "epoch": 46.59183673469388, "grad_norm": 43.902706146240234, "learning_rate": 2.6722448979591836e-05, "loss": 1.0754, "step": 11415 }, { "epoch": 46.59591836734694, "grad_norm": 31.95993423461914, "learning_rate": 2.672040816326531e-05, "loss": 2.103, "step": 11416 }, { "epoch": 46.6, "grad_norm": 32.98745346069336, "learning_rate": 2.6718367346938777e-05, "loss": 1.4332, "step": 11417 }, { "epoch": 46.60408163265306, "grad_norm": 38.43348693847656, "learning_rate": 2.6716326530612245e-05, "loss": 0.9496, "step": 11418 }, { "epoch": 46.608163265306125, "grad_norm": 32.35223388671875, "learning_rate": 2.6714285714285715e-05, "loss": 1.7221, "step": 11419 }, { "epoch": 46.61224489795919, "grad_norm": 15.500972747802734, "learning_rate": 2.671224489795919e-05, "loss": 2.9188, "step": 11420 }, { "epoch": 46.61632653061225, "grad_norm": 33.560211181640625, "learning_rate": 2.6710204081632656e-05, "loss": 1.5969, "step": 11421 }, { "epoch": 46.6204081632653, "grad_norm": 18.48566246032715, "learning_rate": 2.6708163265306123e-05, "loss": 2.5024, "step": 11422 }, { "epoch": 46.624489795918365, "grad_norm": 17.804475784301758, "learning_rate": 2.6706122448979594e-05, "loss": 2.1101, "step": 11423 }, { "epoch": 46.628571428571426, "grad_norm": 25.354711532592773, "learning_rate": 2.670408163265306e-05, "loss": 1.9896, "step": 11424 }, { "epoch": 46.63265306122449, "grad_norm": 20.042495727539062, "learning_rate": 2.6702040816326535e-05, "loss": 2.2833, "step": 11425 }, { "epoch": 46.63673469387755, "grad_norm": 17.136943817138672, "learning_rate": 2.6700000000000002e-05, "loss": 2.5541, "step": 11426 }, { "epoch": 46.64081632653061, "grad_norm": 23.671741485595703, "learning_rate": 2.6697959183673472e-05, "loss": 1.9408, "step": 11427 }, { "epoch": 46.64489795918367, "grad_norm": 27.819276809692383, "learning_rate": 2.669591836734694e-05, "loss": 1.6197, "step": 11428 }, { "epoch": 46.648979591836735, "grad_norm": 30.526870727539062, "learning_rate": 2.6693877551020407e-05, "loss": 1.2776, "step": 11429 }, { "epoch": 46.6530612244898, "grad_norm": 30.41259002685547, "learning_rate": 2.669183673469388e-05, "loss": 1.8859, "step": 11430 }, { "epoch": 46.65714285714286, "grad_norm": 26.637609481811523, "learning_rate": 2.668979591836735e-05, "loss": 2.0425, "step": 11431 }, { "epoch": 46.66122448979592, "grad_norm": 32.7696647644043, "learning_rate": 2.6687755102040818e-05, "loss": 0.9543, "step": 11432 }, { "epoch": 46.66530612244898, "grad_norm": 45.64445495605469, "learning_rate": 2.6685714285714285e-05, "loss": 1.2882, "step": 11433 }, { "epoch": 46.66938775510204, "grad_norm": 26.251237869262695, "learning_rate": 2.6683673469387756e-05, "loss": 1.6262, "step": 11434 }, { "epoch": 46.673469387755105, "grad_norm": 24.367992401123047, "learning_rate": 2.668163265306123e-05, "loss": 2.0845, "step": 11435 }, { "epoch": 46.67755102040816, "grad_norm": 30.5570011138916, "learning_rate": 2.6679591836734697e-05, "loss": 1.9295, "step": 11436 }, { "epoch": 46.68163265306122, "grad_norm": 23.816482543945312, "learning_rate": 2.6677551020408164e-05, "loss": 2.4283, "step": 11437 }, { "epoch": 46.68571428571428, "grad_norm": 29.59957504272461, "learning_rate": 2.6675510204081634e-05, "loss": 2.1945, "step": 11438 }, { "epoch": 46.689795918367345, "grad_norm": 34.9679069519043, "learning_rate": 2.66734693877551e-05, "loss": 1.6883, "step": 11439 }, { "epoch": 46.69387755102041, "grad_norm": 36.36173629760742, "learning_rate": 2.6671428571428576e-05, "loss": 1.0903, "step": 11440 }, { "epoch": 46.69795918367347, "grad_norm": 34.74089050292969, "learning_rate": 2.6669387755102043e-05, "loss": 1.3524, "step": 11441 }, { "epoch": 46.70204081632653, "grad_norm": 42.9686164855957, "learning_rate": 2.6667346938775513e-05, "loss": 1.4314, "step": 11442 }, { "epoch": 46.70612244897959, "grad_norm": 42.09386444091797, "learning_rate": 2.666530612244898e-05, "loss": 0.6535, "step": 11443 }, { "epoch": 46.710204081632654, "grad_norm": 38.54117965698242, "learning_rate": 2.6663265306122447e-05, "loss": 0.5731, "step": 11444 }, { "epoch": 46.714285714285715, "grad_norm": 18.338844299316406, "learning_rate": 2.666122448979592e-05, "loss": 2.2283, "step": 11445 }, { "epoch": 46.71836734693878, "grad_norm": 43.6554069519043, "learning_rate": 2.665918367346939e-05, "loss": 1.1082, "step": 11446 }, { "epoch": 46.72244897959184, "grad_norm": 14.798325538635254, "learning_rate": 2.665714285714286e-05, "loss": 2.912, "step": 11447 }, { "epoch": 46.7265306122449, "grad_norm": 20.109994888305664, "learning_rate": 2.6655102040816326e-05, "loss": 2.5751, "step": 11448 }, { "epoch": 46.73061224489796, "grad_norm": 17.472423553466797, "learning_rate": 2.6653061224489793e-05, "loss": 2.7712, "step": 11449 }, { "epoch": 46.734693877551024, "grad_norm": 24.91427993774414, "learning_rate": 2.6651020408163267e-05, "loss": 2.4037, "step": 11450 }, { "epoch": 46.73877551020408, "grad_norm": 21.866456985473633, "learning_rate": 2.6648979591836738e-05, "loss": 2.3827, "step": 11451 }, { "epoch": 46.74285714285714, "grad_norm": 21.08861541748047, "learning_rate": 2.6646938775510205e-05, "loss": 2.1745, "step": 11452 }, { "epoch": 46.7469387755102, "grad_norm": 22.777862548828125, "learning_rate": 2.6644897959183672e-05, "loss": 1.8428, "step": 11453 }, { "epoch": 46.751020408163264, "grad_norm": 22.446706771850586, "learning_rate": 2.6642857142857142e-05, "loss": 2.413, "step": 11454 }, { "epoch": 46.755102040816325, "grad_norm": 35.51626205444336, "learning_rate": 2.6640816326530616e-05, "loss": 1.8892, "step": 11455 }, { "epoch": 46.75918367346939, "grad_norm": 26.562759399414062, "learning_rate": 2.6638775510204083e-05, "loss": 2.2794, "step": 11456 }, { "epoch": 46.76326530612245, "grad_norm": 18.356170654296875, "learning_rate": 2.663673469387755e-05, "loss": 2.5413, "step": 11457 }, { "epoch": 46.76734693877551, "grad_norm": 50.60405731201172, "learning_rate": 2.663469387755102e-05, "loss": 1.407, "step": 11458 }, { "epoch": 46.77142857142857, "grad_norm": 23.32068634033203, "learning_rate": 2.6632653061224488e-05, "loss": 1.6184, "step": 11459 }, { "epoch": 46.775510204081634, "grad_norm": 24.704051971435547, "learning_rate": 2.6630612244897962e-05, "loss": 1.58, "step": 11460 }, { "epoch": 46.779591836734696, "grad_norm": 24.720596313476562, "learning_rate": 2.662857142857143e-05, "loss": 1.6236, "step": 11461 }, { "epoch": 46.78367346938776, "grad_norm": 27.90550422668457, "learning_rate": 2.66265306122449e-05, "loss": 1.8397, "step": 11462 }, { "epoch": 46.78775510204082, "grad_norm": 22.5570125579834, "learning_rate": 2.6624489795918367e-05, "loss": 2.2663, "step": 11463 }, { "epoch": 46.79183673469388, "grad_norm": 41.24461364746094, "learning_rate": 2.6622448979591834e-05, "loss": 1.2376, "step": 11464 }, { "epoch": 46.795918367346935, "grad_norm": 22.4901123046875, "learning_rate": 2.6620408163265308e-05, "loss": 2.2646, "step": 11465 }, { "epoch": 46.8, "grad_norm": 30.450233459472656, "learning_rate": 2.661836734693878e-05, "loss": 1.3763, "step": 11466 }, { "epoch": 46.80408163265306, "grad_norm": 24.55251693725586, "learning_rate": 2.6616326530612245e-05, "loss": 1.9123, "step": 11467 }, { "epoch": 46.80816326530612, "grad_norm": 15.593560218811035, "learning_rate": 2.6614285714285713e-05, "loss": 2.968, "step": 11468 }, { "epoch": 46.81224489795918, "grad_norm": 42.275020599365234, "learning_rate": 2.6612244897959187e-05, "loss": 1.2031, "step": 11469 }, { "epoch": 46.816326530612244, "grad_norm": 19.309019088745117, "learning_rate": 2.6610204081632657e-05, "loss": 2.2658, "step": 11470 }, { "epoch": 46.820408163265306, "grad_norm": 23.19045066833496, "learning_rate": 2.6608163265306124e-05, "loss": 1.74, "step": 11471 }, { "epoch": 46.82448979591837, "grad_norm": 20.048465728759766, "learning_rate": 2.660612244897959e-05, "loss": 2.5641, "step": 11472 }, { "epoch": 46.82857142857143, "grad_norm": 36.795204162597656, "learning_rate": 2.6604081632653062e-05, "loss": 1.581, "step": 11473 }, { "epoch": 46.83265306122449, "grad_norm": 63.17436981201172, "learning_rate": 2.6602040816326536e-05, "loss": 0.4705, "step": 11474 }, { "epoch": 46.83673469387755, "grad_norm": 12.99245548248291, "learning_rate": 2.6600000000000003e-05, "loss": 2.9848, "step": 11475 }, { "epoch": 46.840816326530614, "grad_norm": 30.83795738220215, "learning_rate": 2.659795918367347e-05, "loss": 1.8, "step": 11476 }, { "epoch": 46.844897959183676, "grad_norm": 18.0651798248291, "learning_rate": 2.659591836734694e-05, "loss": 2.5382, "step": 11477 }, { "epoch": 46.84897959183674, "grad_norm": 18.34878921508789, "learning_rate": 2.6593877551020408e-05, "loss": 2.4557, "step": 11478 }, { "epoch": 46.85306122448979, "grad_norm": 31.39727783203125, "learning_rate": 2.659183673469388e-05, "loss": 1.8623, "step": 11479 }, { "epoch": 46.857142857142854, "grad_norm": 28.920333862304688, "learning_rate": 2.658979591836735e-05, "loss": 2.0085, "step": 11480 }, { "epoch": 46.861224489795916, "grad_norm": 17.459487915039062, "learning_rate": 2.658775510204082e-05, "loss": 2.3927, "step": 11481 }, { "epoch": 46.86530612244898, "grad_norm": 41.190799713134766, "learning_rate": 2.6585714285714286e-05, "loss": 1.7371, "step": 11482 }, { "epoch": 46.86938775510204, "grad_norm": 29.108776092529297, "learning_rate": 2.6583673469387753e-05, "loss": 1.7403, "step": 11483 }, { "epoch": 46.8734693877551, "grad_norm": 18.739551544189453, "learning_rate": 2.6581632653061227e-05, "loss": 2.9063, "step": 11484 }, { "epoch": 46.87755102040816, "grad_norm": 27.067893981933594, "learning_rate": 2.6579591836734698e-05, "loss": 0.268, "step": 11485 }, { "epoch": 46.881632653061224, "grad_norm": 17.7939395904541, "learning_rate": 2.6577551020408165e-05, "loss": 2.6207, "step": 11486 }, { "epoch": 46.885714285714286, "grad_norm": 32.01146697998047, "learning_rate": 2.6575510204081632e-05, "loss": 1.1806, "step": 11487 }, { "epoch": 46.88979591836735, "grad_norm": 20.5291690826416, "learning_rate": 2.6573469387755103e-05, "loss": 2.5775, "step": 11488 }, { "epoch": 46.89387755102041, "grad_norm": 25.51533317565918, "learning_rate": 2.6571428571428576e-05, "loss": 2.0693, "step": 11489 }, { "epoch": 46.89795918367347, "grad_norm": 15.408340454101562, "learning_rate": 2.6569387755102044e-05, "loss": 2.8506, "step": 11490 }, { "epoch": 46.90204081632653, "grad_norm": 22.296167373657227, "learning_rate": 2.656734693877551e-05, "loss": 2.058, "step": 11491 }, { "epoch": 46.906122448979595, "grad_norm": 32.23660659790039, "learning_rate": 2.656530612244898e-05, "loss": 1.6816, "step": 11492 }, { "epoch": 46.910204081632656, "grad_norm": 28.629697799682617, "learning_rate": 2.656326530612245e-05, "loss": 2.3925, "step": 11493 }, { "epoch": 46.91428571428571, "grad_norm": 15.551109313964844, "learning_rate": 2.6561224489795922e-05, "loss": 2.3066, "step": 11494 }, { "epoch": 46.91836734693877, "grad_norm": 34.20454788208008, "learning_rate": 2.655918367346939e-05, "loss": 1.5771, "step": 11495 }, { "epoch": 46.922448979591834, "grad_norm": 11.86854362487793, "learning_rate": 2.655714285714286e-05, "loss": 3.294, "step": 11496 }, { "epoch": 46.926530612244896, "grad_norm": 36.37374496459961, "learning_rate": 2.6555102040816327e-05, "loss": 1.2187, "step": 11497 }, { "epoch": 46.93061224489796, "grad_norm": 27.994544982910156, "learning_rate": 2.6553061224489794e-05, "loss": 1.8628, "step": 11498 }, { "epoch": 46.93469387755102, "grad_norm": 16.175464630126953, "learning_rate": 2.6551020408163268e-05, "loss": 2.8711, "step": 11499 }, { "epoch": 46.93877551020408, "grad_norm": 22.956323623657227, "learning_rate": 2.6548979591836735e-05, "loss": 2.5185, "step": 11500 }, { "epoch": 46.94285714285714, "grad_norm": 25.91429901123047, "learning_rate": 2.6546938775510206e-05, "loss": 1.9647, "step": 11501 }, { "epoch": 46.946938775510205, "grad_norm": 37.03315734863281, "learning_rate": 2.6544897959183673e-05, "loss": 0.9935, "step": 11502 }, { "epoch": 46.95102040816327, "grad_norm": 17.238719940185547, "learning_rate": 2.654285714285714e-05, "loss": 2.4526, "step": 11503 }, { "epoch": 46.95510204081633, "grad_norm": 43.62403106689453, "learning_rate": 2.6540816326530614e-05, "loss": 1.4342, "step": 11504 }, { "epoch": 46.95918367346939, "grad_norm": 13.851353645324707, "learning_rate": 2.6538775510204084e-05, "loss": 2.7561, "step": 11505 }, { "epoch": 46.96326530612245, "grad_norm": 27.897567749023438, "learning_rate": 2.653673469387755e-05, "loss": 1.9858, "step": 11506 }, { "epoch": 46.96734693877551, "grad_norm": 31.963102340698242, "learning_rate": 2.653469387755102e-05, "loss": 1.2499, "step": 11507 }, { "epoch": 46.97142857142857, "grad_norm": 19.859031677246094, "learning_rate": 2.653265306122449e-05, "loss": 2.2358, "step": 11508 }, { "epoch": 46.97551020408163, "grad_norm": 28.799291610717773, "learning_rate": 2.6530612244897963e-05, "loss": 1.2402, "step": 11509 }, { "epoch": 46.97959183673469, "grad_norm": 28.75751495361328, "learning_rate": 2.652857142857143e-05, "loss": 2.0249, "step": 11510 }, { "epoch": 46.98367346938775, "grad_norm": 18.084606170654297, "learning_rate": 2.6526530612244897e-05, "loss": 2.5174, "step": 11511 }, { "epoch": 46.987755102040815, "grad_norm": 18.70381736755371, "learning_rate": 2.6524489795918368e-05, "loss": 2.1142, "step": 11512 }, { "epoch": 46.99183673469388, "grad_norm": 15.146790504455566, "learning_rate": 2.652244897959184e-05, "loss": 2.5696, "step": 11513 }, { "epoch": 46.99591836734694, "grad_norm": 23.765363693237305, "learning_rate": 2.652040816326531e-05, "loss": 2.3451, "step": 11514 }, { "epoch": 47.0, "grad_norm": 17.025798797607422, "learning_rate": 2.6518367346938776e-05, "loss": 3.0263, "step": 11515 }, { "epoch": 47.00408163265306, "grad_norm": 16.721879959106445, "learning_rate": 2.6516326530612246e-05, "loss": 2.7707, "step": 11516 }, { "epoch": 47.00816326530612, "grad_norm": 31.760360717773438, "learning_rate": 2.6514285714285714e-05, "loss": 1.2365, "step": 11517 }, { "epoch": 47.012244897959185, "grad_norm": 36.83088302612305, "learning_rate": 2.6512244897959187e-05, "loss": 1.6382, "step": 11518 }, { "epoch": 47.01632653061225, "grad_norm": 32.782989501953125, "learning_rate": 2.6510204081632655e-05, "loss": 1.3803, "step": 11519 }, { "epoch": 47.02040816326531, "grad_norm": 41.68218231201172, "learning_rate": 2.6508163265306125e-05, "loss": 1.0164, "step": 11520 }, { "epoch": 47.02448979591837, "grad_norm": 33.0341682434082, "learning_rate": 2.6506122448979592e-05, "loss": 2.0658, "step": 11521 }, { "epoch": 47.02857142857143, "grad_norm": 23.991336822509766, "learning_rate": 2.650408163265306e-05, "loss": 2.2659, "step": 11522 }, { "epoch": 47.03265306122449, "grad_norm": 38.30303192138672, "learning_rate": 2.6502040816326533e-05, "loss": 0.9145, "step": 11523 }, { "epoch": 47.03673469387755, "grad_norm": 30.794708251953125, "learning_rate": 2.6500000000000004e-05, "loss": 1.1148, "step": 11524 }, { "epoch": 47.04081632653061, "grad_norm": 37.29240798950195, "learning_rate": 2.649795918367347e-05, "loss": 0.9657, "step": 11525 }, { "epoch": 47.04489795918367, "grad_norm": 21.33531379699707, "learning_rate": 2.6495918367346938e-05, "loss": 2.296, "step": 11526 }, { "epoch": 47.04897959183673, "grad_norm": 29.928701400756836, "learning_rate": 2.649387755102041e-05, "loss": 0.5833, "step": 11527 }, { "epoch": 47.053061224489795, "grad_norm": 39.88233184814453, "learning_rate": 2.6491836734693882e-05, "loss": 1.5243, "step": 11528 }, { "epoch": 47.05714285714286, "grad_norm": 31.176137924194336, "learning_rate": 2.648979591836735e-05, "loss": 2.0409, "step": 11529 }, { "epoch": 47.06122448979592, "grad_norm": 26.24803352355957, "learning_rate": 2.6487755102040817e-05, "loss": 2.2477, "step": 11530 }, { "epoch": 47.06530612244898, "grad_norm": 11.867835998535156, "learning_rate": 2.6485714285714287e-05, "loss": 3.1381, "step": 11531 }, { "epoch": 47.06938775510204, "grad_norm": 27.285478591918945, "learning_rate": 2.6483673469387754e-05, "loss": 1.4029, "step": 11532 }, { "epoch": 47.073469387755104, "grad_norm": 33.17635726928711, "learning_rate": 2.6481632653061228e-05, "loss": 2.035, "step": 11533 }, { "epoch": 47.077551020408166, "grad_norm": 26.372888565063477, "learning_rate": 2.6479591836734695e-05, "loss": 2.262, "step": 11534 }, { "epoch": 47.08163265306123, "grad_norm": 22.946439743041992, "learning_rate": 2.6477551020408166e-05, "loss": 1.616, "step": 11535 }, { "epoch": 47.08571428571429, "grad_norm": 24.10737419128418, "learning_rate": 2.6475510204081633e-05, "loss": 1.7817, "step": 11536 }, { "epoch": 47.089795918367344, "grad_norm": 31.71200942993164, "learning_rate": 2.64734693877551e-05, "loss": 0.9781, "step": 11537 }, { "epoch": 47.093877551020405, "grad_norm": 25.6861629486084, "learning_rate": 2.6471428571428574e-05, "loss": 1.7616, "step": 11538 }, { "epoch": 47.09795918367347, "grad_norm": 26.414424896240234, "learning_rate": 2.6469387755102045e-05, "loss": 1.441, "step": 11539 }, { "epoch": 47.10204081632653, "grad_norm": 19.283475875854492, "learning_rate": 2.646734693877551e-05, "loss": 2.4291, "step": 11540 }, { "epoch": 47.10612244897959, "grad_norm": 21.614253997802734, "learning_rate": 2.646530612244898e-05, "loss": 2.1428, "step": 11541 }, { "epoch": 47.11020408163265, "grad_norm": 28.466548919677734, "learning_rate": 2.646326530612245e-05, "loss": 1.7294, "step": 11542 }, { "epoch": 47.114285714285714, "grad_norm": 36.135215759277344, "learning_rate": 2.6461224489795923e-05, "loss": 1.517, "step": 11543 }, { "epoch": 47.118367346938776, "grad_norm": 29.444664001464844, "learning_rate": 2.645918367346939e-05, "loss": 2.0706, "step": 11544 }, { "epoch": 47.12244897959184, "grad_norm": 35.72019577026367, "learning_rate": 2.6457142857142857e-05, "loss": 1.4837, "step": 11545 }, { "epoch": 47.1265306122449, "grad_norm": 23.56776237487793, "learning_rate": 2.6455102040816328e-05, "loss": 1.6499, "step": 11546 }, { "epoch": 47.13061224489796, "grad_norm": 24.173952102661133, "learning_rate": 2.6453061224489795e-05, "loss": 2.1462, "step": 11547 }, { "epoch": 47.13469387755102, "grad_norm": 40.459049224853516, "learning_rate": 2.645102040816327e-05, "loss": 1.3526, "step": 11548 }, { "epoch": 47.138775510204084, "grad_norm": 31.707260131835938, "learning_rate": 2.6448979591836736e-05, "loss": 1.1267, "step": 11549 }, { "epoch": 47.142857142857146, "grad_norm": 14.252336502075195, "learning_rate": 2.6446938775510203e-05, "loss": 2.9673, "step": 11550 }, { "epoch": 47.14693877551021, "grad_norm": 20.980323791503906, "learning_rate": 2.6444897959183674e-05, "loss": 2.1223, "step": 11551 }, { "epoch": 47.15102040816326, "grad_norm": 36.643123626708984, "learning_rate": 2.644285714285714e-05, "loss": 0.8737, "step": 11552 }, { "epoch": 47.155102040816324, "grad_norm": 25.213380813598633, "learning_rate": 2.6440816326530615e-05, "loss": 1.8189, "step": 11553 }, { "epoch": 47.159183673469386, "grad_norm": 57.04122543334961, "learning_rate": 2.6438775510204082e-05, "loss": 1.3295, "step": 11554 }, { "epoch": 47.16326530612245, "grad_norm": 33.13456726074219, "learning_rate": 2.6436734693877552e-05, "loss": 1.9255, "step": 11555 }, { "epoch": 47.16734693877551, "grad_norm": 25.933290481567383, "learning_rate": 2.643469387755102e-05, "loss": 2.132, "step": 11556 }, { "epoch": 47.17142857142857, "grad_norm": 27.777320861816406, "learning_rate": 2.6432653061224493e-05, "loss": 1.578, "step": 11557 }, { "epoch": 47.17551020408163, "grad_norm": 29.659730911254883, "learning_rate": 2.643061224489796e-05, "loss": 1.8373, "step": 11558 }, { "epoch": 47.179591836734694, "grad_norm": 36.1094856262207, "learning_rate": 2.642857142857143e-05, "loss": 1.1993, "step": 11559 }, { "epoch": 47.183673469387756, "grad_norm": 28.17896270751953, "learning_rate": 2.6426530612244898e-05, "loss": 2.4324, "step": 11560 }, { "epoch": 47.18775510204082, "grad_norm": 18.501800537109375, "learning_rate": 2.6424489795918365e-05, "loss": 2.0224, "step": 11561 }, { "epoch": 47.19183673469388, "grad_norm": 30.46373176574707, "learning_rate": 2.642244897959184e-05, "loss": 1.5742, "step": 11562 }, { "epoch": 47.19591836734694, "grad_norm": 44.2800407409668, "learning_rate": 2.642040816326531e-05, "loss": 1.3806, "step": 11563 }, { "epoch": 47.2, "grad_norm": 31.402381896972656, "learning_rate": 2.6418367346938777e-05, "loss": 1.4806, "step": 11564 }, { "epoch": 47.204081632653065, "grad_norm": 14.322108268737793, "learning_rate": 2.6416326530612244e-05, "loss": 2.848, "step": 11565 }, { "epoch": 47.20816326530612, "grad_norm": 20.394136428833008, "learning_rate": 2.6414285714285715e-05, "loss": 2.4337, "step": 11566 }, { "epoch": 47.21224489795918, "grad_norm": 30.227041244506836, "learning_rate": 2.641224489795919e-05, "loss": 1.0798, "step": 11567 }, { "epoch": 47.21632653061224, "grad_norm": 33.72711944580078, "learning_rate": 2.6410204081632656e-05, "loss": 1.4097, "step": 11568 }, { "epoch": 47.220408163265304, "grad_norm": 22.311073303222656, "learning_rate": 2.6408163265306123e-05, "loss": 2.2184, "step": 11569 }, { "epoch": 47.224489795918366, "grad_norm": 15.535412788391113, "learning_rate": 2.6406122448979593e-05, "loss": 2.5047, "step": 11570 }, { "epoch": 47.22857142857143, "grad_norm": 40.51138687133789, "learning_rate": 2.640408163265306e-05, "loss": 1.0904, "step": 11571 }, { "epoch": 47.23265306122449, "grad_norm": 32.33551025390625, "learning_rate": 2.6402040816326534e-05, "loss": 1.1663, "step": 11572 }, { "epoch": 47.23673469387755, "grad_norm": 18.358074188232422, "learning_rate": 2.64e-05, "loss": 2.4271, "step": 11573 }, { "epoch": 47.24081632653061, "grad_norm": 22.870868682861328, "learning_rate": 2.6397959183673472e-05, "loss": 2.4243, "step": 11574 }, { "epoch": 47.244897959183675, "grad_norm": 15.484997749328613, "learning_rate": 2.639591836734694e-05, "loss": 2.6579, "step": 11575 }, { "epoch": 47.248979591836736, "grad_norm": 63.26972961425781, "learning_rate": 2.6393877551020406e-05, "loss": 0.5719, "step": 11576 }, { "epoch": 47.2530612244898, "grad_norm": 32.69968795776367, "learning_rate": 2.639183673469388e-05, "loss": 1.0607, "step": 11577 }, { "epoch": 47.25714285714286, "grad_norm": 22.305973052978516, "learning_rate": 2.638979591836735e-05, "loss": 2.2055, "step": 11578 }, { "epoch": 47.26122448979592, "grad_norm": 30.310012817382812, "learning_rate": 2.6387755102040818e-05, "loss": 1.9493, "step": 11579 }, { "epoch": 47.265306122448976, "grad_norm": 41.82372283935547, "learning_rate": 2.6385714285714285e-05, "loss": 0.9391, "step": 11580 }, { "epoch": 47.26938775510204, "grad_norm": 31.18968963623047, "learning_rate": 2.6383673469387755e-05, "loss": 0.4218, "step": 11581 }, { "epoch": 47.2734693877551, "grad_norm": 22.032411575317383, "learning_rate": 2.638163265306123e-05, "loss": 2.491, "step": 11582 }, { "epoch": 47.27755102040816, "grad_norm": 21.179895401000977, "learning_rate": 2.6379591836734696e-05, "loss": 1.7255, "step": 11583 }, { "epoch": 47.28163265306122, "grad_norm": 18.096214294433594, "learning_rate": 2.6377551020408163e-05, "loss": 2.3089, "step": 11584 }, { "epoch": 47.285714285714285, "grad_norm": 34.80385208129883, "learning_rate": 2.6375510204081634e-05, "loss": 1.2596, "step": 11585 }, { "epoch": 47.289795918367346, "grad_norm": 44.04484176635742, "learning_rate": 2.63734693877551e-05, "loss": 0.9658, "step": 11586 }, { "epoch": 47.29387755102041, "grad_norm": 32.3803825378418, "learning_rate": 2.6371428571428575e-05, "loss": 1.8911, "step": 11587 }, { "epoch": 47.29795918367347, "grad_norm": 25.010478973388672, "learning_rate": 2.6369387755102042e-05, "loss": 1.9717, "step": 11588 }, { "epoch": 47.30204081632653, "grad_norm": 39.11996078491211, "learning_rate": 2.6367346938775513e-05, "loss": 1.877, "step": 11589 }, { "epoch": 47.30612244897959, "grad_norm": 27.25115394592285, "learning_rate": 2.636530612244898e-05, "loss": 1.5345, "step": 11590 }, { "epoch": 47.310204081632655, "grad_norm": 24.520673751831055, "learning_rate": 2.6363265306122447e-05, "loss": 2.2888, "step": 11591 }, { "epoch": 47.31428571428572, "grad_norm": 23.250154495239258, "learning_rate": 2.636122448979592e-05, "loss": 2.2286, "step": 11592 }, { "epoch": 47.31836734693878, "grad_norm": 26.375350952148438, "learning_rate": 2.635918367346939e-05, "loss": 1.9137, "step": 11593 }, { "epoch": 47.32244897959184, "grad_norm": 29.333520889282227, "learning_rate": 2.635714285714286e-05, "loss": 0.3815, "step": 11594 }, { "epoch": 47.326530612244895, "grad_norm": 36.33827590942383, "learning_rate": 2.6355102040816326e-05, "loss": 1.6205, "step": 11595 }, { "epoch": 47.33061224489796, "grad_norm": 16.789169311523438, "learning_rate": 2.6353061224489796e-05, "loss": 2.3688, "step": 11596 }, { "epoch": 47.33469387755102, "grad_norm": 19.008535385131836, "learning_rate": 2.635102040816327e-05, "loss": 2.6664, "step": 11597 }, { "epoch": 47.33877551020408, "grad_norm": 13.209166526794434, "learning_rate": 2.6348979591836737e-05, "loss": 2.7623, "step": 11598 }, { "epoch": 47.34285714285714, "grad_norm": 16.391151428222656, "learning_rate": 2.6346938775510204e-05, "loss": 2.4252, "step": 11599 }, { "epoch": 47.3469387755102, "grad_norm": 21.238847732543945, "learning_rate": 2.6344897959183675e-05, "loss": 2.3551, "step": 11600 }, { "epoch": 47.351020408163265, "grad_norm": 45.7391242980957, "learning_rate": 2.6342857142857142e-05, "loss": 1.1957, "step": 11601 }, { "epoch": 47.35510204081633, "grad_norm": 27.87144660949707, "learning_rate": 2.6340816326530616e-05, "loss": 1.2233, "step": 11602 }, { "epoch": 47.35918367346939, "grad_norm": 18.66455078125, "learning_rate": 2.6338775510204083e-05, "loss": 2.4617, "step": 11603 }, { "epoch": 47.36326530612245, "grad_norm": 25.83159637451172, "learning_rate": 2.633673469387755e-05, "loss": 1.846, "step": 11604 }, { "epoch": 47.36734693877551, "grad_norm": 28.66438102722168, "learning_rate": 2.633469387755102e-05, "loss": 1.6533, "step": 11605 }, { "epoch": 47.371428571428574, "grad_norm": 26.68973159790039, "learning_rate": 2.6332653061224494e-05, "loss": 1.6011, "step": 11606 }, { "epoch": 47.375510204081635, "grad_norm": 58.717262268066406, "learning_rate": 2.633061224489796e-05, "loss": 1.3931, "step": 11607 }, { "epoch": 47.3795918367347, "grad_norm": 27.82638931274414, "learning_rate": 2.632857142857143e-05, "loss": 1.6099, "step": 11608 }, { "epoch": 47.38367346938775, "grad_norm": 29.404325485229492, "learning_rate": 2.63265306122449e-05, "loss": 1.1152, "step": 11609 }, { "epoch": 47.38775510204081, "grad_norm": 20.339113235473633, "learning_rate": 2.6324489795918366e-05, "loss": 2.3278, "step": 11610 }, { "epoch": 47.391836734693875, "grad_norm": 21.95616340637207, "learning_rate": 2.632244897959184e-05, "loss": 2.1631, "step": 11611 }, { "epoch": 47.39591836734694, "grad_norm": 32.042423248291016, "learning_rate": 2.6320408163265307e-05, "loss": 1.6116, "step": 11612 }, { "epoch": 47.4, "grad_norm": 18.47671890258789, "learning_rate": 2.6318367346938778e-05, "loss": 2.7643, "step": 11613 }, { "epoch": 47.40408163265306, "grad_norm": 18.338972091674805, "learning_rate": 2.6316326530612245e-05, "loss": 2.4206, "step": 11614 }, { "epoch": 47.40816326530612, "grad_norm": 21.25137710571289, "learning_rate": 2.6314285714285712e-05, "loss": 2.064, "step": 11615 }, { "epoch": 47.412244897959184, "grad_norm": 19.78750228881836, "learning_rate": 2.6312244897959186e-05, "loss": 2.273, "step": 11616 }, { "epoch": 47.416326530612245, "grad_norm": 43.10136413574219, "learning_rate": 2.6310204081632657e-05, "loss": 1.2661, "step": 11617 }, { "epoch": 47.42040816326531, "grad_norm": 10.430668830871582, "learning_rate": 2.6308163265306124e-05, "loss": 3.392, "step": 11618 }, { "epoch": 47.42448979591837, "grad_norm": 32.83080291748047, "learning_rate": 2.630612244897959e-05, "loss": 1.0346, "step": 11619 }, { "epoch": 47.42857142857143, "grad_norm": 29.041305541992188, "learning_rate": 2.630408163265306e-05, "loss": 1.5057, "step": 11620 }, { "epoch": 47.43265306122449, "grad_norm": 23.923620223999023, "learning_rate": 2.6302040816326535e-05, "loss": 1.7411, "step": 11621 }, { "epoch": 47.436734693877554, "grad_norm": 26.51308822631836, "learning_rate": 2.6300000000000002e-05, "loss": 1.8692, "step": 11622 }, { "epoch": 47.440816326530616, "grad_norm": 22.086503982543945, "learning_rate": 2.629795918367347e-05, "loss": 2.1852, "step": 11623 }, { "epoch": 47.44489795918367, "grad_norm": 34.307525634765625, "learning_rate": 2.629591836734694e-05, "loss": 0.8854, "step": 11624 }, { "epoch": 47.44897959183673, "grad_norm": 16.628408432006836, "learning_rate": 2.6293877551020407e-05, "loss": 2.9539, "step": 11625 }, { "epoch": 47.453061224489794, "grad_norm": 20.4473819732666, "learning_rate": 2.629183673469388e-05, "loss": 2.0943, "step": 11626 }, { "epoch": 47.457142857142856, "grad_norm": 39.07154083251953, "learning_rate": 2.6289795918367348e-05, "loss": 2.0112, "step": 11627 }, { "epoch": 47.46122448979592, "grad_norm": 17.959388732910156, "learning_rate": 2.628775510204082e-05, "loss": 2.3769, "step": 11628 }, { "epoch": 47.46530612244898, "grad_norm": 33.413047790527344, "learning_rate": 2.6285714285714286e-05, "loss": 0.4293, "step": 11629 }, { "epoch": 47.46938775510204, "grad_norm": 20.848249435424805, "learning_rate": 2.6283673469387753e-05, "loss": 2.511, "step": 11630 }, { "epoch": 47.4734693877551, "grad_norm": 27.89131736755371, "learning_rate": 2.6281632653061227e-05, "loss": 2.0207, "step": 11631 }, { "epoch": 47.477551020408164, "grad_norm": 11.146814346313477, "learning_rate": 2.6279591836734697e-05, "loss": 3.2232, "step": 11632 }, { "epoch": 47.481632653061226, "grad_norm": 19.86501121520996, "learning_rate": 2.6277551020408164e-05, "loss": 2.4998, "step": 11633 }, { "epoch": 47.48571428571429, "grad_norm": 18.483760833740234, "learning_rate": 2.627551020408163e-05, "loss": 2.3243, "step": 11634 }, { "epoch": 47.48979591836735, "grad_norm": 14.192232131958008, "learning_rate": 2.6273469387755102e-05, "loss": 2.5303, "step": 11635 }, { "epoch": 47.49387755102041, "grad_norm": 29.016220092773438, "learning_rate": 2.6271428571428576e-05, "loss": 2.0397, "step": 11636 }, { "epoch": 47.49795918367347, "grad_norm": 22.80389404296875, "learning_rate": 2.6269387755102043e-05, "loss": 2.2935, "step": 11637 }, { "epoch": 47.50204081632653, "grad_norm": 16.94086265563965, "learning_rate": 2.626734693877551e-05, "loss": 2.5204, "step": 11638 }, { "epoch": 47.50612244897959, "grad_norm": 21.70582389831543, "learning_rate": 2.626530612244898e-05, "loss": 2.1797, "step": 11639 }, { "epoch": 47.51020408163265, "grad_norm": 33.809627532958984, "learning_rate": 2.6263265306122448e-05, "loss": 1.056, "step": 11640 }, { "epoch": 47.51428571428571, "grad_norm": 38.52389144897461, "learning_rate": 2.6261224489795922e-05, "loss": 1.7621, "step": 11641 }, { "epoch": 47.518367346938774, "grad_norm": 25.17667007446289, "learning_rate": 2.625918367346939e-05, "loss": 2.2698, "step": 11642 }, { "epoch": 47.522448979591836, "grad_norm": 20.450719833374023, "learning_rate": 2.625714285714286e-05, "loss": 2.1937, "step": 11643 }, { "epoch": 47.5265306122449, "grad_norm": 27.500843048095703, "learning_rate": 2.6255102040816326e-05, "loss": 1.7133, "step": 11644 }, { "epoch": 47.53061224489796, "grad_norm": 49.55854034423828, "learning_rate": 2.6253061224489794e-05, "loss": 1.5511, "step": 11645 }, { "epoch": 47.53469387755102, "grad_norm": 41.219451904296875, "learning_rate": 2.6251020408163268e-05, "loss": 0.5885, "step": 11646 }, { "epoch": 47.53877551020408, "grad_norm": 39.18165588378906, "learning_rate": 2.6248979591836738e-05, "loss": 1.253, "step": 11647 }, { "epoch": 47.542857142857144, "grad_norm": 16.479108810424805, "learning_rate": 2.6246938775510205e-05, "loss": 2.7917, "step": 11648 }, { "epoch": 47.546938775510206, "grad_norm": 40.95152282714844, "learning_rate": 2.6244897959183672e-05, "loss": 1.1328, "step": 11649 }, { "epoch": 47.55102040816327, "grad_norm": 20.868167877197266, "learning_rate": 2.6242857142857146e-05, "loss": 1.7793, "step": 11650 }, { "epoch": 47.55510204081633, "grad_norm": 14.054397583007812, "learning_rate": 2.6240816326530617e-05, "loss": 2.7842, "step": 11651 }, { "epoch": 47.55918367346939, "grad_norm": 45.22804641723633, "learning_rate": 2.6238775510204084e-05, "loss": 0.5535, "step": 11652 }, { "epoch": 47.563265306122446, "grad_norm": 18.713411331176758, "learning_rate": 2.623673469387755e-05, "loss": 2.4119, "step": 11653 }, { "epoch": 47.56734693877551, "grad_norm": 27.244192123413086, "learning_rate": 2.623469387755102e-05, "loss": 2.0559, "step": 11654 }, { "epoch": 47.57142857142857, "grad_norm": 34.2524299621582, "learning_rate": 2.6232653061224492e-05, "loss": 1.605, "step": 11655 }, { "epoch": 47.57551020408163, "grad_norm": 35.1114616394043, "learning_rate": 2.6230612244897962e-05, "loss": 1.5611, "step": 11656 }, { "epoch": 47.57959183673469, "grad_norm": 33.88515090942383, "learning_rate": 2.622857142857143e-05, "loss": 0.5424, "step": 11657 }, { "epoch": 47.583673469387755, "grad_norm": 27.123693466186523, "learning_rate": 2.6226530612244897e-05, "loss": 1.1355, "step": 11658 }, { "epoch": 47.587755102040816, "grad_norm": 29.57598304748535, "learning_rate": 2.6224489795918367e-05, "loss": 1.7603, "step": 11659 }, { "epoch": 47.59183673469388, "grad_norm": 25.057376861572266, "learning_rate": 2.622244897959184e-05, "loss": 1.8019, "step": 11660 }, { "epoch": 47.59591836734694, "grad_norm": 13.223369598388672, "learning_rate": 2.6220408163265308e-05, "loss": 2.9151, "step": 11661 }, { "epoch": 47.6, "grad_norm": 28.698226928710938, "learning_rate": 2.6218367346938775e-05, "loss": 1.768, "step": 11662 }, { "epoch": 47.60408163265306, "grad_norm": 33.38189697265625, "learning_rate": 2.6216326530612246e-05, "loss": 1.5456, "step": 11663 }, { "epoch": 47.608163265306125, "grad_norm": 25.4478702545166, "learning_rate": 2.6214285714285713e-05, "loss": 2.0071, "step": 11664 }, { "epoch": 47.61224489795919, "grad_norm": 36.18218994140625, "learning_rate": 2.6212244897959187e-05, "loss": 1.04, "step": 11665 }, { "epoch": 47.61632653061225, "grad_norm": 19.105690002441406, "learning_rate": 2.6210204081632654e-05, "loss": 2.3335, "step": 11666 }, { "epoch": 47.6204081632653, "grad_norm": 33.654998779296875, "learning_rate": 2.6208163265306125e-05, "loss": 0.6797, "step": 11667 }, { "epoch": 47.624489795918365, "grad_norm": 32.035396575927734, "learning_rate": 2.6206122448979592e-05, "loss": 1.8591, "step": 11668 }, { "epoch": 47.628571428571426, "grad_norm": 16.809974670410156, "learning_rate": 2.620408163265306e-05, "loss": 2.3804, "step": 11669 }, { "epoch": 47.63265306122449, "grad_norm": 37.019615173339844, "learning_rate": 2.6202040816326533e-05, "loss": 0.6189, "step": 11670 }, { "epoch": 47.63673469387755, "grad_norm": 27.945058822631836, "learning_rate": 2.6200000000000003e-05, "loss": 1.9341, "step": 11671 }, { "epoch": 47.64081632653061, "grad_norm": 44.556209564208984, "learning_rate": 2.619795918367347e-05, "loss": 1.8265, "step": 11672 }, { "epoch": 47.64489795918367, "grad_norm": 17.467239379882812, "learning_rate": 2.6195918367346938e-05, "loss": 2.4265, "step": 11673 }, { "epoch": 47.648979591836735, "grad_norm": 15.41315746307373, "learning_rate": 2.6193877551020408e-05, "loss": 2.729, "step": 11674 }, { "epoch": 47.6530612244898, "grad_norm": 29.067182540893555, "learning_rate": 2.6191836734693882e-05, "loss": 2.1459, "step": 11675 }, { "epoch": 47.65714285714286, "grad_norm": 41.04388427734375, "learning_rate": 2.618979591836735e-05, "loss": 1.6393, "step": 11676 }, { "epoch": 47.66122448979592, "grad_norm": 17.529510498046875, "learning_rate": 2.6187755102040816e-05, "loss": 2.3902, "step": 11677 }, { "epoch": 47.66530612244898, "grad_norm": 34.59443664550781, "learning_rate": 2.6185714285714287e-05, "loss": 0.9909, "step": 11678 }, { "epoch": 47.66938775510204, "grad_norm": 31.07933235168457, "learning_rate": 2.6183673469387754e-05, "loss": 1.7252, "step": 11679 }, { "epoch": 47.673469387755105, "grad_norm": 31.389028549194336, "learning_rate": 2.6181632653061228e-05, "loss": 1.4946, "step": 11680 }, { "epoch": 47.67755102040816, "grad_norm": 21.23110580444336, "learning_rate": 2.6179591836734695e-05, "loss": 2.6068, "step": 11681 }, { "epoch": 47.68163265306122, "grad_norm": 39.854068756103516, "learning_rate": 2.6177551020408165e-05, "loss": 1.2205, "step": 11682 }, { "epoch": 47.68571428571428, "grad_norm": 16.511489868164062, "learning_rate": 2.6175510204081632e-05, "loss": 2.2826, "step": 11683 }, { "epoch": 47.689795918367345, "grad_norm": 34.385501861572266, "learning_rate": 2.61734693877551e-05, "loss": 1.1034, "step": 11684 }, { "epoch": 47.69387755102041, "grad_norm": 19.54541778564453, "learning_rate": 2.6171428571428574e-05, "loss": 2.2146, "step": 11685 }, { "epoch": 47.69795918367347, "grad_norm": 44.760955810546875, "learning_rate": 2.6169387755102044e-05, "loss": 0.3348, "step": 11686 }, { "epoch": 47.70204081632653, "grad_norm": 23.3284854888916, "learning_rate": 2.616734693877551e-05, "loss": 2.3064, "step": 11687 }, { "epoch": 47.70612244897959, "grad_norm": 25.97176742553711, "learning_rate": 2.6165306122448978e-05, "loss": 2.1567, "step": 11688 }, { "epoch": 47.710204081632654, "grad_norm": 6.1301798820495605, "learning_rate": 2.616326530612245e-05, "loss": 0.0903, "step": 11689 }, { "epoch": 47.714285714285715, "grad_norm": 28.224050521850586, "learning_rate": 2.6161224489795923e-05, "loss": 0.4602, "step": 11690 }, { "epoch": 47.71836734693878, "grad_norm": 16.678571701049805, "learning_rate": 2.615918367346939e-05, "loss": 2.4441, "step": 11691 }, { "epoch": 47.72244897959184, "grad_norm": 24.307077407836914, "learning_rate": 2.6157142857142857e-05, "loss": 1.9751, "step": 11692 }, { "epoch": 47.7265306122449, "grad_norm": 11.468929290771484, "learning_rate": 2.6155102040816327e-05, "loss": 2.9703, "step": 11693 }, { "epoch": 47.73061224489796, "grad_norm": 27.432964324951172, "learning_rate": 2.61530612244898e-05, "loss": 1.8394, "step": 11694 }, { "epoch": 47.734693877551024, "grad_norm": 26.752046585083008, "learning_rate": 2.615102040816327e-05, "loss": 2.1139, "step": 11695 }, { "epoch": 47.73877551020408, "grad_norm": 32.06715774536133, "learning_rate": 2.6148979591836736e-05, "loss": 0.5806, "step": 11696 }, { "epoch": 47.74285714285714, "grad_norm": 35.278587341308594, "learning_rate": 2.6146938775510206e-05, "loss": 0.433, "step": 11697 }, { "epoch": 47.7469387755102, "grad_norm": 21.517154693603516, "learning_rate": 2.6144897959183673e-05, "loss": 2.2044, "step": 11698 }, { "epoch": 47.751020408163264, "grad_norm": 20.975967407226562, "learning_rate": 2.6142857142857147e-05, "loss": 2.2779, "step": 11699 }, { "epoch": 47.755102040816325, "grad_norm": 17.761079788208008, "learning_rate": 2.6140816326530614e-05, "loss": 2.7303, "step": 11700 }, { "epoch": 47.75918367346939, "grad_norm": 29.88730812072754, "learning_rate": 2.6138775510204085e-05, "loss": 1.9785, "step": 11701 }, { "epoch": 47.76326530612245, "grad_norm": 23.5316162109375, "learning_rate": 2.6136734693877552e-05, "loss": 2.5378, "step": 11702 }, { "epoch": 47.76734693877551, "grad_norm": 18.363422393798828, "learning_rate": 2.613469387755102e-05, "loss": 2.5295, "step": 11703 }, { "epoch": 47.77142857142857, "grad_norm": 20.5449275970459, "learning_rate": 2.6132653061224493e-05, "loss": 2.5575, "step": 11704 }, { "epoch": 47.775510204081634, "grad_norm": 30.1851863861084, "learning_rate": 2.6130612244897963e-05, "loss": 1.2136, "step": 11705 }, { "epoch": 47.779591836734696, "grad_norm": 19.298425674438477, "learning_rate": 2.612857142857143e-05, "loss": 2.3599, "step": 11706 }, { "epoch": 47.78367346938776, "grad_norm": 29.43030548095703, "learning_rate": 2.6126530612244898e-05, "loss": 1.9709, "step": 11707 }, { "epoch": 47.78775510204082, "grad_norm": 43.94382858276367, "learning_rate": 2.6124489795918368e-05, "loss": 1.0321, "step": 11708 }, { "epoch": 47.79183673469388, "grad_norm": 34.68248748779297, "learning_rate": 2.612244897959184e-05, "loss": 1.9843, "step": 11709 }, { "epoch": 47.795918367346935, "grad_norm": 43.73467254638672, "learning_rate": 2.612040816326531e-05, "loss": 1.5211, "step": 11710 }, { "epoch": 47.8, "grad_norm": 13.139867782592773, "learning_rate": 2.6118367346938776e-05, "loss": 2.8652, "step": 11711 }, { "epoch": 47.80408163265306, "grad_norm": 33.96202087402344, "learning_rate": 2.6116326530612243e-05, "loss": 1.1557, "step": 11712 }, { "epoch": 47.80816326530612, "grad_norm": 28.621456146240234, "learning_rate": 2.6114285714285714e-05, "loss": 1.7522, "step": 11713 }, { "epoch": 47.81224489795918, "grad_norm": 18.30660057067871, "learning_rate": 2.6112244897959188e-05, "loss": 2.5859, "step": 11714 }, { "epoch": 47.816326530612244, "grad_norm": 32.24555969238281, "learning_rate": 2.6110204081632655e-05, "loss": 1.8352, "step": 11715 }, { "epoch": 47.820408163265306, "grad_norm": 19.908557891845703, "learning_rate": 2.6108163265306122e-05, "loss": 2.0857, "step": 11716 }, { "epoch": 47.82448979591837, "grad_norm": 15.018255233764648, "learning_rate": 2.6106122448979593e-05, "loss": 2.7902, "step": 11717 }, { "epoch": 47.82857142857143, "grad_norm": 21.15350341796875, "learning_rate": 2.610408163265306e-05, "loss": 2.4692, "step": 11718 }, { "epoch": 47.83265306122449, "grad_norm": 15.586536407470703, "learning_rate": 2.6102040816326534e-05, "loss": 2.9278, "step": 11719 }, { "epoch": 47.83673469387755, "grad_norm": 21.84113883972168, "learning_rate": 2.61e-05, "loss": 2.5048, "step": 11720 }, { "epoch": 47.840816326530614, "grad_norm": 37.2202262878418, "learning_rate": 2.609795918367347e-05, "loss": 1.5876, "step": 11721 }, { "epoch": 47.844897959183676, "grad_norm": 27.899982452392578, "learning_rate": 2.609591836734694e-05, "loss": 1.9363, "step": 11722 }, { "epoch": 47.84897959183674, "grad_norm": 37.431175231933594, "learning_rate": 2.6093877551020406e-05, "loss": 1.438, "step": 11723 }, { "epoch": 47.85306122448979, "grad_norm": 42.849185943603516, "learning_rate": 2.609183673469388e-05, "loss": 2.1573, "step": 11724 }, { "epoch": 47.857142857142854, "grad_norm": 24.797468185424805, "learning_rate": 2.608979591836735e-05, "loss": 2.3133, "step": 11725 }, { "epoch": 47.861224489795916, "grad_norm": 21.810800552368164, "learning_rate": 2.6087755102040817e-05, "loss": 2.3504, "step": 11726 }, { "epoch": 47.86530612244898, "grad_norm": 15.703454971313477, "learning_rate": 2.6085714285714284e-05, "loss": 2.8593, "step": 11727 }, { "epoch": 47.86938775510204, "grad_norm": 33.66082763671875, "learning_rate": 2.6083673469387755e-05, "loss": 1.244, "step": 11728 }, { "epoch": 47.8734693877551, "grad_norm": 41.01436996459961, "learning_rate": 2.608163265306123e-05, "loss": 1.1443, "step": 11729 }, { "epoch": 47.87755102040816, "grad_norm": 26.887908935546875, "learning_rate": 2.6079591836734696e-05, "loss": 1.5839, "step": 11730 }, { "epoch": 47.881632653061224, "grad_norm": 28.126407623291016, "learning_rate": 2.6077551020408163e-05, "loss": 1.6928, "step": 11731 }, { "epoch": 47.885714285714286, "grad_norm": 20.840539932250977, "learning_rate": 2.6075510204081633e-05, "loss": 2.1093, "step": 11732 }, { "epoch": 47.88979591836735, "grad_norm": 51.44794464111328, "learning_rate": 2.60734693877551e-05, "loss": 0.7035, "step": 11733 }, { "epoch": 47.89387755102041, "grad_norm": 19.440881729125977, "learning_rate": 2.6071428571428574e-05, "loss": 2.248, "step": 11734 }, { "epoch": 47.89795918367347, "grad_norm": 25.136211395263672, "learning_rate": 2.606938775510204e-05, "loss": 1.9881, "step": 11735 }, { "epoch": 47.90204081632653, "grad_norm": 35.91343307495117, "learning_rate": 2.6067346938775512e-05, "loss": 0.7055, "step": 11736 }, { "epoch": 47.906122448979595, "grad_norm": 40.156742095947266, "learning_rate": 2.606530612244898e-05, "loss": 1.34, "step": 11737 }, { "epoch": 47.910204081632656, "grad_norm": 22.102746963500977, "learning_rate": 2.6063265306122453e-05, "loss": 1.7547, "step": 11738 }, { "epoch": 47.91428571428571, "grad_norm": 24.996625900268555, "learning_rate": 2.606122448979592e-05, "loss": 1.7357, "step": 11739 }, { "epoch": 47.91836734693877, "grad_norm": 27.389942169189453, "learning_rate": 2.605918367346939e-05, "loss": 1.7686, "step": 11740 }, { "epoch": 47.922448979591834, "grad_norm": 17.267414093017578, "learning_rate": 2.6057142857142858e-05, "loss": 2.4464, "step": 11741 }, { "epoch": 47.926530612244896, "grad_norm": 27.03895378112793, "learning_rate": 2.6055102040816325e-05, "loss": 1.6054, "step": 11742 }, { "epoch": 47.93061224489796, "grad_norm": 29.675079345703125, "learning_rate": 2.60530612244898e-05, "loss": 1.1738, "step": 11743 }, { "epoch": 47.93469387755102, "grad_norm": 48.078182220458984, "learning_rate": 2.605102040816327e-05, "loss": 0.4612, "step": 11744 }, { "epoch": 47.93877551020408, "grad_norm": 32.40558624267578, "learning_rate": 2.6048979591836737e-05, "loss": 1.038, "step": 11745 }, { "epoch": 47.94285714285714, "grad_norm": 24.165367126464844, "learning_rate": 2.6046938775510204e-05, "loss": 1.8347, "step": 11746 }, { "epoch": 47.946938775510205, "grad_norm": 25.777292251586914, "learning_rate": 2.6044897959183674e-05, "loss": 2.1534, "step": 11747 }, { "epoch": 47.95102040816327, "grad_norm": 16.55518341064453, "learning_rate": 2.6042857142857148e-05, "loss": 2.7835, "step": 11748 }, { "epoch": 47.95510204081633, "grad_norm": 55.59221649169922, "learning_rate": 2.6040816326530615e-05, "loss": 1.2182, "step": 11749 }, { "epoch": 47.95918367346939, "grad_norm": 20.65224266052246, "learning_rate": 2.6038775510204082e-05, "loss": 2.1528, "step": 11750 }, { "epoch": 47.96326530612245, "grad_norm": 50.25081253051758, "learning_rate": 2.6036734693877553e-05, "loss": 1.2796, "step": 11751 }, { "epoch": 47.96734693877551, "grad_norm": 22.19970703125, "learning_rate": 2.603469387755102e-05, "loss": 2.5335, "step": 11752 }, { "epoch": 47.97142857142857, "grad_norm": 30.742206573486328, "learning_rate": 2.6032653061224494e-05, "loss": 1.522, "step": 11753 }, { "epoch": 47.97551020408163, "grad_norm": 42.668827056884766, "learning_rate": 2.603061224489796e-05, "loss": 1.0257, "step": 11754 }, { "epoch": 47.97959183673469, "grad_norm": 22.192934036254883, "learning_rate": 2.602857142857143e-05, "loss": 2.3634, "step": 11755 }, { "epoch": 47.98367346938775, "grad_norm": 22.27035903930664, "learning_rate": 2.60265306122449e-05, "loss": 2.0762, "step": 11756 }, { "epoch": 47.987755102040815, "grad_norm": 21.503206253051758, "learning_rate": 2.6024489795918366e-05, "loss": 2.208, "step": 11757 }, { "epoch": 47.99183673469388, "grad_norm": 20.522768020629883, "learning_rate": 2.602244897959184e-05, "loss": 2.2867, "step": 11758 }, { "epoch": 47.99591836734694, "grad_norm": 25.434425354003906, "learning_rate": 2.6020408163265307e-05, "loss": 2.18, "step": 11759 }, { "epoch": 48.0, "grad_norm": 16.061630249023438, "learning_rate": 2.6018367346938777e-05, "loss": 2.8149, "step": 11760 }, { "epoch": 48.00408163265306, "grad_norm": 31.5072021484375, "learning_rate": 2.6016326530612244e-05, "loss": 1.7657, "step": 11761 }, { "epoch": 48.00816326530612, "grad_norm": 28.047409057617188, "learning_rate": 2.601428571428571e-05, "loss": 1.4554, "step": 11762 }, { "epoch": 48.012244897959185, "grad_norm": 18.810941696166992, "learning_rate": 2.6012244897959185e-05, "loss": 2.3618, "step": 11763 }, { "epoch": 48.01632653061225, "grad_norm": 60.15111541748047, "learning_rate": 2.6010204081632656e-05, "loss": 0.6378, "step": 11764 }, { "epoch": 48.02040816326531, "grad_norm": 22.429048538208008, "learning_rate": 2.6008163265306123e-05, "loss": 2.0458, "step": 11765 }, { "epoch": 48.02448979591837, "grad_norm": 25.143056869506836, "learning_rate": 2.600612244897959e-05, "loss": 2.1908, "step": 11766 }, { "epoch": 48.02857142857143, "grad_norm": 34.03899002075195, "learning_rate": 2.600408163265306e-05, "loss": 1.4854, "step": 11767 }, { "epoch": 48.03265306122449, "grad_norm": 44.215877532958984, "learning_rate": 2.6002040816326535e-05, "loss": 1.7877, "step": 11768 }, { "epoch": 48.03673469387755, "grad_norm": 15.448577880859375, "learning_rate": 2.6000000000000002e-05, "loss": 2.6469, "step": 11769 }, { "epoch": 48.04081632653061, "grad_norm": 21.30165672302246, "learning_rate": 2.599795918367347e-05, "loss": 2.2129, "step": 11770 }, { "epoch": 48.04489795918367, "grad_norm": 38.711483001708984, "learning_rate": 2.599591836734694e-05, "loss": 0.9159, "step": 11771 }, { "epoch": 48.04897959183673, "grad_norm": 38.00879669189453, "learning_rate": 2.5993877551020407e-05, "loss": 1.5567, "step": 11772 }, { "epoch": 48.053061224489795, "grad_norm": 30.08124351501465, "learning_rate": 2.599183673469388e-05, "loss": 1.8859, "step": 11773 }, { "epoch": 48.05714285714286, "grad_norm": 31.33572006225586, "learning_rate": 2.5989795918367348e-05, "loss": 1.5417, "step": 11774 }, { "epoch": 48.06122448979592, "grad_norm": 19.321659088134766, "learning_rate": 2.5987755102040818e-05, "loss": 2.402, "step": 11775 }, { "epoch": 48.06530612244898, "grad_norm": 20.760881423950195, "learning_rate": 2.5985714285714285e-05, "loss": 2.3933, "step": 11776 }, { "epoch": 48.06938775510204, "grad_norm": 28.852619171142578, "learning_rate": 2.5983673469387752e-05, "loss": 0.3935, "step": 11777 }, { "epoch": 48.073469387755104, "grad_norm": 17.90786361694336, "learning_rate": 2.5981632653061226e-05, "loss": 2.3096, "step": 11778 }, { "epoch": 48.077551020408166, "grad_norm": 43.07522201538086, "learning_rate": 2.5979591836734697e-05, "loss": 1.6282, "step": 11779 }, { "epoch": 48.08163265306123, "grad_norm": 20.937868118286133, "learning_rate": 2.5977551020408164e-05, "loss": 2.1398, "step": 11780 }, { "epoch": 48.08571428571429, "grad_norm": 29.118377685546875, "learning_rate": 2.597551020408163e-05, "loss": 1.1159, "step": 11781 }, { "epoch": 48.089795918367344, "grad_norm": 35.51335144042969, "learning_rate": 2.59734693877551e-05, "loss": 1.0005, "step": 11782 }, { "epoch": 48.093877551020405, "grad_norm": 30.021198272705078, "learning_rate": 2.5971428571428575e-05, "loss": 1.4579, "step": 11783 }, { "epoch": 48.09795918367347, "grad_norm": 24.3257999420166, "learning_rate": 2.5969387755102043e-05, "loss": 1.7099, "step": 11784 }, { "epoch": 48.10204081632653, "grad_norm": 27.992570877075195, "learning_rate": 2.596734693877551e-05, "loss": 1.6858, "step": 11785 }, { "epoch": 48.10612244897959, "grad_norm": 40.28681182861328, "learning_rate": 2.596530612244898e-05, "loss": 1.9586, "step": 11786 }, { "epoch": 48.11020408163265, "grad_norm": 26.67293930053711, "learning_rate": 2.5963265306122454e-05, "loss": 2.1814, "step": 11787 }, { "epoch": 48.114285714285714, "grad_norm": 27.64919662475586, "learning_rate": 2.596122448979592e-05, "loss": 1.7722, "step": 11788 }, { "epoch": 48.118367346938776, "grad_norm": 38.36780548095703, "learning_rate": 2.595918367346939e-05, "loss": 0.3729, "step": 11789 }, { "epoch": 48.12244897959184, "grad_norm": 31.644407272338867, "learning_rate": 2.595714285714286e-05, "loss": 1.6141, "step": 11790 }, { "epoch": 48.1265306122449, "grad_norm": 28.39496612548828, "learning_rate": 2.5955102040816326e-05, "loss": 2.1655, "step": 11791 }, { "epoch": 48.13061224489796, "grad_norm": 50.33889389038086, "learning_rate": 2.59530612244898e-05, "loss": 1.1605, "step": 11792 }, { "epoch": 48.13469387755102, "grad_norm": 22.406261444091797, "learning_rate": 2.5951020408163267e-05, "loss": 2.419, "step": 11793 }, { "epoch": 48.138775510204084, "grad_norm": 29.097497940063477, "learning_rate": 2.5948979591836738e-05, "loss": 1.6319, "step": 11794 }, { "epoch": 48.142857142857146, "grad_norm": 44.34478759765625, "learning_rate": 2.5946938775510205e-05, "loss": 0.6601, "step": 11795 }, { "epoch": 48.14693877551021, "grad_norm": 39.2951774597168, "learning_rate": 2.5944897959183672e-05, "loss": 1.349, "step": 11796 }, { "epoch": 48.15102040816326, "grad_norm": 17.18296241760254, "learning_rate": 2.5942857142857146e-05, "loss": 2.6473, "step": 11797 }, { "epoch": 48.155102040816324, "grad_norm": 31.25389862060547, "learning_rate": 2.5940816326530616e-05, "loss": 1.327, "step": 11798 }, { "epoch": 48.159183673469386, "grad_norm": 15.823015213012695, "learning_rate": 2.5938775510204083e-05, "loss": 2.4834, "step": 11799 }, { "epoch": 48.16326530612245, "grad_norm": 31.18977928161621, "learning_rate": 2.593673469387755e-05, "loss": 1.7554, "step": 11800 }, { "epoch": 48.16734693877551, "grad_norm": 19.57921028137207, "learning_rate": 2.593469387755102e-05, "loss": 2.3034, "step": 11801 }, { "epoch": 48.17142857142857, "grad_norm": 29.542192459106445, "learning_rate": 2.5932653061224495e-05, "loss": 1.8996, "step": 11802 }, { "epoch": 48.17551020408163, "grad_norm": 26.94140625, "learning_rate": 2.5930612244897962e-05, "loss": 1.5295, "step": 11803 }, { "epoch": 48.179591836734694, "grad_norm": 28.21415138244629, "learning_rate": 2.592857142857143e-05, "loss": 1.5194, "step": 11804 }, { "epoch": 48.183673469387756, "grad_norm": 26.883445739746094, "learning_rate": 2.59265306122449e-05, "loss": 1.6296, "step": 11805 }, { "epoch": 48.18775510204082, "grad_norm": 17.794038772583008, "learning_rate": 2.5924489795918367e-05, "loss": 2.3296, "step": 11806 }, { "epoch": 48.19183673469388, "grad_norm": 14.167082786560059, "learning_rate": 2.592244897959184e-05, "loss": 3.1335, "step": 11807 }, { "epoch": 48.19591836734694, "grad_norm": 24.44808006286621, "learning_rate": 2.5920408163265308e-05, "loss": 2.048, "step": 11808 }, { "epoch": 48.2, "grad_norm": 26.52613639831543, "learning_rate": 2.5918367346938778e-05, "loss": 1.6978, "step": 11809 }, { "epoch": 48.204081632653065, "grad_norm": 35.4778938293457, "learning_rate": 2.5916326530612245e-05, "loss": 0.384, "step": 11810 }, { "epoch": 48.20816326530612, "grad_norm": 23.678295135498047, "learning_rate": 2.5914285714285713e-05, "loss": 1.6165, "step": 11811 }, { "epoch": 48.21224489795918, "grad_norm": 19.540752410888672, "learning_rate": 2.5912244897959186e-05, "loss": 2.3069, "step": 11812 }, { "epoch": 48.21632653061224, "grad_norm": 28.34130096435547, "learning_rate": 2.5910204081632654e-05, "loss": 2.0436, "step": 11813 }, { "epoch": 48.220408163265304, "grad_norm": 27.58266830444336, "learning_rate": 2.5908163265306124e-05, "loss": 1.3875, "step": 11814 }, { "epoch": 48.224489795918366, "grad_norm": 29.528350830078125, "learning_rate": 2.590612244897959e-05, "loss": 0.2439, "step": 11815 }, { "epoch": 48.22857142857143, "grad_norm": 34.558753967285156, "learning_rate": 2.590408163265306e-05, "loss": 1.1517, "step": 11816 }, { "epoch": 48.23265306122449, "grad_norm": 31.651973724365234, "learning_rate": 2.5902040816326532e-05, "loss": 1.138, "step": 11817 }, { "epoch": 48.23673469387755, "grad_norm": 39.56975555419922, "learning_rate": 2.5900000000000003e-05, "loss": 0.5237, "step": 11818 }, { "epoch": 48.24081632653061, "grad_norm": 22.647695541381836, "learning_rate": 2.589795918367347e-05, "loss": 2.1516, "step": 11819 }, { "epoch": 48.244897959183675, "grad_norm": 27.698108673095703, "learning_rate": 2.5895918367346937e-05, "loss": 2.0235, "step": 11820 }, { "epoch": 48.248979591836736, "grad_norm": 49.826908111572266, "learning_rate": 2.5893877551020408e-05, "loss": 0.8766, "step": 11821 }, { "epoch": 48.2530612244898, "grad_norm": 29.6251163482666, "learning_rate": 2.589183673469388e-05, "loss": 1.0758, "step": 11822 }, { "epoch": 48.25714285714286, "grad_norm": 27.937864303588867, "learning_rate": 2.588979591836735e-05, "loss": 1.8747, "step": 11823 }, { "epoch": 48.26122448979592, "grad_norm": 29.766084671020508, "learning_rate": 2.5887755102040816e-05, "loss": 1.7967, "step": 11824 }, { "epoch": 48.265306122448976, "grad_norm": 15.811915397644043, "learning_rate": 2.5885714285714286e-05, "loss": 2.7974, "step": 11825 }, { "epoch": 48.26938775510204, "grad_norm": 37.552284240722656, "learning_rate": 2.5883673469387753e-05, "loss": 1.9433, "step": 11826 }, { "epoch": 48.2734693877551, "grad_norm": 23.222997665405273, "learning_rate": 2.5881632653061227e-05, "loss": 2.2363, "step": 11827 }, { "epoch": 48.27755102040816, "grad_norm": 9.424342155456543, "learning_rate": 2.5879591836734694e-05, "loss": 0.0804, "step": 11828 }, { "epoch": 48.28163265306122, "grad_norm": 19.180280685424805, "learning_rate": 2.5877551020408165e-05, "loss": 2.6959, "step": 11829 }, { "epoch": 48.285714285714285, "grad_norm": 36.01046371459961, "learning_rate": 2.5875510204081632e-05, "loss": 1.5529, "step": 11830 }, { "epoch": 48.289795918367346, "grad_norm": 24.71222496032715, "learning_rate": 2.5873469387755106e-05, "loss": 1.6169, "step": 11831 }, { "epoch": 48.29387755102041, "grad_norm": 25.057252883911133, "learning_rate": 2.5871428571428573e-05, "loss": 2.2146, "step": 11832 }, { "epoch": 48.29795918367347, "grad_norm": 39.210243225097656, "learning_rate": 2.5869387755102044e-05, "loss": 0.3745, "step": 11833 }, { "epoch": 48.30204081632653, "grad_norm": 38.8322868347168, "learning_rate": 2.586734693877551e-05, "loss": 1.2075, "step": 11834 }, { "epoch": 48.30612244897959, "grad_norm": 35.74840545654297, "learning_rate": 2.5865306122448978e-05, "loss": 1.2589, "step": 11835 }, { "epoch": 48.310204081632655, "grad_norm": 43.08373260498047, "learning_rate": 2.586326530612245e-05, "loss": 0.4674, "step": 11836 }, { "epoch": 48.31428571428572, "grad_norm": 21.353384017944336, "learning_rate": 2.5861224489795922e-05, "loss": 2.4449, "step": 11837 }, { "epoch": 48.31836734693878, "grad_norm": 26.804874420166016, "learning_rate": 2.585918367346939e-05, "loss": 1.8896, "step": 11838 }, { "epoch": 48.32244897959184, "grad_norm": 28.79098129272461, "learning_rate": 2.5857142857142856e-05, "loss": 2.0494, "step": 11839 }, { "epoch": 48.326530612244895, "grad_norm": 19.60409164428711, "learning_rate": 2.5855102040816327e-05, "loss": 2.018, "step": 11840 }, { "epoch": 48.33061224489796, "grad_norm": 46.35614013671875, "learning_rate": 2.58530612244898e-05, "loss": 2.1416, "step": 11841 }, { "epoch": 48.33469387755102, "grad_norm": 39.1624641418457, "learning_rate": 2.5851020408163268e-05, "loss": 1.3027, "step": 11842 }, { "epoch": 48.33877551020408, "grad_norm": 17.28057098388672, "learning_rate": 2.5848979591836735e-05, "loss": 2.678, "step": 11843 }, { "epoch": 48.34285714285714, "grad_norm": 23.44398307800293, "learning_rate": 2.5846938775510206e-05, "loss": 2.1789, "step": 11844 }, { "epoch": 48.3469387755102, "grad_norm": 25.928613662719727, "learning_rate": 2.5844897959183673e-05, "loss": 1.8241, "step": 11845 }, { "epoch": 48.351020408163265, "grad_norm": 25.159482955932617, "learning_rate": 2.5842857142857147e-05, "loss": 2.2339, "step": 11846 }, { "epoch": 48.35510204081633, "grad_norm": 33.597076416015625, "learning_rate": 2.5840816326530614e-05, "loss": 0.5646, "step": 11847 }, { "epoch": 48.35918367346939, "grad_norm": 29.55942726135254, "learning_rate": 2.5838775510204084e-05, "loss": 1.5622, "step": 11848 }, { "epoch": 48.36326530612245, "grad_norm": 33.75681686401367, "learning_rate": 2.583673469387755e-05, "loss": 1.9549, "step": 11849 }, { "epoch": 48.36734693877551, "grad_norm": 38.01555252075195, "learning_rate": 2.583469387755102e-05, "loss": 0.555, "step": 11850 }, { "epoch": 48.371428571428574, "grad_norm": 29.714399337768555, "learning_rate": 2.5832653061224492e-05, "loss": 1.1334, "step": 11851 }, { "epoch": 48.375510204081635, "grad_norm": 37.08852005004883, "learning_rate": 2.5830612244897963e-05, "loss": 0.9667, "step": 11852 }, { "epoch": 48.3795918367347, "grad_norm": 20.31848907470703, "learning_rate": 2.582857142857143e-05, "loss": 2.3499, "step": 11853 }, { "epoch": 48.38367346938775, "grad_norm": 30.791011810302734, "learning_rate": 2.5826530612244897e-05, "loss": 0.3834, "step": 11854 }, { "epoch": 48.38775510204081, "grad_norm": 41.947113037109375, "learning_rate": 2.5824489795918368e-05, "loss": 1.3304, "step": 11855 }, { "epoch": 48.391836734693875, "grad_norm": 24.797056198120117, "learning_rate": 2.582244897959184e-05, "loss": 2.198, "step": 11856 }, { "epoch": 48.39591836734694, "grad_norm": 31.742660522460938, "learning_rate": 2.582040816326531e-05, "loss": 1.2394, "step": 11857 }, { "epoch": 48.4, "grad_norm": 28.994626998901367, "learning_rate": 2.5818367346938776e-05, "loss": 2.2497, "step": 11858 }, { "epoch": 48.40408163265306, "grad_norm": 24.55755615234375, "learning_rate": 2.5816326530612246e-05, "loss": 2.4187, "step": 11859 }, { "epoch": 48.40816326530612, "grad_norm": 40.384124755859375, "learning_rate": 2.5814285714285713e-05, "loss": 0.576, "step": 11860 }, { "epoch": 48.412244897959184, "grad_norm": 15.859161376953125, "learning_rate": 2.5812244897959187e-05, "loss": 2.7411, "step": 11861 }, { "epoch": 48.416326530612245, "grad_norm": 30.332059860229492, "learning_rate": 2.5810204081632655e-05, "loss": 0.9977, "step": 11862 }, { "epoch": 48.42040816326531, "grad_norm": 22.89429473876953, "learning_rate": 2.5808163265306125e-05, "loss": 2.2232, "step": 11863 }, { "epoch": 48.42448979591837, "grad_norm": 25.56028938293457, "learning_rate": 2.5806122448979592e-05, "loss": 1.9838, "step": 11864 }, { "epoch": 48.42857142857143, "grad_norm": 19.445478439331055, "learning_rate": 2.580408163265306e-05, "loss": 2.5149, "step": 11865 }, { "epoch": 48.43265306122449, "grad_norm": 29.322256088256836, "learning_rate": 2.5802040816326533e-05, "loss": 1.4383, "step": 11866 }, { "epoch": 48.436734693877554, "grad_norm": 28.358047485351562, "learning_rate": 2.58e-05, "loss": 0.9704, "step": 11867 }, { "epoch": 48.440816326530616, "grad_norm": 31.19618034362793, "learning_rate": 2.579795918367347e-05, "loss": 1.7286, "step": 11868 }, { "epoch": 48.44489795918367, "grad_norm": 41.52330017089844, "learning_rate": 2.5795918367346938e-05, "loss": 0.4479, "step": 11869 }, { "epoch": 48.44897959183673, "grad_norm": 31.17584228515625, "learning_rate": 2.5793877551020405e-05, "loss": 1.0853, "step": 11870 }, { "epoch": 48.453061224489794, "grad_norm": 21.186487197875977, "learning_rate": 2.579183673469388e-05, "loss": 2.2623, "step": 11871 }, { "epoch": 48.457142857142856, "grad_norm": 35.71812057495117, "learning_rate": 2.578979591836735e-05, "loss": 1.0813, "step": 11872 }, { "epoch": 48.46122448979592, "grad_norm": 14.41594409942627, "learning_rate": 2.5787755102040817e-05, "loss": 2.8245, "step": 11873 }, { "epoch": 48.46530612244898, "grad_norm": 14.431798934936523, "learning_rate": 2.5785714285714284e-05, "loss": 2.8705, "step": 11874 }, { "epoch": 48.46938775510204, "grad_norm": 39.465030670166016, "learning_rate": 2.5783673469387758e-05, "loss": 1.2025, "step": 11875 }, { "epoch": 48.4734693877551, "grad_norm": 17.70342445373535, "learning_rate": 2.5781632653061228e-05, "loss": 2.2821, "step": 11876 }, { "epoch": 48.477551020408164, "grad_norm": 28.51483154296875, "learning_rate": 2.5779591836734695e-05, "loss": 2.0188, "step": 11877 }, { "epoch": 48.481632653061226, "grad_norm": 41.06879425048828, "learning_rate": 2.5777551020408162e-05, "loss": 1.2693, "step": 11878 }, { "epoch": 48.48571428571429, "grad_norm": 24.60263442993164, "learning_rate": 2.5775510204081633e-05, "loss": 2.0882, "step": 11879 }, { "epoch": 48.48979591836735, "grad_norm": 21.065725326538086, "learning_rate": 2.5773469387755107e-05, "loss": 2.1687, "step": 11880 }, { "epoch": 48.49387755102041, "grad_norm": 18.60113525390625, "learning_rate": 2.5771428571428574e-05, "loss": 2.0117, "step": 11881 }, { "epoch": 48.49795918367347, "grad_norm": 18.816326141357422, "learning_rate": 2.576938775510204e-05, "loss": 2.8455, "step": 11882 }, { "epoch": 48.50204081632653, "grad_norm": 34.48198318481445, "learning_rate": 2.576734693877551e-05, "loss": 1.5432, "step": 11883 }, { "epoch": 48.50612244897959, "grad_norm": 22.13426971435547, "learning_rate": 2.576530612244898e-05, "loss": 2.4567, "step": 11884 }, { "epoch": 48.51020408163265, "grad_norm": 41.7592887878418, "learning_rate": 2.5763265306122453e-05, "loss": 1.8776, "step": 11885 }, { "epoch": 48.51428571428571, "grad_norm": 27.28692054748535, "learning_rate": 2.576122448979592e-05, "loss": 1.6958, "step": 11886 }, { "epoch": 48.518367346938774, "grad_norm": 19.94688606262207, "learning_rate": 2.575918367346939e-05, "loss": 2.2493, "step": 11887 }, { "epoch": 48.522448979591836, "grad_norm": 22.381132125854492, "learning_rate": 2.5757142857142857e-05, "loss": 1.6807, "step": 11888 }, { "epoch": 48.5265306122449, "grad_norm": 28.47947120666504, "learning_rate": 2.5755102040816325e-05, "loss": 2.1336, "step": 11889 }, { "epoch": 48.53061224489796, "grad_norm": 18.56378173828125, "learning_rate": 2.57530612244898e-05, "loss": 2.3357, "step": 11890 }, { "epoch": 48.53469387755102, "grad_norm": 18.56037712097168, "learning_rate": 2.575102040816327e-05, "loss": 2.4104, "step": 11891 }, { "epoch": 48.53877551020408, "grad_norm": 33.91884231567383, "learning_rate": 2.5748979591836736e-05, "loss": 0.663, "step": 11892 }, { "epoch": 48.542857142857144, "grad_norm": 19.675912857055664, "learning_rate": 2.5746938775510203e-05, "loss": 2.2744, "step": 11893 }, { "epoch": 48.546938775510206, "grad_norm": 25.981307983398438, "learning_rate": 2.5744897959183674e-05, "loss": 2.2658, "step": 11894 }, { "epoch": 48.55102040816327, "grad_norm": 31.236736297607422, "learning_rate": 2.5742857142857148e-05, "loss": 1.0626, "step": 11895 }, { "epoch": 48.55510204081633, "grad_norm": 22.006303787231445, "learning_rate": 2.5740816326530615e-05, "loss": 2.3879, "step": 11896 }, { "epoch": 48.55918367346939, "grad_norm": 21.647401809692383, "learning_rate": 2.5738775510204082e-05, "loss": 2.5224, "step": 11897 }, { "epoch": 48.563265306122446, "grad_norm": 11.557437896728516, "learning_rate": 2.5736734693877552e-05, "loss": 3.2034, "step": 11898 }, { "epoch": 48.56734693877551, "grad_norm": 34.2083625793457, "learning_rate": 2.573469387755102e-05, "loss": 1.4815, "step": 11899 }, { "epoch": 48.57142857142857, "grad_norm": 33.958892822265625, "learning_rate": 2.5732653061224493e-05, "loss": 1.9673, "step": 11900 }, { "epoch": 48.57551020408163, "grad_norm": 21.960922241210938, "learning_rate": 2.573061224489796e-05, "loss": 1.7332, "step": 11901 }, { "epoch": 48.57959183673469, "grad_norm": 17.154605865478516, "learning_rate": 2.572857142857143e-05, "loss": 2.4901, "step": 11902 }, { "epoch": 48.583673469387755, "grad_norm": 25.934730529785156, "learning_rate": 2.5726530612244898e-05, "loss": 1.9725, "step": 11903 }, { "epoch": 48.587755102040816, "grad_norm": 31.26507568359375, "learning_rate": 2.5724489795918365e-05, "loss": 1.0973, "step": 11904 }, { "epoch": 48.59183673469388, "grad_norm": 30.869028091430664, "learning_rate": 2.572244897959184e-05, "loss": 2.0952, "step": 11905 }, { "epoch": 48.59591836734694, "grad_norm": 27.63730239868164, "learning_rate": 2.572040816326531e-05, "loss": 1.872, "step": 11906 }, { "epoch": 48.6, "grad_norm": 44.447994232177734, "learning_rate": 2.5718367346938777e-05, "loss": 1.009, "step": 11907 }, { "epoch": 48.60408163265306, "grad_norm": 32.06016159057617, "learning_rate": 2.5716326530612244e-05, "loss": 1.1024, "step": 11908 }, { "epoch": 48.608163265306125, "grad_norm": 46.12834930419922, "learning_rate": 2.5714285714285714e-05, "loss": 1.0589, "step": 11909 }, { "epoch": 48.61224489795919, "grad_norm": 33.19841003417969, "learning_rate": 2.571224489795919e-05, "loss": 1.4256, "step": 11910 }, { "epoch": 48.61632653061225, "grad_norm": 19.909337997436523, "learning_rate": 2.5710204081632655e-05, "loss": 2.433, "step": 11911 }, { "epoch": 48.6204081632653, "grad_norm": 42.73930358886719, "learning_rate": 2.5708163265306123e-05, "loss": 1.9167, "step": 11912 }, { "epoch": 48.624489795918365, "grad_norm": 36.049964904785156, "learning_rate": 2.5706122448979593e-05, "loss": 2.0713, "step": 11913 }, { "epoch": 48.628571428571426, "grad_norm": 17.056270599365234, "learning_rate": 2.570408163265306e-05, "loss": 2.7139, "step": 11914 }, { "epoch": 48.63265306122449, "grad_norm": 26.0294189453125, "learning_rate": 2.5702040816326534e-05, "loss": 2.0652, "step": 11915 }, { "epoch": 48.63673469387755, "grad_norm": 17.191648483276367, "learning_rate": 2.57e-05, "loss": 2.7052, "step": 11916 }, { "epoch": 48.64081632653061, "grad_norm": 29.19244956970215, "learning_rate": 2.5697959183673472e-05, "loss": 1.9141, "step": 11917 }, { "epoch": 48.64489795918367, "grad_norm": 22.469680786132812, "learning_rate": 2.569591836734694e-05, "loss": 2.3357, "step": 11918 }, { "epoch": 48.648979591836735, "grad_norm": 27.405961990356445, "learning_rate": 2.5693877551020413e-05, "loss": 1.9044, "step": 11919 }, { "epoch": 48.6530612244898, "grad_norm": 30.914691925048828, "learning_rate": 2.569183673469388e-05, "loss": 1.9156, "step": 11920 }, { "epoch": 48.65714285714286, "grad_norm": 26.168643951416016, "learning_rate": 2.5689795918367347e-05, "loss": 2.0178, "step": 11921 }, { "epoch": 48.66122448979592, "grad_norm": 21.150165557861328, "learning_rate": 2.5687755102040818e-05, "loss": 2.3386, "step": 11922 }, { "epoch": 48.66530612244898, "grad_norm": 25.591106414794922, "learning_rate": 2.5685714285714285e-05, "loss": 1.7882, "step": 11923 }, { "epoch": 48.66938775510204, "grad_norm": 34.523380279541016, "learning_rate": 2.568367346938776e-05, "loss": 1.5115, "step": 11924 }, { "epoch": 48.673469387755105, "grad_norm": 40.35716247558594, "learning_rate": 2.5681632653061226e-05, "loss": 0.4822, "step": 11925 }, { "epoch": 48.67755102040816, "grad_norm": 29.417394638061523, "learning_rate": 2.5679591836734696e-05, "loss": 1.7619, "step": 11926 }, { "epoch": 48.68163265306122, "grad_norm": 30.951637268066406, "learning_rate": 2.5677551020408163e-05, "loss": 1.0855, "step": 11927 }, { "epoch": 48.68571428571428, "grad_norm": 25.77061653137207, "learning_rate": 2.567551020408163e-05, "loss": 1.5134, "step": 11928 }, { "epoch": 48.689795918367345, "grad_norm": 25.32591438293457, "learning_rate": 2.5673469387755104e-05, "loss": 1.5298, "step": 11929 }, { "epoch": 48.69387755102041, "grad_norm": 29.737567901611328, "learning_rate": 2.5671428571428575e-05, "loss": 1.6932, "step": 11930 }, { "epoch": 48.69795918367347, "grad_norm": 22.547273635864258, "learning_rate": 2.5669387755102042e-05, "loss": 1.9586, "step": 11931 }, { "epoch": 48.70204081632653, "grad_norm": 44.27330017089844, "learning_rate": 2.566734693877551e-05, "loss": 1.1744, "step": 11932 }, { "epoch": 48.70612244897959, "grad_norm": 40.928321838378906, "learning_rate": 2.566530612244898e-05, "loss": 1.2823, "step": 11933 }, { "epoch": 48.710204081632654, "grad_norm": 41.41983413696289, "learning_rate": 2.5663265306122454e-05, "loss": 1.4445, "step": 11934 }, { "epoch": 48.714285714285715, "grad_norm": 34.83307647705078, "learning_rate": 2.566122448979592e-05, "loss": 0.8931, "step": 11935 }, { "epoch": 48.71836734693878, "grad_norm": 15.721227645874023, "learning_rate": 2.5659183673469388e-05, "loss": 2.7844, "step": 11936 }, { "epoch": 48.72244897959184, "grad_norm": 43.35707092285156, "learning_rate": 2.565714285714286e-05, "loss": 0.9395, "step": 11937 }, { "epoch": 48.7265306122449, "grad_norm": 28.72251319885254, "learning_rate": 2.5655102040816325e-05, "loss": 1.7462, "step": 11938 }, { "epoch": 48.73061224489796, "grad_norm": 48.09782791137695, "learning_rate": 2.56530612244898e-05, "loss": 1.0643, "step": 11939 }, { "epoch": 48.734693877551024, "grad_norm": 33.170597076416016, "learning_rate": 2.5651020408163266e-05, "loss": 2.1199, "step": 11940 }, { "epoch": 48.73877551020408, "grad_norm": 50.557899475097656, "learning_rate": 2.5648979591836737e-05, "loss": 1.4377, "step": 11941 }, { "epoch": 48.74285714285714, "grad_norm": 26.926250457763672, "learning_rate": 2.5646938775510204e-05, "loss": 1.8289, "step": 11942 }, { "epoch": 48.7469387755102, "grad_norm": 42.468570709228516, "learning_rate": 2.564489795918367e-05, "loss": 1.7417, "step": 11943 }, { "epoch": 48.751020408163264, "grad_norm": 28.476865768432617, "learning_rate": 2.5642857142857145e-05, "loss": 1.6571, "step": 11944 }, { "epoch": 48.755102040816325, "grad_norm": 21.3328914642334, "learning_rate": 2.5640816326530616e-05, "loss": 2.5316, "step": 11945 }, { "epoch": 48.75918367346939, "grad_norm": 15.65435791015625, "learning_rate": 2.5638775510204083e-05, "loss": 2.2275, "step": 11946 }, { "epoch": 48.76326530612245, "grad_norm": 31.04616355895996, "learning_rate": 2.563673469387755e-05, "loss": 1.5458, "step": 11947 }, { "epoch": 48.76734693877551, "grad_norm": 19.910945892333984, "learning_rate": 2.563469387755102e-05, "loss": 2.2693, "step": 11948 }, { "epoch": 48.77142857142857, "grad_norm": 26.075143814086914, "learning_rate": 2.5632653061224494e-05, "loss": 1.3943, "step": 11949 }, { "epoch": 48.775510204081634, "grad_norm": 34.65617752075195, "learning_rate": 2.563061224489796e-05, "loss": 0.9033, "step": 11950 }, { "epoch": 48.779591836734696, "grad_norm": 24.834020614624023, "learning_rate": 2.562857142857143e-05, "loss": 1.7585, "step": 11951 }, { "epoch": 48.78367346938776, "grad_norm": 40.39076232910156, "learning_rate": 2.56265306122449e-05, "loss": 1.143, "step": 11952 }, { "epoch": 48.78775510204082, "grad_norm": 25.69778823852539, "learning_rate": 2.5624489795918366e-05, "loss": 1.547, "step": 11953 }, { "epoch": 48.79183673469388, "grad_norm": 29.36965560913086, "learning_rate": 2.562244897959184e-05, "loss": 2.308, "step": 11954 }, { "epoch": 48.795918367346935, "grad_norm": 31.734731674194336, "learning_rate": 2.5620408163265307e-05, "loss": 1.5166, "step": 11955 }, { "epoch": 48.8, "grad_norm": 22.010799407958984, "learning_rate": 2.5618367346938778e-05, "loss": 2.378, "step": 11956 }, { "epoch": 48.80408163265306, "grad_norm": 18.47957420349121, "learning_rate": 2.5616326530612245e-05, "loss": 2.5389, "step": 11957 }, { "epoch": 48.80816326530612, "grad_norm": 53.54231643676758, "learning_rate": 2.5614285714285712e-05, "loss": 1.0345, "step": 11958 }, { "epoch": 48.81224489795918, "grad_norm": 20.414169311523438, "learning_rate": 2.5612244897959186e-05, "loss": 2.5574, "step": 11959 }, { "epoch": 48.816326530612244, "grad_norm": 23.530250549316406, "learning_rate": 2.5610204081632656e-05, "loss": 2.123, "step": 11960 }, { "epoch": 48.820408163265306, "grad_norm": 25.932039260864258, "learning_rate": 2.5608163265306124e-05, "loss": 2.267, "step": 11961 }, { "epoch": 48.82448979591837, "grad_norm": 25.684446334838867, "learning_rate": 2.560612244897959e-05, "loss": 2.0816, "step": 11962 }, { "epoch": 48.82857142857143, "grad_norm": 28.945585250854492, "learning_rate": 2.560408163265306e-05, "loss": 2.2212, "step": 11963 }, { "epoch": 48.83265306122449, "grad_norm": 45.84587478637695, "learning_rate": 2.5602040816326535e-05, "loss": 1.0119, "step": 11964 }, { "epoch": 48.83673469387755, "grad_norm": 25.758161544799805, "learning_rate": 2.5600000000000002e-05, "loss": 2.3329, "step": 11965 }, { "epoch": 48.840816326530614, "grad_norm": 16.382232666015625, "learning_rate": 2.559795918367347e-05, "loss": 2.7763, "step": 11966 }, { "epoch": 48.844897959183676, "grad_norm": 36.94593811035156, "learning_rate": 2.559591836734694e-05, "loss": 1.504, "step": 11967 }, { "epoch": 48.84897959183674, "grad_norm": 29.852802276611328, "learning_rate": 2.559387755102041e-05, "loss": 1.9384, "step": 11968 }, { "epoch": 48.85306122448979, "grad_norm": 43.77751159667969, "learning_rate": 2.559183673469388e-05, "loss": 0.9627, "step": 11969 }, { "epoch": 48.857142857142854, "grad_norm": 30.668987274169922, "learning_rate": 2.5589795918367348e-05, "loss": 1.8637, "step": 11970 }, { "epoch": 48.861224489795916, "grad_norm": 27.69809913635254, "learning_rate": 2.5587755102040815e-05, "loss": 2.0467, "step": 11971 }, { "epoch": 48.86530612244898, "grad_norm": 25.680009841918945, "learning_rate": 2.5585714285714286e-05, "loss": 0.5011, "step": 11972 }, { "epoch": 48.86938775510204, "grad_norm": 20.074317932128906, "learning_rate": 2.558367346938776e-05, "loss": 2.0887, "step": 11973 }, { "epoch": 48.8734693877551, "grad_norm": 26.708694458007812, "learning_rate": 2.5581632653061227e-05, "loss": 2.0024, "step": 11974 }, { "epoch": 48.87755102040816, "grad_norm": 20.709243774414062, "learning_rate": 2.5579591836734694e-05, "loss": 2.1737, "step": 11975 }, { "epoch": 48.881632653061224, "grad_norm": 29.965980529785156, "learning_rate": 2.5577551020408164e-05, "loss": 1.983, "step": 11976 }, { "epoch": 48.885714285714286, "grad_norm": 31.86125946044922, "learning_rate": 2.557551020408163e-05, "loss": 1.6329, "step": 11977 }, { "epoch": 48.88979591836735, "grad_norm": 13.230951309204102, "learning_rate": 2.5573469387755105e-05, "loss": 2.9022, "step": 11978 }, { "epoch": 48.89387755102041, "grad_norm": 38.01396942138672, "learning_rate": 2.5571428571428572e-05, "loss": 1.0304, "step": 11979 }, { "epoch": 48.89795918367347, "grad_norm": 19.408588409423828, "learning_rate": 2.5569387755102043e-05, "loss": 2.3932, "step": 11980 }, { "epoch": 48.90204081632653, "grad_norm": 30.78152084350586, "learning_rate": 2.556734693877551e-05, "loss": 2.4235, "step": 11981 }, { "epoch": 48.906122448979595, "grad_norm": 37.261619567871094, "learning_rate": 2.5565306122448977e-05, "loss": 1.4914, "step": 11982 }, { "epoch": 48.910204081632656, "grad_norm": 19.133140563964844, "learning_rate": 2.556326530612245e-05, "loss": 2.4976, "step": 11983 }, { "epoch": 48.91428571428571, "grad_norm": 28.387258529663086, "learning_rate": 2.556122448979592e-05, "loss": 1.1602, "step": 11984 }, { "epoch": 48.91836734693877, "grad_norm": 36.171974182128906, "learning_rate": 2.555918367346939e-05, "loss": 1.1036, "step": 11985 }, { "epoch": 48.922448979591834, "grad_norm": 21.058874130249023, "learning_rate": 2.5557142857142856e-05, "loss": 2.521, "step": 11986 }, { "epoch": 48.926530612244896, "grad_norm": 18.114927291870117, "learning_rate": 2.5555102040816326e-05, "loss": 2.6337, "step": 11987 }, { "epoch": 48.93061224489796, "grad_norm": 16.980667114257812, "learning_rate": 2.55530612244898e-05, "loss": 2.537, "step": 11988 }, { "epoch": 48.93469387755102, "grad_norm": 9.456572532653809, "learning_rate": 2.5551020408163267e-05, "loss": 3.3952, "step": 11989 }, { "epoch": 48.93877551020408, "grad_norm": 15.714802742004395, "learning_rate": 2.5548979591836735e-05, "loss": 2.9757, "step": 11990 }, { "epoch": 48.94285714285714, "grad_norm": 31.264345169067383, "learning_rate": 2.5546938775510205e-05, "loss": 1.0399, "step": 11991 }, { "epoch": 48.946938775510205, "grad_norm": 38.23259353637695, "learning_rate": 2.5544897959183672e-05, "loss": 1.2409, "step": 11992 }, { "epoch": 48.95102040816327, "grad_norm": 39.578697204589844, "learning_rate": 2.5542857142857146e-05, "loss": 1.5335, "step": 11993 }, { "epoch": 48.95510204081633, "grad_norm": 13.386422157287598, "learning_rate": 2.5540816326530613e-05, "loss": 2.7598, "step": 11994 }, { "epoch": 48.95918367346939, "grad_norm": 43.763885498046875, "learning_rate": 2.5538775510204084e-05, "loss": 1.2732, "step": 11995 }, { "epoch": 48.96326530612245, "grad_norm": 19.194610595703125, "learning_rate": 2.553673469387755e-05, "loss": 2.3743, "step": 11996 }, { "epoch": 48.96734693877551, "grad_norm": 23.277118682861328, "learning_rate": 2.5534693877551018e-05, "loss": 2.3456, "step": 11997 }, { "epoch": 48.97142857142857, "grad_norm": 17.57816505432129, "learning_rate": 2.5532653061224492e-05, "loss": 2.8993, "step": 11998 }, { "epoch": 48.97551020408163, "grad_norm": 13.28782844543457, "learning_rate": 2.5530612244897962e-05, "loss": 2.9567, "step": 11999 }, { "epoch": 48.97959183673469, "grad_norm": 18.2968807220459, "learning_rate": 2.552857142857143e-05, "loss": 2.3753, "step": 12000 }, { "epoch": 48.98367346938775, "grad_norm": 11.014330863952637, "learning_rate": 2.5526530612244897e-05, "loss": 2.9354, "step": 12001 }, { "epoch": 48.987755102040815, "grad_norm": 27.16364860534668, "learning_rate": 2.5524489795918367e-05, "loss": 2.2034, "step": 12002 }, { "epoch": 48.99183673469388, "grad_norm": 18.414569854736328, "learning_rate": 2.552244897959184e-05, "loss": 2.4599, "step": 12003 }, { "epoch": 48.99591836734694, "grad_norm": 19.762126922607422, "learning_rate": 2.5520408163265308e-05, "loss": 2.4267, "step": 12004 }, { "epoch": 49.0, "grad_norm": 50.86320495605469, "learning_rate": 2.5518367346938775e-05, "loss": 1.2695, "step": 12005 }, { "epoch": 49.00408163265306, "grad_norm": 18.291213989257812, "learning_rate": 2.5516326530612246e-05, "loss": 2.3376, "step": 12006 }, { "epoch": 49.00816326530612, "grad_norm": 38.576725006103516, "learning_rate": 2.5514285714285713e-05, "loss": 1.016, "step": 12007 }, { "epoch": 49.012244897959185, "grad_norm": 28.467350006103516, "learning_rate": 2.5512244897959187e-05, "loss": 0.5738, "step": 12008 }, { "epoch": 49.01632653061225, "grad_norm": 31.536287307739258, "learning_rate": 2.5510204081632654e-05, "loss": 1.254, "step": 12009 }, { "epoch": 49.02040816326531, "grad_norm": 16.379608154296875, "learning_rate": 2.5508163265306125e-05, "loss": 2.3554, "step": 12010 }, { "epoch": 49.02448979591837, "grad_norm": 33.31786346435547, "learning_rate": 2.550612244897959e-05, "loss": 1.8141, "step": 12011 }, { "epoch": 49.02857142857143, "grad_norm": 46.28782653808594, "learning_rate": 2.5504081632653066e-05, "loss": 1.3896, "step": 12012 }, { "epoch": 49.03265306122449, "grad_norm": 29.13995933532715, "learning_rate": 2.5502040816326533e-05, "loss": 1.9565, "step": 12013 }, { "epoch": 49.03673469387755, "grad_norm": 15.348918914794922, "learning_rate": 2.5500000000000003e-05, "loss": 2.0958, "step": 12014 }, { "epoch": 49.04081632653061, "grad_norm": 35.862091064453125, "learning_rate": 2.549795918367347e-05, "loss": 0.3912, "step": 12015 }, { "epoch": 49.04489795918367, "grad_norm": 33.02790069580078, "learning_rate": 2.5495918367346937e-05, "loss": 1.8125, "step": 12016 }, { "epoch": 49.04897959183673, "grad_norm": 31.842411041259766, "learning_rate": 2.549387755102041e-05, "loss": 0.4637, "step": 12017 }, { "epoch": 49.053061224489795, "grad_norm": 38.0411376953125, "learning_rate": 2.5491836734693882e-05, "loss": 1.0439, "step": 12018 }, { "epoch": 49.05714285714286, "grad_norm": 36.34341049194336, "learning_rate": 2.548979591836735e-05, "loss": 0.9938, "step": 12019 }, { "epoch": 49.06122448979592, "grad_norm": 38.74586486816406, "learning_rate": 2.5487755102040816e-05, "loss": 1.0644, "step": 12020 }, { "epoch": 49.06530612244898, "grad_norm": 19.639991760253906, "learning_rate": 2.5485714285714287e-05, "loss": 2.2576, "step": 12021 }, { "epoch": 49.06938775510204, "grad_norm": 29.110095977783203, "learning_rate": 2.5483673469387757e-05, "loss": 1.2821, "step": 12022 }, { "epoch": 49.073469387755104, "grad_norm": 28.925199508666992, "learning_rate": 2.5481632653061228e-05, "loss": 2.1644, "step": 12023 }, { "epoch": 49.077551020408166, "grad_norm": 18.525474548339844, "learning_rate": 2.5479591836734695e-05, "loss": 2.6655, "step": 12024 }, { "epoch": 49.08163265306123, "grad_norm": 33.224151611328125, "learning_rate": 2.5477551020408162e-05, "loss": 0.9124, "step": 12025 }, { "epoch": 49.08571428571429, "grad_norm": 24.50819206237793, "learning_rate": 2.5475510204081632e-05, "loss": 1.6635, "step": 12026 }, { "epoch": 49.089795918367344, "grad_norm": 34.405364990234375, "learning_rate": 2.5473469387755106e-05, "loss": 1.3251, "step": 12027 }, { "epoch": 49.093877551020405, "grad_norm": 27.292991638183594, "learning_rate": 2.5471428571428573e-05, "loss": 1.6288, "step": 12028 }, { "epoch": 49.09795918367347, "grad_norm": 33.753116607666016, "learning_rate": 2.546938775510204e-05, "loss": 1.9144, "step": 12029 }, { "epoch": 49.10204081632653, "grad_norm": 19.643674850463867, "learning_rate": 2.546734693877551e-05, "loss": 2.8172, "step": 12030 }, { "epoch": 49.10612244897959, "grad_norm": 25.500579833984375, "learning_rate": 2.5465306122448978e-05, "loss": 1.418, "step": 12031 }, { "epoch": 49.11020408163265, "grad_norm": 36.52537155151367, "learning_rate": 2.5463265306122452e-05, "loss": 1.6941, "step": 12032 }, { "epoch": 49.114285714285714, "grad_norm": 24.831314086914062, "learning_rate": 2.546122448979592e-05, "loss": 1.6119, "step": 12033 }, { "epoch": 49.118367346938776, "grad_norm": 16.7636775970459, "learning_rate": 2.545918367346939e-05, "loss": 2.6267, "step": 12034 }, { "epoch": 49.12244897959184, "grad_norm": 12.945930480957031, "learning_rate": 2.5457142857142857e-05, "loss": 2.8539, "step": 12035 }, { "epoch": 49.1265306122449, "grad_norm": 29.229740142822266, "learning_rate": 2.5455102040816324e-05, "loss": 0.3726, "step": 12036 }, { "epoch": 49.13061224489796, "grad_norm": 18.803375244140625, "learning_rate": 2.5453061224489798e-05, "loss": 2.3644, "step": 12037 }, { "epoch": 49.13469387755102, "grad_norm": 44.22589874267578, "learning_rate": 2.545102040816327e-05, "loss": 1.1636, "step": 12038 }, { "epoch": 49.138775510204084, "grad_norm": 21.351713180541992, "learning_rate": 2.5448979591836736e-05, "loss": 2.5288, "step": 12039 }, { "epoch": 49.142857142857146, "grad_norm": 23.67475700378418, "learning_rate": 2.5446938775510203e-05, "loss": 1.8863, "step": 12040 }, { "epoch": 49.14693877551021, "grad_norm": 26.341461181640625, "learning_rate": 2.5444897959183673e-05, "loss": 1.1001, "step": 12041 }, { "epoch": 49.15102040816326, "grad_norm": 33.10464859008789, "learning_rate": 2.5442857142857147e-05, "loss": 0.5448, "step": 12042 }, { "epoch": 49.155102040816324, "grad_norm": 29.924577713012695, "learning_rate": 2.5440816326530614e-05, "loss": 1.8153, "step": 12043 }, { "epoch": 49.159183673469386, "grad_norm": 22.92445182800293, "learning_rate": 2.543877551020408e-05, "loss": 2.3437, "step": 12044 }, { "epoch": 49.16326530612245, "grad_norm": 34.2797966003418, "learning_rate": 2.5436734693877552e-05, "loss": 1.9096, "step": 12045 }, { "epoch": 49.16734693877551, "grad_norm": 25.590087890625, "learning_rate": 2.543469387755102e-05, "loss": 2.177, "step": 12046 }, { "epoch": 49.17142857142857, "grad_norm": 37.950592041015625, "learning_rate": 2.5432653061224493e-05, "loss": 1.1554, "step": 12047 }, { "epoch": 49.17551020408163, "grad_norm": 30.05475616455078, "learning_rate": 2.543061224489796e-05, "loss": 2.0528, "step": 12048 }, { "epoch": 49.179591836734694, "grad_norm": 28.703859329223633, "learning_rate": 2.542857142857143e-05, "loss": 1.4354, "step": 12049 }, { "epoch": 49.183673469387756, "grad_norm": 27.681625366210938, "learning_rate": 2.5426530612244898e-05, "loss": 1.5636, "step": 12050 }, { "epoch": 49.18775510204082, "grad_norm": 38.26332473754883, "learning_rate": 2.5424489795918365e-05, "loss": 0.8813, "step": 12051 }, { "epoch": 49.19183673469388, "grad_norm": 27.477567672729492, "learning_rate": 2.542244897959184e-05, "loss": 0.8714, "step": 12052 }, { "epoch": 49.19591836734694, "grad_norm": 15.272797584533691, "learning_rate": 2.542040816326531e-05, "loss": 2.8079, "step": 12053 }, { "epoch": 49.2, "grad_norm": 40.59021759033203, "learning_rate": 2.5418367346938776e-05, "loss": 0.5068, "step": 12054 }, { "epoch": 49.204081632653065, "grad_norm": 23.84683609008789, "learning_rate": 2.5416326530612243e-05, "loss": 2.0776, "step": 12055 }, { "epoch": 49.20816326530612, "grad_norm": 48.15247344970703, "learning_rate": 2.5414285714285717e-05, "loss": 1.4033, "step": 12056 }, { "epoch": 49.21224489795918, "grad_norm": 34.23541259765625, "learning_rate": 2.5412244897959188e-05, "loss": 1.9895, "step": 12057 }, { "epoch": 49.21632653061224, "grad_norm": 33.85038757324219, "learning_rate": 2.5410204081632655e-05, "loss": 1.607, "step": 12058 }, { "epoch": 49.220408163265304, "grad_norm": 28.355825424194336, "learning_rate": 2.5408163265306122e-05, "loss": 1.6393, "step": 12059 }, { "epoch": 49.224489795918366, "grad_norm": 23.05033302307129, "learning_rate": 2.5406122448979593e-05, "loss": 2.1526, "step": 12060 }, { "epoch": 49.22857142857143, "grad_norm": 25.675447463989258, "learning_rate": 2.5404081632653067e-05, "loss": 1.7329, "step": 12061 }, { "epoch": 49.23265306122449, "grad_norm": 21.407995223999023, "learning_rate": 2.5402040816326534e-05, "loss": 2.0722, "step": 12062 }, { "epoch": 49.23673469387755, "grad_norm": 43.52883529663086, "learning_rate": 2.54e-05, "loss": 0.8525, "step": 12063 }, { "epoch": 49.24081632653061, "grad_norm": 33.993568420410156, "learning_rate": 2.539795918367347e-05, "loss": 1.7861, "step": 12064 }, { "epoch": 49.244897959183675, "grad_norm": 18.206462860107422, "learning_rate": 2.539591836734694e-05, "loss": 2.2695, "step": 12065 }, { "epoch": 49.248979591836736, "grad_norm": 31.62704086303711, "learning_rate": 2.5393877551020412e-05, "loss": 1.4467, "step": 12066 }, { "epoch": 49.2530612244898, "grad_norm": 25.993642807006836, "learning_rate": 2.539183673469388e-05, "loss": 1.7071, "step": 12067 }, { "epoch": 49.25714285714286, "grad_norm": 28.41001319885254, "learning_rate": 2.538979591836735e-05, "loss": 1.6062, "step": 12068 }, { "epoch": 49.26122448979592, "grad_norm": 34.2423210144043, "learning_rate": 2.5387755102040817e-05, "loss": 0.8797, "step": 12069 }, { "epoch": 49.265306122448976, "grad_norm": 36.97732162475586, "learning_rate": 2.5385714285714284e-05, "loss": 0.9858, "step": 12070 }, { "epoch": 49.26938775510204, "grad_norm": 31.465280532836914, "learning_rate": 2.5383673469387758e-05, "loss": 1.7614, "step": 12071 }, { "epoch": 49.2734693877551, "grad_norm": 20.292367935180664, "learning_rate": 2.538163265306123e-05, "loss": 2.3362, "step": 12072 }, { "epoch": 49.27755102040816, "grad_norm": 23.093551635742188, "learning_rate": 2.5379591836734696e-05, "loss": 2.1209, "step": 12073 }, { "epoch": 49.28163265306122, "grad_norm": 25.357534408569336, "learning_rate": 2.5377551020408163e-05, "loss": 2.0337, "step": 12074 }, { "epoch": 49.285714285714285, "grad_norm": 19.16932487487793, "learning_rate": 2.5375510204081633e-05, "loss": 2.9234, "step": 12075 }, { "epoch": 49.289795918367346, "grad_norm": 31.71647071838379, "learning_rate": 2.5373469387755104e-05, "loss": 1.9771, "step": 12076 }, { "epoch": 49.29387755102041, "grad_norm": 28.895017623901367, "learning_rate": 2.5371428571428574e-05, "loss": 1.7084, "step": 12077 }, { "epoch": 49.29795918367347, "grad_norm": 25.584108352661133, "learning_rate": 2.536938775510204e-05, "loss": 2.2311, "step": 12078 }, { "epoch": 49.30204081632653, "grad_norm": 14.421993255615234, "learning_rate": 2.536734693877551e-05, "loss": 2.8504, "step": 12079 }, { "epoch": 49.30612244897959, "grad_norm": 44.95354080200195, "learning_rate": 2.536530612244898e-05, "loss": 0.4166, "step": 12080 }, { "epoch": 49.310204081632655, "grad_norm": 22.996923446655273, "learning_rate": 2.5363265306122453e-05, "loss": 2.4725, "step": 12081 }, { "epoch": 49.31428571428572, "grad_norm": 17.364402770996094, "learning_rate": 2.536122448979592e-05, "loss": 2.6701, "step": 12082 }, { "epoch": 49.31836734693878, "grad_norm": 49.517608642578125, "learning_rate": 2.5359183673469387e-05, "loss": 0.5753, "step": 12083 }, { "epoch": 49.32244897959184, "grad_norm": 16.49779510498047, "learning_rate": 2.5357142857142858e-05, "loss": 2.3431, "step": 12084 }, { "epoch": 49.326530612244895, "grad_norm": 31.325698852539062, "learning_rate": 2.5355102040816325e-05, "loss": 1.9301, "step": 12085 }, { "epoch": 49.33061224489796, "grad_norm": 33.52225875854492, "learning_rate": 2.53530612244898e-05, "loss": 0.9616, "step": 12086 }, { "epoch": 49.33469387755102, "grad_norm": 21.047420501708984, "learning_rate": 2.5351020408163266e-05, "loss": 2.1375, "step": 12087 }, { "epoch": 49.33877551020408, "grad_norm": 34.977943420410156, "learning_rate": 2.5348979591836736e-05, "loss": 0.8395, "step": 12088 }, { "epoch": 49.34285714285714, "grad_norm": 33.070796966552734, "learning_rate": 2.5346938775510204e-05, "loss": 1.7739, "step": 12089 }, { "epoch": 49.3469387755102, "grad_norm": 29.34747314453125, "learning_rate": 2.534489795918367e-05, "loss": 1.712, "step": 12090 }, { "epoch": 49.351020408163265, "grad_norm": 37.02986526489258, "learning_rate": 2.5342857142857145e-05, "loss": 1.0754, "step": 12091 }, { "epoch": 49.35510204081633, "grad_norm": 35.62356948852539, "learning_rate": 2.5340816326530615e-05, "loss": 1.5571, "step": 12092 }, { "epoch": 49.35918367346939, "grad_norm": 31.529376983642578, "learning_rate": 2.5338775510204082e-05, "loss": 1.5517, "step": 12093 }, { "epoch": 49.36326530612245, "grad_norm": 47.856544494628906, "learning_rate": 2.533673469387755e-05, "loss": 2.1013, "step": 12094 }, { "epoch": 49.36734693877551, "grad_norm": 22.649221420288086, "learning_rate": 2.533469387755102e-05, "loss": 2.5095, "step": 12095 }, { "epoch": 49.371428571428574, "grad_norm": 41.91426467895508, "learning_rate": 2.5332653061224494e-05, "loss": 1.17, "step": 12096 }, { "epoch": 49.375510204081635, "grad_norm": 33.120784759521484, "learning_rate": 2.533061224489796e-05, "loss": 0.3775, "step": 12097 }, { "epoch": 49.3795918367347, "grad_norm": 26.139095306396484, "learning_rate": 2.5328571428571428e-05, "loss": 2.0746, "step": 12098 }, { "epoch": 49.38367346938775, "grad_norm": 37.830718994140625, "learning_rate": 2.53265306122449e-05, "loss": 1.5343, "step": 12099 }, { "epoch": 49.38775510204081, "grad_norm": 21.973344802856445, "learning_rate": 2.5324489795918366e-05, "loss": 1.63, "step": 12100 }, { "epoch": 49.391836734693875, "grad_norm": 28.991039276123047, "learning_rate": 2.532244897959184e-05, "loss": 1.0385, "step": 12101 }, { "epoch": 49.39591836734694, "grad_norm": 16.592323303222656, "learning_rate": 2.5320408163265307e-05, "loss": 2.3007, "step": 12102 }, { "epoch": 49.4, "grad_norm": 29.048898696899414, "learning_rate": 2.5318367346938777e-05, "loss": 1.8521, "step": 12103 }, { "epoch": 49.40408163265306, "grad_norm": 41.10543441772461, "learning_rate": 2.5316326530612244e-05, "loss": 1.0861, "step": 12104 }, { "epoch": 49.40816326530612, "grad_norm": 29.02200698852539, "learning_rate": 2.5314285714285718e-05, "loss": 2.2054, "step": 12105 }, { "epoch": 49.412244897959184, "grad_norm": 20.059181213378906, "learning_rate": 2.5312244897959185e-05, "loss": 2.395, "step": 12106 }, { "epoch": 49.416326530612245, "grad_norm": 30.815507888793945, "learning_rate": 2.5310204081632656e-05, "loss": 1.097, "step": 12107 }, { "epoch": 49.42040816326531, "grad_norm": 19.40152359008789, "learning_rate": 2.5308163265306123e-05, "loss": 2.4064, "step": 12108 }, { "epoch": 49.42448979591837, "grad_norm": 21.979978561401367, "learning_rate": 2.530612244897959e-05, "loss": 2.0401, "step": 12109 }, { "epoch": 49.42857142857143, "grad_norm": 40.145450592041016, "learning_rate": 2.5304081632653064e-05, "loss": 1.2127, "step": 12110 }, { "epoch": 49.43265306122449, "grad_norm": 24.848270416259766, "learning_rate": 2.5302040816326535e-05, "loss": 1.5948, "step": 12111 }, { "epoch": 49.436734693877554, "grad_norm": 22.94451904296875, "learning_rate": 2.5300000000000002e-05, "loss": 2.0607, "step": 12112 }, { "epoch": 49.440816326530616, "grad_norm": 28.441173553466797, "learning_rate": 2.529795918367347e-05, "loss": 2.23, "step": 12113 }, { "epoch": 49.44489795918367, "grad_norm": 15.549722671508789, "learning_rate": 2.529591836734694e-05, "loss": 2.6664, "step": 12114 }, { "epoch": 49.44897959183673, "grad_norm": 56.3056526184082, "learning_rate": 2.5293877551020413e-05, "loss": 0.903, "step": 12115 }, { "epoch": 49.453061224489794, "grad_norm": 31.213088989257812, "learning_rate": 2.529183673469388e-05, "loss": 1.8697, "step": 12116 }, { "epoch": 49.457142857142856, "grad_norm": 18.828237533569336, "learning_rate": 2.5289795918367348e-05, "loss": 2.3758, "step": 12117 }, { "epoch": 49.46122448979592, "grad_norm": 24.963247299194336, "learning_rate": 2.5287755102040818e-05, "loss": 2.3431, "step": 12118 }, { "epoch": 49.46530612244898, "grad_norm": 32.12339401245117, "learning_rate": 2.5285714285714285e-05, "loss": 1.4148, "step": 12119 }, { "epoch": 49.46938775510204, "grad_norm": 48.59380340576172, "learning_rate": 2.528367346938776e-05, "loss": 1.3524, "step": 12120 }, { "epoch": 49.4734693877551, "grad_norm": 36.990604400634766, "learning_rate": 2.5281632653061226e-05, "loss": 1.4478, "step": 12121 }, { "epoch": 49.477551020408164, "grad_norm": 16.277524948120117, "learning_rate": 2.5279591836734697e-05, "loss": 2.7253, "step": 12122 }, { "epoch": 49.481632653061226, "grad_norm": 22.676549911499023, "learning_rate": 2.5277551020408164e-05, "loss": 2.5113, "step": 12123 }, { "epoch": 49.48571428571429, "grad_norm": 26.04831314086914, "learning_rate": 2.527551020408163e-05, "loss": 2.3518, "step": 12124 }, { "epoch": 49.48979591836735, "grad_norm": 19.73044204711914, "learning_rate": 2.5273469387755105e-05, "loss": 2.1435, "step": 12125 }, { "epoch": 49.49387755102041, "grad_norm": 17.514698028564453, "learning_rate": 2.5271428571428575e-05, "loss": 2.4334, "step": 12126 }, { "epoch": 49.49795918367347, "grad_norm": 29.5969181060791, "learning_rate": 2.5269387755102042e-05, "loss": 1.8107, "step": 12127 }, { "epoch": 49.50204081632653, "grad_norm": 32.648651123046875, "learning_rate": 2.526734693877551e-05, "loss": 1.2632, "step": 12128 }, { "epoch": 49.50612244897959, "grad_norm": 34.455387115478516, "learning_rate": 2.526530612244898e-05, "loss": 0.9443, "step": 12129 }, { "epoch": 49.51020408163265, "grad_norm": 34.07890319824219, "learning_rate": 2.526326530612245e-05, "loss": 1.7142, "step": 12130 }, { "epoch": 49.51428571428571, "grad_norm": 30.739381790161133, "learning_rate": 2.526122448979592e-05, "loss": 2.0163, "step": 12131 }, { "epoch": 49.518367346938774, "grad_norm": 39.61579895019531, "learning_rate": 2.5259183673469388e-05, "loss": 1.1274, "step": 12132 }, { "epoch": 49.522448979591836, "grad_norm": 38.610408782958984, "learning_rate": 2.5257142857142855e-05, "loss": 1.0097, "step": 12133 }, { "epoch": 49.5265306122449, "grad_norm": 30.359922409057617, "learning_rate": 2.5255102040816326e-05, "loss": 1.8093, "step": 12134 }, { "epoch": 49.53061224489796, "grad_norm": 38.248199462890625, "learning_rate": 2.52530612244898e-05, "loss": 0.9246, "step": 12135 }, { "epoch": 49.53469387755102, "grad_norm": 18.658105850219727, "learning_rate": 2.5251020408163267e-05, "loss": 2.3698, "step": 12136 }, { "epoch": 49.53877551020408, "grad_norm": 41.51542282104492, "learning_rate": 2.5248979591836734e-05, "loss": 0.7226, "step": 12137 }, { "epoch": 49.542857142857144, "grad_norm": 44.103233337402344, "learning_rate": 2.5246938775510205e-05, "loss": 0.9141, "step": 12138 }, { "epoch": 49.546938775510206, "grad_norm": 26.32498550415039, "learning_rate": 2.5244897959183672e-05, "loss": 1.4674, "step": 12139 }, { "epoch": 49.55102040816327, "grad_norm": 42.6370735168457, "learning_rate": 2.5242857142857146e-05, "loss": 1.2679, "step": 12140 }, { "epoch": 49.55510204081633, "grad_norm": 35.85696792602539, "learning_rate": 2.5240816326530613e-05, "loss": 1.1443, "step": 12141 }, { "epoch": 49.55918367346939, "grad_norm": 24.712993621826172, "learning_rate": 2.5238775510204083e-05, "loss": 2.1592, "step": 12142 }, { "epoch": 49.563265306122446, "grad_norm": 18.979305267333984, "learning_rate": 2.523673469387755e-05, "loss": 2.6864, "step": 12143 }, { "epoch": 49.56734693877551, "grad_norm": 27.007596969604492, "learning_rate": 2.5234693877551017e-05, "loss": 1.4957, "step": 12144 }, { "epoch": 49.57142857142857, "grad_norm": 44.854881286621094, "learning_rate": 2.523265306122449e-05, "loss": 1.7768, "step": 12145 }, { "epoch": 49.57551020408163, "grad_norm": 15.010540008544922, "learning_rate": 2.5230612244897962e-05, "loss": 2.7379, "step": 12146 }, { "epoch": 49.57959183673469, "grad_norm": 44.806396484375, "learning_rate": 2.522857142857143e-05, "loss": 1.0151, "step": 12147 }, { "epoch": 49.583673469387755, "grad_norm": 45.707157135009766, "learning_rate": 2.5226530612244896e-05, "loss": 1.4032, "step": 12148 }, { "epoch": 49.587755102040816, "grad_norm": 19.011808395385742, "learning_rate": 2.522448979591837e-05, "loss": 2.2932, "step": 12149 }, { "epoch": 49.59183673469388, "grad_norm": 18.042539596557617, "learning_rate": 2.522244897959184e-05, "loss": 2.7917, "step": 12150 }, { "epoch": 49.59591836734694, "grad_norm": 29.66738510131836, "learning_rate": 2.5220408163265308e-05, "loss": 1.0744, "step": 12151 }, { "epoch": 49.6, "grad_norm": 19.207063674926758, "learning_rate": 2.5218367346938775e-05, "loss": 2.463, "step": 12152 }, { "epoch": 49.60408163265306, "grad_norm": 27.827791213989258, "learning_rate": 2.5216326530612245e-05, "loss": 1.9978, "step": 12153 }, { "epoch": 49.608163265306125, "grad_norm": 20.783830642700195, "learning_rate": 2.521428571428572e-05, "loss": 2.0019, "step": 12154 }, { "epoch": 49.61224489795919, "grad_norm": 24.00598907470703, "learning_rate": 2.5212244897959186e-05, "loss": 1.9115, "step": 12155 }, { "epoch": 49.61632653061225, "grad_norm": 29.082300186157227, "learning_rate": 2.5210204081632653e-05, "loss": 2.0306, "step": 12156 }, { "epoch": 49.6204081632653, "grad_norm": 19.823802947998047, "learning_rate": 2.5208163265306124e-05, "loss": 2.4326, "step": 12157 }, { "epoch": 49.624489795918365, "grad_norm": 32.359474182128906, "learning_rate": 2.520612244897959e-05, "loss": 1.4555, "step": 12158 }, { "epoch": 49.628571428571426, "grad_norm": 14.808121681213379, "learning_rate": 2.5204081632653065e-05, "loss": 2.788, "step": 12159 }, { "epoch": 49.63265306122449, "grad_norm": 13.877535820007324, "learning_rate": 2.5202040816326532e-05, "loss": 2.6464, "step": 12160 }, { "epoch": 49.63673469387755, "grad_norm": 36.286705017089844, "learning_rate": 2.5200000000000003e-05, "loss": 0.3434, "step": 12161 }, { "epoch": 49.64081632653061, "grad_norm": 13.19391918182373, "learning_rate": 2.519795918367347e-05, "loss": 3.1211, "step": 12162 }, { "epoch": 49.64489795918367, "grad_norm": 21.108173370361328, "learning_rate": 2.5195918367346937e-05, "loss": 2.3093, "step": 12163 }, { "epoch": 49.648979591836735, "grad_norm": 39.836917877197266, "learning_rate": 2.519387755102041e-05, "loss": 1.0187, "step": 12164 }, { "epoch": 49.6530612244898, "grad_norm": 13.41064739227295, "learning_rate": 2.519183673469388e-05, "loss": 2.8006, "step": 12165 }, { "epoch": 49.65714285714286, "grad_norm": 15.537734031677246, "learning_rate": 2.518979591836735e-05, "loss": 2.7471, "step": 12166 }, { "epoch": 49.66122448979592, "grad_norm": 17.887313842773438, "learning_rate": 2.5187755102040816e-05, "loss": 2.6824, "step": 12167 }, { "epoch": 49.66530612244898, "grad_norm": 24.31829261779785, "learning_rate": 2.5185714285714286e-05, "loss": 2.3362, "step": 12168 }, { "epoch": 49.66938775510204, "grad_norm": 32.666847229003906, "learning_rate": 2.518367346938776e-05, "loss": 1.2462, "step": 12169 }, { "epoch": 49.673469387755105, "grad_norm": 47.90999984741211, "learning_rate": 2.5181632653061227e-05, "loss": 1.3959, "step": 12170 }, { "epoch": 49.67755102040816, "grad_norm": 24.7998104095459, "learning_rate": 2.5179591836734694e-05, "loss": 2.1518, "step": 12171 }, { "epoch": 49.68163265306122, "grad_norm": 32.57954025268555, "learning_rate": 2.5177551020408165e-05, "loss": 1.5116, "step": 12172 }, { "epoch": 49.68571428571428, "grad_norm": 32.1123161315918, "learning_rate": 2.5175510204081632e-05, "loss": 1.9841, "step": 12173 }, { "epoch": 49.689795918367345, "grad_norm": 29.187644958496094, "learning_rate": 2.5173469387755106e-05, "loss": 1.7245, "step": 12174 }, { "epoch": 49.69387755102041, "grad_norm": 20.344417572021484, "learning_rate": 2.5171428571428573e-05, "loss": 2.2237, "step": 12175 }, { "epoch": 49.69795918367347, "grad_norm": 25.123977661132812, "learning_rate": 2.5169387755102043e-05, "loss": 2.2509, "step": 12176 }, { "epoch": 49.70204081632653, "grad_norm": 18.298675537109375, "learning_rate": 2.516734693877551e-05, "loss": 2.2825, "step": 12177 }, { "epoch": 49.70612244897959, "grad_norm": 26.23302459716797, "learning_rate": 2.5165306122448978e-05, "loss": 1.9068, "step": 12178 }, { "epoch": 49.710204081632654, "grad_norm": 19.321455001831055, "learning_rate": 2.516326530612245e-05, "loss": 1.9589, "step": 12179 }, { "epoch": 49.714285714285715, "grad_norm": 27.004901885986328, "learning_rate": 2.516122448979592e-05, "loss": 1.6414, "step": 12180 }, { "epoch": 49.71836734693878, "grad_norm": 26.390483856201172, "learning_rate": 2.515918367346939e-05, "loss": 0.4439, "step": 12181 }, { "epoch": 49.72244897959184, "grad_norm": 47.28950500488281, "learning_rate": 2.5157142857142856e-05, "loss": 0.9183, "step": 12182 }, { "epoch": 49.7265306122449, "grad_norm": 29.528661727905273, "learning_rate": 2.5155102040816323e-05, "loss": 1.4003, "step": 12183 }, { "epoch": 49.73061224489796, "grad_norm": 35.28582763671875, "learning_rate": 2.5153061224489797e-05, "loss": 0.943, "step": 12184 }, { "epoch": 49.734693877551024, "grad_norm": 23.31931495666504, "learning_rate": 2.5151020408163268e-05, "loss": 2.056, "step": 12185 }, { "epoch": 49.73877551020408, "grad_norm": 24.652206420898438, "learning_rate": 2.5148979591836735e-05, "loss": 2.2161, "step": 12186 }, { "epoch": 49.74285714285714, "grad_norm": 40.8835334777832, "learning_rate": 2.5146938775510202e-05, "loss": 1.1296, "step": 12187 }, { "epoch": 49.7469387755102, "grad_norm": 31.499122619628906, "learning_rate": 2.5144897959183673e-05, "loss": 1.8736, "step": 12188 }, { "epoch": 49.751020408163264, "grad_norm": 22.265562057495117, "learning_rate": 2.5142857142857147e-05, "loss": 2.3703, "step": 12189 }, { "epoch": 49.755102040816325, "grad_norm": 49.48470687866211, "learning_rate": 2.5140816326530614e-05, "loss": 1.027, "step": 12190 }, { "epoch": 49.75918367346939, "grad_norm": 30.58663558959961, "learning_rate": 2.513877551020408e-05, "loss": 1.6303, "step": 12191 }, { "epoch": 49.76326530612245, "grad_norm": 33.50017166137695, "learning_rate": 2.513673469387755e-05, "loss": 1.4057, "step": 12192 }, { "epoch": 49.76734693877551, "grad_norm": 27.0832462310791, "learning_rate": 2.5134693877551025e-05, "loss": 1.7316, "step": 12193 }, { "epoch": 49.77142857142857, "grad_norm": 35.090301513671875, "learning_rate": 2.5132653061224492e-05, "loss": 1.5086, "step": 12194 }, { "epoch": 49.775510204081634, "grad_norm": 33.802730560302734, "learning_rate": 2.513061224489796e-05, "loss": 1.8902, "step": 12195 }, { "epoch": 49.779591836734696, "grad_norm": 20.735231399536133, "learning_rate": 2.512857142857143e-05, "loss": 2.2721, "step": 12196 }, { "epoch": 49.78367346938776, "grad_norm": 23.434810638427734, "learning_rate": 2.5126530612244897e-05, "loss": 2.4024, "step": 12197 }, { "epoch": 49.78775510204082, "grad_norm": 30.62485694885254, "learning_rate": 2.512448979591837e-05, "loss": 1.572, "step": 12198 }, { "epoch": 49.79183673469388, "grad_norm": 11.197305679321289, "learning_rate": 2.5122448979591838e-05, "loss": 3.3646, "step": 12199 }, { "epoch": 49.795918367346935, "grad_norm": 33.55308532714844, "learning_rate": 2.512040816326531e-05, "loss": 1.8505, "step": 12200 }, { "epoch": 49.8, "grad_norm": 27.981916427612305, "learning_rate": 2.5118367346938776e-05, "loss": 1.3527, "step": 12201 }, { "epoch": 49.80408163265306, "grad_norm": 21.76568603515625, "learning_rate": 2.5116326530612243e-05, "loss": 2.2578, "step": 12202 }, { "epoch": 49.80816326530612, "grad_norm": 31.61469268798828, "learning_rate": 2.5114285714285717e-05, "loss": 1.665, "step": 12203 }, { "epoch": 49.81224489795918, "grad_norm": 18.419076919555664, "learning_rate": 2.5112244897959187e-05, "loss": 2.4707, "step": 12204 }, { "epoch": 49.816326530612244, "grad_norm": 27.00212287902832, "learning_rate": 2.5110204081632654e-05, "loss": 1.9418, "step": 12205 }, { "epoch": 49.820408163265306, "grad_norm": 43.57789993286133, "learning_rate": 2.510816326530612e-05, "loss": 1.0035, "step": 12206 }, { "epoch": 49.82448979591837, "grad_norm": 22.110553741455078, "learning_rate": 2.5106122448979592e-05, "loss": 2.2625, "step": 12207 }, { "epoch": 49.82857142857143, "grad_norm": 21.423093795776367, "learning_rate": 2.5104081632653066e-05, "loss": 2.4892, "step": 12208 }, { "epoch": 49.83265306122449, "grad_norm": 56.366050720214844, "learning_rate": 2.5102040816326533e-05, "loss": 0.5287, "step": 12209 }, { "epoch": 49.83673469387755, "grad_norm": 24.56589698791504, "learning_rate": 2.51e-05, "loss": 1.5451, "step": 12210 }, { "epoch": 49.840816326530614, "grad_norm": 22.870786666870117, "learning_rate": 2.509795918367347e-05, "loss": 2.1588, "step": 12211 }, { "epoch": 49.844897959183676, "grad_norm": 22.3750057220459, "learning_rate": 2.5095918367346938e-05, "loss": 2.231, "step": 12212 }, { "epoch": 49.84897959183674, "grad_norm": 16.387121200561523, "learning_rate": 2.5093877551020412e-05, "loss": 2.4416, "step": 12213 }, { "epoch": 49.85306122448979, "grad_norm": 30.49010467529297, "learning_rate": 2.509183673469388e-05, "loss": 1.611, "step": 12214 }, { "epoch": 49.857142857142854, "grad_norm": 32.56044387817383, "learning_rate": 2.508979591836735e-05, "loss": 0.26, "step": 12215 }, { "epoch": 49.861224489795916, "grad_norm": 11.714421272277832, "learning_rate": 2.5087755102040817e-05, "loss": 3.1995, "step": 12216 }, { "epoch": 49.86530612244898, "grad_norm": 49.92628479003906, "learning_rate": 2.5085714285714284e-05, "loss": 0.3977, "step": 12217 }, { "epoch": 49.86938775510204, "grad_norm": 36.667213439941406, "learning_rate": 2.5083673469387758e-05, "loss": 1.2242, "step": 12218 }, { "epoch": 49.8734693877551, "grad_norm": 28.265705108642578, "learning_rate": 2.5081632653061228e-05, "loss": 1.9887, "step": 12219 }, { "epoch": 49.87755102040816, "grad_norm": 21.112401962280273, "learning_rate": 2.5079591836734695e-05, "loss": 2.1691, "step": 12220 }, { "epoch": 49.881632653061224, "grad_norm": 29.64778709411621, "learning_rate": 2.5077551020408162e-05, "loss": 1.5185, "step": 12221 }, { "epoch": 49.885714285714286, "grad_norm": 39.59107208251953, "learning_rate": 2.5075510204081633e-05, "loss": 1.4865, "step": 12222 }, { "epoch": 49.88979591836735, "grad_norm": 17.08677864074707, "learning_rate": 2.5073469387755107e-05, "loss": 2.3477, "step": 12223 }, { "epoch": 49.89387755102041, "grad_norm": 18.28711700439453, "learning_rate": 2.5071428571428574e-05, "loss": 2.3777, "step": 12224 }, { "epoch": 49.89795918367347, "grad_norm": 26.563169479370117, "learning_rate": 2.506938775510204e-05, "loss": 2.2313, "step": 12225 }, { "epoch": 49.90204081632653, "grad_norm": 36.98859786987305, "learning_rate": 2.506734693877551e-05, "loss": 0.5504, "step": 12226 }, { "epoch": 49.906122448979595, "grad_norm": 24.752178192138672, "learning_rate": 2.506530612244898e-05, "loss": 1.4698, "step": 12227 }, { "epoch": 49.910204081632656, "grad_norm": 21.572555541992188, "learning_rate": 2.5063265306122453e-05, "loss": 2.3175, "step": 12228 }, { "epoch": 49.91428571428571, "grad_norm": 29.81448745727539, "learning_rate": 2.506122448979592e-05, "loss": 2.0139, "step": 12229 }, { "epoch": 49.91836734693877, "grad_norm": 34.780174255371094, "learning_rate": 2.505918367346939e-05, "loss": 1.0891, "step": 12230 }, { "epoch": 49.922448979591834, "grad_norm": 54.86786651611328, "learning_rate": 2.5057142857142857e-05, "loss": 1.985, "step": 12231 }, { "epoch": 49.926530612244896, "grad_norm": 44.53764343261719, "learning_rate": 2.5055102040816324e-05, "loss": 1.0454, "step": 12232 }, { "epoch": 49.93061224489796, "grad_norm": 41.549713134765625, "learning_rate": 2.50530612244898e-05, "loss": 1.8405, "step": 12233 }, { "epoch": 49.93469387755102, "grad_norm": 18.40684700012207, "learning_rate": 2.5051020408163265e-05, "loss": 0.1056, "step": 12234 }, { "epoch": 49.93877551020408, "grad_norm": 25.89434051513672, "learning_rate": 2.5048979591836736e-05, "loss": 2.0698, "step": 12235 }, { "epoch": 49.94285714285714, "grad_norm": 19.55883026123047, "learning_rate": 2.5046938775510203e-05, "loss": 2.2452, "step": 12236 }, { "epoch": 49.946938775510205, "grad_norm": 18.70148468017578, "learning_rate": 2.5044897959183677e-05, "loss": 2.3657, "step": 12237 }, { "epoch": 49.95102040816327, "grad_norm": 24.916250228881836, "learning_rate": 2.5042857142857144e-05, "loss": 1.9203, "step": 12238 }, { "epoch": 49.95510204081633, "grad_norm": 16.700742721557617, "learning_rate": 2.5040816326530615e-05, "loss": 2.5009, "step": 12239 }, { "epoch": 49.95918367346939, "grad_norm": 13.929333686828613, "learning_rate": 2.5038775510204082e-05, "loss": 2.9358, "step": 12240 }, { "epoch": 49.96326530612245, "grad_norm": 46.982696533203125, "learning_rate": 2.503673469387755e-05, "loss": 1.1799, "step": 12241 }, { "epoch": 49.96734693877551, "grad_norm": 19.241600036621094, "learning_rate": 2.5034693877551023e-05, "loss": 2.0409, "step": 12242 }, { "epoch": 49.97142857142857, "grad_norm": 19.562118530273438, "learning_rate": 2.5032653061224493e-05, "loss": 2.2104, "step": 12243 }, { "epoch": 49.97551020408163, "grad_norm": 18.527841567993164, "learning_rate": 2.503061224489796e-05, "loss": 2.4192, "step": 12244 }, { "epoch": 49.97959183673469, "grad_norm": 36.65669631958008, "learning_rate": 2.5028571428571428e-05, "loss": 1.6899, "step": 12245 }, { "epoch": 49.98367346938775, "grad_norm": 23.785640716552734, "learning_rate": 2.5026530612244898e-05, "loss": 2.01, "step": 12246 }, { "epoch": 49.987755102040815, "grad_norm": 49.46861267089844, "learning_rate": 2.5024489795918372e-05, "loss": 0.5048, "step": 12247 }, { "epoch": 49.99183673469388, "grad_norm": 31.97735595703125, "learning_rate": 2.502244897959184e-05, "loss": 1.3338, "step": 12248 }, { "epoch": 49.99591836734694, "grad_norm": 29.363025665283203, "learning_rate": 2.5020408163265306e-05, "loss": 2.2007, "step": 12249 }, { "epoch": 50.0, "grad_norm": 35.63878631591797, "learning_rate": 2.5018367346938777e-05, "loss": 1.9913, "step": 12250 }, { "epoch": 50.00408163265306, "grad_norm": 42.26740646362305, "learning_rate": 2.5016326530612244e-05, "loss": 2.0037, "step": 12251 }, { "epoch": 50.00816326530612, "grad_norm": 22.20455551147461, "learning_rate": 2.5014285714285718e-05, "loss": 1.9774, "step": 12252 }, { "epoch": 50.012244897959185, "grad_norm": 19.55082893371582, "learning_rate": 2.5012244897959185e-05, "loss": 2.3726, "step": 12253 }, { "epoch": 50.01632653061225, "grad_norm": 23.93754005432129, "learning_rate": 2.5010204081632655e-05, "loss": 2.6548, "step": 12254 }, { "epoch": 50.02040816326531, "grad_norm": 16.206207275390625, "learning_rate": 2.5008163265306123e-05, "loss": 2.6127, "step": 12255 }, { "epoch": 50.02448979591837, "grad_norm": 18.538679122924805, "learning_rate": 2.500612244897959e-05, "loss": 1.8752, "step": 12256 }, { "epoch": 50.02857142857143, "grad_norm": 35.15460968017578, "learning_rate": 2.5004081632653064e-05, "loss": 1.0683, "step": 12257 }, { "epoch": 50.03265306122449, "grad_norm": 28.567838668823242, "learning_rate": 2.5002040816326534e-05, "loss": 0.9254, "step": 12258 }, { "epoch": 50.03673469387755, "grad_norm": 13.678407669067383, "learning_rate": 2.5e-05, "loss": 2.7054, "step": 12259 }, { "epoch": 50.04081632653061, "grad_norm": 38.31747817993164, "learning_rate": 2.499795918367347e-05, "loss": 1.0569, "step": 12260 }, { "epoch": 50.04489795918367, "grad_norm": 19.954652786254883, "learning_rate": 2.499591836734694e-05, "loss": 1.9947, "step": 12261 }, { "epoch": 50.04897959183673, "grad_norm": 27.961477279663086, "learning_rate": 2.499387755102041e-05, "loss": 2.5031, "step": 12262 }, { "epoch": 50.053061224489795, "grad_norm": 8.574658393859863, "learning_rate": 2.499183673469388e-05, "loss": 3.3161, "step": 12263 }, { "epoch": 50.05714285714286, "grad_norm": 27.42633628845215, "learning_rate": 2.4989795918367347e-05, "loss": 1.9991, "step": 12264 }, { "epoch": 50.06122448979592, "grad_norm": 34.475074768066406, "learning_rate": 2.4987755102040818e-05, "loss": 1.8602, "step": 12265 }, { "epoch": 50.06530612244898, "grad_norm": 57.52443313598633, "learning_rate": 2.4985714285714288e-05, "loss": 0.9182, "step": 12266 }, { "epoch": 50.06938775510204, "grad_norm": 21.08243751525879, "learning_rate": 2.4983673469387755e-05, "loss": 2.2894, "step": 12267 }, { "epoch": 50.073469387755104, "grad_norm": 20.4781436920166, "learning_rate": 2.4981632653061226e-05, "loss": 2.3354, "step": 12268 }, { "epoch": 50.077551020408166, "grad_norm": 28.479949951171875, "learning_rate": 2.4979591836734696e-05, "loss": 0.8814, "step": 12269 }, { "epoch": 50.08163265306123, "grad_norm": 93.57479095458984, "learning_rate": 2.4977551020408163e-05, "loss": 0.5556, "step": 12270 }, { "epoch": 50.08571428571429, "grad_norm": 25.8251895904541, "learning_rate": 2.4975510204081634e-05, "loss": 1.936, "step": 12271 }, { "epoch": 50.089795918367344, "grad_norm": 25.177139282226562, "learning_rate": 2.49734693877551e-05, "loss": 1.9893, "step": 12272 }, { "epoch": 50.093877551020405, "grad_norm": 40.6710205078125, "learning_rate": 2.4971428571428575e-05, "loss": 1.5207, "step": 12273 }, { "epoch": 50.09795918367347, "grad_norm": 29.551977157592773, "learning_rate": 2.4969387755102042e-05, "loss": 1.7585, "step": 12274 }, { "epoch": 50.10204081632653, "grad_norm": 20.577177047729492, "learning_rate": 2.4967346938775512e-05, "loss": 2.2466, "step": 12275 }, { "epoch": 50.10612244897959, "grad_norm": 29.83232307434082, "learning_rate": 2.496530612244898e-05, "loss": 1.6409, "step": 12276 }, { "epoch": 50.11020408163265, "grad_norm": 26.341611862182617, "learning_rate": 2.496326530612245e-05, "loss": 2.1401, "step": 12277 }, { "epoch": 50.114285714285714, "grad_norm": 37.89066696166992, "learning_rate": 2.496122448979592e-05, "loss": 0.6517, "step": 12278 }, { "epoch": 50.118367346938776, "grad_norm": 27.823944091796875, "learning_rate": 2.4959183673469388e-05, "loss": 1.9018, "step": 12279 }, { "epoch": 50.12244897959184, "grad_norm": 26.620861053466797, "learning_rate": 2.4957142857142858e-05, "loss": 2.1575, "step": 12280 }, { "epoch": 50.1265306122449, "grad_norm": 31.70968246459961, "learning_rate": 2.495510204081633e-05, "loss": 1.8008, "step": 12281 }, { "epoch": 50.13061224489796, "grad_norm": 22.362470626831055, "learning_rate": 2.4953061224489796e-05, "loss": 2.0761, "step": 12282 }, { "epoch": 50.13469387755102, "grad_norm": 35.05876541137695, "learning_rate": 2.4951020408163266e-05, "loss": 0.9327, "step": 12283 }, { "epoch": 50.138775510204084, "grad_norm": 29.450275421142578, "learning_rate": 2.4948979591836737e-05, "loss": 1.5728, "step": 12284 }, { "epoch": 50.142857142857146, "grad_norm": 24.160377502441406, "learning_rate": 2.4946938775510207e-05, "loss": 1.5446, "step": 12285 }, { "epoch": 50.14693877551021, "grad_norm": 25.876914978027344, "learning_rate": 2.4944897959183675e-05, "loss": 1.8081, "step": 12286 }, { "epoch": 50.15102040816326, "grad_norm": 27.162277221679688, "learning_rate": 2.4942857142857142e-05, "loss": 2.1275, "step": 12287 }, { "epoch": 50.155102040816324, "grad_norm": 44.08452224731445, "learning_rate": 2.4940816326530612e-05, "loss": 0.3347, "step": 12288 }, { "epoch": 50.159183673469386, "grad_norm": 25.26351547241211, "learning_rate": 2.4938775510204083e-05, "loss": 2.2122, "step": 12289 }, { "epoch": 50.16326530612245, "grad_norm": 31.432357788085938, "learning_rate": 2.4936734693877553e-05, "loss": 1.7872, "step": 12290 }, { "epoch": 50.16734693877551, "grad_norm": 18.029586791992188, "learning_rate": 2.493469387755102e-05, "loss": 2.2196, "step": 12291 }, { "epoch": 50.17142857142857, "grad_norm": 21.721120834350586, "learning_rate": 2.493265306122449e-05, "loss": 2.2942, "step": 12292 }, { "epoch": 50.17551020408163, "grad_norm": 25.813228607177734, "learning_rate": 2.493061224489796e-05, "loss": 2.1296, "step": 12293 }, { "epoch": 50.179591836734694, "grad_norm": 30.62514305114746, "learning_rate": 2.492857142857143e-05, "loss": 1.0328, "step": 12294 }, { "epoch": 50.183673469387756, "grad_norm": 30.777746200561523, "learning_rate": 2.49265306122449e-05, "loss": 1.6434, "step": 12295 }, { "epoch": 50.18775510204082, "grad_norm": 25.708799362182617, "learning_rate": 2.492448979591837e-05, "loss": 2.1453, "step": 12296 }, { "epoch": 50.19183673469388, "grad_norm": 23.390968322753906, "learning_rate": 2.492244897959184e-05, "loss": 2.0627, "step": 12297 }, { "epoch": 50.19591836734694, "grad_norm": 31.44742774963379, "learning_rate": 2.4920408163265307e-05, "loss": 0.3663, "step": 12298 }, { "epoch": 50.2, "grad_norm": 34.981380462646484, "learning_rate": 2.4918367346938774e-05, "loss": 0.4785, "step": 12299 }, { "epoch": 50.204081632653065, "grad_norm": 36.91166305541992, "learning_rate": 2.4916326530612248e-05, "loss": 0.5038, "step": 12300 }, { "epoch": 50.20816326530612, "grad_norm": 38.669158935546875, "learning_rate": 2.4914285714285715e-05, "loss": 0.4296, "step": 12301 }, { "epoch": 50.21224489795918, "grad_norm": 28.76450538635254, "learning_rate": 2.4912244897959186e-05, "loss": 1.5555, "step": 12302 }, { "epoch": 50.21632653061224, "grad_norm": 55.84162521362305, "learning_rate": 2.4910204081632653e-05, "loss": 0.517, "step": 12303 }, { "epoch": 50.220408163265304, "grad_norm": 23.037851333618164, "learning_rate": 2.4908163265306123e-05, "loss": 2.4388, "step": 12304 }, { "epoch": 50.224489795918366, "grad_norm": 27.241491317749023, "learning_rate": 2.4906122448979594e-05, "loss": 2.0504, "step": 12305 }, { "epoch": 50.22857142857143, "grad_norm": 18.508502960205078, "learning_rate": 2.490408163265306e-05, "loss": 2.651, "step": 12306 }, { "epoch": 50.23265306122449, "grad_norm": 20.13570213317871, "learning_rate": 2.490204081632653e-05, "loss": 2.3934, "step": 12307 }, { "epoch": 50.23673469387755, "grad_norm": 22.925325393676758, "learning_rate": 2.4900000000000002e-05, "loss": 1.904, "step": 12308 }, { "epoch": 50.24081632653061, "grad_norm": 19.060998916625977, "learning_rate": 2.489795918367347e-05, "loss": 2.3662, "step": 12309 }, { "epoch": 50.244897959183675, "grad_norm": 24.5339412689209, "learning_rate": 2.489591836734694e-05, "loss": 2.2776, "step": 12310 }, { "epoch": 50.248979591836736, "grad_norm": 26.249027252197266, "learning_rate": 2.489387755102041e-05, "loss": 1.6236, "step": 12311 }, { "epoch": 50.2530612244898, "grad_norm": 39.82902908325195, "learning_rate": 2.489183673469388e-05, "loss": 0.8419, "step": 12312 }, { "epoch": 50.25714285714286, "grad_norm": 45.19742202758789, "learning_rate": 2.4889795918367348e-05, "loss": 1.1641, "step": 12313 }, { "epoch": 50.26122448979592, "grad_norm": 24.03653907775879, "learning_rate": 2.4887755102040815e-05, "loss": 1.6449, "step": 12314 }, { "epoch": 50.265306122448976, "grad_norm": 19.92395782470703, "learning_rate": 2.4885714285714286e-05, "loss": 1.9491, "step": 12315 }, { "epoch": 50.26938775510204, "grad_norm": 33.2017936706543, "learning_rate": 2.4883673469387756e-05, "loss": 1.8144, "step": 12316 }, { "epoch": 50.2734693877551, "grad_norm": 45.16471481323242, "learning_rate": 2.4881632653061227e-05, "loss": 0.8657, "step": 12317 }, { "epoch": 50.27755102040816, "grad_norm": 39.80154037475586, "learning_rate": 2.4879591836734694e-05, "loss": 1.5081, "step": 12318 }, { "epoch": 50.28163265306122, "grad_norm": 28.19224739074707, "learning_rate": 2.4877551020408164e-05, "loss": 1.9469, "step": 12319 }, { "epoch": 50.285714285714285, "grad_norm": 21.16813087463379, "learning_rate": 2.4875510204081635e-05, "loss": 2.2626, "step": 12320 }, { "epoch": 50.289795918367346, "grad_norm": 27.701316833496094, "learning_rate": 2.4873469387755102e-05, "loss": 2.1962, "step": 12321 }, { "epoch": 50.29387755102041, "grad_norm": 31.61176109313965, "learning_rate": 2.4871428571428572e-05, "loss": 1.6188, "step": 12322 }, { "epoch": 50.29795918367347, "grad_norm": 31.397018432617188, "learning_rate": 2.4869387755102043e-05, "loss": 1.4643, "step": 12323 }, { "epoch": 50.30204081632653, "grad_norm": 28.43572425842285, "learning_rate": 2.4867346938775513e-05, "loss": 2.3375, "step": 12324 }, { "epoch": 50.30612244897959, "grad_norm": 15.746256828308105, "learning_rate": 2.486530612244898e-05, "loss": 2.2391, "step": 12325 }, { "epoch": 50.310204081632655, "grad_norm": 29.334564208984375, "learning_rate": 2.4863265306122448e-05, "loss": 1.0449, "step": 12326 }, { "epoch": 50.31428571428572, "grad_norm": 20.870019912719727, "learning_rate": 2.486122448979592e-05, "loss": 2.3581, "step": 12327 }, { "epoch": 50.31836734693878, "grad_norm": 47.41016387939453, "learning_rate": 2.485918367346939e-05, "loss": 1.1479, "step": 12328 }, { "epoch": 50.32244897959184, "grad_norm": 40.18269729614258, "learning_rate": 2.485714285714286e-05, "loss": 1.3904, "step": 12329 }, { "epoch": 50.326530612244895, "grad_norm": 17.348567962646484, "learning_rate": 2.4855102040816326e-05, "loss": 2.1271, "step": 12330 }, { "epoch": 50.33061224489796, "grad_norm": 39.78608322143555, "learning_rate": 2.4853061224489797e-05, "loss": 1.0207, "step": 12331 }, { "epoch": 50.33469387755102, "grad_norm": 21.277551651000977, "learning_rate": 2.4851020408163267e-05, "loss": 2.3889, "step": 12332 }, { "epoch": 50.33877551020408, "grad_norm": 48.8680534362793, "learning_rate": 2.4848979591836735e-05, "loss": 0.9091, "step": 12333 }, { "epoch": 50.34285714285714, "grad_norm": 32.88680648803711, "learning_rate": 2.4846938775510205e-05, "loss": 1.7004, "step": 12334 }, { "epoch": 50.3469387755102, "grad_norm": 30.17449378967285, "learning_rate": 2.4844897959183676e-05, "loss": 1.0191, "step": 12335 }, { "epoch": 50.351020408163265, "grad_norm": 28.877830505371094, "learning_rate": 2.4842857142857143e-05, "loss": 1.3101, "step": 12336 }, { "epoch": 50.35510204081633, "grad_norm": 22.46480369567871, "learning_rate": 2.4840816326530613e-05, "loss": 2.0833, "step": 12337 }, { "epoch": 50.35918367346939, "grad_norm": 26.151779174804688, "learning_rate": 2.4838775510204084e-05, "loss": 2.201, "step": 12338 }, { "epoch": 50.36326530612245, "grad_norm": 37.992855072021484, "learning_rate": 2.4836734693877554e-05, "loss": 0.9758, "step": 12339 }, { "epoch": 50.36734693877551, "grad_norm": 41.577510833740234, "learning_rate": 2.483469387755102e-05, "loss": 1.2407, "step": 12340 }, { "epoch": 50.371428571428574, "grad_norm": 24.204978942871094, "learning_rate": 2.4832653061224492e-05, "loss": 1.6649, "step": 12341 }, { "epoch": 50.375510204081635, "grad_norm": 45.19241714477539, "learning_rate": 2.483061224489796e-05, "loss": 1.0642, "step": 12342 }, { "epoch": 50.3795918367347, "grad_norm": 27.03313446044922, "learning_rate": 2.482857142857143e-05, "loss": 2.1179, "step": 12343 }, { "epoch": 50.38367346938775, "grad_norm": 25.184152603149414, "learning_rate": 2.48265306122449e-05, "loss": 1.5599, "step": 12344 }, { "epoch": 50.38775510204081, "grad_norm": 37.78645706176758, "learning_rate": 2.4824489795918367e-05, "loss": 1.1911, "step": 12345 }, { "epoch": 50.391836734693875, "grad_norm": 20.206100463867188, "learning_rate": 2.4822448979591838e-05, "loss": 2.7585, "step": 12346 }, { "epoch": 50.39591836734694, "grad_norm": 16.716781616210938, "learning_rate": 2.4820408163265308e-05, "loss": 2.4184, "step": 12347 }, { "epoch": 50.4, "grad_norm": 37.853904724121094, "learning_rate": 2.4818367346938775e-05, "loss": 1.0467, "step": 12348 }, { "epoch": 50.40408163265306, "grad_norm": 43.422061920166016, "learning_rate": 2.4816326530612246e-05, "loss": 0.9832, "step": 12349 }, { "epoch": 50.40816326530612, "grad_norm": 22.267250061035156, "learning_rate": 2.4814285714285716e-05, "loss": 1.8944, "step": 12350 }, { "epoch": 50.412244897959184, "grad_norm": 17.198232650756836, "learning_rate": 2.4812244897959187e-05, "loss": 2.2595, "step": 12351 }, { "epoch": 50.416326530612245, "grad_norm": 31.595396041870117, "learning_rate": 2.4810204081632654e-05, "loss": 1.9669, "step": 12352 }, { "epoch": 50.42040816326531, "grad_norm": 46.99681854248047, "learning_rate": 2.480816326530612e-05, "loss": 1.0697, "step": 12353 }, { "epoch": 50.42448979591837, "grad_norm": 24.94377899169922, "learning_rate": 2.4806122448979595e-05, "loss": 1.4401, "step": 12354 }, { "epoch": 50.42857142857143, "grad_norm": 24.099084854125977, "learning_rate": 2.4804081632653062e-05, "loss": 0.3296, "step": 12355 }, { "epoch": 50.43265306122449, "grad_norm": 32.79203414916992, "learning_rate": 2.4802040816326533e-05, "loss": 0.7663, "step": 12356 }, { "epoch": 50.436734693877554, "grad_norm": 26.249786376953125, "learning_rate": 2.48e-05, "loss": 2.1569, "step": 12357 }, { "epoch": 50.440816326530616, "grad_norm": 34.37892532348633, "learning_rate": 2.479795918367347e-05, "loss": 1.4107, "step": 12358 }, { "epoch": 50.44489795918367, "grad_norm": 32.910728454589844, "learning_rate": 2.479591836734694e-05, "loss": 1.3947, "step": 12359 }, { "epoch": 50.44897959183673, "grad_norm": 42.4287109375, "learning_rate": 2.4793877551020408e-05, "loss": 0.504, "step": 12360 }, { "epoch": 50.453061224489794, "grad_norm": 17.205116271972656, "learning_rate": 2.479183673469388e-05, "loss": 2.8181, "step": 12361 }, { "epoch": 50.457142857142856, "grad_norm": 32.50160217285156, "learning_rate": 2.478979591836735e-05, "loss": 2.0389, "step": 12362 }, { "epoch": 50.46122448979592, "grad_norm": 21.719196319580078, "learning_rate": 2.478775510204082e-05, "loss": 2.2621, "step": 12363 }, { "epoch": 50.46530612244898, "grad_norm": 33.49744415283203, "learning_rate": 2.4785714285714287e-05, "loss": 1.3891, "step": 12364 }, { "epoch": 50.46938775510204, "grad_norm": 19.476787567138672, "learning_rate": 2.4783673469387754e-05, "loss": 2.2598, "step": 12365 }, { "epoch": 50.4734693877551, "grad_norm": 38.81309127807617, "learning_rate": 2.4781632653061228e-05, "loss": 1.1334, "step": 12366 }, { "epoch": 50.477551020408164, "grad_norm": 32.69929122924805, "learning_rate": 2.4779591836734695e-05, "loss": 1.9702, "step": 12367 }, { "epoch": 50.481632653061226, "grad_norm": 25.92131233215332, "learning_rate": 2.4777551020408165e-05, "loss": 1.795, "step": 12368 }, { "epoch": 50.48571428571429, "grad_norm": 17.35979461669922, "learning_rate": 2.4775510204081632e-05, "loss": 2.8968, "step": 12369 }, { "epoch": 50.48979591836735, "grad_norm": 44.54062271118164, "learning_rate": 2.4773469387755103e-05, "loss": 1.3097, "step": 12370 }, { "epoch": 50.49387755102041, "grad_norm": 37.79124069213867, "learning_rate": 2.4771428571428573e-05, "loss": 0.5465, "step": 12371 }, { "epoch": 50.49795918367347, "grad_norm": 27.49083137512207, "learning_rate": 2.476938775510204e-05, "loss": 1.4966, "step": 12372 }, { "epoch": 50.50204081632653, "grad_norm": 56.114749908447266, "learning_rate": 2.476734693877551e-05, "loss": 0.9376, "step": 12373 }, { "epoch": 50.50612244897959, "grad_norm": 16.73893928527832, "learning_rate": 2.476530612244898e-05, "loss": 2.6325, "step": 12374 }, { "epoch": 50.51020408163265, "grad_norm": 40.42280960083008, "learning_rate": 2.476326530612245e-05, "loss": 1.8459, "step": 12375 }, { "epoch": 50.51428571428571, "grad_norm": 21.78295135498047, "learning_rate": 2.476122448979592e-05, "loss": 2.1959, "step": 12376 }, { "epoch": 50.518367346938774, "grad_norm": 49.558597564697266, "learning_rate": 2.475918367346939e-05, "loss": 1.1476, "step": 12377 }, { "epoch": 50.522448979591836, "grad_norm": 20.80927085876465, "learning_rate": 2.475714285714286e-05, "loss": 2.361, "step": 12378 }, { "epoch": 50.5265306122449, "grad_norm": 15.775773048400879, "learning_rate": 2.4755102040816327e-05, "loss": 2.4436, "step": 12379 }, { "epoch": 50.53061224489796, "grad_norm": 36.152198791503906, "learning_rate": 2.4753061224489794e-05, "loss": 1.8103, "step": 12380 }, { "epoch": 50.53469387755102, "grad_norm": 24.037450790405273, "learning_rate": 2.475102040816327e-05, "loss": 1.8453, "step": 12381 }, { "epoch": 50.53877551020408, "grad_norm": 35.06730270385742, "learning_rate": 2.4748979591836735e-05, "loss": 1.6278, "step": 12382 }, { "epoch": 50.542857142857144, "grad_norm": 22.17036247253418, "learning_rate": 2.4746938775510206e-05, "loss": 2.4453, "step": 12383 }, { "epoch": 50.546938775510206, "grad_norm": 47.39198684692383, "learning_rate": 2.4744897959183673e-05, "loss": 1.4183, "step": 12384 }, { "epoch": 50.55102040816327, "grad_norm": 37.54650115966797, "learning_rate": 2.4742857142857147e-05, "loss": 0.9359, "step": 12385 }, { "epoch": 50.55510204081633, "grad_norm": 26.50070571899414, "learning_rate": 2.4740816326530614e-05, "loss": 1.7229, "step": 12386 }, { "epoch": 50.55918367346939, "grad_norm": 27.14856719970703, "learning_rate": 2.473877551020408e-05, "loss": 1.5863, "step": 12387 }, { "epoch": 50.563265306122446, "grad_norm": 25.117259979248047, "learning_rate": 2.4736734693877552e-05, "loss": 2.2253, "step": 12388 }, { "epoch": 50.56734693877551, "grad_norm": 35.096832275390625, "learning_rate": 2.4734693877551022e-05, "loss": 0.905, "step": 12389 }, { "epoch": 50.57142857142857, "grad_norm": 46.86671829223633, "learning_rate": 2.4732653061224493e-05, "loss": 1.0492, "step": 12390 }, { "epoch": 50.57551020408163, "grad_norm": 36.568294525146484, "learning_rate": 2.473061224489796e-05, "loss": 1.504, "step": 12391 }, { "epoch": 50.57959183673469, "grad_norm": 33.05762481689453, "learning_rate": 2.4728571428571427e-05, "loss": 1.6778, "step": 12392 }, { "epoch": 50.583673469387755, "grad_norm": 25.3111572265625, "learning_rate": 2.47265306122449e-05, "loss": 2.1777, "step": 12393 }, { "epoch": 50.587755102040816, "grad_norm": 33.301334381103516, "learning_rate": 2.4724489795918368e-05, "loss": 0.8123, "step": 12394 }, { "epoch": 50.59183673469388, "grad_norm": 41.985145568847656, "learning_rate": 2.472244897959184e-05, "loss": 1.1318, "step": 12395 }, { "epoch": 50.59591836734694, "grad_norm": 35.6640625, "learning_rate": 2.4720408163265306e-05, "loss": 0.3891, "step": 12396 }, { "epoch": 50.6, "grad_norm": 20.23262596130371, "learning_rate": 2.4718367346938776e-05, "loss": 2.3935, "step": 12397 }, { "epoch": 50.60408163265306, "grad_norm": 24.07195472717285, "learning_rate": 2.4716326530612247e-05, "loss": 2.0605, "step": 12398 }, { "epoch": 50.608163265306125, "grad_norm": 25.801130294799805, "learning_rate": 2.4714285714285714e-05, "loss": 2.374, "step": 12399 }, { "epoch": 50.61224489795919, "grad_norm": 33.91402816772461, "learning_rate": 2.4712244897959184e-05, "loss": 1.4602, "step": 12400 }, { "epoch": 50.61632653061225, "grad_norm": 28.662429809570312, "learning_rate": 2.4710204081632655e-05, "loss": 1.9384, "step": 12401 }, { "epoch": 50.6204081632653, "grad_norm": 23.20586585998535, "learning_rate": 2.4708163265306122e-05, "loss": 2.6066, "step": 12402 }, { "epoch": 50.624489795918365, "grad_norm": 22.621248245239258, "learning_rate": 2.4706122448979593e-05, "loss": 2.2633, "step": 12403 }, { "epoch": 50.628571428571426, "grad_norm": 16.422657012939453, "learning_rate": 2.4704081632653063e-05, "loss": 2.3702, "step": 12404 }, { "epoch": 50.63265306122449, "grad_norm": 29.19683074951172, "learning_rate": 2.4702040816326534e-05, "loss": 1.8512, "step": 12405 }, { "epoch": 50.63673469387755, "grad_norm": 14.52769660949707, "learning_rate": 2.47e-05, "loss": 2.6719, "step": 12406 }, { "epoch": 50.64081632653061, "grad_norm": 24.31104850769043, "learning_rate": 2.469795918367347e-05, "loss": 1.8964, "step": 12407 }, { "epoch": 50.64489795918367, "grad_norm": 25.56125831604004, "learning_rate": 2.4695918367346942e-05, "loss": 0.2124, "step": 12408 }, { "epoch": 50.648979591836735, "grad_norm": 24.395910263061523, "learning_rate": 2.469387755102041e-05, "loss": 1.9429, "step": 12409 }, { "epoch": 50.6530612244898, "grad_norm": 44.95832824707031, "learning_rate": 2.469183673469388e-05, "loss": 1.2707, "step": 12410 }, { "epoch": 50.65714285714286, "grad_norm": 39.50606918334961, "learning_rate": 2.4689795918367346e-05, "loss": 0.8822, "step": 12411 }, { "epoch": 50.66122448979592, "grad_norm": 14.926072120666504, "learning_rate": 2.468775510204082e-05, "loss": 2.6051, "step": 12412 }, { "epoch": 50.66530612244898, "grad_norm": 29.390302658081055, "learning_rate": 2.4685714285714288e-05, "loss": 1.9426, "step": 12413 }, { "epoch": 50.66938775510204, "grad_norm": 25.68099021911621, "learning_rate": 2.4683673469387755e-05, "loss": 1.4417, "step": 12414 }, { "epoch": 50.673469387755105, "grad_norm": 32.93894577026367, "learning_rate": 2.4681632653061225e-05, "loss": 1.0953, "step": 12415 }, { "epoch": 50.67755102040816, "grad_norm": 27.05628776550293, "learning_rate": 2.4679591836734696e-05, "loss": 1.7355, "step": 12416 }, { "epoch": 50.68163265306122, "grad_norm": 50.44426345825195, "learning_rate": 2.4677551020408166e-05, "loss": 0.994, "step": 12417 }, { "epoch": 50.68571428571428, "grad_norm": 32.445556640625, "learning_rate": 2.4675510204081633e-05, "loss": 1.4087, "step": 12418 }, { "epoch": 50.689795918367345, "grad_norm": 35.31446838378906, "learning_rate": 2.46734693877551e-05, "loss": 1.6166, "step": 12419 }, { "epoch": 50.69387755102041, "grad_norm": 31.344636917114258, "learning_rate": 2.4671428571428574e-05, "loss": 1.5764, "step": 12420 }, { "epoch": 50.69795918367347, "grad_norm": 17.63339614868164, "learning_rate": 2.466938775510204e-05, "loss": 2.2668, "step": 12421 }, { "epoch": 50.70204081632653, "grad_norm": 28.42855453491211, "learning_rate": 2.4667346938775512e-05, "loss": 1.8662, "step": 12422 }, { "epoch": 50.70612244897959, "grad_norm": 14.253167152404785, "learning_rate": 2.466530612244898e-05, "loss": 2.8957, "step": 12423 }, { "epoch": 50.710204081632654, "grad_norm": 20.531036376953125, "learning_rate": 2.466326530612245e-05, "loss": 2.0461, "step": 12424 }, { "epoch": 50.714285714285715, "grad_norm": 29.926929473876953, "learning_rate": 2.466122448979592e-05, "loss": 1.9711, "step": 12425 }, { "epoch": 50.71836734693878, "grad_norm": 20.307971954345703, "learning_rate": 2.4659183673469387e-05, "loss": 2.2418, "step": 12426 }, { "epoch": 50.72244897959184, "grad_norm": 24.02713394165039, "learning_rate": 2.4657142857142858e-05, "loss": 2.1555, "step": 12427 }, { "epoch": 50.7265306122449, "grad_norm": 33.80012130737305, "learning_rate": 2.4655102040816328e-05, "loss": 0.5964, "step": 12428 }, { "epoch": 50.73061224489796, "grad_norm": 43.316776275634766, "learning_rate": 2.4653061224489795e-05, "loss": 0.9175, "step": 12429 }, { "epoch": 50.734693877551024, "grad_norm": 26.89654541015625, "learning_rate": 2.4651020408163266e-05, "loss": 1.9937, "step": 12430 }, { "epoch": 50.73877551020408, "grad_norm": 30.986122131347656, "learning_rate": 2.4648979591836736e-05, "loss": 1.6091, "step": 12431 }, { "epoch": 50.74285714285714, "grad_norm": 16.001354217529297, "learning_rate": 2.4646938775510207e-05, "loss": 2.7187, "step": 12432 }, { "epoch": 50.7469387755102, "grad_norm": 31.19564437866211, "learning_rate": 2.4644897959183674e-05, "loss": 1.5978, "step": 12433 }, { "epoch": 50.751020408163264, "grad_norm": 23.815998077392578, "learning_rate": 2.4642857142857145e-05, "loss": 1.6189, "step": 12434 }, { "epoch": 50.755102040816325, "grad_norm": 31.068517684936523, "learning_rate": 2.4640816326530615e-05, "loss": 0.9816, "step": 12435 }, { "epoch": 50.75918367346939, "grad_norm": 10.995731353759766, "learning_rate": 2.4638775510204082e-05, "loss": 2.8789, "step": 12436 }, { "epoch": 50.76326530612245, "grad_norm": 18.406131744384766, "learning_rate": 2.4636734693877553e-05, "loss": 2.2539, "step": 12437 }, { "epoch": 50.76734693877551, "grad_norm": 29.829954147338867, "learning_rate": 2.463469387755102e-05, "loss": 1.7316, "step": 12438 }, { "epoch": 50.77142857142857, "grad_norm": 32.768592834472656, "learning_rate": 2.4632653061224494e-05, "loss": 1.3523, "step": 12439 }, { "epoch": 50.775510204081634, "grad_norm": 32.404598236083984, "learning_rate": 2.463061224489796e-05, "loss": 1.103, "step": 12440 }, { "epoch": 50.779591836734696, "grad_norm": 21.406248092651367, "learning_rate": 2.4628571428571428e-05, "loss": 2.3664, "step": 12441 }, { "epoch": 50.78367346938776, "grad_norm": 40.60348129272461, "learning_rate": 2.46265306122449e-05, "loss": 1.298, "step": 12442 }, { "epoch": 50.78775510204082, "grad_norm": 27.086315155029297, "learning_rate": 2.462448979591837e-05, "loss": 1.3022, "step": 12443 }, { "epoch": 50.79183673469388, "grad_norm": 12.801846504211426, "learning_rate": 2.462244897959184e-05, "loss": 3.065, "step": 12444 }, { "epoch": 50.795918367346935, "grad_norm": 21.504430770874023, "learning_rate": 2.4620408163265307e-05, "loss": 2.4986, "step": 12445 }, { "epoch": 50.8, "grad_norm": 38.87082290649414, "learning_rate": 2.4618367346938774e-05, "loss": 1.0723, "step": 12446 }, { "epoch": 50.80408163265306, "grad_norm": 32.1743278503418, "learning_rate": 2.4616326530612248e-05, "loss": 1.8847, "step": 12447 }, { "epoch": 50.80816326530612, "grad_norm": 7.219913959503174, "learning_rate": 2.4614285714285715e-05, "loss": 0.0828, "step": 12448 }, { "epoch": 50.81224489795918, "grad_norm": 29.485933303833008, "learning_rate": 2.4612244897959185e-05, "loss": 1.6744, "step": 12449 }, { "epoch": 50.816326530612244, "grad_norm": 32.13814926147461, "learning_rate": 2.4610204081632652e-05, "loss": 2.1017, "step": 12450 }, { "epoch": 50.820408163265306, "grad_norm": 21.44816780090332, "learning_rate": 2.4608163265306123e-05, "loss": 1.6512, "step": 12451 }, { "epoch": 50.82448979591837, "grad_norm": 55.91215133666992, "learning_rate": 2.4606122448979593e-05, "loss": 0.6558, "step": 12452 }, { "epoch": 50.82857142857143, "grad_norm": 19.273109436035156, "learning_rate": 2.460408163265306e-05, "loss": 2.314, "step": 12453 }, { "epoch": 50.83265306122449, "grad_norm": 36.44337844848633, "learning_rate": 2.460204081632653e-05, "loss": 1.4484, "step": 12454 }, { "epoch": 50.83673469387755, "grad_norm": 33.43140411376953, "learning_rate": 2.46e-05, "loss": 1.4662, "step": 12455 }, { "epoch": 50.840816326530614, "grad_norm": 25.237930297851562, "learning_rate": 2.4597959183673472e-05, "loss": 2.1734, "step": 12456 }, { "epoch": 50.844897959183676, "grad_norm": 13.592853546142578, "learning_rate": 2.459591836734694e-05, "loss": 2.8153, "step": 12457 }, { "epoch": 50.84897959183674, "grad_norm": 32.65550994873047, "learning_rate": 2.459387755102041e-05, "loss": 1.441, "step": 12458 }, { "epoch": 50.85306122448979, "grad_norm": 36.403541564941406, "learning_rate": 2.459183673469388e-05, "loss": 1.3977, "step": 12459 }, { "epoch": 50.857142857142854, "grad_norm": 17.361764907836914, "learning_rate": 2.4589795918367347e-05, "loss": 2.3203, "step": 12460 }, { "epoch": 50.861224489795916, "grad_norm": 51.06618118286133, "learning_rate": 2.4587755102040818e-05, "loss": 0.9664, "step": 12461 }, { "epoch": 50.86530612244898, "grad_norm": 38.24631881713867, "learning_rate": 2.458571428571429e-05, "loss": 1.8074, "step": 12462 }, { "epoch": 50.86938775510204, "grad_norm": 32.526668548583984, "learning_rate": 2.4583673469387756e-05, "loss": 1.2824, "step": 12463 }, { "epoch": 50.8734693877551, "grad_norm": 30.474536895751953, "learning_rate": 2.4581632653061226e-05, "loss": 1.8609, "step": 12464 }, { "epoch": 50.87755102040816, "grad_norm": 26.66985511779785, "learning_rate": 2.4579591836734693e-05, "loss": 2.1398, "step": 12465 }, { "epoch": 50.881632653061224, "grad_norm": 20.893953323364258, "learning_rate": 2.4577551020408167e-05, "loss": 2.1866, "step": 12466 }, { "epoch": 50.885714285714286, "grad_norm": 20.750377655029297, "learning_rate": 2.4575510204081634e-05, "loss": 2.3933, "step": 12467 }, { "epoch": 50.88979591836735, "grad_norm": 14.720654487609863, "learning_rate": 2.45734693877551e-05, "loss": 2.7109, "step": 12468 }, { "epoch": 50.89387755102041, "grad_norm": 17.570466995239258, "learning_rate": 2.4571428571428572e-05, "loss": 2.8205, "step": 12469 }, { "epoch": 50.89795918367347, "grad_norm": 18.595291137695312, "learning_rate": 2.4569387755102042e-05, "loss": 2.6458, "step": 12470 }, { "epoch": 50.90204081632653, "grad_norm": 34.64619064331055, "learning_rate": 2.4567346938775513e-05, "loss": 0.847, "step": 12471 }, { "epoch": 50.906122448979595, "grad_norm": 16.557897567749023, "learning_rate": 2.456530612244898e-05, "loss": 2.4627, "step": 12472 }, { "epoch": 50.910204081632656, "grad_norm": 34.4266471862793, "learning_rate": 2.4563265306122447e-05, "loss": 1.3767, "step": 12473 }, { "epoch": 50.91428571428571, "grad_norm": 32.475154876708984, "learning_rate": 2.456122448979592e-05, "loss": 1.5438, "step": 12474 }, { "epoch": 50.91836734693877, "grad_norm": 31.1547908782959, "learning_rate": 2.4559183673469388e-05, "loss": 1.4739, "step": 12475 }, { "epoch": 50.922448979591834, "grad_norm": 28.121749877929688, "learning_rate": 2.455714285714286e-05, "loss": 1.5828, "step": 12476 }, { "epoch": 50.926530612244896, "grad_norm": 27.092714309692383, "learning_rate": 2.4555102040816326e-05, "loss": 1.8798, "step": 12477 }, { "epoch": 50.93061224489796, "grad_norm": 23.472793579101562, "learning_rate": 2.45530612244898e-05, "loss": 1.7311, "step": 12478 }, { "epoch": 50.93469387755102, "grad_norm": 30.693941116333008, "learning_rate": 2.4551020408163267e-05, "loss": 0.3881, "step": 12479 }, { "epoch": 50.93877551020408, "grad_norm": 32.91140365600586, "learning_rate": 2.4548979591836734e-05, "loss": 0.4634, "step": 12480 }, { "epoch": 50.94285714285714, "grad_norm": 23.449613571166992, "learning_rate": 2.4546938775510205e-05, "loss": 2.1343, "step": 12481 }, { "epoch": 50.946938775510205, "grad_norm": 31.120080947875977, "learning_rate": 2.4544897959183675e-05, "loss": 1.8051, "step": 12482 }, { "epoch": 50.95102040816327, "grad_norm": 19.77718162536621, "learning_rate": 2.4542857142857146e-05, "loss": 2.3276, "step": 12483 }, { "epoch": 50.95510204081633, "grad_norm": 26.047321319580078, "learning_rate": 2.4540816326530613e-05, "loss": 2.2149, "step": 12484 }, { "epoch": 50.95918367346939, "grad_norm": 24.78006362915039, "learning_rate": 2.4538775510204083e-05, "loss": 2.0365, "step": 12485 }, { "epoch": 50.96326530612245, "grad_norm": 15.74695873260498, "learning_rate": 2.4536734693877554e-05, "loss": 2.8093, "step": 12486 }, { "epoch": 50.96734693877551, "grad_norm": 33.695777893066406, "learning_rate": 2.453469387755102e-05, "loss": 1.4793, "step": 12487 }, { "epoch": 50.97142857142857, "grad_norm": 46.90105056762695, "learning_rate": 2.453265306122449e-05, "loss": 1.7328, "step": 12488 }, { "epoch": 50.97551020408163, "grad_norm": 36.54801559448242, "learning_rate": 2.4530612244897962e-05, "loss": 1.2117, "step": 12489 }, { "epoch": 50.97959183673469, "grad_norm": 35.170310974121094, "learning_rate": 2.452857142857143e-05, "loss": 1.9017, "step": 12490 }, { "epoch": 50.98367346938775, "grad_norm": 16.354331970214844, "learning_rate": 2.45265306122449e-05, "loss": 2.745, "step": 12491 }, { "epoch": 50.987755102040815, "grad_norm": 12.6414213180542, "learning_rate": 2.4524489795918367e-05, "loss": 3.2012, "step": 12492 }, { "epoch": 50.99183673469388, "grad_norm": 37.92845916748047, "learning_rate": 2.452244897959184e-05, "loss": 1.5939, "step": 12493 }, { "epoch": 50.99591836734694, "grad_norm": 35.74653625488281, "learning_rate": 2.4520408163265308e-05, "loss": 1.1292, "step": 12494 }, { "epoch": 51.0, "grad_norm": 34.22439193725586, "learning_rate": 2.4518367346938775e-05, "loss": 1.2393, "step": 12495 }, { "epoch": 51.00408163265306, "grad_norm": 23.212963104248047, "learning_rate": 2.4516326530612245e-05, "loss": 1.9904, "step": 12496 }, { "epoch": 51.00816326530612, "grad_norm": 20.286205291748047, "learning_rate": 2.4514285714285716e-05, "loss": 1.9045, "step": 12497 }, { "epoch": 51.012244897959185, "grad_norm": 27.142601013183594, "learning_rate": 2.4512244897959186e-05, "loss": 1.2038, "step": 12498 }, { "epoch": 51.01632653061225, "grad_norm": 17.051393508911133, "learning_rate": 2.4510204081632653e-05, "loss": 2.7526, "step": 12499 }, { "epoch": 51.02040816326531, "grad_norm": 30.959043502807617, "learning_rate": 2.4508163265306124e-05, "loss": 1.8473, "step": 12500 }, { "epoch": 51.02448979591837, "grad_norm": 33.27735900878906, "learning_rate": 2.4506122448979594e-05, "loss": 0.289, "step": 12501 }, { "epoch": 51.02857142857143, "grad_norm": 49.171180725097656, "learning_rate": 2.450408163265306e-05, "loss": 1.0046, "step": 12502 }, { "epoch": 51.03265306122449, "grad_norm": 26.088096618652344, "learning_rate": 2.4502040816326532e-05, "loss": 1.7862, "step": 12503 }, { "epoch": 51.03673469387755, "grad_norm": 29.773515701293945, "learning_rate": 2.45e-05, "loss": 1.5264, "step": 12504 }, { "epoch": 51.04081632653061, "grad_norm": 34.51960754394531, "learning_rate": 2.4497959183673473e-05, "loss": 0.5894, "step": 12505 }, { "epoch": 51.04489795918367, "grad_norm": 21.193038940429688, "learning_rate": 2.449591836734694e-05, "loss": 2.1722, "step": 12506 }, { "epoch": 51.04897959183673, "grad_norm": 31.910200119018555, "learning_rate": 2.4493877551020407e-05, "loss": 1.4502, "step": 12507 }, { "epoch": 51.053061224489795, "grad_norm": 26.11545753479004, "learning_rate": 2.4491836734693878e-05, "loss": 1.6101, "step": 12508 }, { "epoch": 51.05714285714286, "grad_norm": 32.285099029541016, "learning_rate": 2.448979591836735e-05, "loss": 1.8848, "step": 12509 }, { "epoch": 51.06122448979592, "grad_norm": 29.909114837646484, "learning_rate": 2.448775510204082e-05, "loss": 1.6798, "step": 12510 }, { "epoch": 51.06530612244898, "grad_norm": 13.487142562866211, "learning_rate": 2.4485714285714286e-05, "loss": 3.0831, "step": 12511 }, { "epoch": 51.06938775510204, "grad_norm": 42.22232437133789, "learning_rate": 2.4483673469387757e-05, "loss": 1.777, "step": 12512 }, { "epoch": 51.073469387755104, "grad_norm": 16.938623428344727, "learning_rate": 2.4481632653061227e-05, "loss": 2.528, "step": 12513 }, { "epoch": 51.077551020408166, "grad_norm": 23.374094009399414, "learning_rate": 2.4479591836734694e-05, "loss": 1.4687, "step": 12514 }, { "epoch": 51.08163265306123, "grad_norm": 19.72340965270996, "learning_rate": 2.4477551020408165e-05, "loss": 2.2603, "step": 12515 }, { "epoch": 51.08571428571429, "grad_norm": 14.281204223632812, "learning_rate": 2.4475510204081635e-05, "loss": 3.0026, "step": 12516 }, { "epoch": 51.089795918367344, "grad_norm": 45.80622100830078, "learning_rate": 2.4473469387755102e-05, "loss": 1.3823, "step": 12517 }, { "epoch": 51.093877551020405, "grad_norm": 23.890966415405273, "learning_rate": 2.4471428571428573e-05, "loss": 1.8755, "step": 12518 }, { "epoch": 51.09795918367347, "grad_norm": 21.98676872253418, "learning_rate": 2.446938775510204e-05, "loss": 2.2358, "step": 12519 }, { "epoch": 51.10204081632653, "grad_norm": 27.731647491455078, "learning_rate": 2.4467346938775514e-05, "loss": 1.5086, "step": 12520 }, { "epoch": 51.10612244897959, "grad_norm": 20.528575897216797, "learning_rate": 2.446530612244898e-05, "loss": 2.0098, "step": 12521 }, { "epoch": 51.11020408163265, "grad_norm": 29.28443145751953, "learning_rate": 2.446326530612245e-05, "loss": 2.1465, "step": 12522 }, { "epoch": 51.114285714285714, "grad_norm": 31.042461395263672, "learning_rate": 2.446122448979592e-05, "loss": 1.5186, "step": 12523 }, { "epoch": 51.118367346938776, "grad_norm": 37.137123107910156, "learning_rate": 2.445918367346939e-05, "loss": 1.4249, "step": 12524 }, { "epoch": 51.12244897959184, "grad_norm": 30.92110824584961, "learning_rate": 2.445714285714286e-05, "loss": 1.3456, "step": 12525 }, { "epoch": 51.1265306122449, "grad_norm": 17.642982482910156, "learning_rate": 2.4455102040816327e-05, "loss": 2.1909, "step": 12526 }, { "epoch": 51.13061224489796, "grad_norm": 24.827957153320312, "learning_rate": 2.4453061224489797e-05, "loss": 2.1036, "step": 12527 }, { "epoch": 51.13469387755102, "grad_norm": 30.229408264160156, "learning_rate": 2.4451020408163268e-05, "loss": 1.9086, "step": 12528 }, { "epoch": 51.138775510204084, "grad_norm": 26.77941131591797, "learning_rate": 2.4448979591836735e-05, "loss": 1.7269, "step": 12529 }, { "epoch": 51.142857142857146, "grad_norm": 19.55738639831543, "learning_rate": 2.4446938775510205e-05, "loss": 2.3711, "step": 12530 }, { "epoch": 51.14693877551021, "grad_norm": 31.32136344909668, "learning_rate": 2.4444897959183673e-05, "loss": 1.4503, "step": 12531 }, { "epoch": 51.15102040816326, "grad_norm": 16.821130752563477, "learning_rate": 2.4442857142857146e-05, "loss": 2.3171, "step": 12532 }, { "epoch": 51.155102040816324, "grad_norm": 33.62259292602539, "learning_rate": 2.4440816326530614e-05, "loss": 1.9201, "step": 12533 }, { "epoch": 51.159183673469386, "grad_norm": 9.989377975463867, "learning_rate": 2.443877551020408e-05, "loss": 3.34, "step": 12534 }, { "epoch": 51.16326530612245, "grad_norm": 40.833961486816406, "learning_rate": 2.443673469387755e-05, "loss": 1.9904, "step": 12535 }, { "epoch": 51.16734693877551, "grad_norm": 31.398048400878906, "learning_rate": 2.4434693877551022e-05, "loss": 0.9503, "step": 12536 }, { "epoch": 51.17142857142857, "grad_norm": 17.690427780151367, "learning_rate": 2.4432653061224492e-05, "loss": 2.2197, "step": 12537 }, { "epoch": 51.17551020408163, "grad_norm": 18.365114212036133, "learning_rate": 2.443061224489796e-05, "loss": 2.0376, "step": 12538 }, { "epoch": 51.179591836734694, "grad_norm": 36.74075698852539, "learning_rate": 2.442857142857143e-05, "loss": 1.2701, "step": 12539 }, { "epoch": 51.183673469387756, "grad_norm": 23.17672348022461, "learning_rate": 2.44265306122449e-05, "loss": 2.1145, "step": 12540 }, { "epoch": 51.18775510204082, "grad_norm": 26.539701461791992, "learning_rate": 2.4424489795918368e-05, "loss": 1.418, "step": 12541 }, { "epoch": 51.19183673469388, "grad_norm": 41.14529037475586, "learning_rate": 2.4422448979591838e-05, "loss": 1.1951, "step": 12542 }, { "epoch": 51.19591836734694, "grad_norm": 31.525678634643555, "learning_rate": 2.442040816326531e-05, "loss": 1.7372, "step": 12543 }, { "epoch": 51.2, "grad_norm": 49.97328567504883, "learning_rate": 2.441836734693878e-05, "loss": 0.941, "step": 12544 }, { "epoch": 51.204081632653065, "grad_norm": 13.957311630249023, "learning_rate": 2.4416326530612246e-05, "loss": 2.8135, "step": 12545 }, { "epoch": 51.20816326530612, "grad_norm": 32.55849075317383, "learning_rate": 2.4414285714285713e-05, "loss": 1.5732, "step": 12546 }, { "epoch": 51.21224489795918, "grad_norm": 30.885299682617188, "learning_rate": 2.4412244897959187e-05, "loss": 1.9417, "step": 12547 }, { "epoch": 51.21632653061224, "grad_norm": 27.92962646484375, "learning_rate": 2.4410204081632654e-05, "loss": 2.2008, "step": 12548 }, { "epoch": 51.220408163265304, "grad_norm": 29.42051124572754, "learning_rate": 2.4408163265306125e-05, "loss": 1.7947, "step": 12549 }, { "epoch": 51.224489795918366, "grad_norm": 20.023347854614258, "learning_rate": 2.4406122448979592e-05, "loss": 2.359, "step": 12550 }, { "epoch": 51.22857142857143, "grad_norm": 43.566871643066406, "learning_rate": 2.4404081632653063e-05, "loss": 1.7826, "step": 12551 }, { "epoch": 51.23265306122449, "grad_norm": 38.21821594238281, "learning_rate": 2.4402040816326533e-05, "loss": 1.1155, "step": 12552 }, { "epoch": 51.23673469387755, "grad_norm": 32.58299255371094, "learning_rate": 2.44e-05, "loss": 1.1193, "step": 12553 }, { "epoch": 51.24081632653061, "grad_norm": 30.174753189086914, "learning_rate": 2.439795918367347e-05, "loss": 1.7665, "step": 12554 }, { "epoch": 51.244897959183675, "grad_norm": 26.800315856933594, "learning_rate": 2.439591836734694e-05, "loss": 1.5111, "step": 12555 }, { "epoch": 51.248979591836736, "grad_norm": 27.176809310913086, "learning_rate": 2.439387755102041e-05, "loss": 1.8169, "step": 12556 }, { "epoch": 51.2530612244898, "grad_norm": 30.74805450439453, "learning_rate": 2.439183673469388e-05, "loss": 1.6585, "step": 12557 }, { "epoch": 51.25714285714286, "grad_norm": 28.643680572509766, "learning_rate": 2.4389795918367346e-05, "loss": 1.8487, "step": 12558 }, { "epoch": 51.26122448979592, "grad_norm": 29.97673797607422, "learning_rate": 2.438775510204082e-05, "loss": 1.3874, "step": 12559 }, { "epoch": 51.265306122448976, "grad_norm": 32.558265686035156, "learning_rate": 2.4385714285714287e-05, "loss": 0.3316, "step": 12560 }, { "epoch": 51.26938775510204, "grad_norm": 34.27940368652344, "learning_rate": 2.4383673469387754e-05, "loss": 1.5916, "step": 12561 }, { "epoch": 51.2734693877551, "grad_norm": 40.100730895996094, "learning_rate": 2.4381632653061225e-05, "loss": 1.7021, "step": 12562 }, { "epoch": 51.27755102040816, "grad_norm": 45.210182189941406, "learning_rate": 2.4379591836734695e-05, "loss": 1.0696, "step": 12563 }, { "epoch": 51.28163265306122, "grad_norm": 45.202598571777344, "learning_rate": 2.4377551020408166e-05, "loss": 1.1554, "step": 12564 }, { "epoch": 51.285714285714285, "grad_norm": 18.934314727783203, "learning_rate": 2.4375510204081633e-05, "loss": 2.3156, "step": 12565 }, { "epoch": 51.289795918367346, "grad_norm": 21.435588836669922, "learning_rate": 2.4373469387755103e-05, "loss": 2.0322, "step": 12566 }, { "epoch": 51.29387755102041, "grad_norm": 35.05289840698242, "learning_rate": 2.4371428571428574e-05, "loss": 1.6457, "step": 12567 }, { "epoch": 51.29795918367347, "grad_norm": 22.923595428466797, "learning_rate": 2.436938775510204e-05, "loss": 0.1071, "step": 12568 }, { "epoch": 51.30204081632653, "grad_norm": 31.564916610717773, "learning_rate": 2.436734693877551e-05, "loss": 1.1806, "step": 12569 }, { "epoch": 51.30612244897959, "grad_norm": 28.467607498168945, "learning_rate": 2.4365306122448982e-05, "loss": 2.2861, "step": 12570 }, { "epoch": 51.310204081632655, "grad_norm": 26.379518508911133, "learning_rate": 2.4363265306122452e-05, "loss": 2.3133, "step": 12571 }, { "epoch": 51.31428571428572, "grad_norm": 33.944400787353516, "learning_rate": 2.436122448979592e-05, "loss": 1.5281, "step": 12572 }, { "epoch": 51.31836734693878, "grad_norm": 32.15677261352539, "learning_rate": 2.4359183673469387e-05, "loss": 1.3756, "step": 12573 }, { "epoch": 51.32244897959184, "grad_norm": 20.091938018798828, "learning_rate": 2.4357142857142857e-05, "loss": 2.3278, "step": 12574 }, { "epoch": 51.326530612244895, "grad_norm": 38.781864166259766, "learning_rate": 2.4355102040816328e-05, "loss": 0.8256, "step": 12575 }, { "epoch": 51.33061224489796, "grad_norm": 36.732879638671875, "learning_rate": 2.4353061224489798e-05, "loss": 1.1967, "step": 12576 }, { "epoch": 51.33469387755102, "grad_norm": 38.6946907043457, "learning_rate": 2.4351020408163265e-05, "loss": 0.8829, "step": 12577 }, { "epoch": 51.33877551020408, "grad_norm": 36.324771881103516, "learning_rate": 2.4348979591836736e-05, "loss": 1.6467, "step": 12578 }, { "epoch": 51.34285714285714, "grad_norm": 54.81427001953125, "learning_rate": 2.4346938775510206e-05, "loss": 1.3223, "step": 12579 }, { "epoch": 51.3469387755102, "grad_norm": 15.007489204406738, "learning_rate": 2.4344897959183674e-05, "loss": 2.6361, "step": 12580 }, { "epoch": 51.351020408163265, "grad_norm": 26.132909774780273, "learning_rate": 2.4342857142857144e-05, "loss": 1.9592, "step": 12581 }, { "epoch": 51.35510204081633, "grad_norm": 22.204524993896484, "learning_rate": 2.4340816326530615e-05, "loss": 1.9944, "step": 12582 }, { "epoch": 51.35918367346939, "grad_norm": 40.43775177001953, "learning_rate": 2.433877551020408e-05, "loss": 1.4006, "step": 12583 }, { "epoch": 51.36326530612245, "grad_norm": 27.185157775878906, "learning_rate": 2.4336734693877552e-05, "loss": 1.7905, "step": 12584 }, { "epoch": 51.36734693877551, "grad_norm": 18.385164260864258, "learning_rate": 2.433469387755102e-05, "loss": 2.4028, "step": 12585 }, { "epoch": 51.371428571428574, "grad_norm": 35.966217041015625, "learning_rate": 2.4332653061224493e-05, "loss": 0.473, "step": 12586 }, { "epoch": 51.375510204081635, "grad_norm": 29.202550888061523, "learning_rate": 2.433061224489796e-05, "loss": 1.9273, "step": 12587 }, { "epoch": 51.3795918367347, "grad_norm": 40.58371353149414, "learning_rate": 2.432857142857143e-05, "loss": 1.7163, "step": 12588 }, { "epoch": 51.38367346938775, "grad_norm": 33.8425178527832, "learning_rate": 2.4326530612244898e-05, "loss": 1.3811, "step": 12589 }, { "epoch": 51.38775510204081, "grad_norm": 29.208560943603516, "learning_rate": 2.432448979591837e-05, "loss": 1.9355, "step": 12590 }, { "epoch": 51.391836734693875, "grad_norm": 36.95700454711914, "learning_rate": 2.432244897959184e-05, "loss": 0.7038, "step": 12591 }, { "epoch": 51.39591836734694, "grad_norm": 41.11300277709961, "learning_rate": 2.4320408163265306e-05, "loss": 1.4121, "step": 12592 }, { "epoch": 51.4, "grad_norm": 17.058134078979492, "learning_rate": 2.4318367346938777e-05, "loss": 2.2274, "step": 12593 }, { "epoch": 51.40408163265306, "grad_norm": 36.405364990234375, "learning_rate": 2.4316326530612247e-05, "loss": 0.7921, "step": 12594 }, { "epoch": 51.40816326530612, "grad_norm": 41.82438278198242, "learning_rate": 2.4314285714285714e-05, "loss": 1.1025, "step": 12595 }, { "epoch": 51.412244897959184, "grad_norm": 26.34482192993164, "learning_rate": 2.4312244897959185e-05, "loss": 1.9979, "step": 12596 }, { "epoch": 51.416326530612245, "grad_norm": 40.21012496948242, "learning_rate": 2.4310204081632655e-05, "loss": 0.8555, "step": 12597 }, { "epoch": 51.42040816326531, "grad_norm": 42.38385772705078, "learning_rate": 2.4308163265306126e-05, "loss": 1.7379, "step": 12598 }, { "epoch": 51.42448979591837, "grad_norm": 31.322965621948242, "learning_rate": 2.4306122448979593e-05, "loss": 2.0983, "step": 12599 }, { "epoch": 51.42857142857143, "grad_norm": 22.707033157348633, "learning_rate": 2.430408163265306e-05, "loss": 2.2508, "step": 12600 }, { "epoch": 51.43265306122449, "grad_norm": 51.29861831665039, "learning_rate": 2.430204081632653e-05, "loss": 0.881, "step": 12601 }, { "epoch": 51.436734693877554, "grad_norm": 36.40692138671875, "learning_rate": 2.43e-05, "loss": 0.5671, "step": 12602 }, { "epoch": 51.440816326530616, "grad_norm": 20.583818435668945, "learning_rate": 2.429795918367347e-05, "loss": 2.3249, "step": 12603 }, { "epoch": 51.44489795918367, "grad_norm": 39.91659164428711, "learning_rate": 2.429591836734694e-05, "loss": 1.099, "step": 12604 }, { "epoch": 51.44897959183673, "grad_norm": 19.48331642150879, "learning_rate": 2.429387755102041e-05, "loss": 2.3943, "step": 12605 }, { "epoch": 51.453061224489794, "grad_norm": 30.5135555267334, "learning_rate": 2.429183673469388e-05, "loss": 1.761, "step": 12606 }, { "epoch": 51.457142857142856, "grad_norm": 25.214046478271484, "learning_rate": 2.4289795918367347e-05, "loss": 2.2306, "step": 12607 }, { "epoch": 51.46122448979592, "grad_norm": 21.056041717529297, "learning_rate": 2.4287755102040817e-05, "loss": 2.2091, "step": 12608 }, { "epoch": 51.46530612244898, "grad_norm": 24.897558212280273, "learning_rate": 2.4285714285714288e-05, "loss": 1.987, "step": 12609 }, { "epoch": 51.46938775510204, "grad_norm": 26.001867294311523, "learning_rate": 2.4283673469387755e-05, "loss": 1.6831, "step": 12610 }, { "epoch": 51.4734693877551, "grad_norm": 18.469459533691406, "learning_rate": 2.4281632653061226e-05, "loss": 1.9136, "step": 12611 }, { "epoch": 51.477551020408164, "grad_norm": 18.510189056396484, "learning_rate": 2.4279591836734693e-05, "loss": 2.7093, "step": 12612 }, { "epoch": 51.481632653061226, "grad_norm": 36.6469841003418, "learning_rate": 2.4277551020408167e-05, "loss": 0.998, "step": 12613 }, { "epoch": 51.48571428571429, "grad_norm": 23.259008407592773, "learning_rate": 2.4275510204081634e-05, "loss": 1.8516, "step": 12614 }, { "epoch": 51.48979591836735, "grad_norm": 42.75292205810547, "learning_rate": 2.4273469387755104e-05, "loss": 1.0397, "step": 12615 }, { "epoch": 51.49387755102041, "grad_norm": 39.30171585083008, "learning_rate": 2.427142857142857e-05, "loss": 0.4942, "step": 12616 }, { "epoch": 51.49795918367347, "grad_norm": 22.46100616455078, "learning_rate": 2.4269387755102042e-05, "loss": 2.317, "step": 12617 }, { "epoch": 51.50204081632653, "grad_norm": 43.655208587646484, "learning_rate": 2.4267346938775512e-05, "loss": 1.8999, "step": 12618 }, { "epoch": 51.50612244897959, "grad_norm": 37.101200103759766, "learning_rate": 2.426530612244898e-05, "loss": 1.8124, "step": 12619 }, { "epoch": 51.51020408163265, "grad_norm": 24.997203826904297, "learning_rate": 2.426326530612245e-05, "loss": 1.7716, "step": 12620 }, { "epoch": 51.51428571428571, "grad_norm": 18.22732162475586, "learning_rate": 2.426122448979592e-05, "loss": 2.3102, "step": 12621 }, { "epoch": 51.518367346938774, "grad_norm": 23.466623306274414, "learning_rate": 2.4259183673469388e-05, "loss": 1.9343, "step": 12622 }, { "epoch": 51.522448979591836, "grad_norm": 22.6610164642334, "learning_rate": 2.4257142857142858e-05, "loss": 2.1314, "step": 12623 }, { "epoch": 51.5265306122449, "grad_norm": 18.246204376220703, "learning_rate": 2.425510204081633e-05, "loss": 2.3979, "step": 12624 }, { "epoch": 51.53061224489796, "grad_norm": 22.184873580932617, "learning_rate": 2.42530612244898e-05, "loss": 2.0726, "step": 12625 }, { "epoch": 51.53469387755102, "grad_norm": 21.933460235595703, "learning_rate": 2.4251020408163266e-05, "loss": 2.6535, "step": 12626 }, { "epoch": 51.53877551020408, "grad_norm": 18.193376541137695, "learning_rate": 2.4248979591836733e-05, "loss": 2.6105, "step": 12627 }, { "epoch": 51.542857142857144, "grad_norm": 31.58795928955078, "learning_rate": 2.4246938775510204e-05, "loss": 0.4439, "step": 12628 }, { "epoch": 51.546938775510206, "grad_norm": 29.41973114013672, "learning_rate": 2.4244897959183674e-05, "loss": 2.1299, "step": 12629 }, { "epoch": 51.55102040816327, "grad_norm": 32.48984146118164, "learning_rate": 2.4242857142857145e-05, "loss": 0.8877, "step": 12630 }, { "epoch": 51.55510204081633, "grad_norm": 22.930774688720703, "learning_rate": 2.4240816326530612e-05, "loss": 2.1258, "step": 12631 }, { "epoch": 51.55918367346939, "grad_norm": 31.776792526245117, "learning_rate": 2.4238775510204083e-05, "loss": 2.1402, "step": 12632 }, { "epoch": 51.563265306122446, "grad_norm": 14.823325157165527, "learning_rate": 2.4236734693877553e-05, "loss": 2.7343, "step": 12633 }, { "epoch": 51.56734693877551, "grad_norm": 15.213013648986816, "learning_rate": 2.423469387755102e-05, "loss": 2.786, "step": 12634 }, { "epoch": 51.57142857142857, "grad_norm": 25.945283889770508, "learning_rate": 2.423265306122449e-05, "loss": 2.1667, "step": 12635 }, { "epoch": 51.57551020408163, "grad_norm": 36.108402252197266, "learning_rate": 2.423061224489796e-05, "loss": 1.153, "step": 12636 }, { "epoch": 51.57959183673469, "grad_norm": 44.43846130371094, "learning_rate": 2.4228571428571432e-05, "loss": 0.49, "step": 12637 }, { "epoch": 51.583673469387755, "grad_norm": 39.67333984375, "learning_rate": 2.42265306122449e-05, "loss": 1.8761, "step": 12638 }, { "epoch": 51.587755102040816, "grad_norm": 31.31005859375, "learning_rate": 2.4224489795918366e-05, "loss": 1.6362, "step": 12639 }, { "epoch": 51.59183673469388, "grad_norm": 24.4930477142334, "learning_rate": 2.422244897959184e-05, "loss": 1.8042, "step": 12640 }, { "epoch": 51.59591836734694, "grad_norm": 24.080839157104492, "learning_rate": 2.4220408163265307e-05, "loss": 2.0642, "step": 12641 }, { "epoch": 51.6, "grad_norm": 17.513090133666992, "learning_rate": 2.4218367346938778e-05, "loss": 2.6101, "step": 12642 }, { "epoch": 51.60408163265306, "grad_norm": 38.376522064208984, "learning_rate": 2.4216326530612245e-05, "loss": 1.5934, "step": 12643 }, { "epoch": 51.608163265306125, "grad_norm": 41.12969970703125, "learning_rate": 2.4214285714285715e-05, "loss": 1.6068, "step": 12644 }, { "epoch": 51.61224489795919, "grad_norm": 34.630008697509766, "learning_rate": 2.4212244897959186e-05, "loss": 1.0434, "step": 12645 }, { "epoch": 51.61632653061225, "grad_norm": 27.44120216369629, "learning_rate": 2.4210204081632653e-05, "loss": 1.3522, "step": 12646 }, { "epoch": 51.6204081632653, "grad_norm": 19.02496337890625, "learning_rate": 2.4208163265306123e-05, "loss": 2.9014, "step": 12647 }, { "epoch": 51.624489795918365, "grad_norm": 32.271087646484375, "learning_rate": 2.4206122448979594e-05, "loss": 0.3867, "step": 12648 }, { "epoch": 51.628571428571426, "grad_norm": 37.348751068115234, "learning_rate": 2.420408163265306e-05, "loss": 1.0626, "step": 12649 }, { "epoch": 51.63265306122449, "grad_norm": 35.80209732055664, "learning_rate": 2.420204081632653e-05, "loss": 1.4108, "step": 12650 }, { "epoch": 51.63673469387755, "grad_norm": 23.12119483947754, "learning_rate": 2.4200000000000002e-05, "loss": 2.2191, "step": 12651 }, { "epoch": 51.64081632653061, "grad_norm": 21.328311920166016, "learning_rate": 2.4197959183673473e-05, "loss": 2.2214, "step": 12652 }, { "epoch": 51.64489795918367, "grad_norm": 15.001317977905273, "learning_rate": 2.419591836734694e-05, "loss": 2.5847, "step": 12653 }, { "epoch": 51.648979591836735, "grad_norm": 30.629940032958984, "learning_rate": 2.4193877551020407e-05, "loss": 1.0384, "step": 12654 }, { "epoch": 51.6530612244898, "grad_norm": 39.643157958984375, "learning_rate": 2.4191836734693877e-05, "loss": 1.0778, "step": 12655 }, { "epoch": 51.65714285714286, "grad_norm": 34.09581756591797, "learning_rate": 2.4189795918367348e-05, "loss": 0.3521, "step": 12656 }, { "epoch": 51.66122448979592, "grad_norm": 22.286832809448242, "learning_rate": 2.418775510204082e-05, "loss": 1.9447, "step": 12657 }, { "epoch": 51.66530612244898, "grad_norm": 26.573833465576172, "learning_rate": 2.4185714285714286e-05, "loss": 2.2341, "step": 12658 }, { "epoch": 51.66938775510204, "grad_norm": 24.03777313232422, "learning_rate": 2.4183673469387756e-05, "loss": 2.0655, "step": 12659 }, { "epoch": 51.673469387755105, "grad_norm": 18.01676368713379, "learning_rate": 2.4181632653061227e-05, "loss": 2.2551, "step": 12660 }, { "epoch": 51.67755102040816, "grad_norm": 25.18538475036621, "learning_rate": 2.4179591836734694e-05, "loss": 2.1544, "step": 12661 }, { "epoch": 51.68163265306122, "grad_norm": 39.88300323486328, "learning_rate": 2.4177551020408164e-05, "loss": 1.2043, "step": 12662 }, { "epoch": 51.68571428571428, "grad_norm": 30.58399772644043, "learning_rate": 2.4175510204081635e-05, "loss": 2.243, "step": 12663 }, { "epoch": 51.689795918367345, "grad_norm": 23.49547576904297, "learning_rate": 2.4173469387755105e-05, "loss": 2.0019, "step": 12664 }, { "epoch": 51.69387755102041, "grad_norm": 21.976451873779297, "learning_rate": 2.4171428571428572e-05, "loss": 2.3806, "step": 12665 }, { "epoch": 51.69795918367347, "grad_norm": 27.504257202148438, "learning_rate": 2.416938775510204e-05, "loss": 1.8358, "step": 12666 }, { "epoch": 51.70204081632653, "grad_norm": 33.2814826965332, "learning_rate": 2.4167346938775513e-05, "loss": 1.3685, "step": 12667 }, { "epoch": 51.70612244897959, "grad_norm": 28.063825607299805, "learning_rate": 2.416530612244898e-05, "loss": 1.3896, "step": 12668 }, { "epoch": 51.710204081632654, "grad_norm": 27.29123306274414, "learning_rate": 2.416326530612245e-05, "loss": 1.5898, "step": 12669 }, { "epoch": 51.714285714285715, "grad_norm": 19.169954299926758, "learning_rate": 2.4161224489795918e-05, "loss": 2.2421, "step": 12670 }, { "epoch": 51.71836734693878, "grad_norm": 49.03839874267578, "learning_rate": 2.415918367346939e-05, "loss": 1.0243, "step": 12671 }, { "epoch": 51.72244897959184, "grad_norm": 21.574277877807617, "learning_rate": 2.415714285714286e-05, "loss": 2.2485, "step": 12672 }, { "epoch": 51.7265306122449, "grad_norm": 40.66181182861328, "learning_rate": 2.4155102040816326e-05, "loss": 0.8269, "step": 12673 }, { "epoch": 51.73061224489796, "grad_norm": 46.02187728881836, "learning_rate": 2.4153061224489797e-05, "loss": 0.8307, "step": 12674 }, { "epoch": 51.734693877551024, "grad_norm": 32.46552658081055, "learning_rate": 2.4151020408163267e-05, "loss": 1.6192, "step": 12675 }, { "epoch": 51.73877551020408, "grad_norm": 18.253501892089844, "learning_rate": 2.4148979591836734e-05, "loss": 2.0531, "step": 12676 }, { "epoch": 51.74285714285714, "grad_norm": 38.64822769165039, "learning_rate": 2.4146938775510205e-05, "loss": 0.3401, "step": 12677 }, { "epoch": 51.7469387755102, "grad_norm": 29.051456451416016, "learning_rate": 2.4144897959183675e-05, "loss": 1.6823, "step": 12678 }, { "epoch": 51.751020408163264, "grad_norm": 19.46415901184082, "learning_rate": 2.4142857142857146e-05, "loss": 2.7569, "step": 12679 }, { "epoch": 51.755102040816325, "grad_norm": 23.05681800842285, "learning_rate": 2.4140816326530613e-05, "loss": 2.0599, "step": 12680 }, { "epoch": 51.75918367346939, "grad_norm": 30.483566284179688, "learning_rate": 2.4138775510204084e-05, "loss": 0.3691, "step": 12681 }, { "epoch": 51.76326530612245, "grad_norm": 27.570327758789062, "learning_rate": 2.413673469387755e-05, "loss": 2.4934, "step": 12682 }, { "epoch": 51.76734693877551, "grad_norm": 23.25141143798828, "learning_rate": 2.413469387755102e-05, "loss": 1.6135, "step": 12683 }, { "epoch": 51.77142857142857, "grad_norm": 15.643209457397461, "learning_rate": 2.4132653061224492e-05, "loss": 2.6515, "step": 12684 }, { "epoch": 51.775510204081634, "grad_norm": 40.653255462646484, "learning_rate": 2.413061224489796e-05, "loss": 0.9688, "step": 12685 }, { "epoch": 51.779591836734696, "grad_norm": 21.33269691467285, "learning_rate": 2.412857142857143e-05, "loss": 2.2253, "step": 12686 }, { "epoch": 51.78367346938776, "grad_norm": 20.19331932067871, "learning_rate": 2.41265306122449e-05, "loss": 2.3963, "step": 12687 }, { "epoch": 51.78775510204082, "grad_norm": 15.401605606079102, "learning_rate": 2.4124489795918367e-05, "loss": 2.6654, "step": 12688 }, { "epoch": 51.79183673469388, "grad_norm": 35.267181396484375, "learning_rate": 2.4122448979591838e-05, "loss": 1.3962, "step": 12689 }, { "epoch": 51.795918367346935, "grad_norm": 59.644683837890625, "learning_rate": 2.4120408163265308e-05, "loss": 0.501, "step": 12690 }, { "epoch": 51.8, "grad_norm": 33.49960708618164, "learning_rate": 2.411836734693878e-05, "loss": 0.8809, "step": 12691 }, { "epoch": 51.80408163265306, "grad_norm": 31.33344268798828, "learning_rate": 2.4116326530612246e-05, "loss": 1.5147, "step": 12692 }, { "epoch": 51.80816326530612, "grad_norm": 28.756484985351562, "learning_rate": 2.4114285714285713e-05, "loss": 1.4222, "step": 12693 }, { "epoch": 51.81224489795918, "grad_norm": 40.015045166015625, "learning_rate": 2.4112244897959187e-05, "loss": 0.86, "step": 12694 }, { "epoch": 51.816326530612244, "grad_norm": 22.693397521972656, "learning_rate": 2.4110204081632654e-05, "loss": 2.3939, "step": 12695 }, { "epoch": 51.820408163265306, "grad_norm": 37.467010498046875, "learning_rate": 2.4108163265306124e-05, "loss": 1.3995, "step": 12696 }, { "epoch": 51.82448979591837, "grad_norm": 46.24060821533203, "learning_rate": 2.410612244897959e-05, "loss": 1.5486, "step": 12697 }, { "epoch": 51.82857142857143, "grad_norm": 20.801734924316406, "learning_rate": 2.4104081632653062e-05, "loss": 2.3543, "step": 12698 }, { "epoch": 51.83265306122449, "grad_norm": 34.80397033691406, "learning_rate": 2.4102040816326533e-05, "loss": 0.9927, "step": 12699 }, { "epoch": 51.83673469387755, "grad_norm": 30.60862159729004, "learning_rate": 2.41e-05, "loss": 1.9915, "step": 12700 }, { "epoch": 51.840816326530614, "grad_norm": 37.984649658203125, "learning_rate": 2.409795918367347e-05, "loss": 0.9278, "step": 12701 }, { "epoch": 51.844897959183676, "grad_norm": 31.907751083374023, "learning_rate": 2.409591836734694e-05, "loss": 0.4757, "step": 12702 }, { "epoch": 51.84897959183674, "grad_norm": 12.038379669189453, "learning_rate": 2.409387755102041e-05, "loss": 2.8646, "step": 12703 }, { "epoch": 51.85306122448979, "grad_norm": 30.62118148803711, "learning_rate": 2.409183673469388e-05, "loss": 1.9453, "step": 12704 }, { "epoch": 51.857142857142854, "grad_norm": 38.68586349487305, "learning_rate": 2.408979591836735e-05, "loss": 1.079, "step": 12705 }, { "epoch": 51.861224489795916, "grad_norm": 16.79351234436035, "learning_rate": 2.408775510204082e-05, "loss": 2.437, "step": 12706 }, { "epoch": 51.86530612244898, "grad_norm": 15.42440128326416, "learning_rate": 2.4085714285714286e-05, "loss": 2.6736, "step": 12707 }, { "epoch": 51.86938775510204, "grad_norm": 15.096057891845703, "learning_rate": 2.4083673469387757e-05, "loss": 2.8076, "step": 12708 }, { "epoch": 51.8734693877551, "grad_norm": 26.401395797729492, "learning_rate": 2.4081632653061224e-05, "loss": 1.4477, "step": 12709 }, { "epoch": 51.87755102040816, "grad_norm": 49.4881706237793, "learning_rate": 2.4079591836734695e-05, "loss": 1.0859, "step": 12710 }, { "epoch": 51.881632653061224, "grad_norm": 26.900230407714844, "learning_rate": 2.4077551020408165e-05, "loss": 1.2494, "step": 12711 }, { "epoch": 51.885714285714286, "grad_norm": 33.778499603271484, "learning_rate": 2.4075510204081632e-05, "loss": 1.1407, "step": 12712 }, { "epoch": 51.88979591836735, "grad_norm": 32.790016174316406, "learning_rate": 2.4073469387755103e-05, "loss": 1.08, "step": 12713 }, { "epoch": 51.89387755102041, "grad_norm": 33.99293899536133, "learning_rate": 2.4071428571428573e-05, "loss": 0.9428, "step": 12714 }, { "epoch": 51.89795918367347, "grad_norm": 29.1763973236084, "learning_rate": 2.406938775510204e-05, "loss": 1.0635, "step": 12715 }, { "epoch": 51.90204081632653, "grad_norm": 44.361026763916016, "learning_rate": 2.406734693877551e-05, "loss": 1.566, "step": 12716 }, { "epoch": 51.906122448979595, "grad_norm": 32.12364196777344, "learning_rate": 2.406530612244898e-05, "loss": 0.9762, "step": 12717 }, { "epoch": 51.910204081632656, "grad_norm": 35.51154708862305, "learning_rate": 2.4063265306122452e-05, "loss": 1.3846, "step": 12718 }, { "epoch": 51.91428571428571, "grad_norm": 17.336572647094727, "learning_rate": 2.406122448979592e-05, "loss": 2.1203, "step": 12719 }, { "epoch": 51.91836734693877, "grad_norm": 27.1590633392334, "learning_rate": 2.4059183673469386e-05, "loss": 2.1163, "step": 12720 }, { "epoch": 51.922448979591834, "grad_norm": 38.80427932739258, "learning_rate": 2.405714285714286e-05, "loss": 1.0789, "step": 12721 }, { "epoch": 51.926530612244896, "grad_norm": 19.222270965576172, "learning_rate": 2.4055102040816327e-05, "loss": 2.2951, "step": 12722 }, { "epoch": 51.93061224489796, "grad_norm": 36.08755874633789, "learning_rate": 2.4053061224489798e-05, "loss": 0.3414, "step": 12723 }, { "epoch": 51.93469387755102, "grad_norm": 41.65735626220703, "learning_rate": 2.4051020408163265e-05, "loss": 0.2896, "step": 12724 }, { "epoch": 51.93877551020408, "grad_norm": 24.852005004882812, "learning_rate": 2.404897959183674e-05, "loss": 2.4026, "step": 12725 }, { "epoch": 51.94285714285714, "grad_norm": 22.237049102783203, "learning_rate": 2.4046938775510206e-05, "loss": 2.3792, "step": 12726 }, { "epoch": 51.946938775510205, "grad_norm": 23.45689582824707, "learning_rate": 2.4044897959183673e-05, "loss": 2.2207, "step": 12727 }, { "epoch": 51.95102040816327, "grad_norm": 35.576744079589844, "learning_rate": 2.4042857142857144e-05, "loss": 0.7896, "step": 12728 }, { "epoch": 51.95510204081633, "grad_norm": 21.374006271362305, "learning_rate": 2.4040816326530614e-05, "loss": 2.6028, "step": 12729 }, { "epoch": 51.95918367346939, "grad_norm": 23.428756713867188, "learning_rate": 2.4038775510204085e-05, "loss": 2.154, "step": 12730 }, { "epoch": 51.96326530612245, "grad_norm": 17.65506935119629, "learning_rate": 2.403673469387755e-05, "loss": 2.658, "step": 12731 }, { "epoch": 51.96734693877551, "grad_norm": 33.26648712158203, "learning_rate": 2.4034693877551022e-05, "loss": 0.9051, "step": 12732 }, { "epoch": 51.97142857142857, "grad_norm": 44.62031936645508, "learning_rate": 2.4032653061224493e-05, "loss": 1.0648, "step": 12733 }, { "epoch": 51.97551020408163, "grad_norm": 31.49582862854004, "learning_rate": 2.403061224489796e-05, "loss": 1.8679, "step": 12734 }, { "epoch": 51.97959183673469, "grad_norm": 29.853567123413086, "learning_rate": 2.402857142857143e-05, "loss": 1.5267, "step": 12735 }, { "epoch": 51.98367346938775, "grad_norm": 29.3587646484375, "learning_rate": 2.4026530612244897e-05, "loss": 1.6356, "step": 12736 }, { "epoch": 51.987755102040815, "grad_norm": 36.838375091552734, "learning_rate": 2.4024489795918368e-05, "loss": 1.423, "step": 12737 }, { "epoch": 51.99183673469388, "grad_norm": 29.724153518676758, "learning_rate": 2.402244897959184e-05, "loss": 1.7888, "step": 12738 }, { "epoch": 51.99591836734694, "grad_norm": 31.66221809387207, "learning_rate": 2.4020408163265306e-05, "loss": 0.5326, "step": 12739 }, { "epoch": 52.0, "grad_norm": 36.6678352355957, "learning_rate": 2.4018367346938776e-05, "loss": 0.9238, "step": 12740 }, { "epoch": 52.00408163265306, "grad_norm": 14.291210174560547, "learning_rate": 2.4016326530612247e-05, "loss": 3.1234, "step": 12741 }, { "epoch": 52.00816326530612, "grad_norm": 19.100492477416992, "learning_rate": 2.4014285714285714e-05, "loss": 1.8281, "step": 12742 }, { "epoch": 52.012244897959185, "grad_norm": 38.871219635009766, "learning_rate": 2.4012244897959184e-05, "loss": 1.2892, "step": 12743 }, { "epoch": 52.01632653061225, "grad_norm": 19.21677017211914, "learning_rate": 2.4010204081632655e-05, "loss": 2.1722, "step": 12744 }, { "epoch": 52.02040816326531, "grad_norm": 18.455270767211914, "learning_rate": 2.4008163265306125e-05, "loss": 2.087, "step": 12745 }, { "epoch": 52.02448979591837, "grad_norm": 72.87509155273438, "learning_rate": 2.4006122448979592e-05, "loss": 1.3771, "step": 12746 }, { "epoch": 52.02857142857143, "grad_norm": 35.90410232543945, "learning_rate": 2.4004081632653063e-05, "loss": 1.5024, "step": 12747 }, { "epoch": 52.03265306122449, "grad_norm": 34.48478698730469, "learning_rate": 2.4002040816326533e-05, "loss": 1.3132, "step": 12748 }, { "epoch": 52.03673469387755, "grad_norm": 32.25859832763672, "learning_rate": 2.4e-05, "loss": 1.7837, "step": 12749 }, { "epoch": 52.04081632653061, "grad_norm": 60.19091796875, "learning_rate": 2.399795918367347e-05, "loss": 1.7878, "step": 12750 }, { "epoch": 52.04489795918367, "grad_norm": 28.98198127746582, "learning_rate": 2.3995918367346938e-05, "loss": 1.8311, "step": 12751 }, { "epoch": 52.04897959183673, "grad_norm": 44.04347229003906, "learning_rate": 2.3993877551020412e-05, "loss": 0.4399, "step": 12752 }, { "epoch": 52.053061224489795, "grad_norm": 36.26688766479492, "learning_rate": 2.399183673469388e-05, "loss": 1.6092, "step": 12753 }, { "epoch": 52.05714285714286, "grad_norm": 25.302383422851562, "learning_rate": 2.3989795918367346e-05, "loss": 1.6216, "step": 12754 }, { "epoch": 52.06122448979592, "grad_norm": 17.669090270996094, "learning_rate": 2.3987755102040817e-05, "loss": 2.7017, "step": 12755 }, { "epoch": 52.06530612244898, "grad_norm": 54.1716194152832, "learning_rate": 2.3985714285714287e-05, "loss": 0.9559, "step": 12756 }, { "epoch": 52.06938775510204, "grad_norm": 26.36326026916504, "learning_rate": 2.3983673469387758e-05, "loss": 2.1761, "step": 12757 }, { "epoch": 52.073469387755104, "grad_norm": 35.67194747924805, "learning_rate": 2.3981632653061225e-05, "loss": 1.4417, "step": 12758 }, { "epoch": 52.077551020408166, "grad_norm": 51.983436584472656, "learning_rate": 2.3979591836734696e-05, "loss": 0.8983, "step": 12759 }, { "epoch": 52.08163265306123, "grad_norm": 33.573638916015625, "learning_rate": 2.3977551020408166e-05, "loss": 1.748, "step": 12760 }, { "epoch": 52.08571428571429, "grad_norm": 37.397090911865234, "learning_rate": 2.3975510204081633e-05, "loss": 1.436, "step": 12761 }, { "epoch": 52.089795918367344, "grad_norm": 22.7191104888916, "learning_rate": 2.3973469387755104e-05, "loss": 2.1767, "step": 12762 }, { "epoch": 52.093877551020405, "grad_norm": 50.76248550415039, "learning_rate": 2.397142857142857e-05, "loss": 0.8318, "step": 12763 }, { "epoch": 52.09795918367347, "grad_norm": 32.06888961791992, "learning_rate": 2.396938775510204e-05, "loss": 1.5943, "step": 12764 }, { "epoch": 52.10204081632653, "grad_norm": 21.510021209716797, "learning_rate": 2.3967346938775512e-05, "loss": 2.2048, "step": 12765 }, { "epoch": 52.10612244897959, "grad_norm": 22.826614379882812, "learning_rate": 2.396530612244898e-05, "loss": 2.0087, "step": 12766 }, { "epoch": 52.11020408163265, "grad_norm": 36.328147888183594, "learning_rate": 2.396326530612245e-05, "loss": 1.2747, "step": 12767 }, { "epoch": 52.114285714285714, "grad_norm": 26.468156814575195, "learning_rate": 2.396122448979592e-05, "loss": 1.9379, "step": 12768 }, { "epoch": 52.118367346938776, "grad_norm": 54.443782806396484, "learning_rate": 2.3959183673469387e-05, "loss": 0.7725, "step": 12769 }, { "epoch": 52.12244897959184, "grad_norm": 15.252174377441406, "learning_rate": 2.3957142857142858e-05, "loss": 2.7348, "step": 12770 }, { "epoch": 52.1265306122449, "grad_norm": 22.230051040649414, "learning_rate": 2.3955102040816328e-05, "loss": 2.3551, "step": 12771 }, { "epoch": 52.13061224489796, "grad_norm": 17.248416900634766, "learning_rate": 2.39530612244898e-05, "loss": 2.5785, "step": 12772 }, { "epoch": 52.13469387755102, "grad_norm": 35.51095199584961, "learning_rate": 2.3951020408163266e-05, "loss": 0.9809, "step": 12773 }, { "epoch": 52.138775510204084, "grad_norm": 21.430776596069336, "learning_rate": 2.3948979591836736e-05, "loss": 1.9753, "step": 12774 }, { "epoch": 52.142857142857146, "grad_norm": 20.051816940307617, "learning_rate": 2.3946938775510207e-05, "loss": 2.4309, "step": 12775 }, { "epoch": 52.14693877551021, "grad_norm": 26.19577407836914, "learning_rate": 2.3944897959183674e-05, "loss": 1.84, "step": 12776 }, { "epoch": 52.15102040816326, "grad_norm": 22.269580841064453, "learning_rate": 2.3942857142857144e-05, "loss": 1.993, "step": 12777 }, { "epoch": 52.155102040816324, "grad_norm": 22.91253662109375, "learning_rate": 2.394081632653061e-05, "loss": 1.8307, "step": 12778 }, { "epoch": 52.159183673469386, "grad_norm": 30.65831756591797, "learning_rate": 2.3938775510204086e-05, "loss": 1.4005, "step": 12779 }, { "epoch": 52.16326530612245, "grad_norm": 20.366445541381836, "learning_rate": 2.3936734693877553e-05, "loss": 2.1383, "step": 12780 }, { "epoch": 52.16734693877551, "grad_norm": 20.138774871826172, "learning_rate": 2.393469387755102e-05, "loss": 2.2893, "step": 12781 }, { "epoch": 52.17142857142857, "grad_norm": 33.873924255371094, "learning_rate": 2.393265306122449e-05, "loss": 1.5217, "step": 12782 }, { "epoch": 52.17551020408163, "grad_norm": 21.30438804626465, "learning_rate": 2.393061224489796e-05, "loss": 2.2186, "step": 12783 }, { "epoch": 52.179591836734694, "grad_norm": 55.09892272949219, "learning_rate": 2.392857142857143e-05, "loss": 2.0019, "step": 12784 }, { "epoch": 52.183673469387756, "grad_norm": 29.078500747680664, "learning_rate": 2.39265306122449e-05, "loss": 0.3359, "step": 12785 }, { "epoch": 52.18775510204082, "grad_norm": 41.68220901489258, "learning_rate": 2.3924489795918366e-05, "loss": 0.993, "step": 12786 }, { "epoch": 52.19183673469388, "grad_norm": 27.953866958618164, "learning_rate": 2.392244897959184e-05, "loss": 1.748, "step": 12787 }, { "epoch": 52.19591836734694, "grad_norm": 34.363380432128906, "learning_rate": 2.3920408163265307e-05, "loss": 1.2763, "step": 12788 }, { "epoch": 52.2, "grad_norm": 47.54857635498047, "learning_rate": 2.3918367346938777e-05, "loss": 0.9895, "step": 12789 }, { "epoch": 52.204081632653065, "grad_norm": 28.19446563720703, "learning_rate": 2.3916326530612244e-05, "loss": 1.48, "step": 12790 }, { "epoch": 52.20816326530612, "grad_norm": 26.964174270629883, "learning_rate": 2.3914285714285715e-05, "loss": 2.0027, "step": 12791 }, { "epoch": 52.21224489795918, "grad_norm": 21.280231475830078, "learning_rate": 2.3912244897959185e-05, "loss": 2.2895, "step": 12792 }, { "epoch": 52.21632653061224, "grad_norm": 30.652551651000977, "learning_rate": 2.3910204081632652e-05, "loss": 1.8867, "step": 12793 }, { "epoch": 52.220408163265304, "grad_norm": 39.891361236572266, "learning_rate": 2.3908163265306123e-05, "loss": 0.307, "step": 12794 }, { "epoch": 52.224489795918366, "grad_norm": 28.13930320739746, "learning_rate": 2.3906122448979593e-05, "loss": 1.3322, "step": 12795 }, { "epoch": 52.22857142857143, "grad_norm": 29.298542022705078, "learning_rate": 2.3904081632653064e-05, "loss": 1.8466, "step": 12796 }, { "epoch": 52.23265306122449, "grad_norm": 15.880105972290039, "learning_rate": 2.390204081632653e-05, "loss": 2.5098, "step": 12797 }, { "epoch": 52.23673469387755, "grad_norm": 30.192590713500977, "learning_rate": 2.39e-05, "loss": 1.2472, "step": 12798 }, { "epoch": 52.24081632653061, "grad_norm": 20.31968879699707, "learning_rate": 2.3897959183673472e-05, "loss": 2.0865, "step": 12799 }, { "epoch": 52.244897959183675, "grad_norm": 46.486751556396484, "learning_rate": 2.389591836734694e-05, "loss": 0.9858, "step": 12800 }, { "epoch": 52.248979591836736, "grad_norm": 41.77982711791992, "learning_rate": 2.389387755102041e-05, "loss": 1.1038, "step": 12801 }, { "epoch": 52.2530612244898, "grad_norm": 16.26019859313965, "learning_rate": 2.389183673469388e-05, "loss": 2.737, "step": 12802 }, { "epoch": 52.25714285714286, "grad_norm": 23.106529235839844, "learning_rate": 2.3889795918367347e-05, "loss": 2.1676, "step": 12803 }, { "epoch": 52.26122448979592, "grad_norm": 28.79117774963379, "learning_rate": 2.3887755102040818e-05, "loss": 0.8025, "step": 12804 }, { "epoch": 52.265306122448976, "grad_norm": 22.985185623168945, "learning_rate": 2.3885714285714285e-05, "loss": 1.9127, "step": 12805 }, { "epoch": 52.26938775510204, "grad_norm": 16.040239334106445, "learning_rate": 2.388367346938776e-05, "loss": 2.6655, "step": 12806 }, { "epoch": 52.2734693877551, "grad_norm": 30.60906982421875, "learning_rate": 2.3881632653061226e-05, "loss": 1.6296, "step": 12807 }, { "epoch": 52.27755102040816, "grad_norm": 13.600629806518555, "learning_rate": 2.3879591836734693e-05, "loss": 2.9946, "step": 12808 }, { "epoch": 52.28163265306122, "grad_norm": 31.133930206298828, "learning_rate": 2.3877551020408164e-05, "loss": 1.1107, "step": 12809 }, { "epoch": 52.285714285714285, "grad_norm": 41.197147369384766, "learning_rate": 2.3875510204081634e-05, "loss": 1.9995, "step": 12810 }, { "epoch": 52.289795918367346, "grad_norm": 32.66989517211914, "learning_rate": 2.3873469387755105e-05, "loss": 1.5266, "step": 12811 }, { "epoch": 52.29387755102041, "grad_norm": 32.3698844909668, "learning_rate": 2.3871428571428572e-05, "loss": 1.7575, "step": 12812 }, { "epoch": 52.29795918367347, "grad_norm": 19.302566528320312, "learning_rate": 2.386938775510204e-05, "loss": 2.545, "step": 12813 }, { "epoch": 52.30204081632653, "grad_norm": 19.862884521484375, "learning_rate": 2.3867346938775513e-05, "loss": 2.1662, "step": 12814 }, { "epoch": 52.30612244897959, "grad_norm": 22.44745445251465, "learning_rate": 2.386530612244898e-05, "loss": 2.0409, "step": 12815 }, { "epoch": 52.310204081632655, "grad_norm": 32.933841705322266, "learning_rate": 2.386326530612245e-05, "loss": 1.7614, "step": 12816 }, { "epoch": 52.31428571428572, "grad_norm": 54.283382415771484, "learning_rate": 2.3861224489795918e-05, "loss": 0.4692, "step": 12817 }, { "epoch": 52.31836734693878, "grad_norm": 18.613990783691406, "learning_rate": 2.385918367346939e-05, "loss": 2.5855, "step": 12818 }, { "epoch": 52.32244897959184, "grad_norm": 28.604564666748047, "learning_rate": 2.385714285714286e-05, "loss": 2.6627, "step": 12819 }, { "epoch": 52.326530612244895, "grad_norm": 47.37261199951172, "learning_rate": 2.3855102040816326e-05, "loss": 1.0436, "step": 12820 }, { "epoch": 52.33061224489796, "grad_norm": 22.863521575927734, "learning_rate": 2.3853061224489796e-05, "loss": 2.0531, "step": 12821 }, { "epoch": 52.33469387755102, "grad_norm": 47.71017074584961, "learning_rate": 2.3851020408163267e-05, "loss": 1.5943, "step": 12822 }, { "epoch": 52.33877551020408, "grad_norm": 19.28301239013672, "learning_rate": 2.3848979591836737e-05, "loss": 2.2641, "step": 12823 }, { "epoch": 52.34285714285714, "grad_norm": 42.94385528564453, "learning_rate": 2.3846938775510204e-05, "loss": 1.0115, "step": 12824 }, { "epoch": 52.3469387755102, "grad_norm": 42.73767852783203, "learning_rate": 2.3844897959183675e-05, "loss": 1.3465, "step": 12825 }, { "epoch": 52.351020408163265, "grad_norm": 34.70033264160156, "learning_rate": 2.3842857142857145e-05, "loss": 2.1285, "step": 12826 }, { "epoch": 52.35510204081633, "grad_norm": 29.602331161499023, "learning_rate": 2.3840816326530613e-05, "loss": 1.8775, "step": 12827 }, { "epoch": 52.35918367346939, "grad_norm": 38.38645553588867, "learning_rate": 2.3838775510204083e-05, "loss": 0.3113, "step": 12828 }, { "epoch": 52.36326530612245, "grad_norm": 34.80698013305664, "learning_rate": 2.3836734693877554e-05, "loss": 0.7728, "step": 12829 }, { "epoch": 52.36734693877551, "grad_norm": 20.59244155883789, "learning_rate": 2.383469387755102e-05, "loss": 2.2044, "step": 12830 }, { "epoch": 52.371428571428574, "grad_norm": 19.128725051879883, "learning_rate": 2.383265306122449e-05, "loss": 2.215, "step": 12831 }, { "epoch": 52.375510204081635, "grad_norm": 29.426734924316406, "learning_rate": 2.383061224489796e-05, "loss": 1.6483, "step": 12832 }, { "epoch": 52.3795918367347, "grad_norm": 40.076454162597656, "learning_rate": 2.3828571428571432e-05, "loss": 0.3888, "step": 12833 }, { "epoch": 52.38367346938775, "grad_norm": 37.20435333251953, "learning_rate": 2.38265306122449e-05, "loss": 0.836, "step": 12834 }, { "epoch": 52.38775510204081, "grad_norm": 27.303123474121094, "learning_rate": 2.3824489795918367e-05, "loss": 2.0913, "step": 12835 }, { "epoch": 52.391836734693875, "grad_norm": 31.792919158935547, "learning_rate": 2.3822448979591837e-05, "loss": 1.3757, "step": 12836 }, { "epoch": 52.39591836734694, "grad_norm": 9.967469215393066, "learning_rate": 2.3820408163265308e-05, "loss": 3.3329, "step": 12837 }, { "epoch": 52.4, "grad_norm": 30.277542114257812, "learning_rate": 2.3818367346938778e-05, "loss": 1.906, "step": 12838 }, { "epoch": 52.40408163265306, "grad_norm": 24.52106285095215, "learning_rate": 2.3816326530612245e-05, "loss": 1.984, "step": 12839 }, { "epoch": 52.40816326530612, "grad_norm": 20.45278549194336, "learning_rate": 2.3814285714285716e-05, "loss": 2.3413, "step": 12840 }, { "epoch": 52.412244897959184, "grad_norm": 22.047515869140625, "learning_rate": 2.3812244897959186e-05, "loss": 2.28, "step": 12841 }, { "epoch": 52.416326530612245, "grad_norm": 32.09046173095703, "learning_rate": 2.3810204081632653e-05, "loss": 1.7419, "step": 12842 }, { "epoch": 52.42040816326531, "grad_norm": 21.63228416442871, "learning_rate": 2.3808163265306124e-05, "loss": 1.9841, "step": 12843 }, { "epoch": 52.42448979591837, "grad_norm": 36.4893684387207, "learning_rate": 2.380612244897959e-05, "loss": 0.5775, "step": 12844 }, { "epoch": 52.42857142857143, "grad_norm": 21.28431510925293, "learning_rate": 2.3804081632653065e-05, "loss": 2.0697, "step": 12845 }, { "epoch": 52.43265306122449, "grad_norm": 32.09196472167969, "learning_rate": 2.3802040816326532e-05, "loss": 1.0175, "step": 12846 }, { "epoch": 52.436734693877554, "grad_norm": 19.711767196655273, "learning_rate": 2.38e-05, "loss": 2.6785, "step": 12847 }, { "epoch": 52.440816326530616, "grad_norm": 33.7541618347168, "learning_rate": 2.379795918367347e-05, "loss": 0.4433, "step": 12848 }, { "epoch": 52.44489795918367, "grad_norm": 29.791536331176758, "learning_rate": 2.379591836734694e-05, "loss": 1.8374, "step": 12849 }, { "epoch": 52.44897959183673, "grad_norm": 23.391780853271484, "learning_rate": 2.379387755102041e-05, "loss": 2.0538, "step": 12850 }, { "epoch": 52.453061224489794, "grad_norm": 21.759666442871094, "learning_rate": 2.3791836734693878e-05, "loss": 2.29, "step": 12851 }, { "epoch": 52.457142857142856, "grad_norm": 32.58419418334961, "learning_rate": 2.378979591836735e-05, "loss": 1.8037, "step": 12852 }, { "epoch": 52.46122448979592, "grad_norm": 30.892187118530273, "learning_rate": 2.378775510204082e-05, "loss": 1.0261, "step": 12853 }, { "epoch": 52.46530612244898, "grad_norm": 33.364444732666016, "learning_rate": 2.3785714285714286e-05, "loss": 1.3236, "step": 12854 }, { "epoch": 52.46938775510204, "grad_norm": 21.500301361083984, "learning_rate": 2.3783673469387756e-05, "loss": 2.2703, "step": 12855 }, { "epoch": 52.4734693877551, "grad_norm": 43.25475311279297, "learning_rate": 2.3781632653061227e-05, "loss": 1.7208, "step": 12856 }, { "epoch": 52.477551020408164, "grad_norm": 22.532785415649414, "learning_rate": 2.3779591836734694e-05, "loss": 2.6284, "step": 12857 }, { "epoch": 52.481632653061226, "grad_norm": 27.555252075195312, "learning_rate": 2.3777551020408165e-05, "loss": 1.6616, "step": 12858 }, { "epoch": 52.48571428571429, "grad_norm": 42.839210510253906, "learning_rate": 2.3775510204081632e-05, "loss": 1.2388, "step": 12859 }, { "epoch": 52.48979591836735, "grad_norm": 29.503538131713867, "learning_rate": 2.3773469387755106e-05, "loss": 1.7812, "step": 12860 }, { "epoch": 52.49387755102041, "grad_norm": 35.415496826171875, "learning_rate": 2.3771428571428573e-05, "loss": 2.3086, "step": 12861 }, { "epoch": 52.49795918367347, "grad_norm": 27.880022048950195, "learning_rate": 2.3769387755102043e-05, "loss": 2.1396, "step": 12862 }, { "epoch": 52.50204081632653, "grad_norm": 20.121713638305664, "learning_rate": 2.376734693877551e-05, "loss": 2.2691, "step": 12863 }, { "epoch": 52.50612244897959, "grad_norm": 41.36777114868164, "learning_rate": 2.376530612244898e-05, "loss": 0.9897, "step": 12864 }, { "epoch": 52.51020408163265, "grad_norm": 23.96086311340332, "learning_rate": 2.376326530612245e-05, "loss": 1.5901, "step": 12865 }, { "epoch": 52.51428571428571, "grad_norm": 34.76362991333008, "learning_rate": 2.376122448979592e-05, "loss": 0.9564, "step": 12866 }, { "epoch": 52.518367346938774, "grad_norm": 30.184770584106445, "learning_rate": 2.375918367346939e-05, "loss": 1.8556, "step": 12867 }, { "epoch": 52.522448979591836, "grad_norm": 42.9018669128418, "learning_rate": 2.375714285714286e-05, "loss": 0.9851, "step": 12868 }, { "epoch": 52.5265306122449, "grad_norm": 36.96063232421875, "learning_rate": 2.3755102040816327e-05, "loss": 0.9053, "step": 12869 }, { "epoch": 52.53061224489796, "grad_norm": 41.86830520629883, "learning_rate": 2.3753061224489797e-05, "loss": 0.623, "step": 12870 }, { "epoch": 52.53469387755102, "grad_norm": 18.403017044067383, "learning_rate": 2.3751020408163264e-05, "loss": 2.7758, "step": 12871 }, { "epoch": 52.53877551020408, "grad_norm": 32.20552444458008, "learning_rate": 2.3748979591836738e-05, "loss": 1.9856, "step": 12872 }, { "epoch": 52.542857142857144, "grad_norm": 30.614688873291016, "learning_rate": 2.3746938775510205e-05, "loss": 2.0034, "step": 12873 }, { "epoch": 52.546938775510206, "grad_norm": 23.00775718688965, "learning_rate": 2.3744897959183673e-05, "loss": 1.9192, "step": 12874 }, { "epoch": 52.55102040816327, "grad_norm": 43.31493377685547, "learning_rate": 2.3742857142857143e-05, "loss": 0.967, "step": 12875 }, { "epoch": 52.55510204081633, "grad_norm": 33.7063102722168, "learning_rate": 2.3740816326530614e-05, "loss": 0.9268, "step": 12876 }, { "epoch": 52.55918367346939, "grad_norm": 35.29884719848633, "learning_rate": 2.3738775510204084e-05, "loss": 1.4157, "step": 12877 }, { "epoch": 52.563265306122446, "grad_norm": 32.07984161376953, "learning_rate": 2.373673469387755e-05, "loss": 0.8437, "step": 12878 }, { "epoch": 52.56734693877551, "grad_norm": 29.524890899658203, "learning_rate": 2.373469387755102e-05, "loss": 1.6606, "step": 12879 }, { "epoch": 52.57142857142857, "grad_norm": 24.326004028320312, "learning_rate": 2.3732653061224492e-05, "loss": 2.4081, "step": 12880 }, { "epoch": 52.57551020408163, "grad_norm": 47.84695053100586, "learning_rate": 2.373061224489796e-05, "loss": 0.8066, "step": 12881 }, { "epoch": 52.57959183673469, "grad_norm": 31.56798553466797, "learning_rate": 2.372857142857143e-05, "loss": 0.4754, "step": 12882 }, { "epoch": 52.583673469387755, "grad_norm": 38.49405288696289, "learning_rate": 2.37265306122449e-05, "loss": 1.2135, "step": 12883 }, { "epoch": 52.587755102040816, "grad_norm": 27.823965072631836, "learning_rate": 2.372448979591837e-05, "loss": 2.4202, "step": 12884 }, { "epoch": 52.59183673469388, "grad_norm": 39.82567596435547, "learning_rate": 2.3722448979591838e-05, "loss": 1.0249, "step": 12885 }, { "epoch": 52.59591836734694, "grad_norm": 36.67573928833008, "learning_rate": 2.3720408163265305e-05, "loss": 1.5997, "step": 12886 }, { "epoch": 52.6, "grad_norm": 26.86486053466797, "learning_rate": 2.371836734693878e-05, "loss": 1.6352, "step": 12887 }, { "epoch": 52.60408163265306, "grad_norm": 49.90410614013672, "learning_rate": 2.3716326530612246e-05, "loss": 0.5127, "step": 12888 }, { "epoch": 52.608163265306125, "grad_norm": 36.06394958496094, "learning_rate": 2.3714285714285717e-05, "loss": 1.8462, "step": 12889 }, { "epoch": 52.61224489795919, "grad_norm": 24.24672508239746, "learning_rate": 2.3712244897959184e-05, "loss": 2.249, "step": 12890 }, { "epoch": 52.61632653061225, "grad_norm": 38.599952697753906, "learning_rate": 2.3710204081632654e-05, "loss": 1.4792, "step": 12891 }, { "epoch": 52.6204081632653, "grad_norm": 36.06313705444336, "learning_rate": 2.3708163265306125e-05, "loss": 1.2684, "step": 12892 }, { "epoch": 52.624489795918365, "grad_norm": 30.582427978515625, "learning_rate": 2.3706122448979592e-05, "loss": 1.422, "step": 12893 }, { "epoch": 52.628571428571426, "grad_norm": 30.016569137573242, "learning_rate": 2.3704081632653062e-05, "loss": 1.5051, "step": 12894 }, { "epoch": 52.63265306122449, "grad_norm": 31.937705993652344, "learning_rate": 2.3702040816326533e-05, "loss": 1.7179, "step": 12895 }, { "epoch": 52.63673469387755, "grad_norm": 43.40457534790039, "learning_rate": 2.37e-05, "loss": 1.1314, "step": 12896 }, { "epoch": 52.64081632653061, "grad_norm": 31.62162971496582, "learning_rate": 2.369795918367347e-05, "loss": 1.7885, "step": 12897 }, { "epoch": 52.64489795918367, "grad_norm": 32.69917678833008, "learning_rate": 2.3695918367346938e-05, "loss": 1.5153, "step": 12898 }, { "epoch": 52.648979591836735, "grad_norm": 46.64911651611328, "learning_rate": 2.369387755102041e-05, "loss": 1.2438, "step": 12899 }, { "epoch": 52.6530612244898, "grad_norm": 29.033130645751953, "learning_rate": 2.369183673469388e-05, "loss": 2.052, "step": 12900 }, { "epoch": 52.65714285714286, "grad_norm": 27.4384822845459, "learning_rate": 2.3689795918367346e-05, "loss": 1.6114, "step": 12901 }, { "epoch": 52.66122448979592, "grad_norm": 29.618886947631836, "learning_rate": 2.3687755102040816e-05, "loss": 2.1539, "step": 12902 }, { "epoch": 52.66530612244898, "grad_norm": 32.14781951904297, "learning_rate": 2.3685714285714287e-05, "loss": 1.7512, "step": 12903 }, { "epoch": 52.66938775510204, "grad_norm": 16.53158950805664, "learning_rate": 2.3683673469387757e-05, "loss": 2.064, "step": 12904 }, { "epoch": 52.673469387755105, "grad_norm": 37.59307098388672, "learning_rate": 2.3681632653061225e-05, "loss": 1.0524, "step": 12905 }, { "epoch": 52.67755102040816, "grad_norm": 32.317684173583984, "learning_rate": 2.3679591836734695e-05, "loss": 2.1508, "step": 12906 }, { "epoch": 52.68163265306122, "grad_norm": 21.3685302734375, "learning_rate": 2.3677551020408166e-05, "loss": 2.401, "step": 12907 }, { "epoch": 52.68571428571428, "grad_norm": 15.30116081237793, "learning_rate": 2.3675510204081633e-05, "loss": 2.866, "step": 12908 }, { "epoch": 52.689795918367345, "grad_norm": 31.621339797973633, "learning_rate": 2.3673469387755103e-05, "loss": 1.3845, "step": 12909 }, { "epoch": 52.69387755102041, "grad_norm": 46.887611389160156, "learning_rate": 2.3671428571428574e-05, "loss": 1.1425, "step": 12910 }, { "epoch": 52.69795918367347, "grad_norm": 43.19254684448242, "learning_rate": 2.3669387755102044e-05, "loss": 0.8445, "step": 12911 }, { "epoch": 52.70204081632653, "grad_norm": 28.931922912597656, "learning_rate": 2.366734693877551e-05, "loss": 0.2143, "step": 12912 }, { "epoch": 52.70612244897959, "grad_norm": 33.36891174316406, "learning_rate": 2.366530612244898e-05, "loss": 1.5516, "step": 12913 }, { "epoch": 52.710204081632654, "grad_norm": 35.972496032714844, "learning_rate": 2.3663265306122452e-05, "loss": 1.9257, "step": 12914 }, { "epoch": 52.714285714285715, "grad_norm": 20.238981246948242, "learning_rate": 2.366122448979592e-05, "loss": 2.365, "step": 12915 }, { "epoch": 52.71836734693878, "grad_norm": 34.990238189697266, "learning_rate": 2.365918367346939e-05, "loss": 0.7401, "step": 12916 }, { "epoch": 52.72244897959184, "grad_norm": 22.39406394958496, "learning_rate": 2.3657142857142857e-05, "loss": 2.1275, "step": 12917 }, { "epoch": 52.7265306122449, "grad_norm": 15.534229278564453, "learning_rate": 2.3655102040816328e-05, "loss": 2.6034, "step": 12918 }, { "epoch": 52.73061224489796, "grad_norm": 31.715511322021484, "learning_rate": 2.3653061224489798e-05, "loss": 1.5138, "step": 12919 }, { "epoch": 52.734693877551024, "grad_norm": 24.906034469604492, "learning_rate": 2.3651020408163265e-05, "loss": 2.0217, "step": 12920 }, { "epoch": 52.73877551020408, "grad_norm": 35.07241439819336, "learning_rate": 2.3648979591836736e-05, "loss": 1.4964, "step": 12921 }, { "epoch": 52.74285714285714, "grad_norm": 31.60633087158203, "learning_rate": 2.3646938775510206e-05, "loss": 1.1756, "step": 12922 }, { "epoch": 52.7469387755102, "grad_norm": 35.95296096801758, "learning_rate": 2.3644897959183673e-05, "loss": 1.3981, "step": 12923 }, { "epoch": 52.751020408163264, "grad_norm": 35.69467544555664, "learning_rate": 2.3642857142857144e-05, "loss": 0.9899, "step": 12924 }, { "epoch": 52.755102040816325, "grad_norm": 39.94266891479492, "learning_rate": 2.364081632653061e-05, "loss": 0.8332, "step": 12925 }, { "epoch": 52.75918367346939, "grad_norm": 32.30085754394531, "learning_rate": 2.3638775510204085e-05, "loss": 1.3904, "step": 12926 }, { "epoch": 52.76326530612245, "grad_norm": 16.741792678833008, "learning_rate": 2.3636734693877552e-05, "loss": 2.6441, "step": 12927 }, { "epoch": 52.76734693877551, "grad_norm": 29.30547332763672, "learning_rate": 2.363469387755102e-05, "loss": 1.6022, "step": 12928 }, { "epoch": 52.77142857142857, "grad_norm": 49.045265197753906, "learning_rate": 2.363265306122449e-05, "loss": 0.8175, "step": 12929 }, { "epoch": 52.775510204081634, "grad_norm": 25.701784133911133, "learning_rate": 2.363061224489796e-05, "loss": 2.2058, "step": 12930 }, { "epoch": 52.779591836734696, "grad_norm": 27.873374938964844, "learning_rate": 2.362857142857143e-05, "loss": 1.6494, "step": 12931 }, { "epoch": 52.78367346938776, "grad_norm": 31.971113204956055, "learning_rate": 2.3626530612244898e-05, "loss": 0.838, "step": 12932 }, { "epoch": 52.78775510204082, "grad_norm": 27.870010375976562, "learning_rate": 2.362448979591837e-05, "loss": 0.393, "step": 12933 }, { "epoch": 52.79183673469388, "grad_norm": 29.481094360351562, "learning_rate": 2.362244897959184e-05, "loss": 2.0885, "step": 12934 }, { "epoch": 52.795918367346935, "grad_norm": 24.112207412719727, "learning_rate": 2.3620408163265306e-05, "loss": 1.9791, "step": 12935 }, { "epoch": 52.8, "grad_norm": 26.506982803344727, "learning_rate": 2.3618367346938777e-05, "loss": 1.5573, "step": 12936 }, { "epoch": 52.80408163265306, "grad_norm": 20.4199275970459, "learning_rate": 2.3616326530612247e-05, "loss": 2.2674, "step": 12937 }, { "epoch": 52.80816326530612, "grad_norm": 18.791343688964844, "learning_rate": 2.3614285714285718e-05, "loss": 2.5801, "step": 12938 }, { "epoch": 52.81224489795918, "grad_norm": 10.825396537780762, "learning_rate": 2.3612244897959185e-05, "loss": 0.0803, "step": 12939 }, { "epoch": 52.816326530612244, "grad_norm": 31.631235122680664, "learning_rate": 2.3610204081632652e-05, "loss": 1.209, "step": 12940 }, { "epoch": 52.820408163265306, "grad_norm": 17.246225357055664, "learning_rate": 2.3608163265306126e-05, "loss": 2.8809, "step": 12941 }, { "epoch": 52.82448979591837, "grad_norm": 28.716524124145508, "learning_rate": 2.3606122448979593e-05, "loss": 1.4784, "step": 12942 }, { "epoch": 52.82857142857143, "grad_norm": 24.459096908569336, "learning_rate": 2.3604081632653063e-05, "loss": 2.0539, "step": 12943 }, { "epoch": 52.83265306122449, "grad_norm": 31.166362762451172, "learning_rate": 2.360204081632653e-05, "loss": 1.0196, "step": 12944 }, { "epoch": 52.83673469387755, "grad_norm": 29.905139923095703, "learning_rate": 2.36e-05, "loss": 1.6272, "step": 12945 }, { "epoch": 52.840816326530614, "grad_norm": 31.476356506347656, "learning_rate": 2.359795918367347e-05, "loss": 1.525, "step": 12946 }, { "epoch": 52.844897959183676, "grad_norm": 17.000577926635742, "learning_rate": 2.359591836734694e-05, "loss": 2.2485, "step": 12947 }, { "epoch": 52.84897959183674, "grad_norm": 24.13064956665039, "learning_rate": 2.359387755102041e-05, "loss": 2.4835, "step": 12948 }, { "epoch": 52.85306122448979, "grad_norm": 26.417823791503906, "learning_rate": 2.359183673469388e-05, "loss": 1.9647, "step": 12949 }, { "epoch": 52.857142857142854, "grad_norm": 54.39780044555664, "learning_rate": 2.3589795918367347e-05, "loss": 0.863, "step": 12950 }, { "epoch": 52.861224489795916, "grad_norm": 27.108774185180664, "learning_rate": 2.3587755102040817e-05, "loss": 1.8562, "step": 12951 }, { "epoch": 52.86530612244898, "grad_norm": 32.09406280517578, "learning_rate": 2.3585714285714284e-05, "loss": 0.4862, "step": 12952 }, { "epoch": 52.86938775510204, "grad_norm": 36.731422424316406, "learning_rate": 2.358367346938776e-05, "loss": 0.3771, "step": 12953 }, { "epoch": 52.8734693877551, "grad_norm": 26.92649269104004, "learning_rate": 2.3581632653061226e-05, "loss": 2.1568, "step": 12954 }, { "epoch": 52.87755102040816, "grad_norm": 35.7008171081543, "learning_rate": 2.3579591836734696e-05, "loss": 1.3782, "step": 12955 }, { "epoch": 52.881632653061224, "grad_norm": 30.652671813964844, "learning_rate": 2.3577551020408163e-05, "loss": 1.6019, "step": 12956 }, { "epoch": 52.885714285714286, "grad_norm": 33.128055572509766, "learning_rate": 2.3575510204081634e-05, "loss": 1.3318, "step": 12957 }, { "epoch": 52.88979591836735, "grad_norm": 45.21607208251953, "learning_rate": 2.3573469387755104e-05, "loss": 0.8601, "step": 12958 }, { "epoch": 52.89387755102041, "grad_norm": 29.932035446166992, "learning_rate": 2.357142857142857e-05, "loss": 1.9439, "step": 12959 }, { "epoch": 52.89795918367347, "grad_norm": 31.307004928588867, "learning_rate": 2.3569387755102042e-05, "loss": 1.6152, "step": 12960 }, { "epoch": 52.90204081632653, "grad_norm": 32.203880310058594, "learning_rate": 2.3567346938775512e-05, "loss": 1.343, "step": 12961 }, { "epoch": 52.906122448979595, "grad_norm": 11.957737922668457, "learning_rate": 2.356530612244898e-05, "loss": 2.8342, "step": 12962 }, { "epoch": 52.910204081632656, "grad_norm": 32.19731521606445, "learning_rate": 2.356326530612245e-05, "loss": 1.836, "step": 12963 }, { "epoch": 52.91428571428571, "grad_norm": 18.854894638061523, "learning_rate": 2.356122448979592e-05, "loss": 2.2576, "step": 12964 }, { "epoch": 52.91836734693877, "grad_norm": 15.65337085723877, "learning_rate": 2.355918367346939e-05, "loss": 2.3755, "step": 12965 }, { "epoch": 52.922448979591834, "grad_norm": 23.3660831451416, "learning_rate": 2.3557142857142858e-05, "loss": 1.9754, "step": 12966 }, { "epoch": 52.926530612244896, "grad_norm": 36.871360778808594, "learning_rate": 2.3555102040816325e-05, "loss": 1.0221, "step": 12967 }, { "epoch": 52.93061224489796, "grad_norm": 34.32353973388672, "learning_rate": 2.35530612244898e-05, "loss": 1.9528, "step": 12968 }, { "epoch": 52.93469387755102, "grad_norm": 18.25253677368164, "learning_rate": 2.3551020408163266e-05, "loss": 2.3768, "step": 12969 }, { "epoch": 52.93877551020408, "grad_norm": 15.227557182312012, "learning_rate": 2.3548979591836737e-05, "loss": 2.6248, "step": 12970 }, { "epoch": 52.94285714285714, "grad_norm": 31.064462661743164, "learning_rate": 2.3546938775510204e-05, "loss": 0.9593, "step": 12971 }, { "epoch": 52.946938775510205, "grad_norm": 21.535303115844727, "learning_rate": 2.3544897959183674e-05, "loss": 2.3466, "step": 12972 }, { "epoch": 52.95102040816327, "grad_norm": 35.821388244628906, "learning_rate": 2.3542857142857145e-05, "loss": 1.906, "step": 12973 }, { "epoch": 52.95510204081633, "grad_norm": 50.55832290649414, "learning_rate": 2.3540816326530612e-05, "loss": 0.3991, "step": 12974 }, { "epoch": 52.95918367346939, "grad_norm": 41.80999755859375, "learning_rate": 2.3538775510204083e-05, "loss": 0.3819, "step": 12975 }, { "epoch": 52.96326530612245, "grad_norm": 20.131349563598633, "learning_rate": 2.3536734693877553e-05, "loss": 2.3517, "step": 12976 }, { "epoch": 52.96734693877551, "grad_norm": 23.672100067138672, "learning_rate": 2.3534693877551024e-05, "loss": 2.1722, "step": 12977 }, { "epoch": 52.97142857142857, "grad_norm": 21.375343322753906, "learning_rate": 2.353265306122449e-05, "loss": 2.3812, "step": 12978 }, { "epoch": 52.97551020408163, "grad_norm": 36.46274948120117, "learning_rate": 2.3530612244897958e-05, "loss": 1.1141, "step": 12979 }, { "epoch": 52.97959183673469, "grad_norm": 26.177104949951172, "learning_rate": 2.3528571428571432e-05, "loss": 2.1421, "step": 12980 }, { "epoch": 52.98367346938775, "grad_norm": 37.787261962890625, "learning_rate": 2.35265306122449e-05, "loss": 1.4276, "step": 12981 }, { "epoch": 52.987755102040815, "grad_norm": 39.55784225463867, "learning_rate": 2.352448979591837e-05, "loss": 1.1888, "step": 12982 }, { "epoch": 52.99183673469388, "grad_norm": 19.633337020874023, "learning_rate": 2.3522448979591837e-05, "loss": 2.2668, "step": 12983 }, { "epoch": 52.99591836734694, "grad_norm": 25.020544052124023, "learning_rate": 2.3520408163265307e-05, "loss": 2.3187, "step": 12984 }, { "epoch": 53.0, "grad_norm": 46.81976318359375, "learning_rate": 2.3518367346938778e-05, "loss": 0.9579, "step": 12985 }, { "epoch": 53.00408163265306, "grad_norm": 21.349143981933594, "learning_rate": 2.3516326530612245e-05, "loss": 1.8793, "step": 12986 }, { "epoch": 53.00816326530612, "grad_norm": 11.832388877868652, "learning_rate": 2.3514285714285715e-05, "loss": 3.0863, "step": 12987 }, { "epoch": 53.012244897959185, "grad_norm": 20.77559471130371, "learning_rate": 2.3512244897959186e-05, "loss": 0.1364, "step": 12988 }, { "epoch": 53.01632653061225, "grad_norm": 37.1572265625, "learning_rate": 2.3510204081632653e-05, "loss": 1.7082, "step": 12989 }, { "epoch": 53.02040816326531, "grad_norm": 27.613245010375977, "learning_rate": 2.3508163265306123e-05, "loss": 1.5301, "step": 12990 }, { "epoch": 53.02448979591837, "grad_norm": 42.87786102294922, "learning_rate": 2.3506122448979594e-05, "loss": 0.9362, "step": 12991 }, { "epoch": 53.02857142857143, "grad_norm": 22.6224422454834, "learning_rate": 2.3504081632653064e-05, "loss": 1.6802, "step": 12992 }, { "epoch": 53.03265306122449, "grad_norm": 22.68052864074707, "learning_rate": 2.350204081632653e-05, "loss": 1.8991, "step": 12993 }, { "epoch": 53.03673469387755, "grad_norm": 22.486225128173828, "learning_rate": 2.35e-05, "loss": 2.0058, "step": 12994 }, { "epoch": 53.04081632653061, "grad_norm": 18.278053283691406, "learning_rate": 2.349795918367347e-05, "loss": 2.5312, "step": 12995 }, { "epoch": 53.04489795918367, "grad_norm": 47.28244400024414, "learning_rate": 2.349591836734694e-05, "loss": 1.9577, "step": 12996 }, { "epoch": 53.04897959183673, "grad_norm": 20.062326431274414, "learning_rate": 2.349387755102041e-05, "loss": 2.1563, "step": 12997 }, { "epoch": 53.053061224489795, "grad_norm": 43.2871208190918, "learning_rate": 2.3491836734693877e-05, "loss": 0.9354, "step": 12998 }, { "epoch": 53.05714285714286, "grad_norm": 22.865367889404297, "learning_rate": 2.3489795918367348e-05, "loss": 2.306, "step": 12999 }, { "epoch": 53.06122448979592, "grad_norm": 34.02870178222656, "learning_rate": 2.348775510204082e-05, "loss": 1.3118, "step": 13000 }, { "epoch": 53.06530612244898, "grad_norm": 19.940237045288086, "learning_rate": 2.3485714285714285e-05, "loss": 2.2112, "step": 13001 }, { "epoch": 53.06938775510204, "grad_norm": 18.277904510498047, "learning_rate": 2.3483673469387756e-05, "loss": 2.5102, "step": 13002 }, { "epoch": 53.073469387755104, "grad_norm": 23.455698013305664, "learning_rate": 2.3481632653061226e-05, "loss": 1.9951, "step": 13003 }, { "epoch": 53.077551020408166, "grad_norm": 19.65097999572754, "learning_rate": 2.3479591836734697e-05, "loss": 2.2793, "step": 13004 }, { "epoch": 53.08163265306123, "grad_norm": 37.033451080322266, "learning_rate": 2.3477551020408164e-05, "loss": 0.3109, "step": 13005 }, { "epoch": 53.08571428571429, "grad_norm": 35.784027099609375, "learning_rate": 2.347551020408163e-05, "loss": 0.924, "step": 13006 }, { "epoch": 53.089795918367344, "grad_norm": 32.93811798095703, "learning_rate": 2.3473469387755105e-05, "loss": 1.281, "step": 13007 }, { "epoch": 53.093877551020405, "grad_norm": 29.006933212280273, "learning_rate": 2.3471428571428572e-05, "loss": 1.7381, "step": 13008 }, { "epoch": 53.09795918367347, "grad_norm": 26.747404098510742, "learning_rate": 2.3469387755102043e-05, "loss": 2.0888, "step": 13009 }, { "epoch": 53.10204081632653, "grad_norm": 19.072473526000977, "learning_rate": 2.346734693877551e-05, "loss": 2.1072, "step": 13010 }, { "epoch": 53.10612244897959, "grad_norm": 39.274784088134766, "learning_rate": 2.346530612244898e-05, "loss": 0.7895, "step": 13011 }, { "epoch": 53.11020408163265, "grad_norm": 34.614959716796875, "learning_rate": 2.346326530612245e-05, "loss": 0.3256, "step": 13012 }, { "epoch": 53.114285714285714, "grad_norm": 34.62581253051758, "learning_rate": 2.3461224489795918e-05, "loss": 1.787, "step": 13013 }, { "epoch": 53.118367346938776, "grad_norm": 25.086631774902344, "learning_rate": 2.345918367346939e-05, "loss": 2.0108, "step": 13014 }, { "epoch": 53.12244897959184, "grad_norm": 37.96104431152344, "learning_rate": 2.345714285714286e-05, "loss": 0.2822, "step": 13015 }, { "epoch": 53.1265306122449, "grad_norm": 41.08367156982422, "learning_rate": 2.3455102040816326e-05, "loss": 0.9981, "step": 13016 }, { "epoch": 53.13061224489796, "grad_norm": 38.90422058105469, "learning_rate": 2.3453061224489797e-05, "loss": 1.3062, "step": 13017 }, { "epoch": 53.13469387755102, "grad_norm": 23.224224090576172, "learning_rate": 2.3451020408163267e-05, "loss": 2.3875, "step": 13018 }, { "epoch": 53.138775510204084, "grad_norm": 29.85540008544922, "learning_rate": 2.3448979591836738e-05, "loss": 0.7536, "step": 13019 }, { "epoch": 53.142857142857146, "grad_norm": 36.981483459472656, "learning_rate": 2.3446938775510205e-05, "loss": 0.3503, "step": 13020 }, { "epoch": 53.14693877551021, "grad_norm": 29.423534393310547, "learning_rate": 2.3444897959183675e-05, "loss": 1.8366, "step": 13021 }, { "epoch": 53.15102040816326, "grad_norm": 21.656282424926758, "learning_rate": 2.3442857142857143e-05, "loss": 2.2516, "step": 13022 }, { "epoch": 53.155102040816324, "grad_norm": 33.44819641113281, "learning_rate": 2.3440816326530613e-05, "loss": 1.6825, "step": 13023 }, { "epoch": 53.159183673469386, "grad_norm": 27.84954071044922, "learning_rate": 2.3438775510204084e-05, "loss": 1.826, "step": 13024 }, { "epoch": 53.16326530612245, "grad_norm": 16.171417236328125, "learning_rate": 2.343673469387755e-05, "loss": 2.6911, "step": 13025 }, { "epoch": 53.16734693877551, "grad_norm": 21.250761032104492, "learning_rate": 2.343469387755102e-05, "loss": 2.2331, "step": 13026 }, { "epoch": 53.17142857142857, "grad_norm": 28.984113693237305, "learning_rate": 2.343265306122449e-05, "loss": 1.6153, "step": 13027 }, { "epoch": 53.17551020408163, "grad_norm": 33.73373794555664, "learning_rate": 2.343061224489796e-05, "loss": 1.2529, "step": 13028 }, { "epoch": 53.179591836734694, "grad_norm": 20.51825714111328, "learning_rate": 2.342857142857143e-05, "loss": 2.1229, "step": 13029 }, { "epoch": 53.183673469387756, "grad_norm": 47.49230194091797, "learning_rate": 2.34265306122449e-05, "loss": 1.4998, "step": 13030 }, { "epoch": 53.18775510204082, "grad_norm": 34.399085998535156, "learning_rate": 2.342448979591837e-05, "loss": 1.3273, "step": 13031 }, { "epoch": 53.19183673469388, "grad_norm": 27.989377975463867, "learning_rate": 2.3422448979591837e-05, "loss": 1.7324, "step": 13032 }, { "epoch": 53.19591836734694, "grad_norm": 44.70745849609375, "learning_rate": 2.3420408163265305e-05, "loss": 1.042, "step": 13033 }, { "epoch": 53.2, "grad_norm": 33.45557403564453, "learning_rate": 2.341836734693878e-05, "loss": 1.8398, "step": 13034 }, { "epoch": 53.204081632653065, "grad_norm": 21.1072998046875, "learning_rate": 2.3416326530612246e-05, "loss": 2.1996, "step": 13035 }, { "epoch": 53.20816326530612, "grad_norm": 22.756671905517578, "learning_rate": 2.3414285714285716e-05, "loss": 2.1742, "step": 13036 }, { "epoch": 53.21224489795918, "grad_norm": 22.78536605834961, "learning_rate": 2.3412244897959183e-05, "loss": 2.1537, "step": 13037 }, { "epoch": 53.21632653061224, "grad_norm": 30.1861515045166, "learning_rate": 2.3410204081632654e-05, "loss": 1.6025, "step": 13038 }, { "epoch": 53.220408163265304, "grad_norm": 31.57135772705078, "learning_rate": 2.3408163265306124e-05, "loss": 0.9612, "step": 13039 }, { "epoch": 53.224489795918366, "grad_norm": 20.598262786865234, "learning_rate": 2.340612244897959e-05, "loss": 2.2273, "step": 13040 }, { "epoch": 53.22857142857143, "grad_norm": 24.647319793701172, "learning_rate": 2.3404081632653062e-05, "loss": 2.0891, "step": 13041 }, { "epoch": 53.23265306122449, "grad_norm": 40.41111373901367, "learning_rate": 2.3402040816326532e-05, "loss": 0.8308, "step": 13042 }, { "epoch": 53.23673469387755, "grad_norm": 16.87162971496582, "learning_rate": 2.3400000000000003e-05, "loss": 2.2627, "step": 13043 }, { "epoch": 53.24081632653061, "grad_norm": 13.235932350158691, "learning_rate": 2.339795918367347e-05, "loss": 2.7549, "step": 13044 }, { "epoch": 53.244897959183675, "grad_norm": 35.21994400024414, "learning_rate": 2.339591836734694e-05, "loss": 1.1118, "step": 13045 }, { "epoch": 53.248979591836736, "grad_norm": 26.319738388061523, "learning_rate": 2.339387755102041e-05, "loss": 2.0495, "step": 13046 }, { "epoch": 53.2530612244898, "grad_norm": 19.16963005065918, "learning_rate": 2.3391836734693878e-05, "loss": 2.1685, "step": 13047 }, { "epoch": 53.25714285714286, "grad_norm": 33.11424255371094, "learning_rate": 2.338979591836735e-05, "loss": 1.7175, "step": 13048 }, { "epoch": 53.26122448979592, "grad_norm": 14.308999061584473, "learning_rate": 2.3387755102040816e-05, "loss": 2.7244, "step": 13049 }, { "epoch": 53.265306122448976, "grad_norm": 31.289335250854492, "learning_rate": 2.3385714285714286e-05, "loss": 1.0195, "step": 13050 }, { "epoch": 53.26938775510204, "grad_norm": 30.45907974243164, "learning_rate": 2.3383673469387757e-05, "loss": 1.4614, "step": 13051 }, { "epoch": 53.2734693877551, "grad_norm": 28.85680389404297, "learning_rate": 2.3381632653061224e-05, "loss": 2.0921, "step": 13052 }, { "epoch": 53.27755102040816, "grad_norm": 43.01996612548828, "learning_rate": 2.3379591836734695e-05, "loss": 0.8716, "step": 13053 }, { "epoch": 53.28163265306122, "grad_norm": 35.772430419921875, "learning_rate": 2.3377551020408165e-05, "loss": 0.8417, "step": 13054 }, { "epoch": 53.285714285714285, "grad_norm": 31.77609634399414, "learning_rate": 2.3375510204081632e-05, "loss": 1.3662, "step": 13055 }, { "epoch": 53.289795918367346, "grad_norm": 27.26201820373535, "learning_rate": 2.3373469387755103e-05, "loss": 0.3984, "step": 13056 }, { "epoch": 53.29387755102041, "grad_norm": 37.516422271728516, "learning_rate": 2.3371428571428573e-05, "loss": 1.9214, "step": 13057 }, { "epoch": 53.29795918367347, "grad_norm": 32.63703155517578, "learning_rate": 2.3369387755102044e-05, "loss": 1.3759, "step": 13058 }, { "epoch": 53.30204081632653, "grad_norm": 12.268138885498047, "learning_rate": 2.336734693877551e-05, "loss": 2.749, "step": 13059 }, { "epoch": 53.30612244897959, "grad_norm": 25.507831573486328, "learning_rate": 2.3365306122448978e-05, "loss": 1.3293, "step": 13060 }, { "epoch": 53.310204081632655, "grad_norm": 27.915950775146484, "learning_rate": 2.3363265306122452e-05, "loss": 2.1578, "step": 13061 }, { "epoch": 53.31428571428572, "grad_norm": 33.95121383666992, "learning_rate": 2.336122448979592e-05, "loss": 1.1867, "step": 13062 }, { "epoch": 53.31836734693878, "grad_norm": 17.05011558532715, "learning_rate": 2.335918367346939e-05, "loss": 1.9998, "step": 13063 }, { "epoch": 53.32244897959184, "grad_norm": 32.37203598022461, "learning_rate": 2.3357142857142857e-05, "loss": 1.5245, "step": 13064 }, { "epoch": 53.326530612244895, "grad_norm": 61.842838287353516, "learning_rate": 2.335510204081633e-05, "loss": 0.5431, "step": 13065 }, { "epoch": 53.33061224489796, "grad_norm": 34.64113998413086, "learning_rate": 2.3353061224489798e-05, "loss": 1.7556, "step": 13066 }, { "epoch": 53.33469387755102, "grad_norm": 38.71465301513672, "learning_rate": 2.3351020408163265e-05, "loss": 1.2941, "step": 13067 }, { "epoch": 53.33877551020408, "grad_norm": 26.041820526123047, "learning_rate": 2.3348979591836735e-05, "loss": 1.5204, "step": 13068 }, { "epoch": 53.34285714285714, "grad_norm": 35.95103073120117, "learning_rate": 2.3346938775510206e-05, "loss": 1.3231, "step": 13069 }, { "epoch": 53.3469387755102, "grad_norm": 41.321781158447266, "learning_rate": 2.3344897959183676e-05, "loss": 0.9816, "step": 13070 }, { "epoch": 53.351020408163265, "grad_norm": 58.5338249206543, "learning_rate": 2.3342857142857143e-05, "loss": 1.093, "step": 13071 }, { "epoch": 53.35510204081633, "grad_norm": 21.132699966430664, "learning_rate": 2.3340816326530614e-05, "loss": 2.1277, "step": 13072 }, { "epoch": 53.35918367346939, "grad_norm": 36.35358428955078, "learning_rate": 2.3338775510204084e-05, "loss": 2.0724, "step": 13073 }, { "epoch": 53.36326530612245, "grad_norm": 23.089109420776367, "learning_rate": 2.333673469387755e-05, "loss": 1.8096, "step": 13074 }, { "epoch": 53.36734693877551, "grad_norm": 29.03253746032715, "learning_rate": 2.3334693877551022e-05, "loss": 2.0147, "step": 13075 }, { "epoch": 53.371428571428574, "grad_norm": 34.124603271484375, "learning_rate": 2.333265306122449e-05, "loss": 1.7, "step": 13076 }, { "epoch": 53.375510204081635, "grad_norm": 23.099679946899414, "learning_rate": 2.333061224489796e-05, "loss": 1.9594, "step": 13077 }, { "epoch": 53.3795918367347, "grad_norm": 33.07539749145508, "learning_rate": 2.332857142857143e-05, "loss": 1.5265, "step": 13078 }, { "epoch": 53.38367346938775, "grad_norm": 36.9084587097168, "learning_rate": 2.3326530612244897e-05, "loss": 0.8252, "step": 13079 }, { "epoch": 53.38775510204081, "grad_norm": 44.15182113647461, "learning_rate": 2.3324489795918368e-05, "loss": 1.1161, "step": 13080 }, { "epoch": 53.391836734693875, "grad_norm": 59.274986267089844, "learning_rate": 2.332244897959184e-05, "loss": 0.4157, "step": 13081 }, { "epoch": 53.39591836734694, "grad_norm": 24.137861251831055, "learning_rate": 2.3320408163265306e-05, "loss": 2.3093, "step": 13082 }, { "epoch": 53.4, "grad_norm": 47.436004638671875, "learning_rate": 2.3318367346938776e-05, "loss": 1.7429, "step": 13083 }, { "epoch": 53.40408163265306, "grad_norm": 25.02629280090332, "learning_rate": 2.3316326530612247e-05, "loss": 2.0748, "step": 13084 }, { "epoch": 53.40816326530612, "grad_norm": 21.581249237060547, "learning_rate": 2.3314285714285717e-05, "loss": 2.218, "step": 13085 }, { "epoch": 53.412244897959184, "grad_norm": 39.86996841430664, "learning_rate": 2.3312244897959184e-05, "loss": 0.9226, "step": 13086 }, { "epoch": 53.416326530612245, "grad_norm": 28.770919799804688, "learning_rate": 2.331020408163265e-05, "loss": 1.7105, "step": 13087 }, { "epoch": 53.42040816326531, "grad_norm": 31.53717613220215, "learning_rate": 2.3308163265306125e-05, "loss": 1.5411, "step": 13088 }, { "epoch": 53.42448979591837, "grad_norm": 37.84454345703125, "learning_rate": 2.3306122448979592e-05, "loss": 1.547, "step": 13089 }, { "epoch": 53.42857142857143, "grad_norm": 18.468360900878906, "learning_rate": 2.3304081632653063e-05, "loss": 2.7442, "step": 13090 }, { "epoch": 53.43265306122449, "grad_norm": 48.00198745727539, "learning_rate": 2.330204081632653e-05, "loss": 1.6174, "step": 13091 }, { "epoch": 53.436734693877554, "grad_norm": 35.810977935791016, "learning_rate": 2.3300000000000004e-05, "loss": 1.4167, "step": 13092 }, { "epoch": 53.440816326530616, "grad_norm": 14.817729949951172, "learning_rate": 2.329795918367347e-05, "loss": 2.6623, "step": 13093 }, { "epoch": 53.44489795918367, "grad_norm": 32.42366409301758, "learning_rate": 2.3295918367346938e-05, "loss": 1.7019, "step": 13094 }, { "epoch": 53.44897959183673, "grad_norm": 24.71291732788086, "learning_rate": 2.329387755102041e-05, "loss": 2.1885, "step": 13095 }, { "epoch": 53.453061224489794, "grad_norm": 29.68729019165039, "learning_rate": 2.329183673469388e-05, "loss": 1.1959, "step": 13096 }, { "epoch": 53.457142857142856, "grad_norm": 24.2142391204834, "learning_rate": 2.328979591836735e-05, "loss": 1.9226, "step": 13097 }, { "epoch": 53.46122448979592, "grad_norm": 30.61794090270996, "learning_rate": 2.3287755102040817e-05, "loss": 1.4797, "step": 13098 }, { "epoch": 53.46530612244898, "grad_norm": 35.220638275146484, "learning_rate": 2.3285714285714287e-05, "loss": 1.2652, "step": 13099 }, { "epoch": 53.46938775510204, "grad_norm": 19.887895584106445, "learning_rate": 2.3283673469387758e-05, "loss": 2.4371, "step": 13100 }, { "epoch": 53.4734693877551, "grad_norm": 35.67900848388672, "learning_rate": 2.3281632653061225e-05, "loss": 0.7364, "step": 13101 }, { "epoch": 53.477551020408164, "grad_norm": 23.000110626220703, "learning_rate": 2.3279591836734696e-05, "loss": 1.4951, "step": 13102 }, { "epoch": 53.481632653061226, "grad_norm": 62.75932312011719, "learning_rate": 2.3277551020408163e-05, "loss": 1.3054, "step": 13103 }, { "epoch": 53.48571428571429, "grad_norm": 25.777978897094727, "learning_rate": 2.3275510204081633e-05, "loss": 1.8511, "step": 13104 }, { "epoch": 53.48979591836735, "grad_norm": 22.040775299072266, "learning_rate": 2.3273469387755104e-05, "loss": 2.2841, "step": 13105 }, { "epoch": 53.49387755102041, "grad_norm": 27.0233211517334, "learning_rate": 2.327142857142857e-05, "loss": 1.7682, "step": 13106 }, { "epoch": 53.49795918367347, "grad_norm": 34.528785705566406, "learning_rate": 2.326938775510204e-05, "loss": 1.6567, "step": 13107 }, { "epoch": 53.50204081632653, "grad_norm": 21.85715675354004, "learning_rate": 2.3267346938775512e-05, "loss": 2.3251, "step": 13108 }, { "epoch": 53.50612244897959, "grad_norm": 18.156993865966797, "learning_rate": 2.326530612244898e-05, "loss": 2.5456, "step": 13109 }, { "epoch": 53.51020408163265, "grad_norm": 23.21980094909668, "learning_rate": 2.326326530612245e-05, "loss": 2.049, "step": 13110 }, { "epoch": 53.51428571428571, "grad_norm": 21.08514404296875, "learning_rate": 2.326122448979592e-05, "loss": 2.316, "step": 13111 }, { "epoch": 53.518367346938774, "grad_norm": 38.69868850708008, "learning_rate": 2.325918367346939e-05, "loss": 0.4277, "step": 13112 }, { "epoch": 53.522448979591836, "grad_norm": 19.039920806884766, "learning_rate": 2.3257142857142858e-05, "loss": 2.1561, "step": 13113 }, { "epoch": 53.5265306122449, "grad_norm": 41.994384765625, "learning_rate": 2.3255102040816328e-05, "loss": 0.8312, "step": 13114 }, { "epoch": 53.53061224489796, "grad_norm": 51.02459716796875, "learning_rate": 2.32530612244898e-05, "loss": 1.0317, "step": 13115 }, { "epoch": 53.53469387755102, "grad_norm": 40.05015563964844, "learning_rate": 2.3251020408163266e-05, "loss": 1.668, "step": 13116 }, { "epoch": 53.53877551020408, "grad_norm": 22.137510299682617, "learning_rate": 2.3248979591836736e-05, "loss": 2.2741, "step": 13117 }, { "epoch": 53.542857142857144, "grad_norm": 23.84425926208496, "learning_rate": 2.3246938775510203e-05, "loss": 1.9096, "step": 13118 }, { "epoch": 53.546938775510206, "grad_norm": 25.47188377380371, "learning_rate": 2.3244897959183677e-05, "loss": 2.086, "step": 13119 }, { "epoch": 53.55102040816327, "grad_norm": 31.150352478027344, "learning_rate": 2.3242857142857144e-05, "loss": 1.237, "step": 13120 }, { "epoch": 53.55510204081633, "grad_norm": 29.611881256103516, "learning_rate": 2.324081632653061e-05, "loss": 1.623, "step": 13121 }, { "epoch": 53.55918367346939, "grad_norm": 35.0950927734375, "learning_rate": 2.3238775510204082e-05, "loss": 1.8965, "step": 13122 }, { "epoch": 53.563265306122446, "grad_norm": 19.684049606323242, "learning_rate": 2.3236734693877553e-05, "loss": 2.2387, "step": 13123 }, { "epoch": 53.56734693877551, "grad_norm": 18.824127197265625, "learning_rate": 2.3234693877551023e-05, "loss": 2.3487, "step": 13124 }, { "epoch": 53.57142857142857, "grad_norm": 34.35315704345703, "learning_rate": 2.323265306122449e-05, "loss": 0.9818, "step": 13125 }, { "epoch": 53.57551020408163, "grad_norm": 25.4700927734375, "learning_rate": 2.323061224489796e-05, "loss": 1.8373, "step": 13126 }, { "epoch": 53.57959183673469, "grad_norm": 33.8058967590332, "learning_rate": 2.322857142857143e-05, "loss": 1.2701, "step": 13127 }, { "epoch": 53.583673469387755, "grad_norm": 31.984174728393555, "learning_rate": 2.32265306122449e-05, "loss": 1.9443, "step": 13128 }, { "epoch": 53.587755102040816, "grad_norm": 29.887855529785156, "learning_rate": 2.322448979591837e-05, "loss": 1.826, "step": 13129 }, { "epoch": 53.59183673469388, "grad_norm": 35.1995849609375, "learning_rate": 2.3222448979591836e-05, "loss": 1.3045, "step": 13130 }, { "epoch": 53.59591836734694, "grad_norm": 26.87677764892578, "learning_rate": 2.3220408163265307e-05, "loss": 0.3425, "step": 13131 }, { "epoch": 53.6, "grad_norm": 39.19707489013672, "learning_rate": 2.3218367346938777e-05, "loss": 0.8458, "step": 13132 }, { "epoch": 53.60408163265306, "grad_norm": 34.50432205200195, "learning_rate": 2.3216326530612244e-05, "loss": 0.4526, "step": 13133 }, { "epoch": 53.608163265306125, "grad_norm": 22.24582290649414, "learning_rate": 2.3214285714285715e-05, "loss": 2.2231, "step": 13134 }, { "epoch": 53.61224489795919, "grad_norm": 33.607032775878906, "learning_rate": 2.3212244897959185e-05, "loss": 0.9897, "step": 13135 }, { "epoch": 53.61632653061225, "grad_norm": 23.478559494018555, "learning_rate": 2.3210204081632656e-05, "loss": 1.9134, "step": 13136 }, { "epoch": 53.6204081632653, "grad_norm": 20.812763214111328, "learning_rate": 2.3208163265306123e-05, "loss": 2.1565, "step": 13137 }, { "epoch": 53.624489795918365, "grad_norm": 28.473247528076172, "learning_rate": 2.3206122448979593e-05, "loss": 1.4024, "step": 13138 }, { "epoch": 53.628571428571426, "grad_norm": 22.41459846496582, "learning_rate": 2.3204081632653064e-05, "loss": 2.6056, "step": 13139 }, { "epoch": 53.63265306122449, "grad_norm": 50.53401565551758, "learning_rate": 2.320204081632653e-05, "loss": 1.25, "step": 13140 }, { "epoch": 53.63673469387755, "grad_norm": 39.737361907958984, "learning_rate": 2.32e-05, "loss": 0.7197, "step": 13141 }, { "epoch": 53.64081632653061, "grad_norm": 26.234907150268555, "learning_rate": 2.3197959183673472e-05, "loss": 1.5086, "step": 13142 }, { "epoch": 53.64489795918367, "grad_norm": 37.475826263427734, "learning_rate": 2.319591836734694e-05, "loss": 1.3529, "step": 13143 }, { "epoch": 53.648979591836735, "grad_norm": 36.00629806518555, "learning_rate": 2.319387755102041e-05, "loss": 1.5972, "step": 13144 }, { "epoch": 53.6530612244898, "grad_norm": 28.770483016967773, "learning_rate": 2.3191836734693877e-05, "loss": 1.6421, "step": 13145 }, { "epoch": 53.65714285714286, "grad_norm": 32.46793746948242, "learning_rate": 2.318979591836735e-05, "loss": 1.2441, "step": 13146 }, { "epoch": 53.66122448979592, "grad_norm": 37.505226135253906, "learning_rate": 2.3187755102040818e-05, "loss": 1.5173, "step": 13147 }, { "epoch": 53.66530612244898, "grad_norm": 37.6314697265625, "learning_rate": 2.3185714285714285e-05, "loss": 1.1082, "step": 13148 }, { "epoch": 53.66938775510204, "grad_norm": 30.50848388671875, "learning_rate": 2.3183673469387755e-05, "loss": 1.3302, "step": 13149 }, { "epoch": 53.673469387755105, "grad_norm": 34.00498962402344, "learning_rate": 2.3181632653061226e-05, "loss": 0.7122, "step": 13150 }, { "epoch": 53.67755102040816, "grad_norm": 40.26850891113281, "learning_rate": 2.3179591836734696e-05, "loss": 0.9079, "step": 13151 }, { "epoch": 53.68163265306122, "grad_norm": 41.82345199584961, "learning_rate": 2.3177551020408164e-05, "loss": 0.7678, "step": 13152 }, { "epoch": 53.68571428571428, "grad_norm": 42.880210876464844, "learning_rate": 2.3175510204081634e-05, "loss": 1.1153, "step": 13153 }, { "epoch": 53.689795918367345, "grad_norm": 21.255382537841797, "learning_rate": 2.3173469387755105e-05, "loss": 2.5738, "step": 13154 }, { "epoch": 53.69387755102041, "grad_norm": 31.489152908325195, "learning_rate": 2.3171428571428572e-05, "loss": 1.5398, "step": 13155 }, { "epoch": 53.69795918367347, "grad_norm": 32.486385345458984, "learning_rate": 2.3169387755102042e-05, "loss": 0.8001, "step": 13156 }, { "epoch": 53.70204081632653, "grad_norm": 31.219789505004883, "learning_rate": 2.316734693877551e-05, "loss": 2.1445, "step": 13157 }, { "epoch": 53.70612244897959, "grad_norm": 21.553091049194336, "learning_rate": 2.3165306122448983e-05, "loss": 1.8734, "step": 13158 }, { "epoch": 53.710204081632654, "grad_norm": 55.55403137207031, "learning_rate": 2.316326530612245e-05, "loss": 0.478, "step": 13159 }, { "epoch": 53.714285714285715, "grad_norm": 26.35631561279297, "learning_rate": 2.3161224489795918e-05, "loss": 2.0395, "step": 13160 }, { "epoch": 53.71836734693878, "grad_norm": 27.358442306518555, "learning_rate": 2.3159183673469388e-05, "loss": 1.9981, "step": 13161 }, { "epoch": 53.72244897959184, "grad_norm": 27.631502151489258, "learning_rate": 2.315714285714286e-05, "loss": 1.5813, "step": 13162 }, { "epoch": 53.7265306122449, "grad_norm": 24.583433151245117, "learning_rate": 2.315510204081633e-05, "loss": 1.4926, "step": 13163 }, { "epoch": 53.73061224489796, "grad_norm": 27.780561447143555, "learning_rate": 2.3153061224489796e-05, "loss": 1.2594, "step": 13164 }, { "epoch": 53.734693877551024, "grad_norm": 33.45469284057617, "learning_rate": 2.3151020408163267e-05, "loss": 2.2964, "step": 13165 }, { "epoch": 53.73877551020408, "grad_norm": 31.7752628326416, "learning_rate": 2.3148979591836737e-05, "loss": 1.3636, "step": 13166 }, { "epoch": 53.74285714285714, "grad_norm": 24.688379287719727, "learning_rate": 2.3146938775510204e-05, "loss": 1.8936, "step": 13167 }, { "epoch": 53.7469387755102, "grad_norm": 42.20689010620117, "learning_rate": 2.3144897959183675e-05, "loss": 0.7388, "step": 13168 }, { "epoch": 53.751020408163264, "grad_norm": 36.45371627807617, "learning_rate": 2.3142857142857145e-05, "loss": 1.1014, "step": 13169 }, { "epoch": 53.755102040816325, "grad_norm": 19.58833122253418, "learning_rate": 2.3140816326530613e-05, "loss": 2.6169, "step": 13170 }, { "epoch": 53.75918367346939, "grad_norm": 15.593280792236328, "learning_rate": 2.3138775510204083e-05, "loss": 3.0206, "step": 13171 }, { "epoch": 53.76326530612245, "grad_norm": 30.806766510009766, "learning_rate": 2.313673469387755e-05, "loss": 1.4006, "step": 13172 }, { "epoch": 53.76734693877551, "grad_norm": 28.37501335144043, "learning_rate": 2.3134693877551024e-05, "loss": 2.0936, "step": 13173 }, { "epoch": 53.77142857142857, "grad_norm": 19.95246696472168, "learning_rate": 2.313265306122449e-05, "loss": 2.2376, "step": 13174 }, { "epoch": 53.775510204081634, "grad_norm": 48.35932540893555, "learning_rate": 2.3130612244897958e-05, "loss": 0.7619, "step": 13175 }, { "epoch": 53.779591836734696, "grad_norm": 37.406524658203125, "learning_rate": 2.312857142857143e-05, "loss": 1.1086, "step": 13176 }, { "epoch": 53.78367346938776, "grad_norm": 34.848297119140625, "learning_rate": 2.31265306122449e-05, "loss": 1.8171, "step": 13177 }, { "epoch": 53.78775510204082, "grad_norm": 47.109840393066406, "learning_rate": 2.312448979591837e-05, "loss": 0.9318, "step": 13178 }, { "epoch": 53.79183673469388, "grad_norm": 17.93512725830078, "learning_rate": 2.3122448979591837e-05, "loss": 2.7332, "step": 13179 }, { "epoch": 53.795918367346935, "grad_norm": 16.078733444213867, "learning_rate": 2.3120408163265307e-05, "loss": 2.6036, "step": 13180 }, { "epoch": 53.8, "grad_norm": 27.617475509643555, "learning_rate": 2.3118367346938778e-05, "loss": 2.0327, "step": 13181 }, { "epoch": 53.80408163265306, "grad_norm": 18.545686721801758, "learning_rate": 2.3116326530612245e-05, "loss": 2.3783, "step": 13182 }, { "epoch": 53.80816326530612, "grad_norm": 28.496158599853516, "learning_rate": 2.3114285714285716e-05, "loss": 2.0614, "step": 13183 }, { "epoch": 53.81224489795918, "grad_norm": 20.038284301757812, "learning_rate": 2.3112244897959183e-05, "loss": 2.8674, "step": 13184 }, { "epoch": 53.816326530612244, "grad_norm": 46.23197937011719, "learning_rate": 2.3110204081632657e-05, "loss": 1.019, "step": 13185 }, { "epoch": 53.820408163265306, "grad_norm": 33.571754455566406, "learning_rate": 2.3108163265306124e-05, "loss": 1.8774, "step": 13186 }, { "epoch": 53.82448979591837, "grad_norm": 27.415555953979492, "learning_rate": 2.310612244897959e-05, "loss": 1.4798, "step": 13187 }, { "epoch": 53.82857142857143, "grad_norm": 18.191274642944336, "learning_rate": 2.310408163265306e-05, "loss": 2.6505, "step": 13188 }, { "epoch": 53.83265306122449, "grad_norm": 31.186689376831055, "learning_rate": 2.3102040816326532e-05, "loss": 0.9178, "step": 13189 }, { "epoch": 53.83673469387755, "grad_norm": 20.120397567749023, "learning_rate": 2.3100000000000002e-05, "loss": 2.7022, "step": 13190 }, { "epoch": 53.840816326530614, "grad_norm": 32.843505859375, "learning_rate": 2.309795918367347e-05, "loss": 0.4366, "step": 13191 }, { "epoch": 53.844897959183676, "grad_norm": 22.002525329589844, "learning_rate": 2.309591836734694e-05, "loss": 2.1982, "step": 13192 }, { "epoch": 53.84897959183674, "grad_norm": 15.849696159362793, "learning_rate": 2.309387755102041e-05, "loss": 2.5581, "step": 13193 }, { "epoch": 53.85306122448979, "grad_norm": 37.38627624511719, "learning_rate": 2.3091836734693878e-05, "loss": 1.8175, "step": 13194 }, { "epoch": 53.857142857142854, "grad_norm": 20.989004135131836, "learning_rate": 2.3089795918367348e-05, "loss": 2.0997, "step": 13195 }, { "epoch": 53.861224489795916, "grad_norm": 32.1439094543457, "learning_rate": 2.308775510204082e-05, "loss": 1.7816, "step": 13196 }, { "epoch": 53.86530612244898, "grad_norm": 26.7736759185791, "learning_rate": 2.3085714285714286e-05, "loss": 2.0771, "step": 13197 }, { "epoch": 53.86938775510204, "grad_norm": 47.03301239013672, "learning_rate": 2.3083673469387756e-05, "loss": 0.7927, "step": 13198 }, { "epoch": 53.8734693877551, "grad_norm": 51.40766143798828, "learning_rate": 2.3081632653061224e-05, "loss": 0.4184, "step": 13199 }, { "epoch": 53.87755102040816, "grad_norm": 38.29161834716797, "learning_rate": 2.3079591836734697e-05, "loss": 0.9529, "step": 13200 }, { "epoch": 53.881632653061224, "grad_norm": 23.042442321777344, "learning_rate": 2.3077551020408165e-05, "loss": 2.2862, "step": 13201 }, { "epoch": 53.885714285714286, "grad_norm": 22.457679748535156, "learning_rate": 2.3075510204081635e-05, "loss": 2.2434, "step": 13202 }, { "epoch": 53.88979591836735, "grad_norm": 33.807743072509766, "learning_rate": 2.3073469387755102e-05, "loss": 1.421, "step": 13203 }, { "epoch": 53.89387755102041, "grad_norm": 17.849443435668945, "learning_rate": 2.3071428571428573e-05, "loss": 2.2142, "step": 13204 }, { "epoch": 53.89795918367347, "grad_norm": 41.84092712402344, "learning_rate": 2.3069387755102043e-05, "loss": 0.9826, "step": 13205 }, { "epoch": 53.90204081632653, "grad_norm": 39.093292236328125, "learning_rate": 2.306734693877551e-05, "loss": 1.0524, "step": 13206 }, { "epoch": 53.906122448979595, "grad_norm": 34.786766052246094, "learning_rate": 2.306530612244898e-05, "loss": 1.3448, "step": 13207 }, { "epoch": 53.910204081632656, "grad_norm": 32.357994079589844, "learning_rate": 2.306326530612245e-05, "loss": 0.8993, "step": 13208 }, { "epoch": 53.91428571428571, "grad_norm": 26.139219284057617, "learning_rate": 2.306122448979592e-05, "loss": 1.5869, "step": 13209 }, { "epoch": 53.91836734693877, "grad_norm": 29.964771270751953, "learning_rate": 2.305918367346939e-05, "loss": 1.9394, "step": 13210 }, { "epoch": 53.922448979591834, "grad_norm": 23.612947463989258, "learning_rate": 2.3057142857142856e-05, "loss": 2.1129, "step": 13211 }, { "epoch": 53.926530612244896, "grad_norm": 31.174631118774414, "learning_rate": 2.305510204081633e-05, "loss": 0.9904, "step": 13212 }, { "epoch": 53.93061224489796, "grad_norm": 30.531522750854492, "learning_rate": 2.3053061224489797e-05, "loss": 1.7321, "step": 13213 }, { "epoch": 53.93469387755102, "grad_norm": 9.031083106994629, "learning_rate": 2.3051020408163264e-05, "loss": 3.299, "step": 13214 }, { "epoch": 53.93877551020408, "grad_norm": 29.305667877197266, "learning_rate": 2.3048979591836735e-05, "loss": 2.0826, "step": 13215 }, { "epoch": 53.94285714285714, "grad_norm": 21.033170700073242, "learning_rate": 2.3046938775510205e-05, "loss": 2.3508, "step": 13216 }, { "epoch": 53.946938775510205, "grad_norm": 28.609012603759766, "learning_rate": 2.3044897959183676e-05, "loss": 1.9212, "step": 13217 }, { "epoch": 53.95102040816327, "grad_norm": 32.3511962890625, "learning_rate": 2.3042857142857143e-05, "loss": 1.9316, "step": 13218 }, { "epoch": 53.95510204081633, "grad_norm": 16.37841033935547, "learning_rate": 2.3040816326530613e-05, "loss": 2.6386, "step": 13219 }, { "epoch": 53.95918367346939, "grad_norm": 54.55397415161133, "learning_rate": 2.3038775510204084e-05, "loss": 0.7599, "step": 13220 }, { "epoch": 53.96326530612245, "grad_norm": 31.043460845947266, "learning_rate": 2.303673469387755e-05, "loss": 1.2039, "step": 13221 }, { "epoch": 53.96734693877551, "grad_norm": 32.575401306152344, "learning_rate": 2.303469387755102e-05, "loss": 0.4191, "step": 13222 }, { "epoch": 53.97142857142857, "grad_norm": 29.407001495361328, "learning_rate": 2.3032653061224492e-05, "loss": 1.4326, "step": 13223 }, { "epoch": 53.97551020408163, "grad_norm": 35.56320571899414, "learning_rate": 2.3030612244897963e-05, "loss": 0.5541, "step": 13224 }, { "epoch": 53.97959183673469, "grad_norm": 38.056396484375, "learning_rate": 2.302857142857143e-05, "loss": 1.9265, "step": 13225 }, { "epoch": 53.98367346938775, "grad_norm": 34.35179138183594, "learning_rate": 2.3026530612244897e-05, "loss": 1.7117, "step": 13226 }, { "epoch": 53.987755102040815, "grad_norm": 33.500205993652344, "learning_rate": 2.302448979591837e-05, "loss": 0.9365, "step": 13227 }, { "epoch": 53.99183673469388, "grad_norm": 41.36466979980469, "learning_rate": 2.3022448979591838e-05, "loss": 0.6059, "step": 13228 }, { "epoch": 53.99591836734694, "grad_norm": 22.1790828704834, "learning_rate": 2.302040816326531e-05, "loss": 0.1948, "step": 13229 }, { "epoch": 54.0, "grad_norm": 21.699310302734375, "learning_rate": 2.3018367346938776e-05, "loss": 2.3724, "step": 13230 }, { "epoch": 54.00408163265306, "grad_norm": 24.944583892822266, "learning_rate": 2.3016326530612246e-05, "loss": 2.2275, "step": 13231 }, { "epoch": 54.00816326530612, "grad_norm": 36.37656021118164, "learning_rate": 2.3014285714285717e-05, "loss": 1.3532, "step": 13232 }, { "epoch": 54.012244897959185, "grad_norm": 44.798675537109375, "learning_rate": 2.3012244897959184e-05, "loss": 0.8757, "step": 13233 }, { "epoch": 54.01632653061225, "grad_norm": 31.248722076416016, "learning_rate": 2.3010204081632654e-05, "loss": 1.5865, "step": 13234 }, { "epoch": 54.02040816326531, "grad_norm": 39.850189208984375, "learning_rate": 2.3008163265306125e-05, "loss": 1.0209, "step": 13235 }, { "epoch": 54.02448979591837, "grad_norm": 23.25941276550293, "learning_rate": 2.3006122448979592e-05, "loss": 2.0206, "step": 13236 }, { "epoch": 54.02857142857143, "grad_norm": 35.23107147216797, "learning_rate": 2.3004081632653062e-05, "loss": 0.3528, "step": 13237 }, { "epoch": 54.03265306122449, "grad_norm": 23.28282928466797, "learning_rate": 2.300204081632653e-05, "loss": 1.9951, "step": 13238 }, { "epoch": 54.03673469387755, "grad_norm": 21.444292068481445, "learning_rate": 2.3000000000000003e-05, "loss": 2.2334, "step": 13239 }, { "epoch": 54.04081632653061, "grad_norm": 37.43394088745117, "learning_rate": 2.299795918367347e-05, "loss": 1.2247, "step": 13240 }, { "epoch": 54.04489795918367, "grad_norm": 19.849088668823242, "learning_rate": 2.2995918367346938e-05, "loss": 2.1844, "step": 13241 }, { "epoch": 54.04897959183673, "grad_norm": NaN, "learning_rate": 2.2993877551020408e-05, "loss": 1.7616, "step": 13242 }, { "epoch": 54.053061224489795, "grad_norm": 18.82788848876953, "learning_rate": 2.2993877551020408e-05, "loss": 2.4759, "step": 13243 }, { "epoch": 54.05714285714286, "grad_norm": 52.098514556884766, "learning_rate": 2.299183673469388e-05, "loss": 1.1822, "step": 13244 }, { "epoch": 54.06122448979592, "grad_norm": 20.04787826538086, "learning_rate": 2.298979591836735e-05, "loss": 2.3001, "step": 13245 }, { "epoch": 54.06530612244898, "grad_norm": 28.7744140625, "learning_rate": 2.2987755102040816e-05, "loss": 1.3271, "step": 13246 }, { "epoch": 54.06938775510204, "grad_norm": 36.75437545776367, "learning_rate": 2.2985714285714287e-05, "loss": 1.0867, "step": 13247 }, { "epoch": 54.073469387755104, "grad_norm": 20.088655471801758, "learning_rate": 2.2983673469387757e-05, "loss": 2.0828, "step": 13248 }, { "epoch": 54.077551020408166, "grad_norm": 23.95477294921875, "learning_rate": 2.2981632653061224e-05, "loss": 2.1256, "step": 13249 }, { "epoch": 54.08163265306123, "grad_norm": 26.61357307434082, "learning_rate": 2.2979591836734695e-05, "loss": 1.902, "step": 13250 }, { "epoch": 54.08571428571429, "grad_norm": 39.20069122314453, "learning_rate": 2.2977551020408166e-05, "loss": 1.2901, "step": 13251 }, { "epoch": 54.089795918367344, "grad_norm": 32.337440490722656, "learning_rate": 2.2975510204081636e-05, "loss": 0.4089, "step": 13252 }, { "epoch": 54.093877551020405, "grad_norm": 15.98338508605957, "learning_rate": 2.2973469387755103e-05, "loss": 2.5888, "step": 13253 }, { "epoch": 54.09795918367347, "grad_norm": 23.034841537475586, "learning_rate": 2.297142857142857e-05, "loss": 2.2888, "step": 13254 }, { "epoch": 54.10204081632653, "grad_norm": 41.53562927246094, "learning_rate": 2.2969387755102044e-05, "loss": 0.7817, "step": 13255 }, { "epoch": 54.10612244897959, "grad_norm": 30.729686737060547, "learning_rate": 2.296734693877551e-05, "loss": 1.8029, "step": 13256 }, { "epoch": 54.11020408163265, "grad_norm": 32.15019989013672, "learning_rate": 2.2965306122448982e-05, "loss": 1.0751, "step": 13257 }, { "epoch": 54.114285714285714, "grad_norm": 42.48331069946289, "learning_rate": 2.296326530612245e-05, "loss": 0.7638, "step": 13258 }, { "epoch": 54.118367346938776, "grad_norm": 45.429351806640625, "learning_rate": 2.296122448979592e-05, "loss": 1.6693, "step": 13259 }, { "epoch": 54.12244897959184, "grad_norm": 23.785165786743164, "learning_rate": 2.295918367346939e-05, "loss": 2.3992, "step": 13260 }, { "epoch": 54.1265306122449, "grad_norm": 21.19223403930664, "learning_rate": 2.2957142857142857e-05, "loss": 2.2357, "step": 13261 }, { "epoch": 54.13061224489796, "grad_norm": 31.605348587036133, "learning_rate": 2.2955102040816328e-05, "loss": 1.0059, "step": 13262 }, { "epoch": 54.13469387755102, "grad_norm": 30.075136184692383, "learning_rate": 2.2953061224489798e-05, "loss": 0.2417, "step": 13263 }, { "epoch": 54.138775510204084, "grad_norm": 30.67237091064453, "learning_rate": 2.2951020408163265e-05, "loss": 1.3954, "step": 13264 }, { "epoch": 54.142857142857146, "grad_norm": 33.034969329833984, "learning_rate": 2.2948979591836736e-05, "loss": 0.7389, "step": 13265 }, { "epoch": 54.14693877551021, "grad_norm": 25.25896453857422, "learning_rate": 2.2946938775510203e-05, "loss": 1.4439, "step": 13266 }, { "epoch": 54.15102040816326, "grad_norm": 39.919273376464844, "learning_rate": 2.2944897959183677e-05, "loss": 0.9471, "step": 13267 }, { "epoch": 54.155102040816324, "grad_norm": 11.719239234924316, "learning_rate": 2.2942857142857144e-05, "loss": 3.286, "step": 13268 }, { "epoch": 54.159183673469386, "grad_norm": 44.168128967285156, "learning_rate": 2.294081632653061e-05, "loss": 0.9451, "step": 13269 }, { "epoch": 54.16326530612245, "grad_norm": 22.650253295898438, "learning_rate": 2.293877551020408e-05, "loss": 1.9466, "step": 13270 }, { "epoch": 54.16734693877551, "grad_norm": 34.41545104980469, "learning_rate": 2.2936734693877552e-05, "loss": 0.3044, "step": 13271 }, { "epoch": 54.17142857142857, "grad_norm": 20.26910972595215, "learning_rate": 2.2934693877551023e-05, "loss": 2.5065, "step": 13272 }, { "epoch": 54.17551020408163, "grad_norm": 18.835067749023438, "learning_rate": 2.293265306122449e-05, "loss": 2.4918, "step": 13273 }, { "epoch": 54.179591836734694, "grad_norm": 42.10474395751953, "learning_rate": 2.293061224489796e-05, "loss": 0.9973, "step": 13274 }, { "epoch": 54.183673469387756, "grad_norm": 23.462905883789062, "learning_rate": 2.292857142857143e-05, "loss": 2.5671, "step": 13275 }, { "epoch": 54.18775510204082, "grad_norm": 28.615032196044922, "learning_rate": 2.2926530612244898e-05, "loss": 1.5621, "step": 13276 }, { "epoch": 54.19183673469388, "grad_norm": 24.269453048706055, "learning_rate": 2.292448979591837e-05, "loss": 2.0542, "step": 13277 }, { "epoch": 54.19591836734694, "grad_norm": 28.32721710205078, "learning_rate": 2.292244897959184e-05, "loss": 1.5405, "step": 13278 }, { "epoch": 54.2, "grad_norm": 49.137290954589844, "learning_rate": 2.292040816326531e-05, "loss": 0.7008, "step": 13279 }, { "epoch": 54.204081632653065, "grad_norm": 35.3151969909668, "learning_rate": 2.2918367346938777e-05, "loss": 1.2896, "step": 13280 }, { "epoch": 54.20816326530612, "grad_norm": 25.797269821166992, "learning_rate": 2.2916326530612244e-05, "loss": 1.3673, "step": 13281 }, { "epoch": 54.21224489795918, "grad_norm": 20.770051956176758, "learning_rate": 2.2914285714285718e-05, "loss": 2.1614, "step": 13282 }, { "epoch": 54.21632653061224, "grad_norm": 18.944538116455078, "learning_rate": 2.2912244897959185e-05, "loss": 2.1795, "step": 13283 }, { "epoch": 54.220408163265304, "grad_norm": 21.32257652282715, "learning_rate": 2.2910204081632655e-05, "loss": 2.1388, "step": 13284 }, { "epoch": 54.224489795918366, "grad_norm": 43.97911834716797, "learning_rate": 2.2908163265306122e-05, "loss": 1.1748, "step": 13285 }, { "epoch": 54.22857142857143, "grad_norm": 28.85614013671875, "learning_rate": 2.2906122448979593e-05, "loss": 2.0417, "step": 13286 }, { "epoch": 54.23265306122449, "grad_norm": 29.500438690185547, "learning_rate": 2.2904081632653063e-05, "loss": 1.8123, "step": 13287 }, { "epoch": 54.23673469387755, "grad_norm": 35.9882926940918, "learning_rate": 2.290204081632653e-05, "loss": 0.7207, "step": 13288 }, { "epoch": 54.24081632653061, "grad_norm": 29.081602096557617, "learning_rate": 2.29e-05, "loss": 0.9309, "step": 13289 }, { "epoch": 54.244897959183675, "grad_norm": 21.268386840820312, "learning_rate": 2.289795918367347e-05, "loss": 1.8136, "step": 13290 }, { "epoch": 54.248979591836736, "grad_norm": 36.41891860961914, "learning_rate": 2.289591836734694e-05, "loss": 1.7116, "step": 13291 }, { "epoch": 54.2530612244898, "grad_norm": 22.78620719909668, "learning_rate": 2.289387755102041e-05, "loss": 2.268, "step": 13292 }, { "epoch": 54.25714285714286, "grad_norm": 37.68777084350586, "learning_rate": 2.2891836734693876e-05, "loss": 1.7516, "step": 13293 }, { "epoch": 54.26122448979592, "grad_norm": 28.682636260986328, "learning_rate": 2.288979591836735e-05, "loss": 1.6102, "step": 13294 }, { "epoch": 54.265306122448976, "grad_norm": 25.79014015197754, "learning_rate": 2.2887755102040817e-05, "loss": 1.4876, "step": 13295 }, { "epoch": 54.26938775510204, "grad_norm": 27.744516372680664, "learning_rate": 2.2885714285714288e-05, "loss": 1.766, "step": 13296 }, { "epoch": 54.2734693877551, "grad_norm": 37.6811637878418, "learning_rate": 2.2883673469387755e-05, "loss": 0.8448, "step": 13297 }, { "epoch": 54.27755102040816, "grad_norm": 31.00942039489746, "learning_rate": 2.2881632653061225e-05, "loss": 1.3504, "step": 13298 }, { "epoch": 54.28163265306122, "grad_norm": 29.561742782592773, "learning_rate": 2.2879591836734696e-05, "loss": 1.7589, "step": 13299 }, { "epoch": 54.285714285714285, "grad_norm": 35.85302734375, "learning_rate": 2.2877551020408163e-05, "loss": 1.5282, "step": 13300 }, { "epoch": 54.289795918367346, "grad_norm": 33.007938385009766, "learning_rate": 2.2875510204081634e-05, "loss": 0.9346, "step": 13301 }, { "epoch": 54.29387755102041, "grad_norm": 17.56024169921875, "learning_rate": 2.2873469387755104e-05, "loss": 2.5295, "step": 13302 }, { "epoch": 54.29795918367347, "grad_norm": 24.795391082763672, "learning_rate": 2.287142857142857e-05, "loss": 1.8671, "step": 13303 }, { "epoch": 54.30204081632653, "grad_norm": 32.57169723510742, "learning_rate": 2.2869387755102042e-05, "loss": 1.2702, "step": 13304 }, { "epoch": 54.30612244897959, "grad_norm": 21.68442153930664, "learning_rate": 2.2867346938775512e-05, "loss": 2.1924, "step": 13305 }, { "epoch": 54.310204081632655, "grad_norm": 36.71907043457031, "learning_rate": 2.2865306122448983e-05, "loss": 1.2049, "step": 13306 }, { "epoch": 54.31428571428572, "grad_norm": 26.72464370727539, "learning_rate": 2.286326530612245e-05, "loss": 2.0029, "step": 13307 }, { "epoch": 54.31836734693878, "grad_norm": 37.37247085571289, "learning_rate": 2.2861224489795917e-05, "loss": 1.2065, "step": 13308 }, { "epoch": 54.32244897959184, "grad_norm": 21.777467727661133, "learning_rate": 2.285918367346939e-05, "loss": 2.1957, "step": 13309 }, { "epoch": 54.326530612244895, "grad_norm": 22.146907806396484, "learning_rate": 2.2857142857142858e-05, "loss": 2.2449, "step": 13310 }, { "epoch": 54.33061224489796, "grad_norm": 26.830575942993164, "learning_rate": 2.285510204081633e-05, "loss": 2.0063, "step": 13311 }, { "epoch": 54.33469387755102, "grad_norm": 35.16023635864258, "learning_rate": 2.2853061224489796e-05, "loss": 1.4175, "step": 13312 }, { "epoch": 54.33877551020408, "grad_norm": 19.237802505493164, "learning_rate": 2.2851020408163266e-05, "loss": 2.1622, "step": 13313 }, { "epoch": 54.34285714285714, "grad_norm": 16.501501083374023, "learning_rate": 2.2848979591836737e-05, "loss": 2.6864, "step": 13314 }, { "epoch": 54.3469387755102, "grad_norm": 35.727020263671875, "learning_rate": 2.2846938775510204e-05, "loss": 0.8627, "step": 13315 }, { "epoch": 54.351020408163265, "grad_norm": 26.754135131835938, "learning_rate": 2.2844897959183674e-05, "loss": 1.5268, "step": 13316 }, { "epoch": 54.35510204081633, "grad_norm": 33.50705337524414, "learning_rate": 2.2842857142857145e-05, "loss": 0.2783, "step": 13317 }, { "epoch": 54.35918367346939, "grad_norm": 29.07453727722168, "learning_rate": 2.2840816326530615e-05, "loss": 2.0407, "step": 13318 }, { "epoch": 54.36326530612245, "grad_norm": 30.382936477661133, "learning_rate": 2.2838775510204083e-05, "loss": 1.9965, "step": 13319 }, { "epoch": 54.36734693877551, "grad_norm": 47.379276275634766, "learning_rate": 2.283673469387755e-05, "loss": 1.8393, "step": 13320 }, { "epoch": 54.371428571428574, "grad_norm": 31.906707763671875, "learning_rate": 2.2834693877551024e-05, "loss": 1.2514, "step": 13321 }, { "epoch": 54.375510204081635, "grad_norm": 35.307716369628906, "learning_rate": 2.283265306122449e-05, "loss": 1.8125, "step": 13322 }, { "epoch": 54.3795918367347, "grad_norm": 24.378379821777344, "learning_rate": 2.283061224489796e-05, "loss": 2.1021, "step": 13323 }, { "epoch": 54.38367346938775, "grad_norm": 25.586854934692383, "learning_rate": 2.2828571428571428e-05, "loss": 1.772, "step": 13324 }, { "epoch": 54.38775510204081, "grad_norm": 48.50950241088867, "learning_rate": 2.28265306122449e-05, "loss": 1.0299, "step": 13325 }, { "epoch": 54.391836734693875, "grad_norm": 26.603559494018555, "learning_rate": 2.282448979591837e-05, "loss": 2.0278, "step": 13326 }, { "epoch": 54.39591836734694, "grad_norm": 35.85689926147461, "learning_rate": 2.2822448979591836e-05, "loss": 0.813, "step": 13327 }, { "epoch": 54.4, "grad_norm": 30.955860137939453, "learning_rate": 2.2820408163265307e-05, "loss": 1.2319, "step": 13328 }, { "epoch": 54.40408163265306, "grad_norm": 29.60579490661621, "learning_rate": 2.2818367346938777e-05, "loss": 0.2934, "step": 13329 }, { "epoch": 54.40816326530612, "grad_norm": 15.479788780212402, "learning_rate": 2.2816326530612245e-05, "loss": 2.7773, "step": 13330 }, { "epoch": 54.412244897959184, "grad_norm": 38.9307746887207, "learning_rate": 2.2814285714285715e-05, "loss": 0.8524, "step": 13331 }, { "epoch": 54.416326530612245, "grad_norm": 42.60039138793945, "learning_rate": 2.2812244897959186e-05, "loss": 0.7725, "step": 13332 }, { "epoch": 54.42040816326531, "grad_norm": 49.029781341552734, "learning_rate": 2.2810204081632656e-05, "loss": 0.732, "step": 13333 }, { "epoch": 54.42448979591837, "grad_norm": 35.979515075683594, "learning_rate": 2.2808163265306123e-05, "loss": 1.8381, "step": 13334 }, { "epoch": 54.42857142857143, "grad_norm": 33.10790252685547, "learning_rate": 2.280612244897959e-05, "loss": 1.337, "step": 13335 }, { "epoch": 54.43265306122449, "grad_norm": 29.533226013183594, "learning_rate": 2.2804081632653064e-05, "loss": 1.5074, "step": 13336 }, { "epoch": 54.436734693877554, "grad_norm": 18.76946258544922, "learning_rate": 2.280204081632653e-05, "loss": 2.2751, "step": 13337 }, { "epoch": 54.440816326530616, "grad_norm": 23.974849700927734, "learning_rate": 2.2800000000000002e-05, "loss": 1.4138, "step": 13338 }, { "epoch": 54.44489795918367, "grad_norm": 17.658140182495117, "learning_rate": 2.279795918367347e-05, "loss": 2.5619, "step": 13339 }, { "epoch": 54.44897959183673, "grad_norm": 52.267765045166016, "learning_rate": 2.279591836734694e-05, "loss": 2.0277, "step": 13340 }, { "epoch": 54.453061224489794, "grad_norm": 53.916053771972656, "learning_rate": 2.279387755102041e-05, "loss": 0.7614, "step": 13341 }, { "epoch": 54.457142857142856, "grad_norm": 29.789730072021484, "learning_rate": 2.2791836734693877e-05, "loss": 2.1547, "step": 13342 }, { "epoch": 54.46122448979592, "grad_norm": 28.16664695739746, "learning_rate": 2.2789795918367348e-05, "loss": 1.958, "step": 13343 }, { "epoch": 54.46530612244898, "grad_norm": 51.687477111816406, "learning_rate": 2.2787755102040818e-05, "loss": 1.6862, "step": 13344 }, { "epoch": 54.46938775510204, "grad_norm": 46.97542190551758, "learning_rate": 2.278571428571429e-05, "loss": 0.9923, "step": 13345 }, { "epoch": 54.4734693877551, "grad_norm": 36.4897575378418, "learning_rate": 2.2783673469387756e-05, "loss": 1.3691, "step": 13346 }, { "epoch": 54.477551020408164, "grad_norm": 31.7996883392334, "learning_rate": 2.2781632653061223e-05, "loss": 1.6921, "step": 13347 }, { "epoch": 54.481632653061226, "grad_norm": 27.806455612182617, "learning_rate": 2.2779591836734697e-05, "loss": 2.0057, "step": 13348 }, { "epoch": 54.48571428571429, "grad_norm": 38.910865783691406, "learning_rate": 2.2777551020408164e-05, "loss": 1.4814, "step": 13349 }, { "epoch": 54.48979591836735, "grad_norm": 20.426204681396484, "learning_rate": 2.2775510204081635e-05, "loss": 2.8372, "step": 13350 }, { "epoch": 54.49387755102041, "grad_norm": 24.16925811767578, "learning_rate": 2.27734693877551e-05, "loss": 2.1304, "step": 13351 }, { "epoch": 54.49795918367347, "grad_norm": 39.94611740112305, "learning_rate": 2.2771428571428572e-05, "loss": 1.5067, "step": 13352 }, { "epoch": 54.50204081632653, "grad_norm": 23.147140502929688, "learning_rate": 2.2769387755102043e-05, "loss": 2.5906, "step": 13353 }, { "epoch": 54.50612244897959, "grad_norm": 35.49665069580078, "learning_rate": 2.276734693877551e-05, "loss": 0.8685, "step": 13354 }, { "epoch": 54.51020408163265, "grad_norm": 27.456281661987305, "learning_rate": 2.276530612244898e-05, "loss": 0.3488, "step": 13355 }, { "epoch": 54.51428571428571, "grad_norm": 28.208106994628906, "learning_rate": 2.276326530612245e-05, "loss": 1.6922, "step": 13356 }, { "epoch": 54.518367346938774, "grad_norm": 26.12883186340332, "learning_rate": 2.2761224489795918e-05, "loss": 1.977, "step": 13357 }, { "epoch": 54.522448979591836, "grad_norm": 27.271766662597656, "learning_rate": 2.275918367346939e-05, "loss": 1.6196, "step": 13358 }, { "epoch": 54.5265306122449, "grad_norm": 30.62306022644043, "learning_rate": 2.275714285714286e-05, "loss": 1.2137, "step": 13359 }, { "epoch": 54.53061224489796, "grad_norm": 23.483280181884766, "learning_rate": 2.275510204081633e-05, "loss": 1.8484, "step": 13360 }, { "epoch": 54.53469387755102, "grad_norm": 21.959651947021484, "learning_rate": 2.2753061224489797e-05, "loss": 2.0502, "step": 13361 }, { "epoch": 54.53877551020408, "grad_norm": 22.605104446411133, "learning_rate": 2.2751020408163267e-05, "loss": 0.1615, "step": 13362 }, { "epoch": 54.542857142857144, "grad_norm": 29.47739601135254, "learning_rate": 2.2748979591836738e-05, "loss": 1.6693, "step": 13363 }, { "epoch": 54.546938775510206, "grad_norm": 46.83498764038086, "learning_rate": 2.2746938775510205e-05, "loss": 0.8846, "step": 13364 }, { "epoch": 54.55102040816327, "grad_norm": 46.96902847290039, "learning_rate": 2.2744897959183675e-05, "loss": 1.8116, "step": 13365 }, { "epoch": 54.55510204081633, "grad_norm": 27.457674026489258, "learning_rate": 2.2742857142857142e-05, "loss": 1.7152, "step": 13366 }, { "epoch": 54.55918367346939, "grad_norm": 36.75078582763672, "learning_rate": 2.2740816326530613e-05, "loss": 0.9456, "step": 13367 }, { "epoch": 54.563265306122446, "grad_norm": 19.17906951904297, "learning_rate": 2.2738775510204083e-05, "loss": 2.6067, "step": 13368 }, { "epoch": 54.56734693877551, "grad_norm": 26.88538360595703, "learning_rate": 2.273673469387755e-05, "loss": 1.8293, "step": 13369 }, { "epoch": 54.57142857142857, "grad_norm": 15.068901062011719, "learning_rate": 2.273469387755102e-05, "loss": 2.7117, "step": 13370 }, { "epoch": 54.57551020408163, "grad_norm": 41.68354797363281, "learning_rate": 2.273265306122449e-05, "loss": 1.2976, "step": 13371 }, { "epoch": 54.57959183673469, "grad_norm": 22.667400360107422, "learning_rate": 2.2730612244897962e-05, "loss": 2.0057, "step": 13372 }, { "epoch": 54.583673469387755, "grad_norm": 27.690305709838867, "learning_rate": 2.272857142857143e-05, "loss": 1.9441, "step": 13373 }, { "epoch": 54.587755102040816, "grad_norm": 31.118898391723633, "learning_rate": 2.2726530612244896e-05, "loss": 1.4779, "step": 13374 }, { "epoch": 54.59183673469388, "grad_norm": 44.269691467285156, "learning_rate": 2.272448979591837e-05, "loss": 0.7433, "step": 13375 }, { "epoch": 54.59591836734694, "grad_norm": 42.74041748046875, "learning_rate": 2.2722448979591837e-05, "loss": 0.5901, "step": 13376 }, { "epoch": 54.6, "grad_norm": 17.32093048095703, "learning_rate": 2.2720408163265308e-05, "loss": 2.1071, "step": 13377 }, { "epoch": 54.60408163265306, "grad_norm": 28.12687110900879, "learning_rate": 2.2718367346938775e-05, "loss": 1.855, "step": 13378 }, { "epoch": 54.608163265306125, "grad_norm": 30.561525344848633, "learning_rate": 2.2716326530612246e-05, "loss": 1.7723, "step": 13379 }, { "epoch": 54.61224489795919, "grad_norm": 22.16321563720703, "learning_rate": 2.2714285714285716e-05, "loss": 2.2154, "step": 13380 }, { "epoch": 54.61632653061225, "grad_norm": 52.450950622558594, "learning_rate": 2.2712244897959183e-05, "loss": 0.9304, "step": 13381 }, { "epoch": 54.6204081632653, "grad_norm": 38.8537712097168, "learning_rate": 2.2710204081632654e-05, "loss": 0.7062, "step": 13382 }, { "epoch": 54.624489795918365, "grad_norm": 39.92131805419922, "learning_rate": 2.2708163265306124e-05, "loss": 1.6232, "step": 13383 }, { "epoch": 54.628571428571426, "grad_norm": 15.640237808227539, "learning_rate": 2.2706122448979595e-05, "loss": 2.7139, "step": 13384 }, { "epoch": 54.63265306122449, "grad_norm": 36.59608840942383, "learning_rate": 2.2704081632653062e-05, "loss": 0.8917, "step": 13385 }, { "epoch": 54.63673469387755, "grad_norm": 24.055965423583984, "learning_rate": 2.2702040816326532e-05, "loss": 1.9032, "step": 13386 }, { "epoch": 54.64081632653061, "grad_norm": 18.438467025756836, "learning_rate": 2.2700000000000003e-05, "loss": 2.6018, "step": 13387 }, { "epoch": 54.64489795918367, "grad_norm": 30.134302139282227, "learning_rate": 2.269795918367347e-05, "loss": 2.1247, "step": 13388 }, { "epoch": 54.648979591836735, "grad_norm": 32.087154388427734, "learning_rate": 2.269591836734694e-05, "loss": 1.519, "step": 13389 }, { "epoch": 54.6530612244898, "grad_norm": 22.46283721923828, "learning_rate": 2.269387755102041e-05, "loss": 2.1606, "step": 13390 }, { "epoch": 54.65714285714286, "grad_norm": 36.727535247802734, "learning_rate": 2.2691836734693878e-05, "loss": 0.6445, "step": 13391 }, { "epoch": 54.66122448979592, "grad_norm": 36.15314865112305, "learning_rate": 2.268979591836735e-05, "loss": 1.2311, "step": 13392 }, { "epoch": 54.66530612244898, "grad_norm": 38.780738830566406, "learning_rate": 2.2687755102040816e-05, "loss": 1.0127, "step": 13393 }, { "epoch": 54.66938775510204, "grad_norm": 9.511486053466797, "learning_rate": 2.2685714285714286e-05, "loss": 0.0848, "step": 13394 }, { "epoch": 54.673469387755105, "grad_norm": 35.31633377075195, "learning_rate": 2.2683673469387757e-05, "loss": 0.7955, "step": 13395 }, { "epoch": 54.67755102040816, "grad_norm": 40.481319427490234, "learning_rate": 2.2681632653061224e-05, "loss": 1.7597, "step": 13396 }, { "epoch": 54.68163265306122, "grad_norm": 37.22364807128906, "learning_rate": 2.2679591836734694e-05, "loss": 1.5111, "step": 13397 }, { "epoch": 54.68571428571428, "grad_norm": 35.02897262573242, "learning_rate": 2.2677551020408165e-05, "loss": 1.6255, "step": 13398 }, { "epoch": 54.689795918367345, "grad_norm": 46.89751052856445, "learning_rate": 2.2675510204081636e-05, "loss": 1.5084, "step": 13399 }, { "epoch": 54.69387755102041, "grad_norm": 27.0170841217041, "learning_rate": 2.2673469387755103e-05, "loss": 1.2934, "step": 13400 }, { "epoch": 54.69795918367347, "grad_norm": 24.245880126953125, "learning_rate": 2.267142857142857e-05, "loss": 1.8903, "step": 13401 }, { "epoch": 54.70204081632653, "grad_norm": 23.319656372070312, "learning_rate": 2.2669387755102044e-05, "loss": 2.2715, "step": 13402 }, { "epoch": 54.70612244897959, "grad_norm": 18.346214294433594, "learning_rate": 2.266734693877551e-05, "loss": 2.2219, "step": 13403 }, { "epoch": 54.710204081632654, "grad_norm": 37.4643440246582, "learning_rate": 2.266530612244898e-05, "loss": 1.0797, "step": 13404 }, { "epoch": 54.714285714285715, "grad_norm": 17.869699478149414, "learning_rate": 2.266326530612245e-05, "loss": 2.3425, "step": 13405 }, { "epoch": 54.71836734693878, "grad_norm": 32.659759521484375, "learning_rate": 2.2661224489795922e-05, "loss": 1.527, "step": 13406 }, { "epoch": 54.72244897959184, "grad_norm": 42.02513885498047, "learning_rate": 2.265918367346939e-05, "loss": 1.1676, "step": 13407 }, { "epoch": 54.7265306122449, "grad_norm": 31.150663375854492, "learning_rate": 2.2657142857142857e-05, "loss": 0.5016, "step": 13408 }, { "epoch": 54.73061224489796, "grad_norm": 23.40703773498535, "learning_rate": 2.2655102040816327e-05, "loss": 2.1763, "step": 13409 }, { "epoch": 54.734693877551024, "grad_norm": 52.217201232910156, "learning_rate": 2.2653061224489798e-05, "loss": 0.905, "step": 13410 }, { "epoch": 54.73877551020408, "grad_norm": 51.29417037963867, "learning_rate": 2.2651020408163268e-05, "loss": 1.2031, "step": 13411 }, { "epoch": 54.74285714285714, "grad_norm": 28.839763641357422, "learning_rate": 2.2648979591836735e-05, "loss": 1.3298, "step": 13412 }, { "epoch": 54.7469387755102, "grad_norm": 50.28594970703125, "learning_rate": 2.2646938775510206e-05, "loss": 0.773, "step": 13413 }, { "epoch": 54.751020408163264, "grad_norm": 16.334945678710938, "learning_rate": 2.2644897959183676e-05, "loss": 2.5088, "step": 13414 }, { "epoch": 54.755102040816325, "grad_norm": 22.771820068359375, "learning_rate": 2.2642857142857143e-05, "loss": 2.1229, "step": 13415 }, { "epoch": 54.75918367346939, "grad_norm": 36.73178482055664, "learning_rate": 2.2640816326530614e-05, "loss": 0.435, "step": 13416 }, { "epoch": 54.76326530612245, "grad_norm": 35.757747650146484, "learning_rate": 2.263877551020408e-05, "loss": 1.7058, "step": 13417 }, { "epoch": 54.76734693877551, "grad_norm": 36.52874755859375, "learning_rate": 2.263673469387755e-05, "loss": 0.98, "step": 13418 }, { "epoch": 54.77142857142857, "grad_norm": 17.63629722595215, "learning_rate": 2.2634693877551022e-05, "loss": 2.0103, "step": 13419 }, { "epoch": 54.775510204081634, "grad_norm": 21.05577850341797, "learning_rate": 2.263265306122449e-05, "loss": 1.9528, "step": 13420 }, { "epoch": 54.779591836734696, "grad_norm": 25.682239532470703, "learning_rate": 2.263061224489796e-05, "loss": 0.3136, "step": 13421 }, { "epoch": 54.78367346938776, "grad_norm": 37.93801498413086, "learning_rate": 2.262857142857143e-05, "loss": 0.841, "step": 13422 }, { "epoch": 54.78775510204082, "grad_norm": 22.340503692626953, "learning_rate": 2.2626530612244897e-05, "loss": 1.9029, "step": 13423 }, { "epoch": 54.79183673469388, "grad_norm": 19.007160186767578, "learning_rate": 2.2624489795918368e-05, "loss": 2.3155, "step": 13424 }, { "epoch": 54.795918367346935, "grad_norm": 37.06536865234375, "learning_rate": 2.262244897959184e-05, "loss": 1.8647, "step": 13425 }, { "epoch": 54.8, "grad_norm": 23.662118911743164, "learning_rate": 2.262040816326531e-05, "loss": 1.4671, "step": 13426 }, { "epoch": 54.80408163265306, "grad_norm": 28.862638473510742, "learning_rate": 2.2618367346938776e-05, "loss": 2.0855, "step": 13427 }, { "epoch": 54.80816326530612, "grad_norm": 27.334030151367188, "learning_rate": 2.2616326530612243e-05, "loss": 1.8017, "step": 13428 }, { "epoch": 54.81224489795918, "grad_norm": 40.7979850769043, "learning_rate": 2.2614285714285717e-05, "loss": 1.0265, "step": 13429 }, { "epoch": 54.816326530612244, "grad_norm": 53.21461486816406, "learning_rate": 2.2612244897959184e-05, "loss": 0.3279, "step": 13430 }, { "epoch": 54.820408163265306, "grad_norm": 13.63286018371582, "learning_rate": 2.2610204081632655e-05, "loss": 3.1329, "step": 13431 }, { "epoch": 54.82448979591837, "grad_norm": 37.76504898071289, "learning_rate": 2.2608163265306122e-05, "loss": 1.4219, "step": 13432 }, { "epoch": 54.82857142857143, "grad_norm": 28.451147079467773, "learning_rate": 2.2606122448979596e-05, "loss": 1.8699, "step": 13433 }, { "epoch": 54.83265306122449, "grad_norm": 32.39577865600586, "learning_rate": 2.2604081632653063e-05, "loss": 1.9474, "step": 13434 }, { "epoch": 54.83673469387755, "grad_norm": 18.772480010986328, "learning_rate": 2.260204081632653e-05, "loss": 2.1884, "step": 13435 }, { "epoch": 54.840816326530614, "grad_norm": 34.76791763305664, "learning_rate": 2.26e-05, "loss": 0.9771, "step": 13436 }, { "epoch": 54.844897959183676, "grad_norm": 18.157520294189453, "learning_rate": 2.259795918367347e-05, "loss": 2.3303, "step": 13437 }, { "epoch": 54.84897959183674, "grad_norm": 21.264575958251953, "learning_rate": 2.259591836734694e-05, "loss": 2.6709, "step": 13438 }, { "epoch": 54.85306122448979, "grad_norm": 32.6221923828125, "learning_rate": 2.259387755102041e-05, "loss": 2.06, "step": 13439 }, { "epoch": 54.857142857142854, "grad_norm": 51.04875183105469, "learning_rate": 2.259183673469388e-05, "loss": 0.8286, "step": 13440 }, { "epoch": 54.861224489795916, "grad_norm": 31.30988311767578, "learning_rate": 2.258979591836735e-05, "loss": 1.9438, "step": 13441 }, { "epoch": 54.86530612244898, "grad_norm": 29.817440032958984, "learning_rate": 2.2587755102040817e-05, "loss": 1.5068, "step": 13442 }, { "epoch": 54.86938775510204, "grad_norm": 38.75910949707031, "learning_rate": 2.2585714285714287e-05, "loss": 1.5367, "step": 13443 }, { "epoch": 54.8734693877551, "grad_norm": 24.833415985107422, "learning_rate": 2.2583673469387754e-05, "loss": 2.4187, "step": 13444 }, { "epoch": 54.87755102040816, "grad_norm": 13.102283477783203, "learning_rate": 2.2581632653061225e-05, "loss": 2.8002, "step": 13445 }, { "epoch": 54.881632653061224, "grad_norm": 33.62612533569336, "learning_rate": 2.2579591836734695e-05, "loss": 1.5987, "step": 13446 }, { "epoch": 54.885714285714286, "grad_norm": 22.591960906982422, "learning_rate": 2.2577551020408163e-05, "loss": 2.0245, "step": 13447 }, { "epoch": 54.88979591836735, "grad_norm": 38.15311813354492, "learning_rate": 2.2575510204081633e-05, "loss": 1.2616, "step": 13448 }, { "epoch": 54.89387755102041, "grad_norm": 37.00862121582031, "learning_rate": 2.2573469387755104e-05, "loss": 1.3145, "step": 13449 }, { "epoch": 54.89795918367347, "grad_norm": 30.20297622680664, "learning_rate": 2.257142857142857e-05, "loss": 1.6586, "step": 13450 }, { "epoch": 54.90204081632653, "grad_norm": 37.49203872680664, "learning_rate": 2.256938775510204e-05, "loss": 0.9439, "step": 13451 }, { "epoch": 54.906122448979595, "grad_norm": 21.2027530670166, "learning_rate": 2.2567346938775512e-05, "loss": 2.11, "step": 13452 }, { "epoch": 54.910204081632656, "grad_norm": 22.574195861816406, "learning_rate": 2.2565306122448982e-05, "loss": 2.2682, "step": 13453 }, { "epoch": 54.91428571428571, "grad_norm": 29.532285690307617, "learning_rate": 2.256326530612245e-05, "loss": 0.8982, "step": 13454 }, { "epoch": 54.91836734693877, "grad_norm": 21.679506301879883, "learning_rate": 2.256122448979592e-05, "loss": 1.9789, "step": 13455 }, { "epoch": 54.922448979591834, "grad_norm": 20.92911148071289, "learning_rate": 2.255918367346939e-05, "loss": 2.2287, "step": 13456 }, { "epoch": 54.926530612244896, "grad_norm": 24.763099670410156, "learning_rate": 2.2557142857142858e-05, "loss": 2.233, "step": 13457 }, { "epoch": 54.93061224489796, "grad_norm": 32.44569396972656, "learning_rate": 2.2555102040816328e-05, "loss": 1.2749, "step": 13458 }, { "epoch": 54.93469387755102, "grad_norm": 31.9318904876709, "learning_rate": 2.2553061224489795e-05, "loss": 2.2349, "step": 13459 }, { "epoch": 54.93877551020408, "grad_norm": 28.538183212280273, "learning_rate": 2.255102040816327e-05, "loss": 1.5937, "step": 13460 }, { "epoch": 54.94285714285714, "grad_norm": 30.33179473876953, "learning_rate": 2.2548979591836736e-05, "loss": 1.1521, "step": 13461 }, { "epoch": 54.946938775510205, "grad_norm": 35.96000289916992, "learning_rate": 2.2546938775510203e-05, "loss": 1.3461, "step": 13462 }, { "epoch": 54.95102040816327, "grad_norm": 30.990880966186523, "learning_rate": 2.2544897959183674e-05, "loss": 1.5168, "step": 13463 }, { "epoch": 54.95510204081633, "grad_norm": 14.463417053222656, "learning_rate": 2.2542857142857144e-05, "loss": 2.9689, "step": 13464 }, { "epoch": 54.95918367346939, "grad_norm": 22.04278564453125, "learning_rate": 2.2540816326530615e-05, "loss": 2.3089, "step": 13465 }, { "epoch": 54.96326530612245, "grad_norm": 31.60565185546875, "learning_rate": 2.2538775510204082e-05, "loss": 1.3642, "step": 13466 }, { "epoch": 54.96734693877551, "grad_norm": 29.532352447509766, "learning_rate": 2.2536734693877553e-05, "loss": 1.8395, "step": 13467 }, { "epoch": 54.97142857142857, "grad_norm": 36.793487548828125, "learning_rate": 2.2534693877551023e-05, "loss": 0.312, "step": 13468 }, { "epoch": 54.97551020408163, "grad_norm": 36.23655700683594, "learning_rate": 2.253265306122449e-05, "loss": 0.4355, "step": 13469 }, { "epoch": 54.97959183673469, "grad_norm": 28.802392959594727, "learning_rate": 2.253061224489796e-05, "loss": 1.7795, "step": 13470 }, { "epoch": 54.98367346938775, "grad_norm": 37.01490783691406, "learning_rate": 2.2528571428571428e-05, "loss": 0.4296, "step": 13471 }, { "epoch": 54.987755102040815, "grad_norm": 57.3743782043457, "learning_rate": 2.2526530612244898e-05, "loss": 0.4407, "step": 13472 }, { "epoch": 54.99183673469388, "grad_norm": 31.22369384765625, "learning_rate": 2.252448979591837e-05, "loss": 1.9996, "step": 13473 }, { "epoch": 54.99591836734694, "grad_norm": 17.907730102539062, "learning_rate": 2.2522448979591836e-05, "loss": 2.718, "step": 13474 }, { "epoch": 55.0, "grad_norm": 39.75284957885742, "learning_rate": 2.2520408163265306e-05, "loss": 0.9618, "step": 13475 }, { "epoch": 55.00408163265306, "grad_norm": 36.707763671875, "learning_rate": 2.2518367346938777e-05, "loss": 0.6121, "step": 13476 }, { "epoch": 55.00816326530612, "grad_norm": 23.551502227783203, "learning_rate": 2.2516326530612247e-05, "loss": 1.8803, "step": 13477 }, { "epoch": 55.012244897959185, "grad_norm": 27.213092803955078, "learning_rate": 2.2514285714285715e-05, "loss": 1.8953, "step": 13478 }, { "epoch": 55.01632653061225, "grad_norm": 30.17224884033203, "learning_rate": 2.2512244897959185e-05, "loss": 1.5952, "step": 13479 }, { "epoch": 55.02040816326531, "grad_norm": 35.5622673034668, "learning_rate": 2.2510204081632656e-05, "loss": 1.7015, "step": 13480 }, { "epoch": 55.02448979591837, "grad_norm": 37.980098724365234, "learning_rate": 2.2508163265306123e-05, "loss": 1.5724, "step": 13481 }, { "epoch": 55.02857142857143, "grad_norm": 25.8831844329834, "learning_rate": 2.2506122448979593e-05, "loss": 1.7799, "step": 13482 }, { "epoch": 55.03265306122449, "grad_norm": 24.028907775878906, "learning_rate": 2.2504081632653064e-05, "loss": 2.0733, "step": 13483 }, { "epoch": 55.03673469387755, "grad_norm": 38.110130310058594, "learning_rate": 2.250204081632653e-05, "loss": 0.3318, "step": 13484 }, { "epoch": 55.04081632653061, "grad_norm": 33.19646072387695, "learning_rate": 2.25e-05, "loss": 0.8112, "step": 13485 }, { "epoch": 55.04489795918367, "grad_norm": 59.7669563293457, "learning_rate": 2.249795918367347e-05, "loss": 0.9485, "step": 13486 }, { "epoch": 55.04897959183673, "grad_norm": 34.515079498291016, "learning_rate": 2.2495918367346942e-05, "loss": 1.2285, "step": 13487 }, { "epoch": 55.053061224489795, "grad_norm": 37.280860900878906, "learning_rate": 2.249387755102041e-05, "loss": 0.466, "step": 13488 }, { "epoch": 55.05714285714286, "grad_norm": 21.048919677734375, "learning_rate": 2.2491836734693877e-05, "loss": 2.2369, "step": 13489 }, { "epoch": 55.06122448979592, "grad_norm": 33.83894348144531, "learning_rate": 2.2489795918367347e-05, "loss": 1.8278, "step": 13490 }, { "epoch": 55.06530612244898, "grad_norm": 19.327800750732422, "learning_rate": 2.2487755102040818e-05, "loss": 2.2127, "step": 13491 }, { "epoch": 55.06938775510204, "grad_norm": 30.339330673217773, "learning_rate": 2.2485714285714288e-05, "loss": 1.6122, "step": 13492 }, { "epoch": 55.073469387755104, "grad_norm": 36.2990837097168, "learning_rate": 2.2483673469387755e-05, "loss": 1.2161, "step": 13493 }, { "epoch": 55.077551020408166, "grad_norm": 23.173656463623047, "learning_rate": 2.2481632653061226e-05, "loss": 1.927, "step": 13494 }, { "epoch": 55.08163265306123, "grad_norm": 24.95634651184082, "learning_rate": 2.2479591836734696e-05, "loss": 1.6282, "step": 13495 }, { "epoch": 55.08571428571429, "grad_norm": 23.242481231689453, "learning_rate": 2.2477551020408164e-05, "loss": 2.0123, "step": 13496 }, { "epoch": 55.089795918367344, "grad_norm": 53.19754409790039, "learning_rate": 2.2475510204081634e-05, "loss": 1.6462, "step": 13497 }, { "epoch": 55.093877551020405, "grad_norm": 29.29584312438965, "learning_rate": 2.24734693877551e-05, "loss": 2.01, "step": 13498 }, { "epoch": 55.09795918367347, "grad_norm": 29.843921661376953, "learning_rate": 2.2471428571428575e-05, "loss": 1.2481, "step": 13499 }, { "epoch": 55.10204081632653, "grad_norm": 50.3065299987793, "learning_rate": 2.2469387755102042e-05, "loss": 1.7058, "step": 13500 }, { "epoch": 55.10612244897959, "grad_norm": 26.67194938659668, "learning_rate": 2.246734693877551e-05, "loss": 2.1984, "step": 13501 }, { "epoch": 55.11020408163265, "grad_norm": 30.144695281982422, "learning_rate": 2.246530612244898e-05, "loss": 1.4449, "step": 13502 }, { "epoch": 55.114285714285714, "grad_norm": 30.64687728881836, "learning_rate": 2.246326530612245e-05, "loss": 1.6454, "step": 13503 }, { "epoch": 55.118367346938776, "grad_norm": 21.357065200805664, "learning_rate": 2.246122448979592e-05, "loss": 2.1076, "step": 13504 }, { "epoch": 55.12244897959184, "grad_norm": 54.91592025756836, "learning_rate": 2.2459183673469388e-05, "loss": 0.7216, "step": 13505 }, { "epoch": 55.1265306122449, "grad_norm": 29.01153564453125, "learning_rate": 2.245714285714286e-05, "loss": 0.3304, "step": 13506 }, { "epoch": 55.13061224489796, "grad_norm": 37.11408996582031, "learning_rate": 2.245510204081633e-05, "loss": 0.6613, "step": 13507 }, { "epoch": 55.13469387755102, "grad_norm": 36.40271759033203, "learning_rate": 2.2453061224489796e-05, "loss": 0.4246, "step": 13508 }, { "epoch": 55.138775510204084, "grad_norm": 37.26994323730469, "learning_rate": 2.2451020408163267e-05, "loss": 0.8218, "step": 13509 }, { "epoch": 55.142857142857146, "grad_norm": 38.34728240966797, "learning_rate": 2.2448979591836737e-05, "loss": 0.8176, "step": 13510 }, { "epoch": 55.14693877551021, "grad_norm": 30.440977096557617, "learning_rate": 2.2446938775510204e-05, "loss": 1.9771, "step": 13511 }, { "epoch": 55.15102040816326, "grad_norm": 45.95676803588867, "learning_rate": 2.2444897959183675e-05, "loss": 0.7158, "step": 13512 }, { "epoch": 55.155102040816324, "grad_norm": 21.475473403930664, "learning_rate": 2.2442857142857142e-05, "loss": 1.7781, "step": 13513 }, { "epoch": 55.159183673469386, "grad_norm": 29.8353328704834, "learning_rate": 2.2440816326530616e-05, "loss": 1.4873, "step": 13514 }, { "epoch": 55.16326530612245, "grad_norm": 20.51982879638672, "learning_rate": 2.2438775510204083e-05, "loss": 2.1464, "step": 13515 }, { "epoch": 55.16734693877551, "grad_norm": 23.259033203125, "learning_rate": 2.243673469387755e-05, "loss": 1.9991, "step": 13516 }, { "epoch": 55.17142857142857, "grad_norm": 26.379854202270508, "learning_rate": 2.243469387755102e-05, "loss": 2.0458, "step": 13517 }, { "epoch": 55.17551020408163, "grad_norm": 32.144649505615234, "learning_rate": 2.243265306122449e-05, "loss": 1.1386, "step": 13518 }, { "epoch": 55.179591836734694, "grad_norm": 17.9051570892334, "learning_rate": 2.243061224489796e-05, "loss": 2.6046, "step": 13519 }, { "epoch": 55.183673469387756, "grad_norm": 23.583303451538086, "learning_rate": 2.242857142857143e-05, "loss": 2.1438, "step": 13520 }, { "epoch": 55.18775510204082, "grad_norm": 18.92987632751465, "learning_rate": 2.24265306122449e-05, "loss": 2.2254, "step": 13521 }, { "epoch": 55.19183673469388, "grad_norm": 47.697505950927734, "learning_rate": 2.242448979591837e-05, "loss": 1.1361, "step": 13522 }, { "epoch": 55.19591836734694, "grad_norm": 22.715076446533203, "learning_rate": 2.2422448979591837e-05, "loss": 0.3349, "step": 13523 }, { "epoch": 55.2, "grad_norm": 38.68123245239258, "learning_rate": 2.2420408163265307e-05, "loss": 1.5483, "step": 13524 }, { "epoch": 55.204081632653065, "grad_norm": 51.07947540283203, "learning_rate": 2.2418367346938775e-05, "loss": 0.2806, "step": 13525 }, { "epoch": 55.20816326530612, "grad_norm": 31.709131240844727, "learning_rate": 2.241632653061225e-05, "loss": 0.3002, "step": 13526 }, { "epoch": 55.21224489795918, "grad_norm": 51.546295166015625, "learning_rate": 2.2414285714285716e-05, "loss": 0.8306, "step": 13527 }, { "epoch": 55.21632653061224, "grad_norm": 21.061532974243164, "learning_rate": 2.2412244897959183e-05, "loss": 2.2033, "step": 13528 }, { "epoch": 55.220408163265304, "grad_norm": 25.13351058959961, "learning_rate": 2.2410204081632653e-05, "loss": 1.463, "step": 13529 }, { "epoch": 55.224489795918366, "grad_norm": 43.27712631225586, "learning_rate": 2.2408163265306124e-05, "loss": 0.9369, "step": 13530 }, { "epoch": 55.22857142857143, "grad_norm": 24.48769760131836, "learning_rate": 2.2406122448979594e-05, "loss": 2.1761, "step": 13531 }, { "epoch": 55.23265306122449, "grad_norm": 34.45308303833008, "learning_rate": 2.240408163265306e-05, "loss": 1.0741, "step": 13532 }, { "epoch": 55.23673469387755, "grad_norm": 24.314844131469727, "learning_rate": 2.2402040816326532e-05, "loss": 1.9746, "step": 13533 }, { "epoch": 55.24081632653061, "grad_norm": 43.21940231323242, "learning_rate": 2.2400000000000002e-05, "loss": 0.9845, "step": 13534 }, { "epoch": 55.244897959183675, "grad_norm": 24.102008819580078, "learning_rate": 2.239795918367347e-05, "loss": 2.0889, "step": 13535 }, { "epoch": 55.248979591836736, "grad_norm": 23.68805694580078, "learning_rate": 2.239591836734694e-05, "loss": 2.1038, "step": 13536 }, { "epoch": 55.2530612244898, "grad_norm": 40.97171401977539, "learning_rate": 2.239387755102041e-05, "loss": 0.9682, "step": 13537 }, { "epoch": 55.25714285714286, "grad_norm": 30.529659271240234, "learning_rate": 2.2391836734693878e-05, "loss": 1.1411, "step": 13538 }, { "epoch": 55.26122448979592, "grad_norm": 34.24930191040039, "learning_rate": 2.2389795918367348e-05, "loss": 0.3605, "step": 13539 }, { "epoch": 55.265306122448976, "grad_norm": 31.225318908691406, "learning_rate": 2.2387755102040815e-05, "loss": 1.2545, "step": 13540 }, { "epoch": 55.26938775510204, "grad_norm": 31.67044448852539, "learning_rate": 2.238571428571429e-05, "loss": 1.2842, "step": 13541 }, { "epoch": 55.2734693877551, "grad_norm": 34.298179626464844, "learning_rate": 2.2383673469387756e-05, "loss": 1.2116, "step": 13542 }, { "epoch": 55.27755102040816, "grad_norm": 28.688512802124023, "learning_rate": 2.2381632653061227e-05, "loss": 1.1653, "step": 13543 }, { "epoch": 55.28163265306122, "grad_norm": 40.5574951171875, "learning_rate": 2.2379591836734694e-05, "loss": 1.7601, "step": 13544 }, { "epoch": 55.285714285714285, "grad_norm": 32.21697998046875, "learning_rate": 2.2377551020408164e-05, "loss": 1.7738, "step": 13545 }, { "epoch": 55.289795918367346, "grad_norm": 39.831336975097656, "learning_rate": 2.2375510204081635e-05, "loss": 2.0025, "step": 13546 }, { "epoch": 55.29387755102041, "grad_norm": 22.704227447509766, "learning_rate": 2.2373469387755102e-05, "loss": 2.1636, "step": 13547 }, { "epoch": 55.29795918367347, "grad_norm": 26.046985626220703, "learning_rate": 2.2371428571428573e-05, "loss": 1.8657, "step": 13548 }, { "epoch": 55.30204081632653, "grad_norm": 28.981945037841797, "learning_rate": 2.2369387755102043e-05, "loss": 1.4035, "step": 13549 }, { "epoch": 55.30612244897959, "grad_norm": 18.581138610839844, "learning_rate": 2.236734693877551e-05, "loss": 2.5731, "step": 13550 }, { "epoch": 55.310204081632655, "grad_norm": 21.466388702392578, "learning_rate": 2.236530612244898e-05, "loss": 2.2381, "step": 13551 }, { "epoch": 55.31428571428572, "grad_norm": 38.019378662109375, "learning_rate": 2.2363265306122448e-05, "loss": 1.6828, "step": 13552 }, { "epoch": 55.31836734693878, "grad_norm": 29.076942443847656, "learning_rate": 2.2361224489795922e-05, "loss": 1.919, "step": 13553 }, { "epoch": 55.32244897959184, "grad_norm": 44.07747268676758, "learning_rate": 2.235918367346939e-05, "loss": 0.7964, "step": 13554 }, { "epoch": 55.326530612244895, "grad_norm": 41.28087615966797, "learning_rate": 2.2357142857142856e-05, "loss": 1.3652, "step": 13555 }, { "epoch": 55.33061224489796, "grad_norm": 34.90807342529297, "learning_rate": 2.2355102040816327e-05, "loss": 1.4719, "step": 13556 }, { "epoch": 55.33469387755102, "grad_norm": 29.976211547851562, "learning_rate": 2.2353061224489797e-05, "loss": 1.8971, "step": 13557 }, { "epoch": 55.33877551020408, "grad_norm": 23.561290740966797, "learning_rate": 2.2351020408163268e-05, "loss": 2.2975, "step": 13558 }, { "epoch": 55.34285714285714, "grad_norm": 49.25872039794922, "learning_rate": 2.2348979591836735e-05, "loss": 0.9231, "step": 13559 }, { "epoch": 55.3469387755102, "grad_norm": 17.018726348876953, "learning_rate": 2.2346938775510205e-05, "loss": 2.5342, "step": 13560 }, { "epoch": 55.351020408163265, "grad_norm": 27.45561408996582, "learning_rate": 2.2344897959183676e-05, "loss": 1.3628, "step": 13561 }, { "epoch": 55.35510204081633, "grad_norm": 24.329143524169922, "learning_rate": 2.2342857142857143e-05, "loss": 1.8341, "step": 13562 }, { "epoch": 55.35918367346939, "grad_norm": 22.02680778503418, "learning_rate": 2.2340816326530613e-05, "loss": 2.5545, "step": 13563 }, { "epoch": 55.36326530612245, "grad_norm": 32.50920486450195, "learning_rate": 2.2338775510204084e-05, "loss": 1.1361, "step": 13564 }, { "epoch": 55.36734693877551, "grad_norm": 28.28559684753418, "learning_rate": 2.2336734693877554e-05, "loss": 1.7632, "step": 13565 }, { "epoch": 55.371428571428574, "grad_norm": 28.000017166137695, "learning_rate": 2.233469387755102e-05, "loss": 1.7928, "step": 13566 }, { "epoch": 55.375510204081635, "grad_norm": 26.254226684570312, "learning_rate": 2.233265306122449e-05, "loss": 1.2181, "step": 13567 }, { "epoch": 55.3795918367347, "grad_norm": 20.16672134399414, "learning_rate": 2.2330612244897963e-05, "loss": 2.1035, "step": 13568 }, { "epoch": 55.38367346938775, "grad_norm": 30.622709274291992, "learning_rate": 2.232857142857143e-05, "loss": 1.1934, "step": 13569 }, { "epoch": 55.38775510204081, "grad_norm": 38.482913970947266, "learning_rate": 2.23265306122449e-05, "loss": 1.4174, "step": 13570 }, { "epoch": 55.391836734693875, "grad_norm": 33.04939651489258, "learning_rate": 2.2324489795918367e-05, "loss": 1.3419, "step": 13571 }, { "epoch": 55.39591836734694, "grad_norm": 21.50530433654785, "learning_rate": 2.2322448979591838e-05, "loss": 2.1174, "step": 13572 }, { "epoch": 55.4, "grad_norm": 41.397525787353516, "learning_rate": 2.232040816326531e-05, "loss": 0.8435, "step": 13573 }, { "epoch": 55.40408163265306, "grad_norm": 46.37124252319336, "learning_rate": 2.2318367346938775e-05, "loss": 1.0549, "step": 13574 }, { "epoch": 55.40816326530612, "grad_norm": 26.472654342651367, "learning_rate": 2.2316326530612246e-05, "loss": 1.742, "step": 13575 }, { "epoch": 55.412244897959184, "grad_norm": 32.79700469970703, "learning_rate": 2.2314285714285717e-05, "loss": 1.774, "step": 13576 }, { "epoch": 55.416326530612245, "grad_norm": 34.82058334350586, "learning_rate": 2.2312244897959184e-05, "loss": 1.701, "step": 13577 }, { "epoch": 55.42040816326531, "grad_norm": 39.404232025146484, "learning_rate": 2.2310204081632654e-05, "loss": 1.2866, "step": 13578 }, { "epoch": 55.42448979591837, "grad_norm": 32.04530715942383, "learning_rate": 2.230816326530612e-05, "loss": 1.4723, "step": 13579 }, { "epoch": 55.42857142857143, "grad_norm": 30.209548950195312, "learning_rate": 2.2306122448979595e-05, "loss": 0.3449, "step": 13580 }, { "epoch": 55.43265306122449, "grad_norm": 15.962193489074707, "learning_rate": 2.2304081632653062e-05, "loss": 2.7384, "step": 13581 }, { "epoch": 55.436734693877554, "grad_norm": 13.470762252807617, "learning_rate": 2.230204081632653e-05, "loss": 2.7484, "step": 13582 }, { "epoch": 55.440816326530616, "grad_norm": 24.610496520996094, "learning_rate": 2.23e-05, "loss": 2.2911, "step": 13583 }, { "epoch": 55.44489795918367, "grad_norm": 34.46613693237305, "learning_rate": 2.229795918367347e-05, "loss": 1.7708, "step": 13584 }, { "epoch": 55.44897959183673, "grad_norm": 30.99812126159668, "learning_rate": 2.229591836734694e-05, "loss": 1.4567, "step": 13585 }, { "epoch": 55.453061224489794, "grad_norm": 19.173812866210938, "learning_rate": 2.2293877551020408e-05, "loss": 2.1237, "step": 13586 }, { "epoch": 55.457142857142856, "grad_norm": 41.873870849609375, "learning_rate": 2.229183673469388e-05, "loss": 0.9169, "step": 13587 }, { "epoch": 55.46122448979592, "grad_norm": 30.64879608154297, "learning_rate": 2.228979591836735e-05, "loss": 1.0724, "step": 13588 }, { "epoch": 55.46530612244898, "grad_norm": 45.74679183959961, "learning_rate": 2.2287755102040816e-05, "loss": 0.9163, "step": 13589 }, { "epoch": 55.46938775510204, "grad_norm": 27.092435836791992, "learning_rate": 2.2285714285714287e-05, "loss": 2.1083, "step": 13590 }, { "epoch": 55.4734693877551, "grad_norm": 28.91520118713379, "learning_rate": 2.2283673469387757e-05, "loss": 0.2271, "step": 13591 }, { "epoch": 55.477551020408164, "grad_norm": 47.63884735107422, "learning_rate": 2.2281632653061228e-05, "loss": 0.4548, "step": 13592 }, { "epoch": 55.481632653061226, "grad_norm": 19.647676467895508, "learning_rate": 2.2279591836734695e-05, "loss": 2.1653, "step": 13593 }, { "epoch": 55.48571428571429, "grad_norm": 29.402814865112305, "learning_rate": 2.2277551020408162e-05, "loss": 2.0502, "step": 13594 }, { "epoch": 55.48979591836735, "grad_norm": 28.63002586364746, "learning_rate": 2.2275510204081636e-05, "loss": 2.0452, "step": 13595 }, { "epoch": 55.49387755102041, "grad_norm": 25.1059627532959, "learning_rate": 2.2273469387755103e-05, "loss": 1.9813, "step": 13596 }, { "epoch": 55.49795918367347, "grad_norm": 28.341398239135742, "learning_rate": 2.2271428571428574e-05, "loss": 1.4144, "step": 13597 }, { "epoch": 55.50204081632653, "grad_norm": 18.63382339477539, "learning_rate": 2.226938775510204e-05, "loss": 2.0656, "step": 13598 }, { "epoch": 55.50612244897959, "grad_norm": 43.49832534790039, "learning_rate": 2.226734693877551e-05, "loss": 1.3908, "step": 13599 }, { "epoch": 55.51020408163265, "grad_norm": 34.07382583618164, "learning_rate": 2.2265306122448982e-05, "loss": 0.7463, "step": 13600 }, { "epoch": 55.51428571428571, "grad_norm": 20.523616790771484, "learning_rate": 2.226326530612245e-05, "loss": 1.8187, "step": 13601 }, { "epoch": 55.518367346938774, "grad_norm": 25.440107345581055, "learning_rate": 2.226122448979592e-05, "loss": 1.6906, "step": 13602 }, { "epoch": 55.522448979591836, "grad_norm": 26.70699119567871, "learning_rate": 2.225918367346939e-05, "loss": 1.7878, "step": 13603 }, { "epoch": 55.5265306122449, "grad_norm": 32.72554016113281, "learning_rate": 2.2257142857142857e-05, "loss": 1.0026, "step": 13604 }, { "epoch": 55.53061224489796, "grad_norm": 29.24652671813965, "learning_rate": 2.2255102040816328e-05, "loss": 1.4251, "step": 13605 }, { "epoch": 55.53469387755102, "grad_norm": 37.165245056152344, "learning_rate": 2.2253061224489795e-05, "loss": 0.3295, "step": 13606 }, { "epoch": 55.53877551020408, "grad_norm": 30.688892364501953, "learning_rate": 2.225102040816327e-05, "loss": 1.1481, "step": 13607 }, { "epoch": 55.542857142857144, "grad_norm": 41.360721588134766, "learning_rate": 2.2248979591836736e-05, "loss": 0.7348, "step": 13608 }, { "epoch": 55.546938775510206, "grad_norm": 21.419952392578125, "learning_rate": 2.2246938775510203e-05, "loss": 2.8201, "step": 13609 }, { "epoch": 55.55102040816327, "grad_norm": 30.824462890625, "learning_rate": 2.2244897959183673e-05, "loss": 1.5763, "step": 13610 }, { "epoch": 55.55510204081633, "grad_norm": 32.542564392089844, "learning_rate": 2.2242857142857144e-05, "loss": 1.175, "step": 13611 }, { "epoch": 55.55918367346939, "grad_norm": 15.074952125549316, "learning_rate": 2.2240816326530614e-05, "loss": 2.6364, "step": 13612 }, { "epoch": 55.563265306122446, "grad_norm": 21.863418579101562, "learning_rate": 2.223877551020408e-05, "loss": 2.5094, "step": 13613 }, { "epoch": 55.56734693877551, "grad_norm": 12.534488677978516, "learning_rate": 2.2236734693877552e-05, "loss": 3.0503, "step": 13614 }, { "epoch": 55.57142857142857, "grad_norm": 34.61323928833008, "learning_rate": 2.2234693877551022e-05, "loss": 1.6601, "step": 13615 }, { "epoch": 55.57551020408163, "grad_norm": 25.997222900390625, "learning_rate": 2.223265306122449e-05, "loss": 1.9263, "step": 13616 }, { "epoch": 55.57959183673469, "grad_norm": 25.715167999267578, "learning_rate": 2.223061224489796e-05, "loss": 1.9284, "step": 13617 }, { "epoch": 55.583673469387755, "grad_norm": 34.49104690551758, "learning_rate": 2.222857142857143e-05, "loss": 1.4461, "step": 13618 }, { "epoch": 55.587755102040816, "grad_norm": 34.556461334228516, "learning_rate": 2.22265306122449e-05, "loss": 1.3938, "step": 13619 }, { "epoch": 55.59183673469388, "grad_norm": 59.967472076416016, "learning_rate": 2.2224489795918368e-05, "loss": 0.8224, "step": 13620 }, { "epoch": 55.59591836734694, "grad_norm": 34.86832046508789, "learning_rate": 2.2222448979591835e-05, "loss": 1.3701, "step": 13621 }, { "epoch": 55.6, "grad_norm": 45.62580108642578, "learning_rate": 2.222040816326531e-05, "loss": 1.4461, "step": 13622 }, { "epoch": 55.60408163265306, "grad_norm": 33.4414176940918, "learning_rate": 2.2218367346938776e-05, "loss": 0.2855, "step": 13623 }, { "epoch": 55.608163265306125, "grad_norm": 52.36882400512695, "learning_rate": 2.2216326530612247e-05, "loss": 0.9755, "step": 13624 }, { "epoch": 55.61224489795919, "grad_norm": 14.069771766662598, "learning_rate": 2.2214285714285714e-05, "loss": 2.9227, "step": 13625 }, { "epoch": 55.61632653061225, "grad_norm": 24.349872589111328, "learning_rate": 2.2212244897959185e-05, "loss": 2.2022, "step": 13626 }, { "epoch": 55.6204081632653, "grad_norm": 37.60521697998047, "learning_rate": 2.2210204081632655e-05, "loss": 1.8829, "step": 13627 }, { "epoch": 55.624489795918365, "grad_norm": 31.238739013671875, "learning_rate": 2.2208163265306122e-05, "loss": 1.7689, "step": 13628 }, { "epoch": 55.628571428571426, "grad_norm": 18.993425369262695, "learning_rate": 2.2206122448979593e-05, "loss": 1.9919, "step": 13629 }, { "epoch": 55.63265306122449, "grad_norm": 19.978185653686523, "learning_rate": 2.2204081632653063e-05, "loss": 2.5289, "step": 13630 }, { "epoch": 55.63673469387755, "grad_norm": 36.77748107910156, "learning_rate": 2.220204081632653e-05, "loss": 1.0499, "step": 13631 }, { "epoch": 55.64081632653061, "grad_norm": 42.50947189331055, "learning_rate": 2.22e-05, "loss": 0.928, "step": 13632 }, { "epoch": 55.64489795918367, "grad_norm": 36.076175689697266, "learning_rate": 2.2197959183673468e-05, "loss": 0.843, "step": 13633 }, { "epoch": 55.648979591836735, "grad_norm": 26.919559478759766, "learning_rate": 2.2195918367346942e-05, "loss": 2.0222, "step": 13634 }, { "epoch": 55.6530612244898, "grad_norm": 5.934274673461914, "learning_rate": 2.219387755102041e-05, "loss": 0.0719, "step": 13635 }, { "epoch": 55.65714285714286, "grad_norm": 41.57183074951172, "learning_rate": 2.219183673469388e-05, "loss": 1.2623, "step": 13636 }, { "epoch": 55.66122448979592, "grad_norm": 17.071273803710938, "learning_rate": 2.2189795918367347e-05, "loss": 2.5404, "step": 13637 }, { "epoch": 55.66530612244898, "grad_norm": 32.9473762512207, "learning_rate": 2.2187755102040817e-05, "loss": 1.3303, "step": 13638 }, { "epoch": 55.66938775510204, "grad_norm": 22.26316261291504, "learning_rate": 2.2185714285714288e-05, "loss": 1.9814, "step": 13639 }, { "epoch": 55.673469387755105, "grad_norm": 34.65692901611328, "learning_rate": 2.2183673469387755e-05, "loss": 0.5255, "step": 13640 }, { "epoch": 55.67755102040816, "grad_norm": 27.933635711669922, "learning_rate": 2.2181632653061225e-05, "loss": 1.5311, "step": 13641 }, { "epoch": 55.68163265306122, "grad_norm": 24.396108627319336, "learning_rate": 2.2179591836734696e-05, "loss": 1.9045, "step": 13642 }, { "epoch": 55.68571428571428, "grad_norm": 20.536991119384766, "learning_rate": 2.2177551020408163e-05, "loss": 2.1328, "step": 13643 }, { "epoch": 55.689795918367345, "grad_norm": 29.370866775512695, "learning_rate": 2.2175510204081634e-05, "loss": 0.9073, "step": 13644 }, { "epoch": 55.69387755102041, "grad_norm": 52.30563735961914, "learning_rate": 2.2173469387755104e-05, "loss": 1.0496, "step": 13645 }, { "epoch": 55.69795918367347, "grad_norm": 19.876901626586914, "learning_rate": 2.2171428571428575e-05, "loss": 2.3127, "step": 13646 }, { "epoch": 55.70204081632653, "grad_norm": 22.994094848632812, "learning_rate": 2.216938775510204e-05, "loss": 2.2298, "step": 13647 }, { "epoch": 55.70612244897959, "grad_norm": 28.22929573059082, "learning_rate": 2.216734693877551e-05, "loss": 1.6364, "step": 13648 }, { "epoch": 55.710204081632654, "grad_norm": 39.40762710571289, "learning_rate": 2.2165306122448983e-05, "loss": 0.724, "step": 13649 }, { "epoch": 55.714285714285715, "grad_norm": 24.287933349609375, "learning_rate": 2.216326530612245e-05, "loss": 2.0083, "step": 13650 }, { "epoch": 55.71836734693878, "grad_norm": 31.16489601135254, "learning_rate": 2.216122448979592e-05, "loss": 1.5541, "step": 13651 }, { "epoch": 55.72244897959184, "grad_norm": 35.14290237426758, "learning_rate": 2.2159183673469387e-05, "loss": 1.819, "step": 13652 }, { "epoch": 55.7265306122449, "grad_norm": 31.695049285888672, "learning_rate": 2.2157142857142858e-05, "loss": 1.1884, "step": 13653 }, { "epoch": 55.73061224489796, "grad_norm": 29.549571990966797, "learning_rate": 2.215510204081633e-05, "loss": 1.5354, "step": 13654 }, { "epoch": 55.734693877551024, "grad_norm": 22.785554885864258, "learning_rate": 2.2153061224489796e-05, "loss": 2.0057, "step": 13655 }, { "epoch": 55.73877551020408, "grad_norm": 27.347640991210938, "learning_rate": 2.2151020408163266e-05, "loss": 1.5503, "step": 13656 }, { "epoch": 55.74285714285714, "grad_norm": 33.89360427856445, "learning_rate": 2.2148979591836737e-05, "loss": 1.1953, "step": 13657 }, { "epoch": 55.7469387755102, "grad_norm": 31.9371395111084, "learning_rate": 2.2146938775510207e-05, "loss": 1.3018, "step": 13658 }, { "epoch": 55.751020408163264, "grad_norm": 36.650508880615234, "learning_rate": 2.2144897959183674e-05, "loss": 0.9148, "step": 13659 }, { "epoch": 55.755102040816325, "grad_norm": 32.88972473144531, "learning_rate": 2.214285714285714e-05, "loss": 0.6847, "step": 13660 }, { "epoch": 55.75918367346939, "grad_norm": 30.992767333984375, "learning_rate": 2.2140816326530615e-05, "loss": 0.7375, "step": 13661 }, { "epoch": 55.76326530612245, "grad_norm": 21.079492568969727, "learning_rate": 2.2138775510204082e-05, "loss": 2.0581, "step": 13662 }, { "epoch": 55.76734693877551, "grad_norm": 41.95075225830078, "learning_rate": 2.2136734693877553e-05, "loss": 1.0429, "step": 13663 }, { "epoch": 55.77142857142857, "grad_norm": 34.83749771118164, "learning_rate": 2.213469387755102e-05, "loss": 1.4745, "step": 13664 }, { "epoch": 55.775510204081634, "grad_norm": 33.90214157104492, "learning_rate": 2.213265306122449e-05, "loss": 1.7279, "step": 13665 }, { "epoch": 55.779591836734696, "grad_norm": 53.01645278930664, "learning_rate": 2.213061224489796e-05, "loss": 1.0262, "step": 13666 }, { "epoch": 55.78367346938776, "grad_norm": 31.36781883239746, "learning_rate": 2.2128571428571428e-05, "loss": 1.638, "step": 13667 }, { "epoch": 55.78775510204082, "grad_norm": 45.73792266845703, "learning_rate": 2.21265306122449e-05, "loss": 1.7683, "step": 13668 }, { "epoch": 55.79183673469388, "grad_norm": 9.009425163269043, "learning_rate": 2.212448979591837e-05, "loss": 3.2828, "step": 13669 }, { "epoch": 55.795918367346935, "grad_norm": 22.15412139892578, "learning_rate": 2.2122448979591836e-05, "loss": 2.3262, "step": 13670 }, { "epoch": 55.8, "grad_norm": 26.260231018066406, "learning_rate": 2.2120408163265307e-05, "loss": 1.9316, "step": 13671 }, { "epoch": 55.80408163265306, "grad_norm": 33.06019592285156, "learning_rate": 2.2118367346938777e-05, "loss": 1.5344, "step": 13672 }, { "epoch": 55.80816326530612, "grad_norm": 28.937393188476562, "learning_rate": 2.2116326530612248e-05, "loss": 1.8357, "step": 13673 }, { "epoch": 55.81224489795918, "grad_norm": 33.21651077270508, "learning_rate": 2.2114285714285715e-05, "loss": 0.7679, "step": 13674 }, { "epoch": 55.816326530612244, "grad_norm": 23.335559844970703, "learning_rate": 2.2112244897959182e-05, "loss": 0.1677, "step": 13675 }, { "epoch": 55.820408163265306, "grad_norm": 31.62356948852539, "learning_rate": 2.2110204081632656e-05, "loss": 1.4265, "step": 13676 }, { "epoch": 55.82448979591837, "grad_norm": 67.83121490478516, "learning_rate": 2.2108163265306123e-05, "loss": 0.3602, "step": 13677 }, { "epoch": 55.82857142857143, "grad_norm": 20.445568084716797, "learning_rate": 2.2106122448979594e-05, "loss": 2.2945, "step": 13678 }, { "epoch": 55.83265306122449, "grad_norm": 21.9834041595459, "learning_rate": 2.210408163265306e-05, "loss": 2.5499, "step": 13679 }, { "epoch": 55.83673469387755, "grad_norm": 36.327491760253906, "learning_rate": 2.210204081632653e-05, "loss": 0.9717, "step": 13680 }, { "epoch": 55.840816326530614, "grad_norm": 30.287395477294922, "learning_rate": 2.2100000000000002e-05, "loss": 2.0239, "step": 13681 }, { "epoch": 55.844897959183676, "grad_norm": 41.06196594238281, "learning_rate": 2.209795918367347e-05, "loss": 0.7406, "step": 13682 }, { "epoch": 55.84897959183674, "grad_norm": 18.363962173461914, "learning_rate": 2.209591836734694e-05, "loss": 2.6942, "step": 13683 }, { "epoch": 55.85306122448979, "grad_norm": 38.91974639892578, "learning_rate": 2.209387755102041e-05, "loss": 0.6617, "step": 13684 }, { "epoch": 55.857142857142854, "grad_norm": 20.10504722595215, "learning_rate": 2.209183673469388e-05, "loss": 2.1354, "step": 13685 }, { "epoch": 55.861224489795916, "grad_norm": 20.524991989135742, "learning_rate": 2.2089795918367348e-05, "loss": 2.6193, "step": 13686 }, { "epoch": 55.86530612244898, "grad_norm": 29.182279586791992, "learning_rate": 2.2087755102040815e-05, "loss": 1.8588, "step": 13687 }, { "epoch": 55.86938775510204, "grad_norm": 24.393972396850586, "learning_rate": 2.208571428571429e-05, "loss": 1.9414, "step": 13688 }, { "epoch": 55.8734693877551, "grad_norm": 42.45082473754883, "learning_rate": 2.2083673469387756e-05, "loss": 1.919, "step": 13689 }, { "epoch": 55.87755102040816, "grad_norm": 42.154808044433594, "learning_rate": 2.2081632653061226e-05, "loss": 1.9115, "step": 13690 }, { "epoch": 55.881632653061224, "grad_norm": 35.5080451965332, "learning_rate": 2.2079591836734693e-05, "loss": 0.8243, "step": 13691 }, { "epoch": 55.885714285714286, "grad_norm": 47.3405876159668, "learning_rate": 2.2077551020408164e-05, "loss": 1.1315, "step": 13692 }, { "epoch": 55.88979591836735, "grad_norm": 21.68158721923828, "learning_rate": 2.2075510204081634e-05, "loss": 2.2557, "step": 13693 }, { "epoch": 55.89387755102041, "grad_norm": 16.790903091430664, "learning_rate": 2.20734693877551e-05, "loss": 2.5532, "step": 13694 }, { "epoch": 55.89795918367347, "grad_norm": 15.942130088806152, "learning_rate": 2.2071428571428572e-05, "loss": 2.4876, "step": 13695 }, { "epoch": 55.90204081632653, "grad_norm": 21.594282150268555, "learning_rate": 2.2069387755102043e-05, "loss": 2.4868, "step": 13696 }, { "epoch": 55.906122448979595, "grad_norm": 16.289793014526367, "learning_rate": 2.206734693877551e-05, "loss": 2.6638, "step": 13697 }, { "epoch": 55.910204081632656, "grad_norm": 32.00336837768555, "learning_rate": 2.206530612244898e-05, "loss": 1.6029, "step": 13698 }, { "epoch": 55.91428571428571, "grad_norm": 33.19144058227539, "learning_rate": 2.206326530612245e-05, "loss": 0.9341, "step": 13699 }, { "epoch": 55.91836734693877, "grad_norm": 24.91140365600586, "learning_rate": 2.206122448979592e-05, "loss": 2.2103, "step": 13700 }, { "epoch": 55.922448979591834, "grad_norm": 23.13422966003418, "learning_rate": 2.205918367346939e-05, "loss": 2.4502, "step": 13701 }, { "epoch": 55.926530612244896, "grad_norm": 21.563932418823242, "learning_rate": 2.205714285714286e-05, "loss": 2.2586, "step": 13702 }, { "epoch": 55.93061224489796, "grad_norm": 35.62735366821289, "learning_rate": 2.205510204081633e-05, "loss": 0.9247, "step": 13703 }, { "epoch": 55.93469387755102, "grad_norm": 53.60158920288086, "learning_rate": 2.2053061224489797e-05, "loss": 0.9838, "step": 13704 }, { "epoch": 55.93877551020408, "grad_norm": 34.31617736816406, "learning_rate": 2.2051020408163267e-05, "loss": 1.4901, "step": 13705 }, { "epoch": 55.94285714285714, "grad_norm": 22.213144302368164, "learning_rate": 2.2048979591836734e-05, "loss": 2.1171, "step": 13706 }, { "epoch": 55.946938775510205, "grad_norm": 50.95606231689453, "learning_rate": 2.2046938775510205e-05, "loss": 1.9448, "step": 13707 }, { "epoch": 55.95102040816327, "grad_norm": 21.067846298217773, "learning_rate": 2.2044897959183675e-05, "loss": 2.2523, "step": 13708 }, { "epoch": 55.95510204081633, "grad_norm": 29.69450569152832, "learning_rate": 2.2042857142857142e-05, "loss": 1.8216, "step": 13709 }, { "epoch": 55.95918367346939, "grad_norm": 27.05759620666504, "learning_rate": 2.2040816326530613e-05, "loss": 2.1407, "step": 13710 }, { "epoch": 55.96326530612245, "grad_norm": 34.46790313720703, "learning_rate": 2.2038775510204083e-05, "loss": 1.2633, "step": 13711 }, { "epoch": 55.96734693877551, "grad_norm": 39.96002960205078, "learning_rate": 2.2036734693877554e-05, "loss": 0.9817, "step": 13712 }, { "epoch": 55.97142857142857, "grad_norm": 14.110036849975586, "learning_rate": 2.203469387755102e-05, "loss": 2.6916, "step": 13713 }, { "epoch": 55.97551020408163, "grad_norm": 27.172714233398438, "learning_rate": 2.2032653061224488e-05, "loss": 0.8687, "step": 13714 }, { "epoch": 55.97959183673469, "grad_norm": 34.30957794189453, "learning_rate": 2.2030612244897962e-05, "loss": 0.7757, "step": 13715 }, { "epoch": 55.98367346938775, "grad_norm": 26.629011154174805, "learning_rate": 2.202857142857143e-05, "loss": 1.7119, "step": 13716 }, { "epoch": 55.987755102040815, "grad_norm": 32.96318054199219, "learning_rate": 2.20265306122449e-05, "loss": 0.3438, "step": 13717 }, { "epoch": 55.99183673469388, "grad_norm": 30.582490921020508, "learning_rate": 2.2024489795918367e-05, "loss": 1.206, "step": 13718 }, { "epoch": 55.99591836734694, "grad_norm": 22.492372512817383, "learning_rate": 2.2022448979591837e-05, "loss": 2.1558, "step": 13719 }, { "epoch": 56.0, "grad_norm": 31.83599090576172, "learning_rate": 2.2020408163265308e-05, "loss": 2.1886, "step": 13720 }, { "epoch": 56.00408163265306, "grad_norm": 30.054676055908203, "learning_rate": 2.2018367346938775e-05, "loss": 1.0695, "step": 13721 }, { "epoch": 56.00816326530612, "grad_norm": 28.683923721313477, "learning_rate": 2.2016326530612245e-05, "loss": 1.5518, "step": 13722 }, { "epoch": 56.012244897959185, "grad_norm": 54.870635986328125, "learning_rate": 2.2014285714285716e-05, "loss": 0.7675, "step": 13723 }, { "epoch": 56.01632653061225, "grad_norm": 27.642602920532227, "learning_rate": 2.2012244897959187e-05, "loss": 1.3138, "step": 13724 }, { "epoch": 56.02040816326531, "grad_norm": 38.80282974243164, "learning_rate": 2.2010204081632654e-05, "loss": 1.2498, "step": 13725 }, { "epoch": 56.02448979591837, "grad_norm": 29.689754486083984, "learning_rate": 2.2008163265306124e-05, "loss": 0.3088, "step": 13726 }, { "epoch": 56.02857142857143, "grad_norm": 27.384536743164062, "learning_rate": 2.2006122448979595e-05, "loss": 1.4396, "step": 13727 }, { "epoch": 56.03265306122449, "grad_norm": 19.895057678222656, "learning_rate": 2.2004081632653062e-05, "loss": 2.2131, "step": 13728 }, { "epoch": 56.03673469387755, "grad_norm": 40.056434631347656, "learning_rate": 2.2002040816326532e-05, "loss": 0.2904, "step": 13729 }, { "epoch": 56.04081632653061, "grad_norm": 37.61022186279297, "learning_rate": 2.2000000000000003e-05, "loss": 0.6526, "step": 13730 }, { "epoch": 56.04489795918367, "grad_norm": 31.711177825927734, "learning_rate": 2.199795918367347e-05, "loss": 1.7091, "step": 13731 }, { "epoch": 56.04897959183673, "grad_norm": 35.004146575927734, "learning_rate": 2.199591836734694e-05, "loss": 1.0807, "step": 13732 }, { "epoch": 56.053061224489795, "grad_norm": 37.45641326904297, "learning_rate": 2.1993877551020408e-05, "loss": 1.2412, "step": 13733 }, { "epoch": 56.05714285714286, "grad_norm": 24.323768615722656, "learning_rate": 2.1991836734693878e-05, "loss": 1.8881, "step": 13734 }, { "epoch": 56.06122448979592, "grad_norm": 32.404300689697266, "learning_rate": 2.198979591836735e-05, "loss": 0.9548, "step": 13735 }, { "epoch": 56.06530612244898, "grad_norm": 37.52985763549805, "learning_rate": 2.1987755102040816e-05, "loss": 1.7343, "step": 13736 }, { "epoch": 56.06938775510204, "grad_norm": 13.705957412719727, "learning_rate": 2.1985714285714286e-05, "loss": 2.712, "step": 13737 }, { "epoch": 56.073469387755104, "grad_norm": 24.061256408691406, "learning_rate": 2.1983673469387757e-05, "loss": 2.0707, "step": 13738 }, { "epoch": 56.077551020408166, "grad_norm": 38.453060150146484, "learning_rate": 2.1981632653061227e-05, "loss": 0.7908, "step": 13739 }, { "epoch": 56.08163265306123, "grad_norm": 14.07752799987793, "learning_rate": 2.1979591836734694e-05, "loss": 2.9958, "step": 13740 }, { "epoch": 56.08571428571429, "grad_norm": 27.205158233642578, "learning_rate": 2.197755102040816e-05, "loss": 1.6114, "step": 13741 }, { "epoch": 56.089795918367344, "grad_norm": 31.916603088378906, "learning_rate": 2.1975510204081635e-05, "loss": 1.3755, "step": 13742 }, { "epoch": 56.093877551020405, "grad_norm": 45.389984130859375, "learning_rate": 2.1973469387755103e-05, "loss": 0.6489, "step": 13743 }, { "epoch": 56.09795918367347, "grad_norm": 28.56932830810547, "learning_rate": 2.1971428571428573e-05, "loss": 1.2138, "step": 13744 }, { "epoch": 56.10204081632653, "grad_norm": 20.478235244750977, "learning_rate": 2.196938775510204e-05, "loss": 2.069, "step": 13745 }, { "epoch": 56.10612244897959, "grad_norm": 37.35090637207031, "learning_rate": 2.1967346938775514e-05, "loss": 1.6274, "step": 13746 }, { "epoch": 56.11020408163265, "grad_norm": 9.214181900024414, "learning_rate": 2.196530612244898e-05, "loss": 3.2416, "step": 13747 }, { "epoch": 56.114285714285714, "grad_norm": 40.757110595703125, "learning_rate": 2.196326530612245e-05, "loss": 0.841, "step": 13748 }, { "epoch": 56.118367346938776, "grad_norm": 28.194717407226562, "learning_rate": 2.196122448979592e-05, "loss": 1.2244, "step": 13749 }, { "epoch": 56.12244897959184, "grad_norm": 17.235286712646484, "learning_rate": 2.195918367346939e-05, "loss": 2.5887, "step": 13750 }, { "epoch": 56.1265306122449, "grad_norm": 28.491304397583008, "learning_rate": 2.195714285714286e-05, "loss": 1.4925, "step": 13751 }, { "epoch": 56.13061224489796, "grad_norm": 21.173715591430664, "learning_rate": 2.1955102040816327e-05, "loss": 1.9542, "step": 13752 }, { "epoch": 56.13469387755102, "grad_norm": 27.389371871948242, "learning_rate": 2.1953061224489798e-05, "loss": 1.2661, "step": 13753 }, { "epoch": 56.138775510204084, "grad_norm": 20.284521102905273, "learning_rate": 2.1951020408163268e-05, "loss": 2.1449, "step": 13754 }, { "epoch": 56.142857142857146, "grad_norm": 42.02445602416992, "learning_rate": 2.1948979591836735e-05, "loss": 1.795, "step": 13755 }, { "epoch": 56.14693877551021, "grad_norm": 35.32441329956055, "learning_rate": 2.1946938775510206e-05, "loss": 0.2992, "step": 13756 }, { "epoch": 56.15102040816326, "grad_norm": 18.144079208374023, "learning_rate": 2.1944897959183676e-05, "loss": 1.9082, "step": 13757 }, { "epoch": 56.155102040816324, "grad_norm": 47.12718200683594, "learning_rate": 2.1942857142857143e-05, "loss": 0.9828, "step": 13758 }, { "epoch": 56.159183673469386, "grad_norm": 47.6768798828125, "learning_rate": 2.1940816326530614e-05, "loss": 0.8144, "step": 13759 }, { "epoch": 56.16326530612245, "grad_norm": 22.411985397338867, "learning_rate": 2.193877551020408e-05, "loss": 2.1381, "step": 13760 }, { "epoch": 56.16734693877551, "grad_norm": 45.6354866027832, "learning_rate": 2.193673469387755e-05, "loss": 0.9687, "step": 13761 }, { "epoch": 56.17142857142857, "grad_norm": 32.29465103149414, "learning_rate": 2.1934693877551022e-05, "loss": 1.9277, "step": 13762 }, { "epoch": 56.17551020408163, "grad_norm": 19.893125534057617, "learning_rate": 2.193265306122449e-05, "loss": 2.19, "step": 13763 }, { "epoch": 56.179591836734694, "grad_norm": 22.34231185913086, "learning_rate": 2.193061224489796e-05, "loss": 2.0329, "step": 13764 }, { "epoch": 56.183673469387756, "grad_norm": 41.75069808959961, "learning_rate": 2.192857142857143e-05, "loss": 1.0466, "step": 13765 }, { "epoch": 56.18775510204082, "grad_norm": 16.990703582763672, "learning_rate": 2.19265306122449e-05, "loss": 2.4768, "step": 13766 }, { "epoch": 56.19183673469388, "grad_norm": 14.481586456298828, "learning_rate": 2.1924489795918368e-05, "loss": 2.5742, "step": 13767 }, { "epoch": 56.19591836734694, "grad_norm": 24.649776458740234, "learning_rate": 2.1922448979591835e-05, "loss": 1.7858, "step": 13768 }, { "epoch": 56.2, "grad_norm": 22.030441284179688, "learning_rate": 2.192040816326531e-05, "loss": 2.0722, "step": 13769 }, { "epoch": 56.204081632653065, "grad_norm": 40.98118209838867, "learning_rate": 2.1918367346938776e-05, "loss": 0.9448, "step": 13770 }, { "epoch": 56.20816326530612, "grad_norm": 40.11695098876953, "learning_rate": 2.1916326530612246e-05, "loss": 0.7972, "step": 13771 }, { "epoch": 56.21224489795918, "grad_norm": 30.8267879486084, "learning_rate": 2.1914285714285714e-05, "loss": 0.3604, "step": 13772 }, { "epoch": 56.21632653061224, "grad_norm": 35.44868087768555, "learning_rate": 2.1912244897959187e-05, "loss": 0.4776, "step": 13773 }, { "epoch": 56.220408163265304, "grad_norm": 45.063743591308594, "learning_rate": 2.1910204081632655e-05, "loss": 1.3114, "step": 13774 }, { "epoch": 56.224489795918366, "grad_norm": 43.1828727722168, "learning_rate": 2.1908163265306122e-05, "loss": 1.7963, "step": 13775 }, { "epoch": 56.22857142857143, "grad_norm": 33.00761413574219, "learning_rate": 2.1906122448979592e-05, "loss": 1.7647, "step": 13776 }, { "epoch": 56.23265306122449, "grad_norm": 25.99562644958496, "learning_rate": 2.1904081632653063e-05, "loss": 1.8273, "step": 13777 }, { "epoch": 56.23673469387755, "grad_norm": 40.84093475341797, "learning_rate": 2.1902040816326533e-05, "loss": 1.2734, "step": 13778 }, { "epoch": 56.24081632653061, "grad_norm": 31.488306045532227, "learning_rate": 2.19e-05, "loss": 1.7959, "step": 13779 }, { "epoch": 56.244897959183675, "grad_norm": 51.520503997802734, "learning_rate": 2.189795918367347e-05, "loss": 0.9085, "step": 13780 }, { "epoch": 56.248979591836736, "grad_norm": 26.62300682067871, "learning_rate": 2.189591836734694e-05, "loss": 1.3898, "step": 13781 }, { "epoch": 56.2530612244898, "grad_norm": 18.801389694213867, "learning_rate": 2.189387755102041e-05, "loss": 2.0956, "step": 13782 }, { "epoch": 56.25714285714286, "grad_norm": 25.536090850830078, "learning_rate": 2.189183673469388e-05, "loss": 1.8247, "step": 13783 }, { "epoch": 56.26122448979592, "grad_norm": 23.866844177246094, "learning_rate": 2.188979591836735e-05, "loss": 2.106, "step": 13784 }, { "epoch": 56.265306122448976, "grad_norm": 36.05190658569336, "learning_rate": 2.1887755102040817e-05, "loss": 1.6636, "step": 13785 }, { "epoch": 56.26938775510204, "grad_norm": 58.237979888916016, "learning_rate": 2.1885714285714287e-05, "loss": 1.7058, "step": 13786 }, { "epoch": 56.2734693877551, "grad_norm": 29.12493324279785, "learning_rate": 2.1883673469387754e-05, "loss": 0.2877, "step": 13787 }, { "epoch": 56.27755102040816, "grad_norm": 45.032875061035156, "learning_rate": 2.1881632653061225e-05, "loss": 0.9098, "step": 13788 }, { "epoch": 56.28163265306122, "grad_norm": 63.06733703613281, "learning_rate": 2.1879591836734695e-05, "loss": 0.7491, "step": 13789 }, { "epoch": 56.285714285714285, "grad_norm": 14.910412788391113, "learning_rate": 2.1877551020408162e-05, "loss": 2.7074, "step": 13790 }, { "epoch": 56.289795918367346, "grad_norm": 20.135257720947266, "learning_rate": 2.1875510204081633e-05, "loss": 2.0976, "step": 13791 }, { "epoch": 56.29387755102041, "grad_norm": 21.645122528076172, "learning_rate": 2.1873469387755104e-05, "loss": 2.1932, "step": 13792 }, { "epoch": 56.29795918367347, "grad_norm": 32.61667251586914, "learning_rate": 2.1871428571428574e-05, "loss": 1.7141, "step": 13793 }, { "epoch": 56.30204081632653, "grad_norm": 35.01045608520508, "learning_rate": 2.186938775510204e-05, "loss": 1.0969, "step": 13794 }, { "epoch": 56.30612244897959, "grad_norm": 30.58026885986328, "learning_rate": 2.186734693877551e-05, "loss": 0.6479, "step": 13795 }, { "epoch": 56.310204081632655, "grad_norm": 26.87844467163086, "learning_rate": 2.1865306122448982e-05, "loss": 1.3826, "step": 13796 }, { "epoch": 56.31428571428572, "grad_norm": 22.985271453857422, "learning_rate": 2.186326530612245e-05, "loss": 2.1772, "step": 13797 }, { "epoch": 56.31836734693878, "grad_norm": 35.77719497680664, "learning_rate": 2.186122448979592e-05, "loss": 1.1768, "step": 13798 }, { "epoch": 56.32244897959184, "grad_norm": 30.5205135345459, "learning_rate": 2.1859183673469387e-05, "loss": 1.9089, "step": 13799 }, { "epoch": 56.326530612244895, "grad_norm": 34.7447395324707, "learning_rate": 2.185714285714286e-05, "loss": 1.3622, "step": 13800 }, { "epoch": 56.33061224489796, "grad_norm": 26.289424896240234, "learning_rate": 2.1855102040816328e-05, "loss": 1.8198, "step": 13801 }, { "epoch": 56.33469387755102, "grad_norm": 32.61309814453125, "learning_rate": 2.1853061224489795e-05, "loss": 1.0749, "step": 13802 }, { "epoch": 56.33877551020408, "grad_norm": 33.08140563964844, "learning_rate": 2.1851020408163266e-05, "loss": 1.7687, "step": 13803 }, { "epoch": 56.34285714285714, "grad_norm": 31.700029373168945, "learning_rate": 2.1848979591836736e-05, "loss": 1.8178, "step": 13804 }, { "epoch": 56.3469387755102, "grad_norm": 33.772762298583984, "learning_rate": 2.1846938775510207e-05, "loss": 1.3292, "step": 13805 }, { "epoch": 56.351020408163265, "grad_norm": 35.309486389160156, "learning_rate": 2.1844897959183674e-05, "loss": 1.3778, "step": 13806 }, { "epoch": 56.35510204081633, "grad_norm": 30.844213485717773, "learning_rate": 2.1842857142857144e-05, "loss": 1.871, "step": 13807 }, { "epoch": 56.35918367346939, "grad_norm": 22.83456039428711, "learning_rate": 2.1840816326530615e-05, "loss": 2.3733, "step": 13808 }, { "epoch": 56.36326530612245, "grad_norm": 21.377670288085938, "learning_rate": 2.1838775510204082e-05, "loss": 2.1171, "step": 13809 }, { "epoch": 56.36734693877551, "grad_norm": 36.92837142944336, "learning_rate": 2.1836734693877552e-05, "loss": 1.62, "step": 13810 }, { "epoch": 56.371428571428574, "grad_norm": 17.75177001953125, "learning_rate": 2.1834693877551023e-05, "loss": 2.5391, "step": 13811 }, { "epoch": 56.375510204081635, "grad_norm": 46.02458953857422, "learning_rate": 2.183265306122449e-05, "loss": 0.864, "step": 13812 }, { "epoch": 56.3795918367347, "grad_norm": 25.428497314453125, "learning_rate": 2.183061224489796e-05, "loss": 1.7456, "step": 13813 }, { "epoch": 56.38367346938775, "grad_norm": 31.808324813842773, "learning_rate": 2.1828571428571428e-05, "loss": 0.8405, "step": 13814 }, { "epoch": 56.38775510204081, "grad_norm": 24.03592300415039, "learning_rate": 2.1826530612244898e-05, "loss": 1.9596, "step": 13815 }, { "epoch": 56.391836734693875, "grad_norm": 35.901554107666016, "learning_rate": 2.182448979591837e-05, "loss": 2.1222, "step": 13816 }, { "epoch": 56.39591836734694, "grad_norm": 25.762630462646484, "learning_rate": 2.182244897959184e-05, "loss": 2.2762, "step": 13817 }, { "epoch": 56.4, "grad_norm": 36.67552185058594, "learning_rate": 2.1820408163265306e-05, "loss": 0.642, "step": 13818 }, { "epoch": 56.40408163265306, "grad_norm": 31.12050437927246, "learning_rate": 2.1818367346938777e-05, "loss": 1.5232, "step": 13819 }, { "epoch": 56.40816326530612, "grad_norm": 36.56972122192383, "learning_rate": 2.1816326530612247e-05, "loss": 0.7218, "step": 13820 }, { "epoch": 56.412244897959184, "grad_norm": 31.44158935546875, "learning_rate": 2.1814285714285715e-05, "loss": 0.6909, "step": 13821 }, { "epoch": 56.416326530612245, "grad_norm": 24.54730987548828, "learning_rate": 2.1812244897959185e-05, "loss": 2.024, "step": 13822 }, { "epoch": 56.42040816326531, "grad_norm": 29.377382278442383, "learning_rate": 2.1810204081632656e-05, "loss": 2.0036, "step": 13823 }, { "epoch": 56.42448979591837, "grad_norm": 17.264785766601562, "learning_rate": 2.1808163265306123e-05, "loss": 2.5808, "step": 13824 }, { "epoch": 56.42857142857143, "grad_norm": 35.27145004272461, "learning_rate": 2.1806122448979593e-05, "loss": 1.4504, "step": 13825 }, { "epoch": 56.43265306122449, "grad_norm": 44.56678009033203, "learning_rate": 2.180408163265306e-05, "loss": 1.0132, "step": 13826 }, { "epoch": 56.436734693877554, "grad_norm": 45.1174430847168, "learning_rate": 2.1802040816326534e-05, "loss": 0.8642, "step": 13827 }, { "epoch": 56.440816326530616, "grad_norm": 26.06285285949707, "learning_rate": 2.18e-05, "loss": 2.1789, "step": 13828 }, { "epoch": 56.44489795918367, "grad_norm": 19.730012893676758, "learning_rate": 2.179795918367347e-05, "loss": 2.2605, "step": 13829 }, { "epoch": 56.44897959183673, "grad_norm": 39.143524169921875, "learning_rate": 2.179591836734694e-05, "loss": 1.2179, "step": 13830 }, { "epoch": 56.453061224489794, "grad_norm": 27.319305419921875, "learning_rate": 2.179387755102041e-05, "loss": 1.3357, "step": 13831 }, { "epoch": 56.457142857142856, "grad_norm": 37.944210052490234, "learning_rate": 2.179183673469388e-05, "loss": 0.8966, "step": 13832 }, { "epoch": 56.46122448979592, "grad_norm": 32.376956939697266, "learning_rate": 2.1789795918367347e-05, "loss": 0.8394, "step": 13833 }, { "epoch": 56.46530612244898, "grad_norm": 28.09884262084961, "learning_rate": 2.1787755102040818e-05, "loss": 1.7195, "step": 13834 }, { "epoch": 56.46938775510204, "grad_norm": 35.02009201049805, "learning_rate": 2.1785714285714288e-05, "loss": 1.5081, "step": 13835 }, { "epoch": 56.4734693877551, "grad_norm": 25.85509490966797, "learning_rate": 2.1783673469387755e-05, "loss": 2.578, "step": 13836 }, { "epoch": 56.477551020408164, "grad_norm": 30.357715606689453, "learning_rate": 2.1781632653061226e-05, "loss": 1.5476, "step": 13837 }, { "epoch": 56.481632653061226, "grad_norm": 31.24637222290039, "learning_rate": 2.1779591836734693e-05, "loss": 1.4029, "step": 13838 }, { "epoch": 56.48571428571429, "grad_norm": 39.536865234375, "learning_rate": 2.1777551020408167e-05, "loss": 1.3036, "step": 13839 }, { "epoch": 56.48979591836735, "grad_norm": 25.6369571685791, "learning_rate": 2.1775510204081634e-05, "loss": 0.2782, "step": 13840 }, { "epoch": 56.49387755102041, "grad_norm": 19.68649673461914, "learning_rate": 2.17734693877551e-05, "loss": 2.5181, "step": 13841 }, { "epoch": 56.49795918367347, "grad_norm": 36.60153579711914, "learning_rate": 2.177142857142857e-05, "loss": 1.6588, "step": 13842 }, { "epoch": 56.50204081632653, "grad_norm": 36.27928161621094, "learning_rate": 2.1769387755102042e-05, "loss": 0.7129, "step": 13843 }, { "epoch": 56.50612244897959, "grad_norm": 30.73777198791504, "learning_rate": 2.1767346938775513e-05, "loss": 2.0296, "step": 13844 }, { "epoch": 56.51020408163265, "grad_norm": 38.80967712402344, "learning_rate": 2.176530612244898e-05, "loss": 0.8747, "step": 13845 }, { "epoch": 56.51428571428571, "grad_norm": 31.852930068969727, "learning_rate": 2.176326530612245e-05, "loss": 1.6092, "step": 13846 }, { "epoch": 56.518367346938774, "grad_norm": 26.523508071899414, "learning_rate": 2.176122448979592e-05, "loss": 0.2923, "step": 13847 }, { "epoch": 56.522448979591836, "grad_norm": 36.14175796508789, "learning_rate": 2.1759183673469388e-05, "loss": 1.5613, "step": 13848 }, { "epoch": 56.5265306122449, "grad_norm": 26.501773834228516, "learning_rate": 2.175714285714286e-05, "loss": 2.0162, "step": 13849 }, { "epoch": 56.53061224489796, "grad_norm": 35.44417190551758, "learning_rate": 2.175510204081633e-05, "loss": 0.7056, "step": 13850 }, { "epoch": 56.53469387755102, "grad_norm": 31.3056640625, "learning_rate": 2.1753061224489796e-05, "loss": 1.6686, "step": 13851 }, { "epoch": 56.53877551020408, "grad_norm": 46.23148727416992, "learning_rate": 2.1751020408163267e-05, "loss": 0.677, "step": 13852 }, { "epoch": 56.542857142857144, "grad_norm": 35.59496307373047, "learning_rate": 2.1748979591836734e-05, "loss": 1.1593, "step": 13853 }, { "epoch": 56.546938775510206, "grad_norm": 18.282142639160156, "learning_rate": 2.1746938775510208e-05, "loss": 2.5534, "step": 13854 }, { "epoch": 56.55102040816327, "grad_norm": 42.579341888427734, "learning_rate": 2.1744897959183675e-05, "loss": 1.0729, "step": 13855 }, { "epoch": 56.55510204081633, "grad_norm": 30.628475189208984, "learning_rate": 2.1742857142857142e-05, "loss": 1.9284, "step": 13856 }, { "epoch": 56.55918367346939, "grad_norm": 42.607730865478516, "learning_rate": 2.1740816326530612e-05, "loss": 0.9333, "step": 13857 }, { "epoch": 56.563265306122446, "grad_norm": 22.879329681396484, "learning_rate": 2.1738775510204083e-05, "loss": 1.8026, "step": 13858 }, { "epoch": 56.56734693877551, "grad_norm": 22.5564022064209, "learning_rate": 2.1736734693877553e-05, "loss": 2.5098, "step": 13859 }, { "epoch": 56.57142857142857, "grad_norm": 40.11919403076172, "learning_rate": 2.173469387755102e-05, "loss": 0.7383, "step": 13860 }, { "epoch": 56.57551020408163, "grad_norm": 31.49956703186035, "learning_rate": 2.173265306122449e-05, "loss": 1.0839, "step": 13861 }, { "epoch": 56.57959183673469, "grad_norm": 42.4164924621582, "learning_rate": 2.173061224489796e-05, "loss": 0.9216, "step": 13862 }, { "epoch": 56.583673469387755, "grad_norm": 26.05362892150879, "learning_rate": 2.172857142857143e-05, "loss": 1.9293, "step": 13863 }, { "epoch": 56.587755102040816, "grad_norm": 34.65319061279297, "learning_rate": 2.17265306122449e-05, "loss": 1.3748, "step": 13864 }, { "epoch": 56.59183673469388, "grad_norm": 42.86471176147461, "learning_rate": 2.1724489795918366e-05, "loss": 0.6775, "step": 13865 }, { "epoch": 56.59591836734694, "grad_norm": 25.453256607055664, "learning_rate": 2.172244897959184e-05, "loss": 1.3818, "step": 13866 }, { "epoch": 56.6, "grad_norm": 35.108177185058594, "learning_rate": 2.1720408163265307e-05, "loss": 1.5385, "step": 13867 }, { "epoch": 56.60408163265306, "grad_norm": 45.50444030761719, "learning_rate": 2.1718367346938774e-05, "loss": 1.2031, "step": 13868 }, { "epoch": 56.608163265306125, "grad_norm": 19.72184944152832, "learning_rate": 2.1716326530612245e-05, "loss": 2.264, "step": 13869 }, { "epoch": 56.61224489795919, "grad_norm": 28.007709503173828, "learning_rate": 2.1714285714285715e-05, "loss": 1.2786, "step": 13870 }, { "epoch": 56.61632653061225, "grad_norm": 34.96933364868164, "learning_rate": 2.1712244897959186e-05, "loss": 1.7658, "step": 13871 }, { "epoch": 56.6204081632653, "grad_norm": 17.161401748657227, "learning_rate": 2.1710204081632653e-05, "loss": 2.4193, "step": 13872 }, { "epoch": 56.624489795918365, "grad_norm": 30.962322235107422, "learning_rate": 2.1708163265306124e-05, "loss": 2.0046, "step": 13873 }, { "epoch": 56.628571428571426, "grad_norm": 23.984373092651367, "learning_rate": 2.1706122448979594e-05, "loss": 0.16, "step": 13874 }, { "epoch": 56.63265306122449, "grad_norm": 44.07102966308594, "learning_rate": 2.170408163265306e-05, "loss": 1.5644, "step": 13875 }, { "epoch": 56.63673469387755, "grad_norm": 23.1572208404541, "learning_rate": 2.1702040816326532e-05, "loss": 2.509, "step": 13876 }, { "epoch": 56.64081632653061, "grad_norm": 37.43503952026367, "learning_rate": 2.1700000000000002e-05, "loss": 0.4059, "step": 13877 }, { "epoch": 56.64489795918367, "grad_norm": 49.37178421020508, "learning_rate": 2.169795918367347e-05, "loss": 1.4406, "step": 13878 }, { "epoch": 56.648979591836735, "grad_norm": 38.31398391723633, "learning_rate": 2.169591836734694e-05, "loss": 1.274, "step": 13879 }, { "epoch": 56.6530612244898, "grad_norm": 29.924694061279297, "learning_rate": 2.1693877551020407e-05, "loss": 1.5085, "step": 13880 }, { "epoch": 56.65714285714286, "grad_norm": 25.68225860595703, "learning_rate": 2.169183673469388e-05, "loss": 0.4368, "step": 13881 }, { "epoch": 56.66122448979592, "grad_norm": 40.23659133911133, "learning_rate": 2.1689795918367348e-05, "loss": 1.1065, "step": 13882 }, { "epoch": 56.66530612244898, "grad_norm": 38.21619415283203, "learning_rate": 2.168775510204082e-05, "loss": 1.4512, "step": 13883 }, { "epoch": 56.66938775510204, "grad_norm": 55.24000930786133, "learning_rate": 2.1685714285714286e-05, "loss": 0.9422, "step": 13884 }, { "epoch": 56.673469387755105, "grad_norm": 31.448392868041992, "learning_rate": 2.1683673469387756e-05, "loss": 1.8862, "step": 13885 }, { "epoch": 56.67755102040816, "grad_norm": 40.87101364135742, "learning_rate": 2.1681632653061227e-05, "loss": 1.2265, "step": 13886 }, { "epoch": 56.68163265306122, "grad_norm": 34.050601959228516, "learning_rate": 2.1679591836734694e-05, "loss": 1.1883, "step": 13887 }, { "epoch": 56.68571428571428, "grad_norm": 35.59162521362305, "learning_rate": 2.1677551020408164e-05, "loss": 2.1092, "step": 13888 }, { "epoch": 56.689795918367345, "grad_norm": 20.69904899597168, "learning_rate": 2.1675510204081635e-05, "loss": 2.0709, "step": 13889 }, { "epoch": 56.69387755102041, "grad_norm": 21.1741886138916, "learning_rate": 2.1673469387755102e-05, "loss": 1.8714, "step": 13890 }, { "epoch": 56.69795918367347, "grad_norm": 30.793296813964844, "learning_rate": 2.1671428571428573e-05, "loss": 1.5135, "step": 13891 }, { "epoch": 56.70204081632653, "grad_norm": 22.79261589050293, "learning_rate": 2.166938775510204e-05, "loss": 2.1992, "step": 13892 }, { "epoch": 56.70612244897959, "grad_norm": 38.06837844848633, "learning_rate": 2.1667346938775514e-05, "loss": 0.9103, "step": 13893 }, { "epoch": 56.710204081632654, "grad_norm": 23.552433013916016, "learning_rate": 2.166530612244898e-05, "loss": 1.9275, "step": 13894 }, { "epoch": 56.714285714285715, "grad_norm": 28.978515625, "learning_rate": 2.1663265306122448e-05, "loss": 2.2604, "step": 13895 }, { "epoch": 56.71836734693878, "grad_norm": 24.484725952148438, "learning_rate": 2.166122448979592e-05, "loss": 1.8288, "step": 13896 }, { "epoch": 56.72244897959184, "grad_norm": 20.66058349609375, "learning_rate": 2.165918367346939e-05, "loss": 1.9697, "step": 13897 }, { "epoch": 56.7265306122449, "grad_norm": 25.880414962768555, "learning_rate": 2.165714285714286e-05, "loss": 2.0395, "step": 13898 }, { "epoch": 56.73061224489796, "grad_norm": 25.30929946899414, "learning_rate": 2.1655102040816326e-05, "loss": 2.3059, "step": 13899 }, { "epoch": 56.734693877551024, "grad_norm": 28.169727325439453, "learning_rate": 2.1653061224489797e-05, "loss": 1.9177, "step": 13900 }, { "epoch": 56.73877551020408, "grad_norm": 10.019929885864258, "learning_rate": 2.1651020408163268e-05, "loss": 0.0808, "step": 13901 }, { "epoch": 56.74285714285714, "grad_norm": 29.05846405029297, "learning_rate": 2.1648979591836735e-05, "loss": 1.6574, "step": 13902 }, { "epoch": 56.7469387755102, "grad_norm": 18.442094802856445, "learning_rate": 2.1646938775510205e-05, "loss": 2.6423, "step": 13903 }, { "epoch": 56.751020408163264, "grad_norm": 37.6002311706543, "learning_rate": 2.1644897959183676e-05, "loss": 0.7028, "step": 13904 }, { "epoch": 56.755102040816325, "grad_norm": 36.40834426879883, "learning_rate": 2.1642857142857146e-05, "loss": 1.4311, "step": 13905 }, { "epoch": 56.75918367346939, "grad_norm": 25.483417510986328, "learning_rate": 2.1640816326530613e-05, "loss": 2.0891, "step": 13906 }, { "epoch": 56.76326530612245, "grad_norm": 35.228546142578125, "learning_rate": 2.163877551020408e-05, "loss": 0.3768, "step": 13907 }, { "epoch": 56.76734693877551, "grad_norm": 32.29264450073242, "learning_rate": 2.1636734693877554e-05, "loss": 1.7983, "step": 13908 }, { "epoch": 56.77142857142857, "grad_norm": 24.11456871032715, "learning_rate": 2.163469387755102e-05, "loss": 2.4883, "step": 13909 }, { "epoch": 56.775510204081634, "grad_norm": 26.535564422607422, "learning_rate": 2.1632653061224492e-05, "loss": 0.3358, "step": 13910 }, { "epoch": 56.779591836734696, "grad_norm": 28.89615249633789, "learning_rate": 2.163061224489796e-05, "loss": 0.8231, "step": 13911 }, { "epoch": 56.78367346938776, "grad_norm": 34.48553466796875, "learning_rate": 2.162857142857143e-05, "loss": 1.8638, "step": 13912 }, { "epoch": 56.78775510204082, "grad_norm": 15.169008255004883, "learning_rate": 2.16265306122449e-05, "loss": 2.9213, "step": 13913 }, { "epoch": 56.79183673469388, "grad_norm": 34.439537048339844, "learning_rate": 2.1624489795918367e-05, "loss": 1.825, "step": 13914 }, { "epoch": 56.795918367346935, "grad_norm": 22.513696670532227, "learning_rate": 2.1622448979591838e-05, "loss": 2.1748, "step": 13915 }, { "epoch": 56.8, "grad_norm": 24.620769500732422, "learning_rate": 2.1620408163265308e-05, "loss": 2.3062, "step": 13916 }, { "epoch": 56.80408163265306, "grad_norm": 49.11113739013672, "learning_rate": 2.1618367346938775e-05, "loss": 0.9426, "step": 13917 }, { "epoch": 56.80816326530612, "grad_norm": 24.913482666015625, "learning_rate": 2.1616326530612246e-05, "loss": 1.9985, "step": 13918 }, { "epoch": 56.81224489795918, "grad_norm": 23.128414154052734, "learning_rate": 2.1614285714285713e-05, "loss": 1.9407, "step": 13919 }, { "epoch": 56.816326530612244, "grad_norm": 20.135244369506836, "learning_rate": 2.1612244897959187e-05, "loss": 2.8103, "step": 13920 }, { "epoch": 56.820408163265306, "grad_norm": 36.20223617553711, "learning_rate": 2.1610204081632654e-05, "loss": 0.8087, "step": 13921 }, { "epoch": 56.82448979591837, "grad_norm": 51.54083251953125, "learning_rate": 2.160816326530612e-05, "loss": 1.1578, "step": 13922 }, { "epoch": 56.82857142857143, "grad_norm": 21.05156135559082, "learning_rate": 2.1606122448979592e-05, "loss": 2.2785, "step": 13923 }, { "epoch": 56.83265306122449, "grad_norm": 34.195594787597656, "learning_rate": 2.1604081632653062e-05, "loss": 1.6714, "step": 13924 }, { "epoch": 56.83673469387755, "grad_norm": 27.0849609375, "learning_rate": 2.1602040816326533e-05, "loss": 0.8163, "step": 13925 }, { "epoch": 56.840816326530614, "grad_norm": 26.907840728759766, "learning_rate": 2.16e-05, "loss": 1.4639, "step": 13926 }, { "epoch": 56.844897959183676, "grad_norm": 16.91615104675293, "learning_rate": 2.159795918367347e-05, "loss": 2.6282, "step": 13927 }, { "epoch": 56.84897959183674, "grad_norm": 36.36181640625, "learning_rate": 2.159591836734694e-05, "loss": 1.1383, "step": 13928 }, { "epoch": 56.85306122448979, "grad_norm": 28.02385139465332, "learning_rate": 2.1593877551020408e-05, "loss": 2.017, "step": 13929 }, { "epoch": 56.857142857142854, "grad_norm": 47.19792556762695, "learning_rate": 2.159183673469388e-05, "loss": 0.7001, "step": 13930 }, { "epoch": 56.861224489795916, "grad_norm": 36.08311462402344, "learning_rate": 2.158979591836735e-05, "loss": 1.2362, "step": 13931 }, { "epoch": 56.86530612244898, "grad_norm": 39.729286193847656, "learning_rate": 2.158775510204082e-05, "loss": 0.3771, "step": 13932 }, { "epoch": 56.86938775510204, "grad_norm": 48.220245361328125, "learning_rate": 2.1585714285714287e-05, "loss": 1.1709, "step": 13933 }, { "epoch": 56.8734693877551, "grad_norm": 37.75824737548828, "learning_rate": 2.1583673469387754e-05, "loss": 1.2236, "step": 13934 }, { "epoch": 56.87755102040816, "grad_norm": 31.478910446166992, "learning_rate": 2.1581632653061228e-05, "loss": 1.649, "step": 13935 }, { "epoch": 56.881632653061224, "grad_norm": 40.784576416015625, "learning_rate": 2.1579591836734695e-05, "loss": 1.5246, "step": 13936 }, { "epoch": 56.885714285714286, "grad_norm": 50.55076599121094, "learning_rate": 2.1577551020408165e-05, "loss": 1.9485, "step": 13937 }, { "epoch": 56.88979591836735, "grad_norm": 41.48076248168945, "learning_rate": 2.1575510204081632e-05, "loss": 0.354, "step": 13938 }, { "epoch": 56.89387755102041, "grad_norm": 28.08345603942871, "learning_rate": 2.1573469387755103e-05, "loss": 1.9614, "step": 13939 }, { "epoch": 56.89795918367347, "grad_norm": 32.42351531982422, "learning_rate": 2.1571428571428574e-05, "loss": 1.8339, "step": 13940 }, { "epoch": 56.90204081632653, "grad_norm": 22.612524032592773, "learning_rate": 2.156938775510204e-05, "loss": 2.167, "step": 13941 }, { "epoch": 56.906122448979595, "grad_norm": 27.141695022583008, "learning_rate": 2.156734693877551e-05, "loss": 2.1787, "step": 13942 }, { "epoch": 56.910204081632656, "grad_norm": 27.508623123168945, "learning_rate": 2.156530612244898e-05, "loss": 1.6815, "step": 13943 }, { "epoch": 56.91428571428571, "grad_norm": 62.09965515136719, "learning_rate": 2.156326530612245e-05, "loss": 0.3754, "step": 13944 }, { "epoch": 56.91836734693877, "grad_norm": 30.21448516845703, "learning_rate": 2.156122448979592e-05, "loss": 1.5503, "step": 13945 }, { "epoch": 56.922448979591834, "grad_norm": 21.28028106689453, "learning_rate": 2.1559183673469386e-05, "loss": 2.1559, "step": 13946 }, { "epoch": 56.926530612244896, "grad_norm": 23.129201889038086, "learning_rate": 2.155714285714286e-05, "loss": 1.8105, "step": 13947 }, { "epoch": 56.93061224489796, "grad_norm": 45.73680114746094, "learning_rate": 2.1555102040816327e-05, "loss": 1.4864, "step": 13948 }, { "epoch": 56.93469387755102, "grad_norm": 20.321502685546875, "learning_rate": 2.1553061224489795e-05, "loss": 2.5892, "step": 13949 }, { "epoch": 56.93877551020408, "grad_norm": 22.28325843811035, "learning_rate": 2.1551020408163265e-05, "loss": 0.2304, "step": 13950 }, { "epoch": 56.94285714285714, "grad_norm": 28.913604736328125, "learning_rate": 2.1548979591836736e-05, "loss": 1.4908, "step": 13951 }, { "epoch": 56.946938775510205, "grad_norm": 27.891794204711914, "learning_rate": 2.1546938775510206e-05, "loss": 2.0929, "step": 13952 }, { "epoch": 56.95102040816327, "grad_norm": 22.23944091796875, "learning_rate": 2.1544897959183673e-05, "loss": 2.1032, "step": 13953 }, { "epoch": 56.95510204081633, "grad_norm": 36.421791076660156, "learning_rate": 2.1542857142857144e-05, "loss": 0.698, "step": 13954 }, { "epoch": 56.95918367346939, "grad_norm": 19.675989151000977, "learning_rate": 2.1540816326530614e-05, "loss": 2.4832, "step": 13955 }, { "epoch": 56.96326530612245, "grad_norm": 36.03703308105469, "learning_rate": 2.153877551020408e-05, "loss": 0.8343, "step": 13956 }, { "epoch": 56.96734693877551, "grad_norm": 21.248624801635742, "learning_rate": 2.1536734693877552e-05, "loss": 2.1641, "step": 13957 }, { "epoch": 56.97142857142857, "grad_norm": 21.495601654052734, "learning_rate": 2.1534693877551022e-05, "loss": 2.2188, "step": 13958 }, { "epoch": 56.97551020408163, "grad_norm": 29.21912956237793, "learning_rate": 2.1532653061224493e-05, "loss": 1.867, "step": 13959 }, { "epoch": 56.97959183673469, "grad_norm": 21.915260314941406, "learning_rate": 2.153061224489796e-05, "loss": 2.1985, "step": 13960 }, { "epoch": 56.98367346938775, "grad_norm": 28.768840789794922, "learning_rate": 2.1528571428571427e-05, "loss": 1.226, "step": 13961 }, { "epoch": 56.987755102040815, "grad_norm": 38.7043342590332, "learning_rate": 2.15265306122449e-05, "loss": 0.9575, "step": 13962 }, { "epoch": 56.99183673469388, "grad_norm": 20.891605377197266, "learning_rate": 2.1524489795918368e-05, "loss": 2.102, "step": 13963 }, { "epoch": 56.99591836734694, "grad_norm": 23.91046142578125, "learning_rate": 2.152244897959184e-05, "loss": 2.0117, "step": 13964 }, { "epoch": 57.0, "grad_norm": 31.44554328918457, "learning_rate": 2.1520408163265306e-05, "loss": 1.4123, "step": 13965 }, { "epoch": 57.00408163265306, "grad_norm": 38.434200286865234, "learning_rate": 2.1518367346938776e-05, "loss": 1.711, "step": 13966 }, { "epoch": 57.00816326530612, "grad_norm": 23.240781784057617, "learning_rate": 2.1516326530612247e-05, "loss": 2.0301, "step": 13967 }, { "epoch": 57.012244897959185, "grad_norm": 23.280860900878906, "learning_rate": 2.1514285714285714e-05, "loss": 1.3266, "step": 13968 }, { "epoch": 57.01632653061225, "grad_norm": 38.85354232788086, "learning_rate": 2.1512244897959185e-05, "loss": 1.5931, "step": 13969 }, { "epoch": 57.02040816326531, "grad_norm": 41.4456672668457, "learning_rate": 2.1510204081632655e-05, "loss": 0.3908, "step": 13970 }, { "epoch": 57.02448979591837, "grad_norm": 27.32046127319336, "learning_rate": 2.1508163265306122e-05, "loss": 1.8274, "step": 13971 }, { "epoch": 57.02857142857143, "grad_norm": 18.74811553955078, "learning_rate": 2.1506122448979593e-05, "loss": 2.5018, "step": 13972 }, { "epoch": 57.03265306122449, "grad_norm": 36.3180046081543, "learning_rate": 2.150408163265306e-05, "loss": 0.5808, "step": 13973 }, { "epoch": 57.03673469387755, "grad_norm": 53.814361572265625, "learning_rate": 2.1502040816326534e-05, "loss": 0.6584, "step": 13974 }, { "epoch": 57.04081632653061, "grad_norm": 33.60234451293945, "learning_rate": 2.15e-05, "loss": 1.569, "step": 13975 }, { "epoch": 57.04489795918367, "grad_norm": 30.722408294677734, "learning_rate": 2.149795918367347e-05, "loss": 1.4335, "step": 13976 }, { "epoch": 57.04897959183673, "grad_norm": 21.49254608154297, "learning_rate": 2.149591836734694e-05, "loss": 2.1882, "step": 13977 }, { "epoch": 57.053061224489795, "grad_norm": 32.28623580932617, "learning_rate": 2.149387755102041e-05, "loss": 0.4336, "step": 13978 }, { "epoch": 57.05714285714286, "grad_norm": 21.667327880859375, "learning_rate": 2.149183673469388e-05, "loss": 1.9995, "step": 13979 }, { "epoch": 57.06122448979592, "grad_norm": 34.07347869873047, "learning_rate": 2.1489795918367347e-05, "loss": 0.3525, "step": 13980 }, { "epoch": 57.06530612244898, "grad_norm": 22.90837860107422, "learning_rate": 2.1487755102040817e-05, "loss": 1.8217, "step": 13981 }, { "epoch": 57.06938775510204, "grad_norm": 38.01032257080078, "learning_rate": 2.1485714285714288e-05, "loss": 1.1506, "step": 13982 }, { "epoch": 57.073469387755104, "grad_norm": 54.19544982910156, "learning_rate": 2.1483673469387755e-05, "loss": 0.7459, "step": 13983 }, { "epoch": 57.077551020408166, "grad_norm": 36.80575942993164, "learning_rate": 2.1481632653061225e-05, "loss": 1.1576, "step": 13984 }, { "epoch": 57.08163265306123, "grad_norm": 30.95118522644043, "learning_rate": 2.1479591836734696e-05, "loss": 1.8325, "step": 13985 }, { "epoch": 57.08571428571429, "grad_norm": 24.35060691833496, "learning_rate": 2.1477551020408166e-05, "loss": 1.9938, "step": 13986 }, { "epoch": 57.089795918367344, "grad_norm": 15.038908958435059, "learning_rate": 2.1475510204081633e-05, "loss": 2.6514, "step": 13987 }, { "epoch": 57.093877551020405, "grad_norm": 19.911022186279297, "learning_rate": 2.14734693877551e-05, "loss": 2.069, "step": 13988 }, { "epoch": 57.09795918367347, "grad_norm": 27.178466796875, "learning_rate": 2.1471428571428574e-05, "loss": 1.7311, "step": 13989 }, { "epoch": 57.10204081632653, "grad_norm": 28.446426391601562, "learning_rate": 2.146938775510204e-05, "loss": 1.5568, "step": 13990 }, { "epoch": 57.10612244897959, "grad_norm": 36.35219192504883, "learning_rate": 2.1467346938775512e-05, "loss": 1.3132, "step": 13991 }, { "epoch": 57.11020408163265, "grad_norm": 56.66889572143555, "learning_rate": 2.146530612244898e-05, "loss": 1.633, "step": 13992 }, { "epoch": 57.114285714285714, "grad_norm": 30.37124252319336, "learning_rate": 2.146326530612245e-05, "loss": 0.7934, "step": 13993 }, { "epoch": 57.118367346938776, "grad_norm": 36.389495849609375, "learning_rate": 2.146122448979592e-05, "loss": 0.6011, "step": 13994 }, { "epoch": 57.12244897959184, "grad_norm": 40.37611770629883, "learning_rate": 2.1459183673469387e-05, "loss": 0.8142, "step": 13995 }, { "epoch": 57.1265306122449, "grad_norm": 24.82866096496582, "learning_rate": 2.1457142857142858e-05, "loss": 1.8956, "step": 13996 }, { "epoch": 57.13061224489796, "grad_norm": 29.577104568481445, "learning_rate": 2.145510204081633e-05, "loss": 1.1066, "step": 13997 }, { "epoch": 57.13469387755102, "grad_norm": 20.52748680114746, "learning_rate": 2.14530612244898e-05, "loss": 2.4194, "step": 13998 }, { "epoch": 57.138775510204084, "grad_norm": 25.686824798583984, "learning_rate": 2.1451020408163266e-05, "loss": 1.9415, "step": 13999 }, { "epoch": 57.142857142857146, "grad_norm": 26.030460357666016, "learning_rate": 2.1448979591836733e-05, "loss": 2.2152, "step": 14000 }, { "epoch": 57.14693877551021, "grad_norm": 17.177324295043945, "learning_rate": 2.1446938775510207e-05, "loss": 2.5773, "step": 14001 }, { "epoch": 57.15102040816326, "grad_norm": 29.247838973999023, "learning_rate": 2.1444897959183674e-05, "loss": 0.7832, "step": 14002 }, { "epoch": 57.155102040816324, "grad_norm": 23.78310203552246, "learning_rate": 2.1442857142857145e-05, "loss": 1.8303, "step": 14003 }, { "epoch": 57.159183673469386, "grad_norm": 27.39095687866211, "learning_rate": 2.1440816326530612e-05, "loss": 2.2142, "step": 14004 }, { "epoch": 57.16326530612245, "grad_norm": 42.79404067993164, "learning_rate": 2.1438775510204082e-05, "loss": 1.0054, "step": 14005 }, { "epoch": 57.16734693877551, "grad_norm": 43.60683822631836, "learning_rate": 2.1436734693877553e-05, "loss": 1.7507, "step": 14006 }, { "epoch": 57.17142857142857, "grad_norm": 30.660551071166992, "learning_rate": 2.143469387755102e-05, "loss": 0.264, "step": 14007 }, { "epoch": 57.17551020408163, "grad_norm": 35.98509979248047, "learning_rate": 2.143265306122449e-05, "loss": 1.5852, "step": 14008 }, { "epoch": 57.179591836734694, "grad_norm": 21.19936752319336, "learning_rate": 2.143061224489796e-05, "loss": 1.87, "step": 14009 }, { "epoch": 57.183673469387756, "grad_norm": 45.91212463378906, "learning_rate": 2.1428571428571428e-05, "loss": 1.5465, "step": 14010 }, { "epoch": 57.18775510204082, "grad_norm": 54.99390411376953, "learning_rate": 2.14265306122449e-05, "loss": 0.4671, "step": 14011 }, { "epoch": 57.19183673469388, "grad_norm": 38.26334762573242, "learning_rate": 2.142448979591837e-05, "loss": 0.3857, "step": 14012 }, { "epoch": 57.19591836734694, "grad_norm": 19.652589797973633, "learning_rate": 2.142244897959184e-05, "loss": 2.7239, "step": 14013 }, { "epoch": 57.2, "grad_norm": 37.048912048339844, "learning_rate": 2.1420408163265307e-05, "loss": 1.4856, "step": 14014 }, { "epoch": 57.204081632653065, "grad_norm": 28.54840850830078, "learning_rate": 2.1418367346938774e-05, "loss": 1.4309, "step": 14015 }, { "epoch": 57.20816326530612, "grad_norm": 32.86915969848633, "learning_rate": 2.1416326530612248e-05, "loss": 0.7587, "step": 14016 }, { "epoch": 57.21224489795918, "grad_norm": 38.40452575683594, "learning_rate": 2.1414285714285715e-05, "loss": 0.2507, "step": 14017 }, { "epoch": 57.21632653061224, "grad_norm": 30.23277473449707, "learning_rate": 2.1412244897959185e-05, "loss": 2.1004, "step": 14018 }, { "epoch": 57.220408163265304, "grad_norm": 29.577619552612305, "learning_rate": 2.1410204081632653e-05, "loss": 1.3981, "step": 14019 }, { "epoch": 57.224489795918366, "grad_norm": 31.051313400268555, "learning_rate": 2.1408163265306127e-05, "loss": 1.9607, "step": 14020 }, { "epoch": 57.22857142857143, "grad_norm": 21.096542358398438, "learning_rate": 2.1406122448979594e-05, "loss": 2.1703, "step": 14021 }, { "epoch": 57.23265306122449, "grad_norm": 26.862306594848633, "learning_rate": 2.140408163265306e-05, "loss": 2.163, "step": 14022 }, { "epoch": 57.23673469387755, "grad_norm": 28.218578338623047, "learning_rate": 2.140204081632653e-05, "loss": 1.88, "step": 14023 }, { "epoch": 57.24081632653061, "grad_norm": 29.714670181274414, "learning_rate": 2.1400000000000002e-05, "loss": 1.5559, "step": 14024 }, { "epoch": 57.244897959183675, "grad_norm": 27.342260360717773, "learning_rate": 2.1397959183673472e-05, "loss": 1.3438, "step": 14025 }, { "epoch": 57.248979591836736, "grad_norm": 32.65438461303711, "learning_rate": 2.139591836734694e-05, "loss": 1.0892, "step": 14026 }, { "epoch": 57.2530612244898, "grad_norm": 26.375307083129883, "learning_rate": 2.1393877551020407e-05, "loss": 1.6729, "step": 14027 }, { "epoch": 57.25714285714286, "grad_norm": 23.156414031982422, "learning_rate": 2.139183673469388e-05, "loss": 1.9856, "step": 14028 }, { "epoch": 57.26122448979592, "grad_norm": 34.020713806152344, "learning_rate": 2.1389795918367348e-05, "loss": 1.034, "step": 14029 }, { "epoch": 57.265306122448976, "grad_norm": 31.376686096191406, "learning_rate": 2.1387755102040818e-05, "loss": 1.5734, "step": 14030 }, { "epoch": 57.26938775510204, "grad_norm": 29.038816452026367, "learning_rate": 2.1385714285714285e-05, "loss": 1.7481, "step": 14031 }, { "epoch": 57.2734693877551, "grad_norm": 31.701894760131836, "learning_rate": 2.1383673469387756e-05, "loss": 1.4262, "step": 14032 }, { "epoch": 57.27755102040816, "grad_norm": 34.268043518066406, "learning_rate": 2.1381632653061226e-05, "loss": 1.7075, "step": 14033 }, { "epoch": 57.28163265306122, "grad_norm": 41.79128646850586, "learning_rate": 2.1379591836734693e-05, "loss": 0.8601, "step": 14034 }, { "epoch": 57.285714285714285, "grad_norm": 34.260902404785156, "learning_rate": 2.1377551020408164e-05, "loss": 0.1905, "step": 14035 }, { "epoch": 57.289795918367346, "grad_norm": 23.95562171936035, "learning_rate": 2.1375510204081634e-05, "loss": 0.2359, "step": 14036 }, { "epoch": 57.29387755102041, "grad_norm": 40.78437423706055, "learning_rate": 2.13734693877551e-05, "loss": 1.3998, "step": 14037 }, { "epoch": 57.29795918367347, "grad_norm": 25.781679153442383, "learning_rate": 2.1371428571428572e-05, "loss": 1.6755, "step": 14038 }, { "epoch": 57.30204081632653, "grad_norm": 43.3612174987793, "learning_rate": 2.1369387755102043e-05, "loss": 0.8227, "step": 14039 }, { "epoch": 57.30612244897959, "grad_norm": 37.01230239868164, "learning_rate": 2.1367346938775513e-05, "loss": 1.7695, "step": 14040 }, { "epoch": 57.310204081632655, "grad_norm": 54.451820373535156, "learning_rate": 2.136530612244898e-05, "loss": 1.7207, "step": 14041 }, { "epoch": 57.31428571428572, "grad_norm": 45.40970230102539, "learning_rate": 2.136326530612245e-05, "loss": 0.8266, "step": 14042 }, { "epoch": 57.31836734693878, "grad_norm": 29.66217613220215, "learning_rate": 2.136122448979592e-05, "loss": 1.4646, "step": 14043 }, { "epoch": 57.32244897959184, "grad_norm": 22.601470947265625, "learning_rate": 2.135918367346939e-05, "loss": 2.1957, "step": 14044 }, { "epoch": 57.326530612244895, "grad_norm": 25.093698501586914, "learning_rate": 2.135714285714286e-05, "loss": 1.9055, "step": 14045 }, { "epoch": 57.33061224489796, "grad_norm": 42.0638427734375, "learning_rate": 2.1355102040816326e-05, "loss": 0.7692, "step": 14046 }, { "epoch": 57.33469387755102, "grad_norm": 44.81664276123047, "learning_rate": 2.1353061224489796e-05, "loss": 1.011, "step": 14047 }, { "epoch": 57.33877551020408, "grad_norm": 31.601335525512695, "learning_rate": 2.1351020408163267e-05, "loss": 1.4113, "step": 14048 }, { "epoch": 57.34285714285714, "grad_norm": 31.779569625854492, "learning_rate": 2.1348979591836734e-05, "loss": 0.4507, "step": 14049 }, { "epoch": 57.3469387755102, "grad_norm": 53.519744873046875, "learning_rate": 2.1346938775510205e-05, "loss": 0.959, "step": 14050 }, { "epoch": 57.351020408163265, "grad_norm": 39.85444259643555, "learning_rate": 2.1344897959183675e-05, "loss": 0.8041, "step": 14051 }, { "epoch": 57.35510204081633, "grad_norm": 44.22428894042969, "learning_rate": 2.1342857142857146e-05, "loss": 1.2568, "step": 14052 }, { "epoch": 57.35918367346939, "grad_norm": 19.04493522644043, "learning_rate": 2.1340816326530613e-05, "loss": 2.3849, "step": 14053 }, { "epoch": 57.36326530612245, "grad_norm": 33.6437873840332, "learning_rate": 2.133877551020408e-05, "loss": 0.6822, "step": 14054 }, { "epoch": 57.36734693877551, "grad_norm": 22.554895401000977, "learning_rate": 2.1336734693877554e-05, "loss": 2.4779, "step": 14055 }, { "epoch": 57.371428571428574, "grad_norm": 43.67931365966797, "learning_rate": 2.133469387755102e-05, "loss": 0.7547, "step": 14056 }, { "epoch": 57.375510204081635, "grad_norm": 45.61494064331055, "learning_rate": 2.133265306122449e-05, "loss": 1.3891, "step": 14057 }, { "epoch": 57.3795918367347, "grad_norm": 37.65639114379883, "learning_rate": 2.133061224489796e-05, "loss": 0.9405, "step": 14058 }, { "epoch": 57.38367346938775, "grad_norm": 32.184959411621094, "learning_rate": 2.132857142857143e-05, "loss": 1.6541, "step": 14059 }, { "epoch": 57.38775510204081, "grad_norm": 30.303396224975586, "learning_rate": 2.13265306122449e-05, "loss": 1.592, "step": 14060 }, { "epoch": 57.391836734693875, "grad_norm": 15.803215980529785, "learning_rate": 2.1324489795918367e-05, "loss": 2.6233, "step": 14061 }, { "epoch": 57.39591836734694, "grad_norm": 39.959922790527344, "learning_rate": 2.1322448979591837e-05, "loss": 0.8135, "step": 14062 }, { "epoch": 57.4, "grad_norm": 23.607135772705078, "learning_rate": 2.1320408163265308e-05, "loss": 1.9106, "step": 14063 }, { "epoch": 57.40408163265306, "grad_norm": 42.90335464477539, "learning_rate": 2.1318367346938778e-05, "loss": 0.8908, "step": 14064 }, { "epoch": 57.40816326530612, "grad_norm": 41.55110168457031, "learning_rate": 2.1316326530612245e-05, "loss": 1.1395, "step": 14065 }, { "epoch": 57.412244897959184, "grad_norm": 33.4460563659668, "learning_rate": 2.1314285714285716e-05, "loss": 1.2775, "step": 14066 }, { "epoch": 57.416326530612245, "grad_norm": 35.0050163269043, "learning_rate": 2.1312244897959186e-05, "loss": 1.4119, "step": 14067 }, { "epoch": 57.42040816326531, "grad_norm": 19.738405227661133, "learning_rate": 2.1310204081632654e-05, "loss": 2.1849, "step": 14068 }, { "epoch": 57.42448979591837, "grad_norm": 33.44235610961914, "learning_rate": 2.1308163265306124e-05, "loss": 1.7897, "step": 14069 }, { "epoch": 57.42857142857143, "grad_norm": 23.260257720947266, "learning_rate": 2.1306122448979595e-05, "loss": 2.0368, "step": 14070 }, { "epoch": 57.43265306122449, "grad_norm": 22.836095809936523, "learning_rate": 2.1304081632653062e-05, "loss": 2.4275, "step": 14071 }, { "epoch": 57.436734693877554, "grad_norm": 38.28921890258789, "learning_rate": 2.1302040816326532e-05, "loss": 0.6271, "step": 14072 }, { "epoch": 57.440816326530616, "grad_norm": 22.34581184387207, "learning_rate": 2.13e-05, "loss": 1.7285, "step": 14073 }, { "epoch": 57.44489795918367, "grad_norm": 39.50838088989258, "learning_rate": 2.129795918367347e-05, "loss": 0.9612, "step": 14074 }, { "epoch": 57.44897959183673, "grad_norm": 47.61606216430664, "learning_rate": 2.129591836734694e-05, "loss": 0.8605, "step": 14075 }, { "epoch": 57.453061224489794, "grad_norm": 19.149259567260742, "learning_rate": 2.1293877551020408e-05, "loss": 2.0802, "step": 14076 }, { "epoch": 57.457142857142856, "grad_norm": 22.36106300354004, "learning_rate": 2.1291836734693878e-05, "loss": 2.0366, "step": 14077 }, { "epoch": 57.46122448979592, "grad_norm": 15.166709899902344, "learning_rate": 2.128979591836735e-05, "loss": 3.0362, "step": 14078 }, { "epoch": 57.46530612244898, "grad_norm": 33.318634033203125, "learning_rate": 2.128775510204082e-05, "loss": 1.0699, "step": 14079 }, { "epoch": 57.46938775510204, "grad_norm": 40.18574905395508, "learning_rate": 2.1285714285714286e-05, "loss": 0.8593, "step": 14080 }, { "epoch": 57.4734693877551, "grad_norm": 20.94812774658203, "learning_rate": 2.1283673469387753e-05, "loss": 2.1019, "step": 14081 }, { "epoch": 57.477551020408164, "grad_norm": 34.95286178588867, "learning_rate": 2.1281632653061227e-05, "loss": 1.3186, "step": 14082 }, { "epoch": 57.481632653061226, "grad_norm": 16.84560203552246, "learning_rate": 2.1279591836734694e-05, "loss": 2.4011, "step": 14083 }, { "epoch": 57.48571428571429, "grad_norm": 44.24164962768555, "learning_rate": 2.1277551020408165e-05, "loss": 0.7449, "step": 14084 }, { "epoch": 57.48979591836735, "grad_norm": 37.44910430908203, "learning_rate": 2.1275510204081632e-05, "loss": 0.8342, "step": 14085 }, { "epoch": 57.49387755102041, "grad_norm": 32.47626495361328, "learning_rate": 2.1273469387755102e-05, "loss": 1.5029, "step": 14086 }, { "epoch": 57.49795918367347, "grad_norm": 48.79420852661133, "learning_rate": 2.1271428571428573e-05, "loss": 0.359, "step": 14087 }, { "epoch": 57.50204081632653, "grad_norm": 16.490339279174805, "learning_rate": 2.126938775510204e-05, "loss": 2.6136, "step": 14088 }, { "epoch": 57.50612244897959, "grad_norm": 33.389244079589844, "learning_rate": 2.126734693877551e-05, "loss": 1.8154, "step": 14089 }, { "epoch": 57.51020408163265, "grad_norm": 34.23705291748047, "learning_rate": 2.126530612244898e-05, "loss": 1.0608, "step": 14090 }, { "epoch": 57.51428571428571, "grad_norm": 19.688228607177734, "learning_rate": 2.126326530612245e-05, "loss": 2.2356, "step": 14091 }, { "epoch": 57.518367346938774, "grad_norm": 21.166488647460938, "learning_rate": 2.126122448979592e-05, "loss": 2.1725, "step": 14092 }, { "epoch": 57.522448979591836, "grad_norm": 48.878700256347656, "learning_rate": 2.125918367346939e-05, "loss": 0.9587, "step": 14093 }, { "epoch": 57.5265306122449, "grad_norm": 27.273983001708984, "learning_rate": 2.125714285714286e-05, "loss": 1.6977, "step": 14094 }, { "epoch": 57.53061224489796, "grad_norm": 35.706111907958984, "learning_rate": 2.1255102040816327e-05, "loss": 1.7162, "step": 14095 }, { "epoch": 57.53469387755102, "grad_norm": 19.042043685913086, "learning_rate": 2.1253061224489797e-05, "loss": 2.0109, "step": 14096 }, { "epoch": 57.53877551020408, "grad_norm": 32.221435546875, "learning_rate": 2.1251020408163268e-05, "loss": 1.454, "step": 14097 }, { "epoch": 57.542857142857144, "grad_norm": 21.272342681884766, "learning_rate": 2.1248979591836735e-05, "loss": 2.1714, "step": 14098 }, { "epoch": 57.546938775510206, "grad_norm": 50.19400405883789, "learning_rate": 2.1246938775510206e-05, "loss": 1.1532, "step": 14099 }, { "epoch": 57.55102040816327, "grad_norm": 22.250988006591797, "learning_rate": 2.1244897959183673e-05, "loss": 1.7514, "step": 14100 }, { "epoch": 57.55510204081633, "grad_norm": 10.659478187561035, "learning_rate": 2.1242857142857143e-05, "loss": 3.2514, "step": 14101 }, { "epoch": 57.55918367346939, "grad_norm": 28.90178871154785, "learning_rate": 2.1240816326530614e-05, "loss": 1.9679, "step": 14102 }, { "epoch": 57.563265306122446, "grad_norm": 23.95720672607422, "learning_rate": 2.123877551020408e-05, "loss": 1.2838, "step": 14103 }, { "epoch": 57.56734693877551, "grad_norm": 28.612878799438477, "learning_rate": 2.123673469387755e-05, "loss": 2.0614, "step": 14104 }, { "epoch": 57.57142857142857, "grad_norm": 30.050731658935547, "learning_rate": 2.1234693877551022e-05, "loss": 1.4041, "step": 14105 }, { "epoch": 57.57551020408163, "grad_norm": 18.339420318603516, "learning_rate": 2.1232653061224492e-05, "loss": 2.111, "step": 14106 }, { "epoch": 57.57959183673469, "grad_norm": 35.101341247558594, "learning_rate": 2.123061224489796e-05, "loss": 0.3493, "step": 14107 }, { "epoch": 57.583673469387755, "grad_norm": 39.43903732299805, "learning_rate": 2.1228571428571427e-05, "loss": 1.2603, "step": 14108 }, { "epoch": 57.587755102040816, "grad_norm": 45.89805221557617, "learning_rate": 2.12265306122449e-05, "loss": 0.7995, "step": 14109 }, { "epoch": 57.59183673469388, "grad_norm": 14.802783966064453, "learning_rate": 2.1224489795918368e-05, "loss": 2.859, "step": 14110 }, { "epoch": 57.59591836734694, "grad_norm": 23.58647346496582, "learning_rate": 2.1222448979591838e-05, "loss": 2.0821, "step": 14111 }, { "epoch": 57.6, "grad_norm": 22.42970085144043, "learning_rate": 2.1220408163265305e-05, "loss": 2.2131, "step": 14112 }, { "epoch": 57.60408163265306, "grad_norm": 24.451679229736328, "learning_rate": 2.121836734693878e-05, "loss": 2.1839, "step": 14113 }, { "epoch": 57.608163265306125, "grad_norm": 36.46600341796875, "learning_rate": 2.1216326530612246e-05, "loss": 0.7166, "step": 14114 }, { "epoch": 57.61224489795919, "grad_norm": 35.028621673583984, "learning_rate": 2.1214285714285713e-05, "loss": 1.2797, "step": 14115 }, { "epoch": 57.61632653061225, "grad_norm": 17.528568267822266, "learning_rate": 2.1212244897959184e-05, "loss": 2.495, "step": 14116 }, { "epoch": 57.6204081632653, "grad_norm": 17.77323341369629, "learning_rate": 2.1210204081632655e-05, "loss": 1.9226, "step": 14117 }, { "epoch": 57.624489795918365, "grad_norm": 37.807029724121094, "learning_rate": 2.1208163265306125e-05, "loss": 1.152, "step": 14118 }, { "epoch": 57.628571428571426, "grad_norm": 38.03892517089844, "learning_rate": 2.1206122448979592e-05, "loss": 1.2126, "step": 14119 }, { "epoch": 57.63265306122449, "grad_norm": 31.274330139160156, "learning_rate": 2.1204081632653063e-05, "loss": 1.9966, "step": 14120 }, { "epoch": 57.63673469387755, "grad_norm": 30.643400192260742, "learning_rate": 2.1202040816326533e-05, "loss": 1.7625, "step": 14121 }, { "epoch": 57.64081632653061, "grad_norm": 18.717451095581055, "learning_rate": 2.12e-05, "loss": 2.6149, "step": 14122 }, { "epoch": 57.64489795918367, "grad_norm": 37.26710891723633, "learning_rate": 2.119795918367347e-05, "loss": 1.5215, "step": 14123 }, { "epoch": 57.648979591836735, "grad_norm": 46.34246063232422, "learning_rate": 2.119591836734694e-05, "loss": 0.7171, "step": 14124 }, { "epoch": 57.6530612244898, "grad_norm": 30.691726684570312, "learning_rate": 2.119387755102041e-05, "loss": 1.492, "step": 14125 }, { "epoch": 57.65714285714286, "grad_norm": 55.139793395996094, "learning_rate": 2.119183673469388e-05, "loss": 0.9555, "step": 14126 }, { "epoch": 57.66122448979592, "grad_norm": 38.40305709838867, "learning_rate": 2.1189795918367346e-05, "loss": 0.9456, "step": 14127 }, { "epoch": 57.66530612244898, "grad_norm": 36.94872283935547, "learning_rate": 2.1187755102040817e-05, "loss": 1.3208, "step": 14128 }, { "epoch": 57.66938775510204, "grad_norm": 31.876571655273438, "learning_rate": 2.1185714285714287e-05, "loss": 1.9093, "step": 14129 }, { "epoch": 57.673469387755105, "grad_norm": 35.061187744140625, "learning_rate": 2.1183673469387754e-05, "loss": 1.4259, "step": 14130 }, { "epoch": 57.67755102040816, "grad_norm": 31.062915802001953, "learning_rate": 2.1181632653061225e-05, "loss": 1.2146, "step": 14131 }, { "epoch": 57.68163265306122, "grad_norm": 24.72551727294922, "learning_rate": 2.1179591836734695e-05, "loss": 1.8017, "step": 14132 }, { "epoch": 57.68571428571428, "grad_norm": 32.34797668457031, "learning_rate": 2.1177551020408166e-05, "loss": 1.5659, "step": 14133 }, { "epoch": 57.689795918367345, "grad_norm": 33.63745880126953, "learning_rate": 2.1175510204081633e-05, "loss": 1.0368, "step": 14134 }, { "epoch": 57.69387755102041, "grad_norm": 26.254432678222656, "learning_rate": 2.1173469387755103e-05, "loss": 1.8376, "step": 14135 }, { "epoch": 57.69795918367347, "grad_norm": 30.080339431762695, "learning_rate": 2.1171428571428574e-05, "loss": 1.6978, "step": 14136 }, { "epoch": 57.70204081632653, "grad_norm": 57.06148147583008, "learning_rate": 2.116938775510204e-05, "loss": 0.7359, "step": 14137 }, { "epoch": 57.70612244897959, "grad_norm": 33.01674270629883, "learning_rate": 2.116734693877551e-05, "loss": 0.5944, "step": 14138 }, { "epoch": 57.710204081632654, "grad_norm": 21.00027084350586, "learning_rate": 2.116530612244898e-05, "loss": 2.5411, "step": 14139 }, { "epoch": 57.714285714285715, "grad_norm": 34.500999450683594, "learning_rate": 2.1163265306122453e-05, "loss": 1.6893, "step": 14140 }, { "epoch": 57.71836734693878, "grad_norm": 33.889774322509766, "learning_rate": 2.116122448979592e-05, "loss": 1.9557, "step": 14141 }, { "epoch": 57.72244897959184, "grad_norm": 43.207881927490234, "learning_rate": 2.1159183673469387e-05, "loss": 0.8271, "step": 14142 }, { "epoch": 57.7265306122449, "grad_norm": 38.20365524291992, "learning_rate": 2.1157142857142857e-05, "loss": 0.6286, "step": 14143 }, { "epoch": 57.73061224489796, "grad_norm": 23.418466567993164, "learning_rate": 2.1155102040816328e-05, "loss": 2.1185, "step": 14144 }, { "epoch": 57.734693877551024, "grad_norm": 50.556419372558594, "learning_rate": 2.11530612244898e-05, "loss": 1.0674, "step": 14145 }, { "epoch": 57.73877551020408, "grad_norm": 23.808244705200195, "learning_rate": 2.1151020408163266e-05, "loss": 2.0274, "step": 14146 }, { "epoch": 57.74285714285714, "grad_norm": 37.50324630737305, "learning_rate": 2.1148979591836736e-05, "loss": 0.875, "step": 14147 }, { "epoch": 57.7469387755102, "grad_norm": 28.075450897216797, "learning_rate": 2.1146938775510207e-05, "loss": 2.047, "step": 14148 }, { "epoch": 57.751020408163264, "grad_norm": 30.7669620513916, "learning_rate": 2.1144897959183674e-05, "loss": 1.7064, "step": 14149 }, { "epoch": 57.755102040816325, "grad_norm": 28.851171493530273, "learning_rate": 2.1142857142857144e-05, "loss": 1.341, "step": 14150 }, { "epoch": 57.75918367346939, "grad_norm": 38.32035446166992, "learning_rate": 2.1140816326530615e-05, "loss": 1.5952, "step": 14151 }, { "epoch": 57.76326530612245, "grad_norm": 28.628372192382812, "learning_rate": 2.1138775510204082e-05, "loss": 1.2379, "step": 14152 }, { "epoch": 57.76734693877551, "grad_norm": 39.18703079223633, "learning_rate": 2.1136734693877552e-05, "loss": 1.4044, "step": 14153 }, { "epoch": 57.77142857142857, "grad_norm": 54.12276840209961, "learning_rate": 2.113469387755102e-05, "loss": 0.2822, "step": 14154 }, { "epoch": 57.775510204081634, "grad_norm": 26.71623420715332, "learning_rate": 2.113265306122449e-05, "loss": 1.8613, "step": 14155 }, { "epoch": 57.779591836734696, "grad_norm": 29.526243209838867, "learning_rate": 2.113061224489796e-05, "loss": 2.0839, "step": 14156 }, { "epoch": 57.78367346938776, "grad_norm": 24.527284622192383, "learning_rate": 2.112857142857143e-05, "loss": 1.9931, "step": 14157 }, { "epoch": 57.78775510204082, "grad_norm": 14.01229476928711, "learning_rate": 2.1126530612244898e-05, "loss": 2.7448, "step": 14158 }, { "epoch": 57.79183673469388, "grad_norm": 20.933202743530273, "learning_rate": 2.112448979591837e-05, "loss": 2.1571, "step": 14159 }, { "epoch": 57.795918367346935, "grad_norm": 41.51237106323242, "learning_rate": 2.112244897959184e-05, "loss": 1.401, "step": 14160 }, { "epoch": 57.8, "grad_norm": 21.728246688842773, "learning_rate": 2.1120408163265306e-05, "loss": 2.0648, "step": 14161 }, { "epoch": 57.80408163265306, "grad_norm": 32.04793167114258, "learning_rate": 2.1118367346938777e-05, "loss": 1.6405, "step": 14162 }, { "epoch": 57.80816326530612, "grad_norm": 6.0951828956604, "learning_rate": 2.1116326530612247e-05, "loss": 0.0665, "step": 14163 }, { "epoch": 57.81224489795918, "grad_norm": 38.34560775756836, "learning_rate": 2.1114285714285714e-05, "loss": 1.2915, "step": 14164 }, { "epoch": 57.816326530612244, "grad_norm": 25.363033294677734, "learning_rate": 2.1112244897959185e-05, "loss": 2.37, "step": 14165 }, { "epoch": 57.820408163265306, "grad_norm": 37.043296813964844, "learning_rate": 2.1110204081632652e-05, "loss": 0.7767, "step": 14166 }, { "epoch": 57.82448979591837, "grad_norm": 42.69901657104492, "learning_rate": 2.1108163265306126e-05, "loss": 0.3305, "step": 14167 }, { "epoch": 57.82857142857143, "grad_norm": 21.49468421936035, "learning_rate": 2.1106122448979593e-05, "loss": 2.1342, "step": 14168 }, { "epoch": 57.83265306122449, "grad_norm": 25.32516860961914, "learning_rate": 2.110408163265306e-05, "loss": 0.2182, "step": 14169 }, { "epoch": 57.83673469387755, "grad_norm": 34.53044891357422, "learning_rate": 2.110204081632653e-05, "loss": 0.9944, "step": 14170 }, { "epoch": 57.840816326530614, "grad_norm": 45.579307556152344, "learning_rate": 2.11e-05, "loss": 0.91, "step": 14171 }, { "epoch": 57.844897959183676, "grad_norm": 31.914203643798828, "learning_rate": 2.1097959183673472e-05, "loss": 1.2666, "step": 14172 }, { "epoch": 57.84897959183674, "grad_norm": 33.65547180175781, "learning_rate": 2.109591836734694e-05, "loss": 1.1522, "step": 14173 }, { "epoch": 57.85306122448979, "grad_norm": 17.01496696472168, "learning_rate": 2.109387755102041e-05, "loss": 2.5217, "step": 14174 }, { "epoch": 57.857142857142854, "grad_norm": 35.47276306152344, "learning_rate": 2.109183673469388e-05, "loss": 0.6016, "step": 14175 }, { "epoch": 57.861224489795916, "grad_norm": 27.800399780273438, "learning_rate": 2.1089795918367347e-05, "loss": 0.3181, "step": 14176 }, { "epoch": 57.86530612244898, "grad_norm": 27.75749969482422, "learning_rate": 2.1087755102040818e-05, "loss": 1.6519, "step": 14177 }, { "epoch": 57.86938775510204, "grad_norm": 27.468599319458008, "learning_rate": 2.1085714285714288e-05, "loss": 2.042, "step": 14178 }, { "epoch": 57.8734693877551, "grad_norm": 18.08457374572754, "learning_rate": 2.108367346938776e-05, "loss": 2.5594, "step": 14179 }, { "epoch": 57.87755102040816, "grad_norm": 21.93035125732422, "learning_rate": 2.1081632653061226e-05, "loss": 2.1428, "step": 14180 }, { "epoch": 57.881632653061224, "grad_norm": 36.722042083740234, "learning_rate": 2.1079591836734693e-05, "loss": 2.0676, "step": 14181 }, { "epoch": 57.885714285714286, "grad_norm": 37.62165832519531, "learning_rate": 2.1077551020408163e-05, "loss": 0.311, "step": 14182 }, { "epoch": 57.88979591836735, "grad_norm": 30.498186111450195, "learning_rate": 2.1075510204081634e-05, "loss": 1.3803, "step": 14183 }, { "epoch": 57.89387755102041, "grad_norm": 36.60252380371094, "learning_rate": 2.1073469387755104e-05, "loss": 1.1302, "step": 14184 }, { "epoch": 57.89795918367347, "grad_norm": 29.529855728149414, "learning_rate": 2.107142857142857e-05, "loss": 1.7463, "step": 14185 }, { "epoch": 57.90204081632653, "grad_norm": 40.059139251708984, "learning_rate": 2.1069387755102042e-05, "loss": 1.6322, "step": 14186 }, { "epoch": 57.906122448979595, "grad_norm": 35.22725296020508, "learning_rate": 2.1067346938775513e-05, "loss": 1.0987, "step": 14187 }, { "epoch": 57.910204081632656, "grad_norm": 31.06999969482422, "learning_rate": 2.106530612244898e-05, "loss": 1.3076, "step": 14188 }, { "epoch": 57.91428571428571, "grad_norm": 25.41783332824707, "learning_rate": 2.106326530612245e-05, "loss": 1.9351, "step": 14189 }, { "epoch": 57.91836734693877, "grad_norm": 30.3536434173584, "learning_rate": 2.106122448979592e-05, "loss": 1.7635, "step": 14190 }, { "epoch": 57.922448979591834, "grad_norm": 24.654878616333008, "learning_rate": 2.1059183673469388e-05, "loss": 1.9373, "step": 14191 }, { "epoch": 57.926530612244896, "grad_norm": 22.994497299194336, "learning_rate": 2.105714285714286e-05, "loss": 2.1975, "step": 14192 }, { "epoch": 57.93061224489796, "grad_norm": 34.50171661376953, "learning_rate": 2.1055102040816325e-05, "loss": 1.3367, "step": 14193 }, { "epoch": 57.93469387755102, "grad_norm": 35.43311309814453, "learning_rate": 2.10530612244898e-05, "loss": 0.9022, "step": 14194 }, { "epoch": 57.93877551020408, "grad_norm": 20.201736450195312, "learning_rate": 2.1051020408163266e-05, "loss": 2.5275, "step": 14195 }, { "epoch": 57.94285714285714, "grad_norm": 40.44487380981445, "learning_rate": 2.1048979591836734e-05, "loss": 1.3644, "step": 14196 }, { "epoch": 57.946938775510205, "grad_norm": 23.137659072875977, "learning_rate": 2.1046938775510204e-05, "loss": 1.8569, "step": 14197 }, { "epoch": 57.95102040816327, "grad_norm": 30.576900482177734, "learning_rate": 2.1044897959183675e-05, "loss": 1.8469, "step": 14198 }, { "epoch": 57.95510204081633, "grad_norm": 38.0658073425293, "learning_rate": 2.1042857142857145e-05, "loss": 0.7739, "step": 14199 }, { "epoch": 57.95918367346939, "grad_norm": 32.435237884521484, "learning_rate": 2.1040816326530612e-05, "loss": 1.1623, "step": 14200 }, { "epoch": 57.96326530612245, "grad_norm": 28.932693481445312, "learning_rate": 2.1038775510204083e-05, "loss": 1.8908, "step": 14201 }, { "epoch": 57.96734693877551, "grad_norm": 45.75848388671875, "learning_rate": 2.1036734693877553e-05, "loss": 0.6598, "step": 14202 }, { "epoch": 57.97142857142857, "grad_norm": 34.00948715209961, "learning_rate": 2.103469387755102e-05, "loss": 1.9379, "step": 14203 }, { "epoch": 57.97551020408163, "grad_norm": 20.38723373413086, "learning_rate": 2.103265306122449e-05, "loss": 2.2643, "step": 14204 }, { "epoch": 57.97959183673469, "grad_norm": 38.28936767578125, "learning_rate": 2.103061224489796e-05, "loss": 1.1594, "step": 14205 }, { "epoch": 57.98367346938775, "grad_norm": 25.071857452392578, "learning_rate": 2.1028571428571432e-05, "loss": 1.7895, "step": 14206 }, { "epoch": 57.987755102040815, "grad_norm": 44.53068923950195, "learning_rate": 2.10265306122449e-05, "loss": 1.8458, "step": 14207 }, { "epoch": 57.99183673469388, "grad_norm": 23.17690086364746, "learning_rate": 2.1024489795918366e-05, "loss": 2.1675, "step": 14208 }, { "epoch": 57.99591836734694, "grad_norm": 24.073766708374023, "learning_rate": 2.1022448979591837e-05, "loss": 2.5646, "step": 14209 }, { "epoch": 58.0, "grad_norm": 35.97403335571289, "learning_rate": 2.1020408163265307e-05, "loss": 1.2029, "step": 14210 }, { "epoch": 58.00408163265306, "grad_norm": 23.288864135742188, "learning_rate": 2.1018367346938778e-05, "loss": 1.7328, "step": 14211 }, { "epoch": 58.00816326530612, "grad_norm": 16.352188110351562, "learning_rate": 2.1016326530612245e-05, "loss": 2.5371, "step": 14212 }, { "epoch": 58.012244897959185, "grad_norm": 24.70336151123047, "learning_rate": 2.1014285714285715e-05, "loss": 1.7693, "step": 14213 }, { "epoch": 58.01632653061225, "grad_norm": 27.548261642456055, "learning_rate": 2.1012244897959186e-05, "loss": 1.8845, "step": 14214 }, { "epoch": 58.02040816326531, "grad_norm": 20.924236297607422, "learning_rate": 2.1010204081632653e-05, "loss": 2.7124, "step": 14215 }, { "epoch": 58.02448979591837, "grad_norm": 50.06898880004883, "learning_rate": 2.1008163265306124e-05, "loss": 0.666, "step": 14216 }, { "epoch": 58.02857142857143, "grad_norm": 6.7878031730651855, "learning_rate": 2.1006122448979594e-05, "loss": 0.0528, "step": 14217 }, { "epoch": 58.03265306122449, "grad_norm": 19.71645164489746, "learning_rate": 2.100408163265306e-05, "loss": 1.8982, "step": 14218 }, { "epoch": 58.03673469387755, "grad_norm": 37.04182052612305, "learning_rate": 2.1002040816326532e-05, "loss": 0.3531, "step": 14219 }, { "epoch": 58.04081632653061, "grad_norm": 46.29936218261719, "learning_rate": 2.1e-05, "loss": 1.5757, "step": 14220 }, { "epoch": 58.04489795918367, "grad_norm": 29.9000186920166, "learning_rate": 2.0997959183673473e-05, "loss": 0.5522, "step": 14221 }, { "epoch": 58.04897959183673, "grad_norm": 33.940738677978516, "learning_rate": 2.099591836734694e-05, "loss": 1.0392, "step": 14222 }, { "epoch": 58.053061224489795, "grad_norm": 24.623491287231445, "learning_rate": 2.099387755102041e-05, "loss": 1.8675, "step": 14223 }, { "epoch": 58.05714285714286, "grad_norm": 41.77304458618164, "learning_rate": 2.0991836734693878e-05, "loss": 0.8723, "step": 14224 }, { "epoch": 58.06122448979592, "grad_norm": 40.244590759277344, "learning_rate": 2.0989795918367348e-05, "loss": 1.4343, "step": 14225 }, { "epoch": 58.06530612244898, "grad_norm": 24.879745483398438, "learning_rate": 2.098775510204082e-05, "loss": 1.7413, "step": 14226 }, { "epoch": 58.06938775510204, "grad_norm": 14.947033882141113, "learning_rate": 2.0985714285714286e-05, "loss": 2.9746, "step": 14227 }, { "epoch": 58.073469387755104, "grad_norm": 40.54572677612305, "learning_rate": 2.0983673469387756e-05, "loss": 1.6221, "step": 14228 }, { "epoch": 58.077551020408166, "grad_norm": 37.320064544677734, "learning_rate": 2.0981632653061227e-05, "loss": 1.9094, "step": 14229 }, { "epoch": 58.08163265306123, "grad_norm": 23.95728874206543, "learning_rate": 2.0979591836734694e-05, "loss": 2.18, "step": 14230 }, { "epoch": 58.08571428571429, "grad_norm": 38.47828674316406, "learning_rate": 2.0977551020408164e-05, "loss": 0.7497, "step": 14231 }, { "epoch": 58.089795918367344, "grad_norm": 19.920148849487305, "learning_rate": 2.097551020408163e-05, "loss": 2.1006, "step": 14232 }, { "epoch": 58.093877551020405, "grad_norm": 25.213642120361328, "learning_rate": 2.0973469387755105e-05, "loss": 1.7361, "step": 14233 }, { "epoch": 58.09795918367347, "grad_norm": 27.51225471496582, "learning_rate": 2.0971428571428572e-05, "loss": 1.5332, "step": 14234 }, { "epoch": 58.10204081632653, "grad_norm": 40.385108947753906, "learning_rate": 2.096938775510204e-05, "loss": 0.6389, "step": 14235 }, { "epoch": 58.10612244897959, "grad_norm": 22.970823287963867, "learning_rate": 2.096734693877551e-05, "loss": 1.6891, "step": 14236 }, { "epoch": 58.11020408163265, "grad_norm": 35.6764030456543, "learning_rate": 2.096530612244898e-05, "loss": 0.8174, "step": 14237 }, { "epoch": 58.114285714285714, "grad_norm": 25.185731887817383, "learning_rate": 2.096326530612245e-05, "loss": 1.1543, "step": 14238 }, { "epoch": 58.118367346938776, "grad_norm": 56.27057647705078, "learning_rate": 2.0961224489795918e-05, "loss": 0.3736, "step": 14239 }, { "epoch": 58.12244897959184, "grad_norm": 23.408039093017578, "learning_rate": 2.095918367346939e-05, "loss": 2.4331, "step": 14240 }, { "epoch": 58.1265306122449, "grad_norm": 19.145938873291016, "learning_rate": 2.095714285714286e-05, "loss": 2.5448, "step": 14241 }, { "epoch": 58.13061224489796, "grad_norm": 32.33305358886719, "learning_rate": 2.0955102040816326e-05, "loss": 0.6544, "step": 14242 }, { "epoch": 58.13469387755102, "grad_norm": 30.048072814941406, "learning_rate": 2.0953061224489797e-05, "loss": 1.3658, "step": 14243 }, { "epoch": 58.138775510204084, "grad_norm": 27.0633602142334, "learning_rate": 2.0951020408163267e-05, "loss": 1.3501, "step": 14244 }, { "epoch": 58.142857142857146, "grad_norm": 32.931396484375, "learning_rate": 2.0948979591836738e-05, "loss": 1.7242, "step": 14245 }, { "epoch": 58.14693877551021, "grad_norm": 29.650381088256836, "learning_rate": 2.0946938775510205e-05, "loss": 0.9645, "step": 14246 }, { "epoch": 58.15102040816326, "grad_norm": 25.37499237060547, "learning_rate": 2.0944897959183672e-05, "loss": 1.3322, "step": 14247 }, { "epoch": 58.155102040816324, "grad_norm": 24.96588897705078, "learning_rate": 2.0942857142857146e-05, "loss": 2.0041, "step": 14248 }, { "epoch": 58.159183673469386, "grad_norm": 30.479352951049805, "learning_rate": 2.0940816326530613e-05, "loss": 1.5672, "step": 14249 }, { "epoch": 58.16326530612245, "grad_norm": 49.10356521606445, "learning_rate": 2.0938775510204084e-05, "loss": 0.8765, "step": 14250 }, { "epoch": 58.16734693877551, "grad_norm": 36.25208282470703, "learning_rate": 2.093673469387755e-05, "loss": 1.779, "step": 14251 }, { "epoch": 58.17142857142857, "grad_norm": 32.385902404785156, "learning_rate": 2.093469387755102e-05, "loss": 1.6519, "step": 14252 }, { "epoch": 58.17551020408163, "grad_norm": 33.53501510620117, "learning_rate": 2.0932653061224492e-05, "loss": 1.3212, "step": 14253 }, { "epoch": 58.179591836734694, "grad_norm": 17.087894439697266, "learning_rate": 2.093061224489796e-05, "loss": 2.6326, "step": 14254 }, { "epoch": 58.183673469387756, "grad_norm": 44.10512924194336, "learning_rate": 2.092857142857143e-05, "loss": 0.8314, "step": 14255 }, { "epoch": 58.18775510204082, "grad_norm": 30.315763473510742, "learning_rate": 2.09265306122449e-05, "loss": 1.9146, "step": 14256 }, { "epoch": 58.19183673469388, "grad_norm": 46.04977798461914, "learning_rate": 2.0924489795918367e-05, "loss": 0.605, "step": 14257 }, { "epoch": 58.19591836734694, "grad_norm": 29.876441955566406, "learning_rate": 2.0922448979591838e-05, "loss": 1.9144, "step": 14258 }, { "epoch": 58.2, "grad_norm": 24.182952880859375, "learning_rate": 2.0920408163265305e-05, "loss": 2.0378, "step": 14259 }, { "epoch": 58.204081632653065, "grad_norm": 18.37230110168457, "learning_rate": 2.091836734693878e-05, "loss": 2.1479, "step": 14260 }, { "epoch": 58.20816326530612, "grad_norm": 36.235748291015625, "learning_rate": 2.0916326530612246e-05, "loss": 0.2676, "step": 14261 }, { "epoch": 58.21224489795918, "grad_norm": 32.28998565673828, "learning_rate": 2.0914285714285713e-05, "loss": 1.3108, "step": 14262 }, { "epoch": 58.21632653061224, "grad_norm": 33.4655647277832, "learning_rate": 2.0912244897959183e-05, "loss": 0.9685, "step": 14263 }, { "epoch": 58.220408163265304, "grad_norm": 36.684879302978516, "learning_rate": 2.0910204081632654e-05, "loss": 1.3286, "step": 14264 }, { "epoch": 58.224489795918366, "grad_norm": 22.095781326293945, "learning_rate": 2.0908163265306125e-05, "loss": 2.1179, "step": 14265 }, { "epoch": 58.22857142857143, "grad_norm": 30.02969741821289, "learning_rate": 2.090612244897959e-05, "loss": 1.6699, "step": 14266 }, { "epoch": 58.23265306122449, "grad_norm": 32.371883392333984, "learning_rate": 2.0904081632653062e-05, "loss": 1.5233, "step": 14267 }, { "epoch": 58.23673469387755, "grad_norm": 31.70011329650879, "learning_rate": 2.0902040816326533e-05, "loss": 1.4359, "step": 14268 }, { "epoch": 58.24081632653061, "grad_norm": 34.03482437133789, "learning_rate": 2.09e-05, "loss": 1.1534, "step": 14269 }, { "epoch": 58.244897959183675, "grad_norm": 30.852705001831055, "learning_rate": 2.089795918367347e-05, "loss": 0.2649, "step": 14270 }, { "epoch": 58.248979591836736, "grad_norm": 45.149471282958984, "learning_rate": 2.089591836734694e-05, "loss": 0.2792, "step": 14271 }, { "epoch": 58.2530612244898, "grad_norm": 18.1193790435791, "learning_rate": 2.089387755102041e-05, "loss": 2.3703, "step": 14272 }, { "epoch": 58.25714285714286, "grad_norm": 50.448219299316406, "learning_rate": 2.089183673469388e-05, "loss": 0.6534, "step": 14273 }, { "epoch": 58.26122448979592, "grad_norm": 36.31187057495117, "learning_rate": 2.0889795918367346e-05, "loss": 1.5988, "step": 14274 }, { "epoch": 58.265306122448976, "grad_norm": 28.42975425720215, "learning_rate": 2.088775510204082e-05, "loss": 2.0401, "step": 14275 }, { "epoch": 58.26938775510204, "grad_norm": 36.316646575927734, "learning_rate": 2.0885714285714287e-05, "loss": 1.2039, "step": 14276 }, { "epoch": 58.2734693877551, "grad_norm": 18.868356704711914, "learning_rate": 2.0883673469387757e-05, "loss": 2.1262, "step": 14277 }, { "epoch": 58.27755102040816, "grad_norm": 29.332138061523438, "learning_rate": 2.0881632653061224e-05, "loss": 1.8113, "step": 14278 }, { "epoch": 58.28163265306122, "grad_norm": 41.810302734375, "learning_rate": 2.0879591836734695e-05, "loss": 1.0893, "step": 14279 }, { "epoch": 58.285714285714285, "grad_norm": 32.22765350341797, "learning_rate": 2.0877551020408165e-05, "loss": 1.6111, "step": 14280 }, { "epoch": 58.289795918367346, "grad_norm": 32.90528869628906, "learning_rate": 2.0875510204081632e-05, "loss": 1.8689, "step": 14281 }, { "epoch": 58.29387755102041, "grad_norm": 43.30984115600586, "learning_rate": 2.0873469387755103e-05, "loss": 1.2541, "step": 14282 }, { "epoch": 58.29795918367347, "grad_norm": 34.05714797973633, "learning_rate": 2.0871428571428573e-05, "loss": 1.0995, "step": 14283 }, { "epoch": 58.30204081632653, "grad_norm": 30.872499465942383, "learning_rate": 2.086938775510204e-05, "loss": 1.6642, "step": 14284 }, { "epoch": 58.30612244897959, "grad_norm": 51.91169738769531, "learning_rate": 2.086734693877551e-05, "loss": 1.7651, "step": 14285 }, { "epoch": 58.310204081632655, "grad_norm": 36.22483825683594, "learning_rate": 2.0865306122448978e-05, "loss": 1.0578, "step": 14286 }, { "epoch": 58.31428571428572, "grad_norm": 23.079511642456055, "learning_rate": 2.0863265306122452e-05, "loss": 1.9999, "step": 14287 }, { "epoch": 58.31836734693878, "grad_norm": 38.79644012451172, "learning_rate": 2.086122448979592e-05, "loss": 1.2759, "step": 14288 }, { "epoch": 58.32244897959184, "grad_norm": 16.613622665405273, "learning_rate": 2.0859183673469386e-05, "loss": 2.4333, "step": 14289 }, { "epoch": 58.326530612244895, "grad_norm": 21.617389678955078, "learning_rate": 2.0857142857142857e-05, "loss": 2.4576, "step": 14290 }, { "epoch": 58.33061224489796, "grad_norm": 30.94727325439453, "learning_rate": 2.0855102040816327e-05, "loss": 1.5868, "step": 14291 }, { "epoch": 58.33469387755102, "grad_norm": 20.371177673339844, "learning_rate": 2.0853061224489798e-05, "loss": 1.8629, "step": 14292 }, { "epoch": 58.33877551020408, "grad_norm": 39.60472869873047, "learning_rate": 2.0851020408163265e-05, "loss": 0.3766, "step": 14293 }, { "epoch": 58.34285714285714, "grad_norm": 31.288238525390625, "learning_rate": 2.0848979591836736e-05, "loss": 0.7717, "step": 14294 }, { "epoch": 58.3469387755102, "grad_norm": 38.601707458496094, "learning_rate": 2.0846938775510206e-05, "loss": 0.6818, "step": 14295 }, { "epoch": 58.351020408163265, "grad_norm": 37.86075973510742, "learning_rate": 2.0844897959183673e-05, "loss": 0.5481, "step": 14296 }, { "epoch": 58.35510204081633, "grad_norm": 46.69743347167969, "learning_rate": 2.0842857142857144e-05, "loss": 0.6711, "step": 14297 }, { "epoch": 58.35918367346939, "grad_norm": 47.992095947265625, "learning_rate": 2.0840816326530614e-05, "loss": 1.6545, "step": 14298 }, { "epoch": 58.36326530612245, "grad_norm": 37.12521743774414, "learning_rate": 2.0838775510204085e-05, "loss": 1.8755, "step": 14299 }, { "epoch": 58.36734693877551, "grad_norm": 35.67746353149414, "learning_rate": 2.0836734693877552e-05, "loss": 1.1352, "step": 14300 }, { "epoch": 58.371428571428574, "grad_norm": 24.28814125061035, "learning_rate": 2.083469387755102e-05, "loss": 1.7083, "step": 14301 }, { "epoch": 58.375510204081635, "grad_norm": 28.149080276489258, "learning_rate": 2.0832653061224493e-05, "loss": 0.2092, "step": 14302 }, { "epoch": 58.3795918367347, "grad_norm": 21.01846694946289, "learning_rate": 2.083061224489796e-05, "loss": 0.1595, "step": 14303 }, { "epoch": 58.38367346938775, "grad_norm": 35.12958526611328, "learning_rate": 2.082857142857143e-05, "loss": 0.5737, "step": 14304 }, { "epoch": 58.38775510204081, "grad_norm": 24.419620513916016, "learning_rate": 2.0826530612244898e-05, "loss": 1.8559, "step": 14305 }, { "epoch": 58.391836734693875, "grad_norm": 21.970897674560547, "learning_rate": 2.0824489795918368e-05, "loss": 2.4221, "step": 14306 }, { "epoch": 58.39591836734694, "grad_norm": 42.251548767089844, "learning_rate": 2.082244897959184e-05, "loss": 0.8159, "step": 14307 }, { "epoch": 58.4, "grad_norm": 38.42338562011719, "learning_rate": 2.0820408163265306e-05, "loss": 0.9482, "step": 14308 }, { "epoch": 58.40408163265306, "grad_norm": 41.7346305847168, "learning_rate": 2.0818367346938776e-05, "loss": 1.7761, "step": 14309 }, { "epoch": 58.40816326530612, "grad_norm": 28.196287155151367, "learning_rate": 2.0816326530612247e-05, "loss": 1.4086, "step": 14310 }, { "epoch": 58.412244897959184, "grad_norm": 40.06315994262695, "learning_rate": 2.0814285714285714e-05, "loss": 1.1432, "step": 14311 }, { "epoch": 58.416326530612245, "grad_norm": 27.404390335083008, "learning_rate": 2.0812244897959184e-05, "loss": 2.247, "step": 14312 }, { "epoch": 58.42040816326531, "grad_norm": 29.242692947387695, "learning_rate": 2.081020408163265e-05, "loss": 1.744, "step": 14313 }, { "epoch": 58.42448979591837, "grad_norm": 30.56104850769043, "learning_rate": 2.0808163265306125e-05, "loss": 1.1784, "step": 14314 }, { "epoch": 58.42857142857143, "grad_norm": 18.612070083618164, "learning_rate": 2.0806122448979593e-05, "loss": 2.4466, "step": 14315 }, { "epoch": 58.43265306122449, "grad_norm": 20.229806900024414, "learning_rate": 2.0804081632653063e-05, "loss": 2.0441, "step": 14316 }, { "epoch": 58.436734693877554, "grad_norm": 37.8874626159668, "learning_rate": 2.080204081632653e-05, "loss": 0.7359, "step": 14317 }, { "epoch": 58.440816326530616, "grad_norm": 24.9514102935791, "learning_rate": 2.08e-05, "loss": 2.0242, "step": 14318 }, { "epoch": 58.44489795918367, "grad_norm": 31.716764450073242, "learning_rate": 2.079795918367347e-05, "loss": 0.7872, "step": 14319 }, { "epoch": 58.44897959183673, "grad_norm": 40.71302032470703, "learning_rate": 2.079591836734694e-05, "loss": 1.181, "step": 14320 }, { "epoch": 58.453061224489794, "grad_norm": 32.64891815185547, "learning_rate": 2.079387755102041e-05, "loss": 0.5541, "step": 14321 }, { "epoch": 58.457142857142856, "grad_norm": 22.96073341369629, "learning_rate": 2.079183673469388e-05, "loss": 2.1084, "step": 14322 }, { "epoch": 58.46122448979592, "grad_norm": 36.51578903198242, "learning_rate": 2.0789795918367347e-05, "loss": 1.2071, "step": 14323 }, { "epoch": 58.46530612244898, "grad_norm": 38.40245819091797, "learning_rate": 2.0787755102040817e-05, "loss": 1.0802, "step": 14324 }, { "epoch": 58.46938775510204, "grad_norm": 29.69894027709961, "learning_rate": 2.0785714285714288e-05, "loss": 1.6341, "step": 14325 }, { "epoch": 58.4734693877551, "grad_norm": 31.068180084228516, "learning_rate": 2.0783673469387758e-05, "loss": 1.6746, "step": 14326 }, { "epoch": 58.477551020408164, "grad_norm": 23.93779182434082, "learning_rate": 2.0781632653061225e-05, "loss": 1.9296, "step": 14327 }, { "epoch": 58.481632653061226, "grad_norm": 18.75810432434082, "learning_rate": 2.0779591836734692e-05, "loss": 2.4497, "step": 14328 }, { "epoch": 58.48571428571429, "grad_norm": 22.1436710357666, "learning_rate": 2.0777551020408166e-05, "loss": 2.311, "step": 14329 }, { "epoch": 58.48979591836735, "grad_norm": 39.50859832763672, "learning_rate": 2.0775510204081633e-05, "loss": 0.8098, "step": 14330 }, { "epoch": 58.49387755102041, "grad_norm": 23.85402488708496, "learning_rate": 2.0773469387755104e-05, "loss": 0.2405, "step": 14331 }, { "epoch": 58.49795918367347, "grad_norm": 27.996313095092773, "learning_rate": 2.077142857142857e-05, "loss": 1.8456, "step": 14332 }, { "epoch": 58.50204081632653, "grad_norm": 30.779481887817383, "learning_rate": 2.076938775510204e-05, "loss": 0.8867, "step": 14333 }, { "epoch": 58.50612244897959, "grad_norm": 29.975444793701172, "learning_rate": 2.0767346938775512e-05, "loss": 1.2502, "step": 14334 }, { "epoch": 58.51020408163265, "grad_norm": 34.45295333862305, "learning_rate": 2.076530612244898e-05, "loss": 1.0602, "step": 14335 }, { "epoch": 58.51428571428571, "grad_norm": 24.951282501220703, "learning_rate": 2.076326530612245e-05, "loss": 1.9943, "step": 14336 }, { "epoch": 58.518367346938774, "grad_norm": 32.579994201660156, "learning_rate": 2.076122448979592e-05, "loss": 0.7811, "step": 14337 }, { "epoch": 58.522448979591836, "grad_norm": 35.85816955566406, "learning_rate": 2.075918367346939e-05, "loss": 0.8324, "step": 14338 }, { "epoch": 58.5265306122449, "grad_norm": 30.432836532592773, "learning_rate": 2.0757142857142858e-05, "loss": 1.4263, "step": 14339 }, { "epoch": 58.53061224489796, "grad_norm": 24.638641357421875, "learning_rate": 2.0755102040816325e-05, "loss": 2.0793, "step": 14340 }, { "epoch": 58.53469387755102, "grad_norm": 25.972585678100586, "learning_rate": 2.07530612244898e-05, "loss": 0.2447, "step": 14341 }, { "epoch": 58.53877551020408, "grad_norm": 24.298219680786133, "learning_rate": 2.0751020408163266e-05, "loss": 1.719, "step": 14342 }, { "epoch": 58.542857142857144, "grad_norm": 28.170106887817383, "learning_rate": 2.0748979591836736e-05, "loss": 1.8588, "step": 14343 }, { "epoch": 58.546938775510206, "grad_norm": 37.53443908691406, "learning_rate": 2.0746938775510204e-05, "loss": 0.8037, "step": 14344 }, { "epoch": 58.55102040816327, "grad_norm": 24.285736083984375, "learning_rate": 2.0744897959183674e-05, "loss": 1.8005, "step": 14345 }, { "epoch": 58.55510204081633, "grad_norm": 24.89823341369629, "learning_rate": 2.0742857142857145e-05, "loss": 1.645, "step": 14346 }, { "epoch": 58.55918367346939, "grad_norm": 36.124916076660156, "learning_rate": 2.0740816326530612e-05, "loss": 0.2786, "step": 14347 }, { "epoch": 58.563265306122446, "grad_norm": 40.277565002441406, "learning_rate": 2.0738775510204082e-05, "loss": 1.6112, "step": 14348 }, { "epoch": 58.56734693877551, "grad_norm": 33.10337448120117, "learning_rate": 2.0736734693877553e-05, "loss": 1.6793, "step": 14349 }, { "epoch": 58.57142857142857, "grad_norm": 43.628623962402344, "learning_rate": 2.073469387755102e-05, "loss": 1.1852, "step": 14350 }, { "epoch": 58.57551020408163, "grad_norm": 31.260360717773438, "learning_rate": 2.073265306122449e-05, "loss": 1.3838, "step": 14351 }, { "epoch": 58.57959183673469, "grad_norm": 31.658205032348633, "learning_rate": 2.073061224489796e-05, "loss": 1.6565, "step": 14352 }, { "epoch": 58.583673469387755, "grad_norm": 33.75206756591797, "learning_rate": 2.072857142857143e-05, "loss": 1.2914, "step": 14353 }, { "epoch": 58.587755102040816, "grad_norm": 42.88728332519531, "learning_rate": 2.07265306122449e-05, "loss": 0.3845, "step": 14354 }, { "epoch": 58.59183673469388, "grad_norm": 22.205331802368164, "learning_rate": 2.0724489795918366e-05, "loss": 2.4083, "step": 14355 }, { "epoch": 58.59591836734694, "grad_norm": 30.563121795654297, "learning_rate": 2.072244897959184e-05, "loss": 2.0375, "step": 14356 }, { "epoch": 58.6, "grad_norm": 21.26589012145996, "learning_rate": 2.0720408163265307e-05, "loss": 2.0542, "step": 14357 }, { "epoch": 58.60408163265306, "grad_norm": 21.140701293945312, "learning_rate": 2.0718367346938777e-05, "loss": 2.5183, "step": 14358 }, { "epoch": 58.608163265306125, "grad_norm": 35.80423355102539, "learning_rate": 2.0716326530612244e-05, "loss": 1.3337, "step": 14359 }, { "epoch": 58.61224489795919, "grad_norm": 18.292560577392578, "learning_rate": 2.0714285714285718e-05, "loss": 2.5567, "step": 14360 }, { "epoch": 58.61632653061225, "grad_norm": 25.849199295043945, "learning_rate": 2.0712244897959185e-05, "loss": 1.3718, "step": 14361 }, { "epoch": 58.6204081632653, "grad_norm": 23.407800674438477, "learning_rate": 2.0710204081632653e-05, "loss": 1.8713, "step": 14362 }, { "epoch": 58.624489795918365, "grad_norm": 30.179014205932617, "learning_rate": 2.0708163265306123e-05, "loss": 1.7098, "step": 14363 }, { "epoch": 58.628571428571426, "grad_norm": 25.18525505065918, "learning_rate": 2.0706122448979594e-05, "loss": 1.8596, "step": 14364 }, { "epoch": 58.63265306122449, "grad_norm": 43.520755767822266, "learning_rate": 2.0704081632653064e-05, "loss": 1.5138, "step": 14365 }, { "epoch": 58.63673469387755, "grad_norm": 31.251096725463867, "learning_rate": 2.070204081632653e-05, "loss": 1.3622, "step": 14366 }, { "epoch": 58.64081632653061, "grad_norm": 21.861085891723633, "learning_rate": 2.07e-05, "loss": 2.5155, "step": 14367 }, { "epoch": 58.64489795918367, "grad_norm": 52.996826171875, "learning_rate": 2.0697959183673472e-05, "loss": 1.8202, "step": 14368 }, { "epoch": 58.648979591836735, "grad_norm": 32.04841232299805, "learning_rate": 2.069591836734694e-05, "loss": 1.4726, "step": 14369 }, { "epoch": 58.6530612244898, "grad_norm": 18.157855987548828, "learning_rate": 2.069387755102041e-05, "loss": 2.5846, "step": 14370 }, { "epoch": 58.65714285714286, "grad_norm": 31.516376495361328, "learning_rate": 2.0691836734693877e-05, "loss": 1.9107, "step": 14371 }, { "epoch": 58.66122448979592, "grad_norm": 49.714847564697266, "learning_rate": 2.0689795918367348e-05, "loss": 1.0086, "step": 14372 }, { "epoch": 58.66530612244898, "grad_norm": 22.974212646484375, "learning_rate": 2.0687755102040818e-05, "loss": 2.1581, "step": 14373 }, { "epoch": 58.66938775510204, "grad_norm": 32.571598052978516, "learning_rate": 2.0685714285714285e-05, "loss": 0.446, "step": 14374 }, { "epoch": 58.673469387755105, "grad_norm": 37.78055191040039, "learning_rate": 2.0683673469387756e-05, "loss": 0.3328, "step": 14375 }, { "epoch": 58.67755102040816, "grad_norm": 20.5414981842041, "learning_rate": 2.0681632653061226e-05, "loss": 2.0125, "step": 14376 }, { "epoch": 58.68163265306122, "grad_norm": 32.83479690551758, "learning_rate": 2.0679591836734693e-05, "loss": 2.1655, "step": 14377 }, { "epoch": 58.68571428571428, "grad_norm": 21.506759643554688, "learning_rate": 2.0677551020408164e-05, "loss": 2.0772, "step": 14378 }, { "epoch": 58.689795918367345, "grad_norm": 35.194427490234375, "learning_rate": 2.0675510204081634e-05, "loss": 1.3287, "step": 14379 }, { "epoch": 58.69387755102041, "grad_norm": 34.77595901489258, "learning_rate": 2.0673469387755105e-05, "loss": 0.3078, "step": 14380 }, { "epoch": 58.69795918367347, "grad_norm": 30.205591201782227, "learning_rate": 2.0671428571428572e-05, "loss": 0.784, "step": 14381 }, { "epoch": 58.70204081632653, "grad_norm": 34.9768180847168, "learning_rate": 2.0669387755102042e-05, "loss": 1.3435, "step": 14382 }, { "epoch": 58.70612244897959, "grad_norm": 38.96018600463867, "learning_rate": 2.0667346938775513e-05, "loss": 1.4583, "step": 14383 }, { "epoch": 58.710204081632654, "grad_norm": 40.929466247558594, "learning_rate": 2.066530612244898e-05, "loss": 0.6955, "step": 14384 }, { "epoch": 58.714285714285715, "grad_norm": 24.99992561340332, "learning_rate": 2.066326530612245e-05, "loss": 2.049, "step": 14385 }, { "epoch": 58.71836734693878, "grad_norm": 38.447959899902344, "learning_rate": 2.0661224489795918e-05, "loss": 1.7198, "step": 14386 }, { "epoch": 58.72244897959184, "grad_norm": 45.65725326538086, "learning_rate": 2.065918367346939e-05, "loss": 0.9552, "step": 14387 }, { "epoch": 58.7265306122449, "grad_norm": 42.62186813354492, "learning_rate": 2.065714285714286e-05, "loss": 0.6459, "step": 14388 }, { "epoch": 58.73061224489796, "grad_norm": 53.037879943847656, "learning_rate": 2.0655102040816326e-05, "loss": 0.8439, "step": 14389 }, { "epoch": 58.734693877551024, "grad_norm": 32.52500534057617, "learning_rate": 2.0653061224489796e-05, "loss": 1.583, "step": 14390 }, { "epoch": 58.73877551020408, "grad_norm": 44.88209915161133, "learning_rate": 2.0651020408163267e-05, "loss": 1.5287, "step": 14391 }, { "epoch": 58.74285714285714, "grad_norm": 54.01069259643555, "learning_rate": 2.0648979591836737e-05, "loss": 1.0352, "step": 14392 }, { "epoch": 58.7469387755102, "grad_norm": 32.863800048828125, "learning_rate": 2.0646938775510205e-05, "loss": 0.8761, "step": 14393 }, { "epoch": 58.751020408163264, "grad_norm": 24.354671478271484, "learning_rate": 2.0644897959183672e-05, "loss": 1.692, "step": 14394 }, { "epoch": 58.755102040816325, "grad_norm": 21.942968368530273, "learning_rate": 2.0642857142857146e-05, "loss": 0.3364, "step": 14395 }, { "epoch": 58.75918367346939, "grad_norm": 45.31880569458008, "learning_rate": 2.0640816326530613e-05, "loss": 1.4655, "step": 14396 }, { "epoch": 58.76326530612245, "grad_norm": 28.136430740356445, "learning_rate": 2.0638775510204083e-05, "loss": 1.9941, "step": 14397 }, { "epoch": 58.76734693877551, "grad_norm": 21.322751998901367, "learning_rate": 2.063673469387755e-05, "loss": 2.1564, "step": 14398 }, { "epoch": 58.77142857142857, "grad_norm": 46.082332611083984, "learning_rate": 2.063469387755102e-05, "loss": 0.8284, "step": 14399 }, { "epoch": 58.775510204081634, "grad_norm": 15.772808074951172, "learning_rate": 2.063265306122449e-05, "loss": 2.8599, "step": 14400 }, { "epoch": 58.779591836734696, "grad_norm": 22.982505798339844, "learning_rate": 2.063061224489796e-05, "loss": 2.0199, "step": 14401 }, { "epoch": 58.78367346938776, "grad_norm": 37.290863037109375, "learning_rate": 2.062857142857143e-05, "loss": 0.6928, "step": 14402 }, { "epoch": 58.78775510204082, "grad_norm": 40.69795608520508, "learning_rate": 2.06265306122449e-05, "loss": 0.6653, "step": 14403 }, { "epoch": 58.79183673469388, "grad_norm": 20.251005172729492, "learning_rate": 2.062448979591837e-05, "loss": 1.9847, "step": 14404 }, { "epoch": 58.795918367346935, "grad_norm": 56.338050842285156, "learning_rate": 2.0622448979591837e-05, "loss": 1.1636, "step": 14405 }, { "epoch": 58.8, "grad_norm": 28.329639434814453, "learning_rate": 2.0620408163265308e-05, "loss": 1.4811, "step": 14406 }, { "epoch": 58.80408163265306, "grad_norm": 32.046722412109375, "learning_rate": 2.0618367346938778e-05, "loss": 1.4312, "step": 14407 }, { "epoch": 58.80816326530612, "grad_norm": 48.971012115478516, "learning_rate": 2.0616326530612245e-05, "loss": 0.8332, "step": 14408 }, { "epoch": 58.81224489795918, "grad_norm": 28.908897399902344, "learning_rate": 2.0614285714285716e-05, "loss": 1.2601, "step": 14409 }, { "epoch": 58.816326530612244, "grad_norm": 18.768430709838867, "learning_rate": 2.0612244897959186e-05, "loss": 2.519, "step": 14410 }, { "epoch": 58.820408163265306, "grad_norm": 23.287622451782227, "learning_rate": 2.0610204081632653e-05, "loss": 2.0979, "step": 14411 }, { "epoch": 58.82448979591837, "grad_norm": 28.116918563842773, "learning_rate": 2.0608163265306124e-05, "loss": 1.991, "step": 14412 }, { "epoch": 58.82857142857143, "grad_norm": 39.412574768066406, "learning_rate": 2.060612244897959e-05, "loss": 1.5893, "step": 14413 }, { "epoch": 58.83265306122449, "grad_norm": 25.96437644958496, "learning_rate": 2.0604081632653065e-05, "loss": 1.938, "step": 14414 }, { "epoch": 58.83673469387755, "grad_norm": 31.14518928527832, "learning_rate": 2.0602040816326532e-05, "loss": 2.0144, "step": 14415 }, { "epoch": 58.840816326530614, "grad_norm": 27.541852951049805, "learning_rate": 2.06e-05, "loss": 2.2628, "step": 14416 }, { "epoch": 58.844897959183676, "grad_norm": 44.767208099365234, "learning_rate": 2.059795918367347e-05, "loss": 1.0089, "step": 14417 }, { "epoch": 58.84897959183674, "grad_norm": 37.46706771850586, "learning_rate": 2.059591836734694e-05, "loss": 1.3168, "step": 14418 }, { "epoch": 58.85306122448979, "grad_norm": 43.82708740234375, "learning_rate": 2.059387755102041e-05, "loss": 1.201, "step": 14419 }, { "epoch": 58.857142857142854, "grad_norm": 44.16986083984375, "learning_rate": 2.0591836734693878e-05, "loss": 0.9306, "step": 14420 }, { "epoch": 58.861224489795916, "grad_norm": 39.03099822998047, "learning_rate": 2.0589795918367345e-05, "loss": 0.6234, "step": 14421 }, { "epoch": 58.86530612244898, "grad_norm": 20.482227325439453, "learning_rate": 2.058775510204082e-05, "loss": 2.0928, "step": 14422 }, { "epoch": 58.86938775510204, "grad_norm": 37.1757698059082, "learning_rate": 2.0585714285714286e-05, "loss": 0.4529, "step": 14423 }, { "epoch": 58.8734693877551, "grad_norm": 29.70869255065918, "learning_rate": 2.0583673469387757e-05, "loss": 2.1035, "step": 14424 }, { "epoch": 58.87755102040816, "grad_norm": 22.333772659301758, "learning_rate": 2.0581632653061224e-05, "loss": 1.7224, "step": 14425 }, { "epoch": 58.881632653061224, "grad_norm": 34.738739013671875, "learning_rate": 2.0579591836734694e-05, "loss": 1.3754, "step": 14426 }, { "epoch": 58.885714285714286, "grad_norm": 19.781978607177734, "learning_rate": 2.0577551020408165e-05, "loss": 2.2292, "step": 14427 }, { "epoch": 58.88979591836735, "grad_norm": 39.784000396728516, "learning_rate": 2.0575510204081632e-05, "loss": 0.7596, "step": 14428 }, { "epoch": 58.89387755102041, "grad_norm": 28.338451385498047, "learning_rate": 2.0573469387755102e-05, "loss": 1.9632, "step": 14429 }, { "epoch": 58.89795918367347, "grad_norm": 29.94654083251953, "learning_rate": 2.0571428571428573e-05, "loss": 1.0302, "step": 14430 }, { "epoch": 58.90204081632653, "grad_norm": 39.68574523925781, "learning_rate": 2.0569387755102043e-05, "loss": 0.7696, "step": 14431 }, { "epoch": 58.906122448979595, "grad_norm": 41.5012092590332, "learning_rate": 2.056734693877551e-05, "loss": 1.5309, "step": 14432 }, { "epoch": 58.910204081632656, "grad_norm": 51.37248229980469, "learning_rate": 2.056530612244898e-05, "loss": 0.8526, "step": 14433 }, { "epoch": 58.91428571428571, "grad_norm": 38.88767623901367, "learning_rate": 2.056326530612245e-05, "loss": 1.0793, "step": 14434 }, { "epoch": 58.91836734693877, "grad_norm": 34.928375244140625, "learning_rate": 2.056122448979592e-05, "loss": 1.1431, "step": 14435 }, { "epoch": 58.922448979591834, "grad_norm": 25.321739196777344, "learning_rate": 2.055918367346939e-05, "loss": 2.2651, "step": 14436 }, { "epoch": 58.926530612244896, "grad_norm": 28.38962745666504, "learning_rate": 2.055714285714286e-05, "loss": 1.6218, "step": 14437 }, { "epoch": 58.93061224489796, "grad_norm": 20.9283390045166, "learning_rate": 2.0555102040816327e-05, "loss": 2.0902, "step": 14438 }, { "epoch": 58.93469387755102, "grad_norm": 12.721954345703125, "learning_rate": 2.0553061224489797e-05, "loss": 2.7043, "step": 14439 }, { "epoch": 58.93877551020408, "grad_norm": 35.24855422973633, "learning_rate": 2.0551020408163265e-05, "loss": 1.3094, "step": 14440 }, { "epoch": 58.94285714285714, "grad_norm": 46.09721374511719, "learning_rate": 2.054897959183674e-05, "loss": 0.9121, "step": 14441 }, { "epoch": 58.946938775510205, "grad_norm": 20.288284301757812, "learning_rate": 2.0546938775510206e-05, "loss": 2.2116, "step": 14442 }, { "epoch": 58.95102040816327, "grad_norm": 10.536820411682129, "learning_rate": 2.0544897959183673e-05, "loss": 3.2555, "step": 14443 }, { "epoch": 58.95510204081633, "grad_norm": 19.013031005859375, "learning_rate": 2.0542857142857143e-05, "loss": 2.0829, "step": 14444 }, { "epoch": 58.95918367346939, "grad_norm": 31.354183197021484, "learning_rate": 2.0540816326530614e-05, "loss": 1.3407, "step": 14445 }, { "epoch": 58.96326530612245, "grad_norm": 25.531652450561523, "learning_rate": 2.0538775510204084e-05, "loss": 2.1104, "step": 14446 }, { "epoch": 58.96734693877551, "grad_norm": 37.78909683227539, "learning_rate": 2.053673469387755e-05, "loss": 1.2318, "step": 14447 }, { "epoch": 58.97142857142857, "grad_norm": 43.8112678527832, "learning_rate": 2.053469387755102e-05, "loss": 0.8777, "step": 14448 }, { "epoch": 58.97551020408163, "grad_norm": 30.862436294555664, "learning_rate": 2.0532653061224492e-05, "loss": 1.7948, "step": 14449 }, { "epoch": 58.97959183673469, "grad_norm": 24.43141746520996, "learning_rate": 2.053061224489796e-05, "loss": 2.1444, "step": 14450 }, { "epoch": 58.98367346938775, "grad_norm": 28.160079956054688, "learning_rate": 2.052857142857143e-05, "loss": 1.5122, "step": 14451 }, { "epoch": 58.987755102040815, "grad_norm": 29.627954483032227, "learning_rate": 2.0526530612244897e-05, "loss": 1.1787, "step": 14452 }, { "epoch": 58.99183673469388, "grad_norm": 27.039539337158203, "learning_rate": 2.052448979591837e-05, "loss": 2.2334, "step": 14453 }, { "epoch": 58.99591836734694, "grad_norm": 38.54659652709961, "learning_rate": 2.0522448979591838e-05, "loss": 1.1767, "step": 14454 }, { "epoch": 59.0, "grad_norm": 15.03075122833252, "learning_rate": 2.0520408163265305e-05, "loss": 2.5958, "step": 14455 }, { "epoch": 59.00408163265306, "grad_norm": 22.541271209716797, "learning_rate": 2.0518367346938776e-05, "loss": 1.7131, "step": 14456 }, { "epoch": 59.00816326530612, "grad_norm": 20.908771514892578, "learning_rate": 2.0516326530612246e-05, "loss": 2.0807, "step": 14457 }, { "epoch": 59.012244897959185, "grad_norm": 18.732587814331055, "learning_rate": 2.0514285714285717e-05, "loss": 2.5389, "step": 14458 }, { "epoch": 59.01632653061225, "grad_norm": 46.34751510620117, "learning_rate": 2.0512244897959184e-05, "loss": 0.5885, "step": 14459 }, { "epoch": 59.02040816326531, "grad_norm": 38.75695037841797, "learning_rate": 2.0510204081632654e-05, "loss": 0.5825, "step": 14460 }, { "epoch": 59.02448979591837, "grad_norm": 28.10372543334961, "learning_rate": 2.0508163265306125e-05, "loss": 1.892, "step": 14461 }, { "epoch": 59.02857142857143, "grad_norm": 29.111831665039062, "learning_rate": 2.0506122448979592e-05, "loss": 2.1947, "step": 14462 }, { "epoch": 59.03265306122449, "grad_norm": 43.99485778808594, "learning_rate": 2.0504081632653063e-05, "loss": 0.7548, "step": 14463 }, { "epoch": 59.03673469387755, "grad_norm": 30.105825424194336, "learning_rate": 2.0502040816326533e-05, "loss": 1.8973, "step": 14464 }, { "epoch": 59.04081632653061, "grad_norm": 37.261070251464844, "learning_rate": 2.05e-05, "loss": 0.6308, "step": 14465 }, { "epoch": 59.04489795918367, "grad_norm": 43.87989807128906, "learning_rate": 2.049795918367347e-05, "loss": 0.8005, "step": 14466 }, { "epoch": 59.04897959183673, "grad_norm": 45.504913330078125, "learning_rate": 2.0495918367346938e-05, "loss": 0.9424, "step": 14467 }, { "epoch": 59.053061224489795, "grad_norm": 26.019014358520508, "learning_rate": 2.049387755102041e-05, "loss": 0.2131, "step": 14468 }, { "epoch": 59.05714285714286, "grad_norm": 42.152076721191406, "learning_rate": 2.049183673469388e-05, "loss": 1.1686, "step": 14469 }, { "epoch": 59.06122448979592, "grad_norm": 32.366397857666016, "learning_rate": 2.0489795918367346e-05, "loss": 1.3106, "step": 14470 }, { "epoch": 59.06530612244898, "grad_norm": 34.76481628417969, "learning_rate": 2.0487755102040817e-05, "loss": 0.2635, "step": 14471 }, { "epoch": 59.06938775510204, "grad_norm": 29.14705467224121, "learning_rate": 2.0485714285714287e-05, "loss": 1.2112, "step": 14472 }, { "epoch": 59.073469387755104, "grad_norm": 30.32951545715332, "learning_rate": 2.0483673469387758e-05, "loss": 2.0802, "step": 14473 }, { "epoch": 59.077551020408166, "grad_norm": 19.06818389892578, "learning_rate": 2.0481632653061225e-05, "loss": 1.984, "step": 14474 }, { "epoch": 59.08163265306123, "grad_norm": 30.567493438720703, "learning_rate": 2.0479591836734695e-05, "loss": 0.7684, "step": 14475 }, { "epoch": 59.08571428571429, "grad_norm": 30.09627342224121, "learning_rate": 2.0477551020408166e-05, "loss": 1.4815, "step": 14476 }, { "epoch": 59.089795918367344, "grad_norm": 42.256858825683594, "learning_rate": 2.0475510204081633e-05, "loss": 1.0758, "step": 14477 }, { "epoch": 59.093877551020405, "grad_norm": 29.67367935180664, "learning_rate": 2.0473469387755103e-05, "loss": 1.76, "step": 14478 }, { "epoch": 59.09795918367347, "grad_norm": 35.47828674316406, "learning_rate": 2.047142857142857e-05, "loss": 1.3854, "step": 14479 }, { "epoch": 59.10204081632653, "grad_norm": 37.63603210449219, "learning_rate": 2.0469387755102044e-05, "loss": 1.6071, "step": 14480 }, { "epoch": 59.10612244897959, "grad_norm": 34.597084045410156, "learning_rate": 2.046734693877551e-05, "loss": 1.8857, "step": 14481 }, { "epoch": 59.11020408163265, "grad_norm": 45.59149169921875, "learning_rate": 2.046530612244898e-05, "loss": 1.6928, "step": 14482 }, { "epoch": 59.114285714285714, "grad_norm": 22.493242263793945, "learning_rate": 2.046326530612245e-05, "loss": 1.8272, "step": 14483 }, { "epoch": 59.118367346938776, "grad_norm": 39.37517166137695, "learning_rate": 2.046122448979592e-05, "loss": 0.8854, "step": 14484 }, { "epoch": 59.12244897959184, "grad_norm": 34.04744338989258, "learning_rate": 2.045918367346939e-05, "loss": 1.077, "step": 14485 }, { "epoch": 59.1265306122449, "grad_norm": 37.60344696044922, "learning_rate": 2.0457142857142857e-05, "loss": 1.1997, "step": 14486 }, { "epoch": 59.13061224489796, "grad_norm": 28.843059539794922, "learning_rate": 2.0455102040816328e-05, "loss": 0.9908, "step": 14487 }, { "epoch": 59.13469387755102, "grad_norm": 40.61507034301758, "learning_rate": 2.04530612244898e-05, "loss": 0.4949, "step": 14488 }, { "epoch": 59.138775510204084, "grad_norm": 21.970619201660156, "learning_rate": 2.0451020408163265e-05, "loss": 2.0391, "step": 14489 }, { "epoch": 59.142857142857146, "grad_norm": 38.12936019897461, "learning_rate": 2.0448979591836736e-05, "loss": 1.0539, "step": 14490 }, { "epoch": 59.14693877551021, "grad_norm": 39.79224395751953, "learning_rate": 2.0446938775510206e-05, "loss": 1.1794, "step": 14491 }, { "epoch": 59.15102040816326, "grad_norm": 34.795413970947266, "learning_rate": 2.0444897959183674e-05, "loss": 1.6957, "step": 14492 }, { "epoch": 59.155102040816324, "grad_norm": 39.9692268371582, "learning_rate": 2.0442857142857144e-05, "loss": 1.1168, "step": 14493 }, { "epoch": 59.159183673469386, "grad_norm": 21.444110870361328, "learning_rate": 2.044081632653061e-05, "loss": 2.3409, "step": 14494 }, { "epoch": 59.16326530612245, "grad_norm": 18.70961570739746, "learning_rate": 2.0438775510204082e-05, "loss": 1.9692, "step": 14495 }, { "epoch": 59.16734693877551, "grad_norm": 35.551025390625, "learning_rate": 2.0436734693877552e-05, "loss": 1.2183, "step": 14496 }, { "epoch": 59.17142857142857, "grad_norm": 31.957460403442383, "learning_rate": 2.0434693877551023e-05, "loss": 0.7823, "step": 14497 }, { "epoch": 59.17551020408163, "grad_norm": 28.633953094482422, "learning_rate": 2.043265306122449e-05, "loss": 1.7556, "step": 14498 }, { "epoch": 59.179591836734694, "grad_norm": 22.06824493408203, "learning_rate": 2.043061224489796e-05, "loss": 2.7047, "step": 14499 }, { "epoch": 59.183673469387756, "grad_norm": 34.76753616333008, "learning_rate": 2.042857142857143e-05, "loss": 1.0319, "step": 14500 }, { "epoch": 59.18775510204082, "grad_norm": 20.18297576904297, "learning_rate": 2.0426530612244898e-05, "loss": 2.0991, "step": 14501 }, { "epoch": 59.19183673469388, "grad_norm": 33.21517562866211, "learning_rate": 2.042448979591837e-05, "loss": 1.8596, "step": 14502 }, { "epoch": 59.19591836734694, "grad_norm": 31.89466667175293, "learning_rate": 2.042244897959184e-05, "loss": 1.8071, "step": 14503 }, { "epoch": 59.2, "grad_norm": 46.03926086425781, "learning_rate": 2.0420408163265306e-05, "loss": 1.3369, "step": 14504 }, { "epoch": 59.204081632653065, "grad_norm": 11.096477508544922, "learning_rate": 2.0418367346938777e-05, "loss": 3.2108, "step": 14505 }, { "epoch": 59.20816326530612, "grad_norm": 53.5891227722168, "learning_rate": 2.0416326530612244e-05, "loss": 0.2665, "step": 14506 }, { "epoch": 59.21224489795918, "grad_norm": 24.303483963012695, "learning_rate": 2.0414285714285718e-05, "loss": 2.0915, "step": 14507 }, { "epoch": 59.21632653061224, "grad_norm": 34.89690017700195, "learning_rate": 2.0412244897959185e-05, "loss": 1.1513, "step": 14508 }, { "epoch": 59.220408163265304, "grad_norm": 51.265724182128906, "learning_rate": 2.0410204081632652e-05, "loss": 1.4681, "step": 14509 }, { "epoch": 59.224489795918366, "grad_norm": 44.01893615722656, "learning_rate": 2.0408163265306123e-05, "loss": 0.9569, "step": 14510 }, { "epoch": 59.22857142857143, "grad_norm": 35.39427185058594, "learning_rate": 2.0406122448979593e-05, "loss": 1.5503, "step": 14511 }, { "epoch": 59.23265306122449, "grad_norm": 29.74632453918457, "learning_rate": 2.0404081632653064e-05, "loss": 1.2904, "step": 14512 }, { "epoch": 59.23673469387755, "grad_norm": 33.25933074951172, "learning_rate": 2.040204081632653e-05, "loss": 1.3974, "step": 14513 }, { "epoch": 59.24081632653061, "grad_norm": 33.118194580078125, "learning_rate": 2.04e-05, "loss": 2.0135, "step": 14514 }, { "epoch": 59.244897959183675, "grad_norm": 23.623422622680664, "learning_rate": 2.0397959183673472e-05, "loss": 2.0177, "step": 14515 }, { "epoch": 59.248979591836736, "grad_norm": 23.80025863647461, "learning_rate": 2.039591836734694e-05, "loss": 2.1022, "step": 14516 }, { "epoch": 59.2530612244898, "grad_norm": 56.49142074584961, "learning_rate": 2.039387755102041e-05, "loss": 0.9324, "step": 14517 }, { "epoch": 59.25714285714286, "grad_norm": 49.01517868041992, "learning_rate": 2.039183673469388e-05, "loss": 0.6818, "step": 14518 }, { "epoch": 59.26122448979592, "grad_norm": 37.1065788269043, "learning_rate": 2.038979591836735e-05, "loss": 0.7566, "step": 14519 }, { "epoch": 59.265306122448976, "grad_norm": 26.743925094604492, "learning_rate": 2.0387755102040817e-05, "loss": 1.8731, "step": 14520 }, { "epoch": 59.26938775510204, "grad_norm": 25.71792221069336, "learning_rate": 2.0385714285714285e-05, "loss": 1.8898, "step": 14521 }, { "epoch": 59.2734693877551, "grad_norm": 22.765567779541016, "learning_rate": 2.0383673469387755e-05, "loss": 2.1077, "step": 14522 }, { "epoch": 59.27755102040816, "grad_norm": 28.453927993774414, "learning_rate": 2.0381632653061226e-05, "loss": 1.7482, "step": 14523 }, { "epoch": 59.28163265306122, "grad_norm": 20.578142166137695, "learning_rate": 2.0379591836734696e-05, "loss": 2.4134, "step": 14524 }, { "epoch": 59.285714285714285, "grad_norm": 22.62549591064453, "learning_rate": 2.0377551020408163e-05, "loss": 1.981, "step": 14525 }, { "epoch": 59.289795918367346, "grad_norm": 38.94062042236328, "learning_rate": 2.0375510204081634e-05, "loss": 0.2472, "step": 14526 }, { "epoch": 59.29387755102041, "grad_norm": 32.751277923583984, "learning_rate": 2.0373469387755104e-05, "loss": 1.797, "step": 14527 }, { "epoch": 59.29795918367347, "grad_norm": 20.591028213500977, "learning_rate": 2.037142857142857e-05, "loss": 1.9772, "step": 14528 }, { "epoch": 59.30204081632653, "grad_norm": 43.95689010620117, "learning_rate": 2.0369387755102042e-05, "loss": 0.7578, "step": 14529 }, { "epoch": 59.30612244897959, "grad_norm": 31.082237243652344, "learning_rate": 2.0367346938775512e-05, "loss": 1.8685, "step": 14530 }, { "epoch": 59.310204081632655, "grad_norm": 40.77449417114258, "learning_rate": 2.036530612244898e-05, "loss": 0.8016, "step": 14531 }, { "epoch": 59.31428571428572, "grad_norm": 23.448671340942383, "learning_rate": 2.036326530612245e-05, "loss": 2.0483, "step": 14532 }, { "epoch": 59.31836734693878, "grad_norm": 26.021142959594727, "learning_rate": 2.0361224489795917e-05, "loss": 1.7733, "step": 14533 }, { "epoch": 59.32244897959184, "grad_norm": 20.87877082824707, "learning_rate": 2.035918367346939e-05, "loss": 2.0432, "step": 14534 }, { "epoch": 59.326530612244895, "grad_norm": 36.790252685546875, "learning_rate": 2.0357142857142858e-05, "loss": 1.3262, "step": 14535 }, { "epoch": 59.33061224489796, "grad_norm": 47.404930114746094, "learning_rate": 2.0355102040816325e-05, "loss": 1.0497, "step": 14536 }, { "epoch": 59.33469387755102, "grad_norm": 30.795114517211914, "learning_rate": 2.0353061224489796e-05, "loss": 1.594, "step": 14537 }, { "epoch": 59.33877551020408, "grad_norm": 21.821836471557617, "learning_rate": 2.0351020408163266e-05, "loss": 1.9527, "step": 14538 }, { "epoch": 59.34285714285714, "grad_norm": 18.76136589050293, "learning_rate": 2.0348979591836737e-05, "loss": 2.1578, "step": 14539 }, { "epoch": 59.3469387755102, "grad_norm": 28.892751693725586, "learning_rate": 2.0346938775510204e-05, "loss": 1.3125, "step": 14540 }, { "epoch": 59.351020408163265, "grad_norm": 29.691150665283203, "learning_rate": 2.0344897959183675e-05, "loss": 1.6884, "step": 14541 }, { "epoch": 59.35510204081633, "grad_norm": 50.191749572753906, "learning_rate": 2.0342857142857145e-05, "loss": 1.7663, "step": 14542 }, { "epoch": 59.35918367346939, "grad_norm": 33.759342193603516, "learning_rate": 2.0340816326530612e-05, "loss": 1.4726, "step": 14543 }, { "epoch": 59.36326530612245, "grad_norm": 24.634109497070312, "learning_rate": 2.0338775510204083e-05, "loss": 0.2279, "step": 14544 }, { "epoch": 59.36734693877551, "grad_norm": 19.3729305267334, "learning_rate": 2.0336734693877553e-05, "loss": 2.4268, "step": 14545 }, { "epoch": 59.371428571428574, "grad_norm": 28.597715377807617, "learning_rate": 2.0334693877551024e-05, "loss": 1.6051, "step": 14546 }, { "epoch": 59.375510204081635, "grad_norm": 40.05888748168945, "learning_rate": 2.033265306122449e-05, "loss": 1.4571, "step": 14547 }, { "epoch": 59.3795918367347, "grad_norm": 25.5787296295166, "learning_rate": 2.0330612244897958e-05, "loss": 1.7036, "step": 14548 }, { "epoch": 59.38367346938775, "grad_norm": 31.656246185302734, "learning_rate": 2.032857142857143e-05, "loss": 1.5422, "step": 14549 }, { "epoch": 59.38775510204081, "grad_norm": 20.8039608001709, "learning_rate": 2.03265306122449e-05, "loss": 1.9469, "step": 14550 }, { "epoch": 59.391836734693875, "grad_norm": 22.60296058654785, "learning_rate": 2.032448979591837e-05, "loss": 2.5695, "step": 14551 }, { "epoch": 59.39591836734694, "grad_norm": 24.662059783935547, "learning_rate": 2.0322448979591837e-05, "loss": 1.9732, "step": 14552 }, { "epoch": 59.4, "grad_norm": 34.597843170166016, "learning_rate": 2.0320408163265307e-05, "loss": 0.7553, "step": 14553 }, { "epoch": 59.40408163265306, "grad_norm": 40.20823669433594, "learning_rate": 2.0318367346938778e-05, "loss": 0.8138, "step": 14554 }, { "epoch": 59.40816326530612, "grad_norm": 42.54928207397461, "learning_rate": 2.0316326530612245e-05, "loss": 1.086, "step": 14555 }, { "epoch": 59.412244897959184, "grad_norm": 32.390472412109375, "learning_rate": 2.0314285714285715e-05, "loss": 0.7728, "step": 14556 }, { "epoch": 59.416326530612245, "grad_norm": 39.902809143066406, "learning_rate": 2.0312244897959186e-05, "loss": 1.1346, "step": 14557 }, { "epoch": 59.42040816326531, "grad_norm": 35.211570739746094, "learning_rate": 2.0310204081632653e-05, "loss": 1.5572, "step": 14558 }, { "epoch": 59.42448979591837, "grad_norm": 37.67086410522461, "learning_rate": 2.0308163265306123e-05, "loss": 0.7266, "step": 14559 }, { "epoch": 59.42857142857143, "grad_norm": 24.591264724731445, "learning_rate": 2.030612244897959e-05, "loss": 1.659, "step": 14560 }, { "epoch": 59.43265306122449, "grad_norm": 34.41080093383789, "learning_rate": 2.0304081632653065e-05, "loss": 0.5404, "step": 14561 }, { "epoch": 59.436734693877554, "grad_norm": 27.77263069152832, "learning_rate": 2.030204081632653e-05, "loss": 1.7246, "step": 14562 }, { "epoch": 59.440816326530616, "grad_norm": 30.54340934753418, "learning_rate": 2.0300000000000002e-05, "loss": 0.2327, "step": 14563 }, { "epoch": 59.44489795918367, "grad_norm": 37.79328155517578, "learning_rate": 2.029795918367347e-05, "loss": 1.1079, "step": 14564 }, { "epoch": 59.44897959183673, "grad_norm": 46.9593620300293, "learning_rate": 2.029591836734694e-05, "loss": 0.812, "step": 14565 }, { "epoch": 59.453061224489794, "grad_norm": 32.62362289428711, "learning_rate": 2.029387755102041e-05, "loss": 0.8718, "step": 14566 }, { "epoch": 59.457142857142856, "grad_norm": 28.645341873168945, "learning_rate": 2.0291836734693877e-05, "loss": 1.5508, "step": 14567 }, { "epoch": 59.46122448979592, "grad_norm": 26.78755760192871, "learning_rate": 2.0289795918367348e-05, "loss": 1.8949, "step": 14568 }, { "epoch": 59.46530612244898, "grad_norm": 40.71685791015625, "learning_rate": 2.028775510204082e-05, "loss": 0.7808, "step": 14569 }, { "epoch": 59.46938775510204, "grad_norm": 39.74605941772461, "learning_rate": 2.0285714285714286e-05, "loss": 0.9149, "step": 14570 }, { "epoch": 59.4734693877551, "grad_norm": 28.49668312072754, "learning_rate": 2.0283673469387756e-05, "loss": 0.9512, "step": 14571 }, { "epoch": 59.477551020408164, "grad_norm": 29.150747299194336, "learning_rate": 2.0281632653061227e-05, "loss": 1.8233, "step": 14572 }, { "epoch": 59.481632653061226, "grad_norm": 42.97678756713867, "learning_rate": 2.0279591836734697e-05, "loss": 0.7991, "step": 14573 }, { "epoch": 59.48571428571429, "grad_norm": 27.730215072631836, "learning_rate": 2.0277551020408164e-05, "loss": 1.9253, "step": 14574 }, { "epoch": 59.48979591836735, "grad_norm": 33.203800201416016, "learning_rate": 2.027551020408163e-05, "loss": 1.4997, "step": 14575 }, { "epoch": 59.49387755102041, "grad_norm": 31.622722625732422, "learning_rate": 2.0273469387755102e-05, "loss": 1.6858, "step": 14576 }, { "epoch": 59.49795918367347, "grad_norm": 38.69182586669922, "learning_rate": 2.0271428571428572e-05, "loss": 0.7122, "step": 14577 }, { "epoch": 59.50204081632653, "grad_norm": 48.865848541259766, "learning_rate": 2.0269387755102043e-05, "loss": 0.6887, "step": 14578 }, { "epoch": 59.50612244897959, "grad_norm": 58.96220016479492, "learning_rate": 2.026734693877551e-05, "loss": 0.6526, "step": 14579 }, { "epoch": 59.51020408163265, "grad_norm": 38.064544677734375, "learning_rate": 2.026530612244898e-05, "loss": 0.7915, "step": 14580 }, { "epoch": 59.51428571428571, "grad_norm": 38.53519058227539, "learning_rate": 2.026326530612245e-05, "loss": 1.1602, "step": 14581 }, { "epoch": 59.518367346938774, "grad_norm": 21.81465721130371, "learning_rate": 2.0261224489795918e-05, "loss": 1.9447, "step": 14582 }, { "epoch": 59.522448979591836, "grad_norm": 27.711721420288086, "learning_rate": 2.025918367346939e-05, "loss": 1.8634, "step": 14583 }, { "epoch": 59.5265306122449, "grad_norm": 19.147560119628906, "learning_rate": 2.025714285714286e-05, "loss": 2.4729, "step": 14584 }, { "epoch": 59.53061224489796, "grad_norm": 36.804893493652344, "learning_rate": 2.0255102040816326e-05, "loss": 1.4044, "step": 14585 }, { "epoch": 59.53469387755102, "grad_norm": 29.9130802154541, "learning_rate": 2.0253061224489797e-05, "loss": 1.6259, "step": 14586 }, { "epoch": 59.53877551020408, "grad_norm": 40.07453536987305, "learning_rate": 2.0251020408163264e-05, "loss": 1.3583, "step": 14587 }, { "epoch": 59.542857142857144, "grad_norm": 23.87749671936035, "learning_rate": 2.0248979591836738e-05, "loss": 1.2898, "step": 14588 }, { "epoch": 59.546938775510206, "grad_norm": 24.500926971435547, "learning_rate": 2.0246938775510205e-05, "loss": 2.0562, "step": 14589 }, { "epoch": 59.55102040816327, "grad_norm": 21.87116050720215, "learning_rate": 2.0244897959183676e-05, "loss": 1.8587, "step": 14590 }, { "epoch": 59.55510204081633, "grad_norm": 33.58361053466797, "learning_rate": 2.0242857142857143e-05, "loss": 1.2679, "step": 14591 }, { "epoch": 59.55918367346939, "grad_norm": 34.907257080078125, "learning_rate": 2.0240816326530613e-05, "loss": 1.7253, "step": 14592 }, { "epoch": 59.563265306122446, "grad_norm": 24.401613235473633, "learning_rate": 2.0238775510204084e-05, "loss": 1.875, "step": 14593 }, { "epoch": 59.56734693877551, "grad_norm": 30.201566696166992, "learning_rate": 2.023673469387755e-05, "loss": 1.3996, "step": 14594 }, { "epoch": 59.57142857142857, "grad_norm": 3.7933876514434814, "learning_rate": 2.023469387755102e-05, "loss": 0.0617, "step": 14595 }, { "epoch": 59.57551020408163, "grad_norm": 17.897232055664062, "learning_rate": 2.0232653061224492e-05, "loss": 2.5436, "step": 14596 }, { "epoch": 59.57959183673469, "grad_norm": 39.50078201293945, "learning_rate": 2.023061224489796e-05, "loss": 1.7011, "step": 14597 }, { "epoch": 59.583673469387755, "grad_norm": 48.40945816040039, "learning_rate": 2.022857142857143e-05, "loss": 0.6313, "step": 14598 }, { "epoch": 59.587755102040816, "grad_norm": 34.28467559814453, "learning_rate": 2.02265306122449e-05, "loss": 0.963, "step": 14599 }, { "epoch": 59.59183673469388, "grad_norm": 24.44293975830078, "learning_rate": 2.022448979591837e-05, "loss": 0.3263, "step": 14600 }, { "epoch": 59.59591836734694, "grad_norm": 23.58382225036621, "learning_rate": 2.0222448979591838e-05, "loss": 2.196, "step": 14601 }, { "epoch": 59.6, "grad_norm": 28.15789031982422, "learning_rate": 2.0220408163265305e-05, "loss": 0.3883, "step": 14602 }, { "epoch": 59.60408163265306, "grad_norm": 31.519622802734375, "learning_rate": 2.0218367346938775e-05, "loss": 1.5381, "step": 14603 }, { "epoch": 59.608163265306125, "grad_norm": 43.272705078125, "learning_rate": 2.0216326530612246e-05, "loss": 0.7276, "step": 14604 }, { "epoch": 59.61224489795919, "grad_norm": 24.102275848388672, "learning_rate": 2.0214285714285716e-05, "loss": 1.7459, "step": 14605 }, { "epoch": 59.61632653061225, "grad_norm": 47.247962951660156, "learning_rate": 2.0212244897959183e-05, "loss": 1.5963, "step": 14606 }, { "epoch": 59.6204081632653, "grad_norm": 49.27781295776367, "learning_rate": 2.0210204081632654e-05, "loss": 0.7407, "step": 14607 }, { "epoch": 59.624489795918365, "grad_norm": 27.199295043945312, "learning_rate": 2.0208163265306124e-05, "loss": 1.1105, "step": 14608 }, { "epoch": 59.628571428571426, "grad_norm": 27.6036434173584, "learning_rate": 2.020612244897959e-05, "loss": 1.3424, "step": 14609 }, { "epoch": 59.63265306122449, "grad_norm": 40.33688735961914, "learning_rate": 2.0204081632653062e-05, "loss": 0.951, "step": 14610 }, { "epoch": 59.63673469387755, "grad_norm": 37.40092849731445, "learning_rate": 2.0202040816326533e-05, "loss": 0.7942, "step": 14611 }, { "epoch": 59.64081632653061, "grad_norm": 19.001850128173828, "learning_rate": 2.0200000000000003e-05, "loss": 2.445, "step": 14612 }, { "epoch": 59.64489795918367, "grad_norm": 24.00244903564453, "learning_rate": 2.019795918367347e-05, "loss": 2.5214, "step": 14613 }, { "epoch": 59.648979591836735, "grad_norm": 25.740764617919922, "learning_rate": 2.0195918367346937e-05, "loss": 1.847, "step": 14614 }, { "epoch": 59.6530612244898, "grad_norm": 32.7957763671875, "learning_rate": 2.019387755102041e-05, "loss": 0.6123, "step": 14615 }, { "epoch": 59.65714285714286, "grad_norm": 38.76192855834961, "learning_rate": 2.019183673469388e-05, "loss": 0.5693, "step": 14616 }, { "epoch": 59.66122448979592, "grad_norm": 25.415918350219727, "learning_rate": 2.018979591836735e-05, "loss": 2.1726, "step": 14617 }, { "epoch": 59.66530612244898, "grad_norm": 32.325748443603516, "learning_rate": 2.0187755102040816e-05, "loss": 1.2431, "step": 14618 }, { "epoch": 59.66938775510204, "grad_norm": 41.149234771728516, "learning_rate": 2.0185714285714287e-05, "loss": 1.0868, "step": 14619 }, { "epoch": 59.673469387755105, "grad_norm": 23.355648040771484, "learning_rate": 2.0183673469387757e-05, "loss": 2.0034, "step": 14620 }, { "epoch": 59.67755102040816, "grad_norm": 25.872909545898438, "learning_rate": 2.0181632653061224e-05, "loss": 0.1542, "step": 14621 }, { "epoch": 59.68163265306122, "grad_norm": 20.932580947875977, "learning_rate": 2.0179591836734695e-05, "loss": 2.4135, "step": 14622 }, { "epoch": 59.68571428571428, "grad_norm": 19.69974136352539, "learning_rate": 2.0177551020408165e-05, "loss": 2.1405, "step": 14623 }, { "epoch": 59.689795918367345, "grad_norm": 26.16789436340332, "learning_rate": 2.0175510204081632e-05, "loss": 1.3532, "step": 14624 }, { "epoch": 59.69387755102041, "grad_norm": 36.563385009765625, "learning_rate": 2.0173469387755103e-05, "loss": 0.7566, "step": 14625 }, { "epoch": 59.69795918367347, "grad_norm": 29.699832916259766, "learning_rate": 2.0171428571428573e-05, "loss": 1.1839, "step": 14626 }, { "epoch": 59.70204081632653, "grad_norm": 19.82676124572754, "learning_rate": 2.0169387755102044e-05, "loss": 2.0213, "step": 14627 }, { "epoch": 59.70612244897959, "grad_norm": 17.65481185913086, "learning_rate": 2.016734693877551e-05, "loss": 2.6495, "step": 14628 }, { "epoch": 59.710204081632654, "grad_norm": 31.218669891357422, "learning_rate": 2.0165306122448978e-05, "loss": 1.1595, "step": 14629 }, { "epoch": 59.714285714285715, "grad_norm": 40.070823669433594, "learning_rate": 2.016326530612245e-05, "loss": 1.4945, "step": 14630 }, { "epoch": 59.71836734693878, "grad_norm": 33.16935348510742, "learning_rate": 2.016122448979592e-05, "loss": 2.0819, "step": 14631 }, { "epoch": 59.72244897959184, "grad_norm": 15.914169311523438, "learning_rate": 2.015918367346939e-05, "loss": 2.5564, "step": 14632 }, { "epoch": 59.7265306122449, "grad_norm": 43.54285430908203, "learning_rate": 2.0157142857142857e-05, "loss": 1.8022, "step": 14633 }, { "epoch": 59.73061224489796, "grad_norm": 29.31782341003418, "learning_rate": 2.0155102040816327e-05, "loss": 1.3349, "step": 14634 }, { "epoch": 59.734693877551024, "grad_norm": 17.591487884521484, "learning_rate": 2.0153061224489798e-05, "loss": 2.3847, "step": 14635 }, { "epoch": 59.73877551020408, "grad_norm": 14.521256446838379, "learning_rate": 2.0151020408163265e-05, "loss": 2.9896, "step": 14636 }, { "epoch": 59.74285714285714, "grad_norm": 25.61763572692871, "learning_rate": 2.0148979591836735e-05, "loss": 1.6352, "step": 14637 }, { "epoch": 59.7469387755102, "grad_norm": 38.43281555175781, "learning_rate": 2.0146938775510206e-05, "loss": 1.5166, "step": 14638 }, { "epoch": 59.751020408163264, "grad_norm": 26.737695693969727, "learning_rate": 2.0144897959183676e-05, "loss": 2.3358, "step": 14639 }, { "epoch": 59.755102040816325, "grad_norm": 35.93463134765625, "learning_rate": 2.0142857142857144e-05, "loss": 1.0499, "step": 14640 }, { "epoch": 59.75918367346939, "grad_norm": 38.60224533081055, "learning_rate": 2.014081632653061e-05, "loss": 1.3213, "step": 14641 }, { "epoch": 59.76326530612245, "grad_norm": 29.933521270751953, "learning_rate": 2.0138775510204085e-05, "loss": 1.3057, "step": 14642 }, { "epoch": 59.76734693877551, "grad_norm": 20.95151138305664, "learning_rate": 2.0136734693877552e-05, "loss": 2.0248, "step": 14643 }, { "epoch": 59.77142857142857, "grad_norm": 27.77915382385254, "learning_rate": 2.0134693877551022e-05, "loss": 1.6164, "step": 14644 }, { "epoch": 59.775510204081634, "grad_norm": 26.791717529296875, "learning_rate": 2.013265306122449e-05, "loss": 1.632, "step": 14645 }, { "epoch": 59.779591836734696, "grad_norm": 33.082984924316406, "learning_rate": 2.013061224489796e-05, "loss": 1.1793, "step": 14646 }, { "epoch": 59.78367346938776, "grad_norm": 39.14973068237305, "learning_rate": 2.012857142857143e-05, "loss": 1.1806, "step": 14647 }, { "epoch": 59.78775510204082, "grad_norm": 31.135276794433594, "learning_rate": 2.0126530612244898e-05, "loss": 1.3926, "step": 14648 }, { "epoch": 59.79183673469388, "grad_norm": 18.679052352905273, "learning_rate": 2.0124489795918368e-05, "loss": 2.4734, "step": 14649 }, { "epoch": 59.795918367346935, "grad_norm": 22.941852569580078, "learning_rate": 2.012244897959184e-05, "loss": 2.0104, "step": 14650 }, { "epoch": 59.8, "grad_norm": 38.47343444824219, "learning_rate": 2.0120408163265306e-05, "loss": 0.6447, "step": 14651 }, { "epoch": 59.80408163265306, "grad_norm": 44.51221466064453, "learning_rate": 2.0118367346938776e-05, "loss": 0.9184, "step": 14652 }, { "epoch": 59.80816326530612, "grad_norm": 30.800146102905273, "learning_rate": 2.0116326530612243e-05, "loss": 1.5035, "step": 14653 }, { "epoch": 59.81224489795918, "grad_norm": 31.881938934326172, "learning_rate": 2.0114285714285717e-05, "loss": 1.6521, "step": 14654 }, { "epoch": 59.816326530612244, "grad_norm": 26.30948829650879, "learning_rate": 2.0112244897959184e-05, "loss": 1.9007, "step": 14655 }, { "epoch": 59.820408163265306, "grad_norm": 32.03022003173828, "learning_rate": 2.0110204081632655e-05, "loss": 0.2809, "step": 14656 }, { "epoch": 59.82448979591837, "grad_norm": 23.899940490722656, "learning_rate": 2.0108163265306122e-05, "loss": 2.4537, "step": 14657 }, { "epoch": 59.82857142857143, "grad_norm": 39.6251335144043, "learning_rate": 2.0106122448979593e-05, "loss": 0.7933, "step": 14658 }, { "epoch": 59.83265306122449, "grad_norm": 22.990604400634766, "learning_rate": 2.0104081632653063e-05, "loss": 2.0129, "step": 14659 }, { "epoch": 59.83673469387755, "grad_norm": 35.65025329589844, "learning_rate": 2.010204081632653e-05, "loss": 0.3566, "step": 14660 }, { "epoch": 59.840816326530614, "grad_norm": 22.868459701538086, "learning_rate": 2.01e-05, "loss": 2.0926, "step": 14661 }, { "epoch": 59.844897959183676, "grad_norm": 22.483013153076172, "learning_rate": 2.009795918367347e-05, "loss": 2.1876, "step": 14662 }, { "epoch": 59.84897959183674, "grad_norm": 22.088285446166992, "learning_rate": 2.009591836734694e-05, "loss": 1.6585, "step": 14663 }, { "epoch": 59.85306122448979, "grad_norm": 24.784719467163086, "learning_rate": 2.009387755102041e-05, "loss": 2.0082, "step": 14664 }, { "epoch": 59.857142857142854, "grad_norm": 21.63052749633789, "learning_rate": 2.009183673469388e-05, "loss": 2.0634, "step": 14665 }, { "epoch": 59.861224489795916, "grad_norm": 44.31734848022461, "learning_rate": 2.008979591836735e-05, "loss": 1.3084, "step": 14666 }, { "epoch": 59.86530612244898, "grad_norm": 18.84845542907715, "learning_rate": 2.0087755102040817e-05, "loss": 2.1804, "step": 14667 }, { "epoch": 59.86938775510204, "grad_norm": 27.396591186523438, "learning_rate": 2.0085714285714284e-05, "loss": 1.9953, "step": 14668 }, { "epoch": 59.8734693877551, "grad_norm": 35.25820541381836, "learning_rate": 2.0083673469387758e-05, "loss": 1.0089, "step": 14669 }, { "epoch": 59.87755102040816, "grad_norm": 29.952564239501953, "learning_rate": 2.0081632653061225e-05, "loss": 1.8018, "step": 14670 }, { "epoch": 59.881632653061224, "grad_norm": 16.706111907958984, "learning_rate": 2.0079591836734696e-05, "loss": 2.5137, "step": 14671 }, { "epoch": 59.885714285714286, "grad_norm": 33.11524200439453, "learning_rate": 2.0077551020408163e-05, "loss": 1.4274, "step": 14672 }, { "epoch": 59.88979591836735, "grad_norm": 31.431013107299805, "learning_rate": 2.0075510204081633e-05, "loss": 0.2915, "step": 14673 }, { "epoch": 59.89387755102041, "grad_norm": 31.96652603149414, "learning_rate": 2.0073469387755104e-05, "loss": 1.7343, "step": 14674 }, { "epoch": 59.89795918367347, "grad_norm": 32.40903091430664, "learning_rate": 2.007142857142857e-05, "loss": 0.6853, "step": 14675 }, { "epoch": 59.90204081632653, "grad_norm": 41.29420471191406, "learning_rate": 2.006938775510204e-05, "loss": 1.091, "step": 14676 }, { "epoch": 59.906122448979595, "grad_norm": 31.423274993896484, "learning_rate": 2.0067346938775512e-05, "loss": 0.3995, "step": 14677 }, { "epoch": 59.910204081632656, "grad_norm": 24.52015495300293, "learning_rate": 2.0065306122448982e-05, "loss": 1.8396, "step": 14678 }, { "epoch": 59.91428571428571, "grad_norm": 34.104339599609375, "learning_rate": 2.006326530612245e-05, "loss": 1.5126, "step": 14679 }, { "epoch": 59.91836734693877, "grad_norm": 34.21719741821289, "learning_rate": 2.0061224489795917e-05, "loss": 1.2746, "step": 14680 }, { "epoch": 59.922448979591834, "grad_norm": 48.678611755371094, "learning_rate": 2.005918367346939e-05, "loss": 0.3035, "step": 14681 }, { "epoch": 59.926530612244896, "grad_norm": 27.82846450805664, "learning_rate": 2.0057142857142858e-05, "loss": 1.637, "step": 14682 }, { "epoch": 59.93061224489796, "grad_norm": 34.341026306152344, "learning_rate": 2.0055102040816328e-05, "loss": 1.588, "step": 14683 }, { "epoch": 59.93469387755102, "grad_norm": 35.24601745605469, "learning_rate": 2.0053061224489795e-05, "loss": 0.542, "step": 14684 }, { "epoch": 59.93877551020408, "grad_norm": 43.16026306152344, "learning_rate": 2.0051020408163266e-05, "loss": 0.8971, "step": 14685 }, { "epoch": 59.94285714285714, "grad_norm": 40.274757385253906, "learning_rate": 2.0048979591836736e-05, "loss": 1.2936, "step": 14686 }, { "epoch": 59.946938775510205, "grad_norm": 33.47318649291992, "learning_rate": 2.0046938775510204e-05, "loss": 0.6473, "step": 14687 }, { "epoch": 59.95102040816327, "grad_norm": 16.290987014770508, "learning_rate": 2.0044897959183674e-05, "loss": 2.8695, "step": 14688 }, { "epoch": 59.95510204081633, "grad_norm": 25.5930233001709, "learning_rate": 2.0042857142857145e-05, "loss": 2.0489, "step": 14689 }, { "epoch": 59.95918367346939, "grad_norm": 35.86928176879883, "learning_rate": 2.004081632653061e-05, "loss": 1.5724, "step": 14690 }, { "epoch": 59.96326530612245, "grad_norm": 30.770736694335938, "learning_rate": 2.0038775510204082e-05, "loss": 0.9452, "step": 14691 }, { "epoch": 59.96734693877551, "grad_norm": 43.097320556640625, "learning_rate": 2.0036734693877553e-05, "loss": 1.22, "step": 14692 }, { "epoch": 59.97142857142857, "grad_norm": 35.12722396850586, "learning_rate": 2.0034693877551023e-05, "loss": 0.3606, "step": 14693 }, { "epoch": 59.97551020408163, "grad_norm": 46.096126556396484, "learning_rate": 2.003265306122449e-05, "loss": 0.3187, "step": 14694 }, { "epoch": 59.97959183673469, "grad_norm": 13.14577865600586, "learning_rate": 2.0030612244897957e-05, "loss": 2.688, "step": 14695 }, { "epoch": 59.98367346938775, "grad_norm": 31.009227752685547, "learning_rate": 2.002857142857143e-05, "loss": 1.7287, "step": 14696 }, { "epoch": 59.987755102040815, "grad_norm": 30.760683059692383, "learning_rate": 2.00265306122449e-05, "loss": 1.8922, "step": 14697 }, { "epoch": 59.99183673469388, "grad_norm": 19.418310165405273, "learning_rate": 2.002448979591837e-05, "loss": 2.4937, "step": 14698 }, { "epoch": 59.99591836734694, "grad_norm": 26.677797317504883, "learning_rate": 2.0022448979591836e-05, "loss": 0.2496, "step": 14699 }, { "epoch": 60.0, "grad_norm": 41.38298416137695, "learning_rate": 2.002040816326531e-05, "loss": 0.809, "step": 14700 }, { "epoch": 60.00408163265306, "grad_norm": 28.6014347076416, "learning_rate": 2.0018367346938777e-05, "loss": 1.2042, "step": 14701 }, { "epoch": 60.00816326530612, "grad_norm": 20.54133415222168, "learning_rate": 2.0016326530612244e-05, "loss": 2.1161, "step": 14702 }, { "epoch": 60.012244897959185, "grad_norm": 39.83079147338867, "learning_rate": 2.0014285714285715e-05, "loss": 1.0477, "step": 14703 }, { "epoch": 60.01632653061225, "grad_norm": 23.839174270629883, "learning_rate": 2.0012244897959185e-05, "loss": 1.7406, "step": 14704 }, { "epoch": 60.02040816326531, "grad_norm": 35.014854431152344, "learning_rate": 2.0010204081632656e-05, "loss": 1.4146, "step": 14705 }, { "epoch": 60.02448979591837, "grad_norm": 23.37152099609375, "learning_rate": 2.0008163265306123e-05, "loss": 1.8784, "step": 14706 }, { "epoch": 60.02857142857143, "grad_norm": 41.71222686767578, "learning_rate": 2.000612244897959e-05, "loss": 0.6916, "step": 14707 }, { "epoch": 60.03265306122449, "grad_norm": 24.070768356323242, "learning_rate": 2.0004081632653064e-05, "loss": 1.8915, "step": 14708 }, { "epoch": 60.03673469387755, "grad_norm": 57.77893829345703, "learning_rate": 2.000204081632653e-05, "loss": 0.7299, "step": 14709 }, { "epoch": 60.04081632653061, "grad_norm": 25.101573944091797, "learning_rate": 2e-05, "loss": 1.9884, "step": 14710 }, { "epoch": 60.04489795918367, "grad_norm": 31.17038917541504, "learning_rate": 1.999795918367347e-05, "loss": 1.514, "step": 14711 }, { "epoch": 60.04897959183673, "grad_norm": 24.67905616760254, "learning_rate": 1.999591836734694e-05, "loss": 1.9096, "step": 14712 }, { "epoch": 60.053061224489795, "grad_norm": 25.136878967285156, "learning_rate": 1.999387755102041e-05, "loss": 1.708, "step": 14713 }, { "epoch": 60.05714285714286, "grad_norm": 23.48326873779297, "learning_rate": 1.9991836734693877e-05, "loss": 1.9717, "step": 14714 }, { "epoch": 60.06122448979592, "grad_norm": 38.51264953613281, "learning_rate": 1.9989795918367347e-05, "loss": 0.4998, "step": 14715 }, { "epoch": 60.06530612244898, "grad_norm": 19.88616180419922, "learning_rate": 1.9987755102040818e-05, "loss": 2.3961, "step": 14716 }, { "epoch": 60.06938775510204, "grad_norm": 21.391679763793945, "learning_rate": 1.9985714285714285e-05, "loss": 2.3194, "step": 14717 }, { "epoch": 60.073469387755104, "grad_norm": 17.49686622619629, "learning_rate": 1.9983673469387756e-05, "loss": 2.0775, "step": 14718 }, { "epoch": 60.077551020408166, "grad_norm": 41.238460540771484, "learning_rate": 1.9981632653061226e-05, "loss": 1.5472, "step": 14719 }, { "epoch": 60.08163265306123, "grad_norm": 24.78862190246582, "learning_rate": 1.9979591836734697e-05, "loss": 1.9071, "step": 14720 }, { "epoch": 60.08571428571429, "grad_norm": 38.90577697753906, "learning_rate": 1.9977551020408164e-05, "loss": 0.7343, "step": 14721 }, { "epoch": 60.089795918367344, "grad_norm": 48.91804122924805, "learning_rate": 1.9975510204081634e-05, "loss": 1.5151, "step": 14722 }, { "epoch": 60.093877551020405, "grad_norm": 37.480106353759766, "learning_rate": 1.9973469387755105e-05, "loss": 0.8623, "step": 14723 }, { "epoch": 60.09795918367347, "grad_norm": 35.838321685791016, "learning_rate": 1.9971428571428572e-05, "loss": 0.7283, "step": 14724 }, { "epoch": 60.10204081632653, "grad_norm": 14.376574516296387, "learning_rate": 1.9969387755102042e-05, "loss": 2.9301, "step": 14725 }, { "epoch": 60.10612244897959, "grad_norm": 21.2133731842041, "learning_rate": 1.996734693877551e-05, "loss": 2.3654, "step": 14726 }, { "epoch": 60.11020408163265, "grad_norm": 22.04370880126953, "learning_rate": 1.9965306122448983e-05, "loss": 2.4555, "step": 14727 }, { "epoch": 60.114285714285714, "grad_norm": 28.95000648498535, "learning_rate": 1.996326530612245e-05, "loss": 1.038, "step": 14728 }, { "epoch": 60.118367346938776, "grad_norm": 25.043533325195312, "learning_rate": 1.9961224489795918e-05, "loss": 2.0973, "step": 14729 }, { "epoch": 60.12244897959184, "grad_norm": 32.83892822265625, "learning_rate": 1.9959183673469388e-05, "loss": 1.1281, "step": 14730 }, { "epoch": 60.1265306122449, "grad_norm": 34.1063117980957, "learning_rate": 1.995714285714286e-05, "loss": 1.7225, "step": 14731 }, { "epoch": 60.13061224489796, "grad_norm": 33.34548568725586, "learning_rate": 1.995510204081633e-05, "loss": 1.5577, "step": 14732 }, { "epoch": 60.13469387755102, "grad_norm": 33.532596588134766, "learning_rate": 1.9953061224489796e-05, "loss": 1.1791, "step": 14733 }, { "epoch": 60.138775510204084, "grad_norm": 45.168052673339844, "learning_rate": 1.9951020408163263e-05, "loss": 0.7443, "step": 14734 }, { "epoch": 60.142857142857146, "grad_norm": 30.554058074951172, "learning_rate": 1.9948979591836737e-05, "loss": 1.5368, "step": 14735 }, { "epoch": 60.14693877551021, "grad_norm": 35.82551574707031, "learning_rate": 1.9946938775510204e-05, "loss": 0.2354, "step": 14736 }, { "epoch": 60.15102040816326, "grad_norm": 57.89506149291992, "learning_rate": 1.9944897959183675e-05, "loss": 0.3998, "step": 14737 }, { "epoch": 60.155102040816324, "grad_norm": 25.295698165893555, "learning_rate": 1.9942857142857142e-05, "loss": 1.7358, "step": 14738 }, { "epoch": 60.159183673469386, "grad_norm": 23.622516632080078, "learning_rate": 1.9940816326530613e-05, "loss": 0.2064, "step": 14739 }, { "epoch": 60.16326530612245, "grad_norm": 25.071067810058594, "learning_rate": 1.9938775510204083e-05, "loss": 2.3828, "step": 14740 }, { "epoch": 60.16734693877551, "grad_norm": 33.121578216552734, "learning_rate": 1.993673469387755e-05, "loss": 1.3211, "step": 14741 }, { "epoch": 60.17142857142857, "grad_norm": 33.13759994506836, "learning_rate": 1.993469387755102e-05, "loss": 1.828, "step": 14742 }, { "epoch": 60.17551020408163, "grad_norm": 29.5508975982666, "learning_rate": 1.993265306122449e-05, "loss": 1.9481, "step": 14743 }, { "epoch": 60.179591836734694, "grad_norm": 33.55131912231445, "learning_rate": 1.993061224489796e-05, "loss": 1.8795, "step": 14744 }, { "epoch": 60.183673469387756, "grad_norm": 26.94346809387207, "learning_rate": 1.992857142857143e-05, "loss": 1.7125, "step": 14745 }, { "epoch": 60.18775510204082, "grad_norm": 37.75902557373047, "learning_rate": 1.99265306122449e-05, "loss": 0.7315, "step": 14746 }, { "epoch": 60.19183673469388, "grad_norm": 20.587820053100586, "learning_rate": 1.992448979591837e-05, "loss": 1.9411, "step": 14747 }, { "epoch": 60.19591836734694, "grad_norm": 38.78495407104492, "learning_rate": 1.9922448979591837e-05, "loss": 1.4765, "step": 14748 }, { "epoch": 60.2, "grad_norm": 40.153663635253906, "learning_rate": 1.9920408163265308e-05, "loss": 0.6216, "step": 14749 }, { "epoch": 60.204081632653065, "grad_norm": 26.269617080688477, "learning_rate": 1.9918367346938778e-05, "loss": 1.907, "step": 14750 }, { "epoch": 60.20816326530612, "grad_norm": 42.63725662231445, "learning_rate": 1.9916326530612245e-05, "loss": 1.1371, "step": 14751 }, { "epoch": 60.21224489795918, "grad_norm": 63.58323287963867, "learning_rate": 1.9914285714285716e-05, "loss": 0.4256, "step": 14752 }, { "epoch": 60.21632653061224, "grad_norm": 34.62395095825195, "learning_rate": 1.9912244897959183e-05, "loss": 1.047, "step": 14753 }, { "epoch": 60.220408163265304, "grad_norm": 45.446495056152344, "learning_rate": 1.9910204081632657e-05, "loss": 0.5899, "step": 14754 }, { "epoch": 60.224489795918366, "grad_norm": 24.207069396972656, "learning_rate": 1.9908163265306124e-05, "loss": 1.9342, "step": 14755 }, { "epoch": 60.22857142857143, "grad_norm": 31.913618087768555, "learning_rate": 1.990612244897959e-05, "loss": 1.1492, "step": 14756 }, { "epoch": 60.23265306122449, "grad_norm": 29.965044021606445, "learning_rate": 1.990408163265306e-05, "loss": 1.1587, "step": 14757 }, { "epoch": 60.23673469387755, "grad_norm": 37.53242111206055, "learning_rate": 1.9902040816326532e-05, "loss": 0.8553, "step": 14758 }, { "epoch": 60.24081632653061, "grad_norm": 46.562313079833984, "learning_rate": 1.9900000000000003e-05, "loss": 1.7051, "step": 14759 }, { "epoch": 60.244897959183675, "grad_norm": 29.571317672729492, "learning_rate": 1.989795918367347e-05, "loss": 1.7565, "step": 14760 }, { "epoch": 60.248979591836736, "grad_norm": 18.063129425048828, "learning_rate": 1.9895918367346937e-05, "loss": 2.4906, "step": 14761 }, { "epoch": 60.2530612244898, "grad_norm": 29.522830963134766, "learning_rate": 1.989387755102041e-05, "loss": 1.8629, "step": 14762 }, { "epoch": 60.25714285714286, "grad_norm": 29.6392879486084, "learning_rate": 1.9891836734693878e-05, "loss": 1.5724, "step": 14763 }, { "epoch": 60.26122448979592, "grad_norm": 41.72633743286133, "learning_rate": 1.988979591836735e-05, "loss": 0.9838, "step": 14764 }, { "epoch": 60.265306122448976, "grad_norm": 27.630762100219727, "learning_rate": 1.9887755102040816e-05, "loss": 1.8807, "step": 14765 }, { "epoch": 60.26938775510204, "grad_norm": 32.760467529296875, "learning_rate": 1.9885714285714286e-05, "loss": 0.3589, "step": 14766 }, { "epoch": 60.2734693877551, "grad_norm": 28.226070404052734, "learning_rate": 1.9883673469387757e-05, "loss": 1.5284, "step": 14767 }, { "epoch": 60.27755102040816, "grad_norm": 38.308753967285156, "learning_rate": 1.9881632653061224e-05, "loss": 1.5845, "step": 14768 }, { "epoch": 60.28163265306122, "grad_norm": 43.98503112792969, "learning_rate": 1.9879591836734694e-05, "loss": 0.7697, "step": 14769 }, { "epoch": 60.285714285714285, "grad_norm": 23.981847763061523, "learning_rate": 1.9877551020408165e-05, "loss": 1.8165, "step": 14770 }, { "epoch": 60.289795918367346, "grad_norm": 22.722753524780273, "learning_rate": 1.9875510204081635e-05, "loss": 2.3851, "step": 14771 }, { "epoch": 60.29387755102041, "grad_norm": 30.791994094848633, "learning_rate": 1.9873469387755102e-05, "loss": 1.8962, "step": 14772 }, { "epoch": 60.29795918367347, "grad_norm": 32.2259407043457, "learning_rate": 1.9871428571428573e-05, "loss": 0.6153, "step": 14773 }, { "epoch": 60.30204081632653, "grad_norm": 53.280540466308594, "learning_rate": 1.9869387755102043e-05, "loss": 0.9609, "step": 14774 }, { "epoch": 60.30612244897959, "grad_norm": 28.32669448852539, "learning_rate": 1.986734693877551e-05, "loss": 0.3072, "step": 14775 }, { "epoch": 60.310204081632655, "grad_norm": 24.804183959960938, "learning_rate": 1.986530612244898e-05, "loss": 2.0406, "step": 14776 }, { "epoch": 60.31428571428572, "grad_norm": 39.62801742553711, "learning_rate": 1.986326530612245e-05, "loss": 0.2821, "step": 14777 }, { "epoch": 60.31836734693878, "grad_norm": 36.27521514892578, "learning_rate": 1.986122448979592e-05, "loss": 1.6971, "step": 14778 }, { "epoch": 60.32244897959184, "grad_norm": 36.26323318481445, "learning_rate": 1.985918367346939e-05, "loss": 0.9021, "step": 14779 }, { "epoch": 60.326530612244895, "grad_norm": 40.46192169189453, "learning_rate": 1.9857142857142856e-05, "loss": 1.0931, "step": 14780 }, { "epoch": 60.33061224489796, "grad_norm": 31.30030059814453, "learning_rate": 1.985510204081633e-05, "loss": 0.3293, "step": 14781 }, { "epoch": 60.33469387755102, "grad_norm": 47.56621551513672, "learning_rate": 1.9853061224489797e-05, "loss": 0.8077, "step": 14782 }, { "epoch": 60.33877551020408, "grad_norm": 26.115644454956055, "learning_rate": 1.9851020408163264e-05, "loss": 1.6297, "step": 14783 }, { "epoch": 60.34285714285714, "grad_norm": 29.543643951416016, "learning_rate": 1.9848979591836735e-05, "loss": 1.8342, "step": 14784 }, { "epoch": 60.3469387755102, "grad_norm": 30.925426483154297, "learning_rate": 1.9846938775510205e-05, "loss": 1.6435, "step": 14785 }, { "epoch": 60.351020408163265, "grad_norm": 29.58223533630371, "learning_rate": 1.9844897959183676e-05, "loss": 1.6958, "step": 14786 }, { "epoch": 60.35510204081633, "grad_norm": 21.35145378112793, "learning_rate": 1.9842857142857143e-05, "loss": 2.1587, "step": 14787 }, { "epoch": 60.35918367346939, "grad_norm": 31.22771644592285, "learning_rate": 1.984081632653061e-05, "loss": 0.7633, "step": 14788 }, { "epoch": 60.36326530612245, "grad_norm": 36.31178665161133, "learning_rate": 1.9838775510204084e-05, "loss": 1.2575, "step": 14789 }, { "epoch": 60.36734693877551, "grad_norm": 26.34796142578125, "learning_rate": 1.983673469387755e-05, "loss": 1.2322, "step": 14790 }, { "epoch": 60.371428571428574, "grad_norm": 37.17867660522461, "learning_rate": 1.9834693877551022e-05, "loss": 1.1207, "step": 14791 }, { "epoch": 60.375510204081635, "grad_norm": 40.35176467895508, "learning_rate": 1.983265306122449e-05, "loss": 1.1898, "step": 14792 }, { "epoch": 60.3795918367347, "grad_norm": 53.60362243652344, "learning_rate": 1.9830612244897963e-05, "loss": 0.7956, "step": 14793 }, { "epoch": 60.38367346938775, "grad_norm": 26.848186492919922, "learning_rate": 1.982857142857143e-05, "loss": 2.0719, "step": 14794 }, { "epoch": 60.38775510204081, "grad_norm": 28.63614273071289, "learning_rate": 1.9826530612244897e-05, "loss": 1.7766, "step": 14795 }, { "epoch": 60.391836734693875, "grad_norm": 35.4514274597168, "learning_rate": 1.9824489795918368e-05, "loss": 0.7598, "step": 14796 }, { "epoch": 60.39591836734694, "grad_norm": 29.77752685546875, "learning_rate": 1.9822448979591838e-05, "loss": 1.3311, "step": 14797 }, { "epoch": 60.4, "grad_norm": 32.10457992553711, "learning_rate": 1.982040816326531e-05, "loss": 1.4486, "step": 14798 }, { "epoch": 60.40408163265306, "grad_norm": 50.59089279174805, "learning_rate": 1.9818367346938776e-05, "loss": 0.6024, "step": 14799 }, { "epoch": 60.40816326530612, "grad_norm": 41.82783508300781, "learning_rate": 1.9816326530612246e-05, "loss": 0.8243, "step": 14800 }, { "epoch": 60.412244897959184, "grad_norm": 31.50291633605957, "learning_rate": 1.9814285714285717e-05, "loss": 1.4344, "step": 14801 }, { "epoch": 60.416326530612245, "grad_norm": 37.57386779785156, "learning_rate": 1.9812244897959184e-05, "loss": 0.5624, "step": 14802 }, { "epoch": 60.42040816326531, "grad_norm": 38.707340240478516, "learning_rate": 1.9810204081632654e-05, "loss": 1.0141, "step": 14803 }, { "epoch": 60.42448979591837, "grad_norm": 35.757816314697266, "learning_rate": 1.9808163265306125e-05, "loss": 1.4008, "step": 14804 }, { "epoch": 60.42857142857143, "grad_norm": 25.729482650756836, "learning_rate": 1.9806122448979592e-05, "loss": 2.4123, "step": 14805 }, { "epoch": 60.43265306122449, "grad_norm": 37.636573791503906, "learning_rate": 1.9804081632653063e-05, "loss": 0.2652, "step": 14806 }, { "epoch": 60.436734693877554, "grad_norm": 20.97486114501953, "learning_rate": 1.980204081632653e-05, "loss": 2.0153, "step": 14807 }, { "epoch": 60.440816326530616, "grad_norm": 29.729524612426758, "learning_rate": 1.9800000000000004e-05, "loss": 1.6181, "step": 14808 }, { "epoch": 60.44489795918367, "grad_norm": 27.595630645751953, "learning_rate": 1.979795918367347e-05, "loss": 1.3076, "step": 14809 }, { "epoch": 60.44897959183673, "grad_norm": 28.2786865234375, "learning_rate": 1.9795918367346938e-05, "loss": 0.3616, "step": 14810 }, { "epoch": 60.453061224489794, "grad_norm": 48.627384185791016, "learning_rate": 1.979387755102041e-05, "loss": 0.2538, "step": 14811 }, { "epoch": 60.457142857142856, "grad_norm": 22.701847076416016, "learning_rate": 1.979183673469388e-05, "loss": 1.6148, "step": 14812 }, { "epoch": 60.46122448979592, "grad_norm": 24.786745071411133, "learning_rate": 1.978979591836735e-05, "loss": 1.2231, "step": 14813 }, { "epoch": 60.46530612244898, "grad_norm": 26.16187286376953, "learning_rate": 1.9787755102040816e-05, "loss": 2.1949, "step": 14814 }, { "epoch": 60.46938775510204, "grad_norm": 22.60343360900879, "learning_rate": 1.9785714285714287e-05, "loss": 2.0185, "step": 14815 }, { "epoch": 60.4734693877551, "grad_norm": 30.76664161682129, "learning_rate": 1.9783673469387757e-05, "loss": 1.7477, "step": 14816 }, { "epoch": 60.477551020408164, "grad_norm": 36.223140716552734, "learning_rate": 1.9781632653061225e-05, "loss": 0.7109, "step": 14817 }, { "epoch": 60.481632653061226, "grad_norm": 20.238872528076172, "learning_rate": 1.9779591836734695e-05, "loss": 1.9939, "step": 14818 }, { "epoch": 60.48571428571429, "grad_norm": 25.57234001159668, "learning_rate": 1.9777551020408162e-05, "loss": 0.234, "step": 14819 }, { "epoch": 60.48979591836735, "grad_norm": 44.5155029296875, "learning_rate": 1.9775510204081636e-05, "loss": 0.5696, "step": 14820 }, { "epoch": 60.49387755102041, "grad_norm": 31.071346282958984, "learning_rate": 1.9773469387755103e-05, "loss": 1.2992, "step": 14821 }, { "epoch": 60.49795918367347, "grad_norm": 32.027706146240234, "learning_rate": 1.977142857142857e-05, "loss": 0.2467, "step": 14822 }, { "epoch": 60.50204081632653, "grad_norm": 29.41572380065918, "learning_rate": 1.976938775510204e-05, "loss": 2.3224, "step": 14823 }, { "epoch": 60.50612244897959, "grad_norm": 2.122802972793579, "learning_rate": 1.976734693877551e-05, "loss": 0.0447, "step": 14824 }, { "epoch": 60.51020408163265, "grad_norm": 31.202434539794922, "learning_rate": 1.9765306122448982e-05, "loss": 0.9551, "step": 14825 }, { "epoch": 60.51428571428571, "grad_norm": 71.77546691894531, "learning_rate": 1.976326530612245e-05, "loss": 1.4579, "step": 14826 }, { "epoch": 60.518367346938774, "grad_norm": 45.13373565673828, "learning_rate": 1.976122448979592e-05, "loss": 0.682, "step": 14827 }, { "epoch": 60.522448979591836, "grad_norm": 32.766231536865234, "learning_rate": 1.975918367346939e-05, "loss": 0.8305, "step": 14828 }, { "epoch": 60.5265306122449, "grad_norm": 46.11073303222656, "learning_rate": 1.9757142857142857e-05, "loss": 1.7114, "step": 14829 }, { "epoch": 60.53061224489796, "grad_norm": 29.630115509033203, "learning_rate": 1.9755102040816328e-05, "loss": 1.0956, "step": 14830 }, { "epoch": 60.53469387755102, "grad_norm": 32.22903060913086, "learning_rate": 1.9753061224489798e-05, "loss": 1.4762, "step": 14831 }, { "epoch": 60.53877551020408, "grad_norm": 26.371807098388672, "learning_rate": 1.9751020408163265e-05, "loss": 1.5848, "step": 14832 }, { "epoch": 60.542857142857144, "grad_norm": 32.61091995239258, "learning_rate": 1.9748979591836736e-05, "loss": 0.9458, "step": 14833 }, { "epoch": 60.546938775510206, "grad_norm": 29.112064361572266, "learning_rate": 1.9746938775510203e-05, "loss": 1.2715, "step": 14834 }, { "epoch": 60.55102040816327, "grad_norm": 33.507205963134766, "learning_rate": 1.9744897959183677e-05, "loss": 1.5237, "step": 14835 }, { "epoch": 60.55510204081633, "grad_norm": 37.66048812866211, "learning_rate": 1.9742857142857144e-05, "loss": 1.5313, "step": 14836 }, { "epoch": 60.55918367346939, "grad_norm": 40.81640625, "learning_rate": 1.9740816326530615e-05, "loss": 0.5961, "step": 14837 }, { "epoch": 60.563265306122446, "grad_norm": 21.400789260864258, "learning_rate": 1.973877551020408e-05, "loss": 2.005, "step": 14838 }, { "epoch": 60.56734693877551, "grad_norm": 40.13288116455078, "learning_rate": 1.9736734693877552e-05, "loss": 0.664, "step": 14839 }, { "epoch": 60.57142857142857, "grad_norm": 34.78828430175781, "learning_rate": 1.9734693877551023e-05, "loss": 0.7667, "step": 14840 }, { "epoch": 60.57551020408163, "grad_norm": 22.798826217651367, "learning_rate": 1.973265306122449e-05, "loss": 2.1121, "step": 14841 }, { "epoch": 60.57959183673469, "grad_norm": 34.18034744262695, "learning_rate": 1.973061224489796e-05, "loss": 0.6559, "step": 14842 }, { "epoch": 60.583673469387755, "grad_norm": 28.099483489990234, "learning_rate": 1.972857142857143e-05, "loss": 1.8593, "step": 14843 }, { "epoch": 60.587755102040816, "grad_norm": 32.35219955444336, "learning_rate": 1.9726530612244898e-05, "loss": 1.6486, "step": 14844 }, { "epoch": 60.59183673469388, "grad_norm": 43.04336929321289, "learning_rate": 1.972448979591837e-05, "loss": 1.5767, "step": 14845 }, { "epoch": 60.59591836734694, "grad_norm": 20.998281478881836, "learning_rate": 1.9722448979591836e-05, "loss": 1.9169, "step": 14846 }, { "epoch": 60.6, "grad_norm": 48.35021209716797, "learning_rate": 1.972040816326531e-05, "loss": 0.837, "step": 14847 }, { "epoch": 60.60408163265306, "grad_norm": 20.95734214782715, "learning_rate": 1.9718367346938777e-05, "loss": 2.46, "step": 14848 }, { "epoch": 60.608163265306125, "grad_norm": 25.938777923583984, "learning_rate": 1.9716326530612244e-05, "loss": 2.0419, "step": 14849 }, { "epoch": 60.61224489795919, "grad_norm": 15.375076293945312, "learning_rate": 1.9714285714285714e-05, "loss": 2.5114, "step": 14850 }, { "epoch": 60.61632653061225, "grad_norm": 27.32445526123047, "learning_rate": 1.9712244897959185e-05, "loss": 1.2141, "step": 14851 }, { "epoch": 60.6204081632653, "grad_norm": 44.88677978515625, "learning_rate": 1.9710204081632655e-05, "loss": 1.527, "step": 14852 }, { "epoch": 60.624489795918365, "grad_norm": 46.613563537597656, "learning_rate": 1.9708163265306122e-05, "loss": 0.4026, "step": 14853 }, { "epoch": 60.628571428571426, "grad_norm": 17.296232223510742, "learning_rate": 1.9706122448979593e-05, "loss": 2.3529, "step": 14854 }, { "epoch": 60.63265306122449, "grad_norm": 45.174537658691406, "learning_rate": 1.9704081632653063e-05, "loss": 0.6313, "step": 14855 }, { "epoch": 60.63673469387755, "grad_norm": 32.17926788330078, "learning_rate": 1.970204081632653e-05, "loss": 0.6167, "step": 14856 }, { "epoch": 60.64081632653061, "grad_norm": 35.02042007446289, "learning_rate": 1.97e-05, "loss": 1.045, "step": 14857 }, { "epoch": 60.64489795918367, "grad_norm": 27.67391014099121, "learning_rate": 1.969795918367347e-05, "loss": 1.7104, "step": 14858 }, { "epoch": 60.648979591836735, "grad_norm": 37.10515594482422, "learning_rate": 1.9695918367346942e-05, "loss": 1.8792, "step": 14859 }, { "epoch": 60.6530612244898, "grad_norm": 26.72231101989746, "learning_rate": 1.969387755102041e-05, "loss": 1.9549, "step": 14860 }, { "epoch": 60.65714285714286, "grad_norm": 27.531553268432617, "learning_rate": 1.9691836734693876e-05, "loss": 1.8493, "step": 14861 }, { "epoch": 60.66122448979592, "grad_norm": 31.7125186920166, "learning_rate": 1.9689795918367347e-05, "loss": 0.2688, "step": 14862 }, { "epoch": 60.66530612244898, "grad_norm": 40.096492767333984, "learning_rate": 1.9687755102040817e-05, "loss": 1.098, "step": 14863 }, { "epoch": 60.66938775510204, "grad_norm": 44.51883316040039, "learning_rate": 1.9685714285714288e-05, "loss": 0.7306, "step": 14864 }, { "epoch": 60.673469387755105, "grad_norm": 34.26435852050781, "learning_rate": 1.9683673469387755e-05, "loss": 1.2973, "step": 14865 }, { "epoch": 60.67755102040816, "grad_norm": 50.919979095458984, "learning_rate": 1.9681632653061226e-05, "loss": 0.914, "step": 14866 }, { "epoch": 60.68163265306122, "grad_norm": 22.236209869384766, "learning_rate": 1.9679591836734696e-05, "loss": 2.1146, "step": 14867 }, { "epoch": 60.68571428571428, "grad_norm": 13.7327880859375, "learning_rate": 1.9677551020408163e-05, "loss": 2.6436, "step": 14868 }, { "epoch": 60.689795918367345, "grad_norm": 44.12220001220703, "learning_rate": 1.9675510204081634e-05, "loss": 0.6234, "step": 14869 }, { "epoch": 60.69387755102041, "grad_norm": 30.009735107421875, "learning_rate": 1.9673469387755104e-05, "loss": 1.9652, "step": 14870 }, { "epoch": 60.69795918367347, "grad_norm": 31.403377532958984, "learning_rate": 1.967142857142857e-05, "loss": 2.0158, "step": 14871 }, { "epoch": 60.70204081632653, "grad_norm": 36.105316162109375, "learning_rate": 1.9669387755102042e-05, "loss": 0.6684, "step": 14872 }, { "epoch": 60.70612244897959, "grad_norm": 42.58486557006836, "learning_rate": 1.966734693877551e-05, "loss": 1.1947, "step": 14873 }, { "epoch": 60.710204081632654, "grad_norm": 31.74567985534668, "learning_rate": 1.9665306122448983e-05, "loss": 1.0499, "step": 14874 }, { "epoch": 60.714285714285715, "grad_norm": 29.383953094482422, "learning_rate": 1.966326530612245e-05, "loss": 1.8071, "step": 14875 }, { "epoch": 60.71836734693878, "grad_norm": 38.08372497558594, "learning_rate": 1.9661224489795917e-05, "loss": 0.9951, "step": 14876 }, { "epoch": 60.72244897959184, "grad_norm": 21.445955276489258, "learning_rate": 1.9659183673469388e-05, "loss": 2.7285, "step": 14877 }, { "epoch": 60.7265306122449, "grad_norm": 29.241117477416992, "learning_rate": 1.9657142857142858e-05, "loss": 2.053, "step": 14878 }, { "epoch": 60.73061224489796, "grad_norm": 20.643545150756836, "learning_rate": 1.965510204081633e-05, "loss": 1.9408, "step": 14879 }, { "epoch": 60.734693877551024, "grad_norm": 20.602519989013672, "learning_rate": 1.9653061224489796e-05, "loss": 2.0174, "step": 14880 }, { "epoch": 60.73877551020408, "grad_norm": 29.4907169342041, "learning_rate": 1.9651020408163266e-05, "loss": 1.7, "step": 14881 }, { "epoch": 60.74285714285714, "grad_norm": 37.33301544189453, "learning_rate": 1.9648979591836737e-05, "loss": 0.2452, "step": 14882 }, { "epoch": 60.7469387755102, "grad_norm": 34.8198356628418, "learning_rate": 1.9646938775510204e-05, "loss": 1.0026, "step": 14883 }, { "epoch": 60.751020408163264, "grad_norm": 37.5835075378418, "learning_rate": 1.9644897959183674e-05, "loss": 0.5838, "step": 14884 }, { "epoch": 60.755102040816325, "grad_norm": 23.839807510375977, "learning_rate": 1.9642857142857145e-05, "loss": 1.7107, "step": 14885 }, { "epoch": 60.75918367346939, "grad_norm": 17.804729461669922, "learning_rate": 1.9640816326530616e-05, "loss": 2.4665, "step": 14886 }, { "epoch": 60.76326530612245, "grad_norm": 39.31233596801758, "learning_rate": 1.9638775510204083e-05, "loss": 1.287, "step": 14887 }, { "epoch": 60.76734693877551, "grad_norm": 49.303306579589844, "learning_rate": 1.963673469387755e-05, "loss": 0.5881, "step": 14888 }, { "epoch": 60.77142857142857, "grad_norm": 22.291854858398438, "learning_rate": 1.963469387755102e-05, "loss": 2.0423, "step": 14889 }, { "epoch": 60.775510204081634, "grad_norm": 21.441177368164062, "learning_rate": 1.963265306122449e-05, "loss": 2.1207, "step": 14890 }, { "epoch": 60.779591836734696, "grad_norm": 41.46263122558594, "learning_rate": 1.963061224489796e-05, "loss": 0.8827, "step": 14891 }, { "epoch": 60.78367346938776, "grad_norm": 43.42036437988281, "learning_rate": 1.962857142857143e-05, "loss": 1.2165, "step": 14892 }, { "epoch": 60.78775510204082, "grad_norm": 19.017179489135742, "learning_rate": 1.96265306122449e-05, "loss": 2.5432, "step": 14893 }, { "epoch": 60.79183673469388, "grad_norm": 32.12361145019531, "learning_rate": 1.962448979591837e-05, "loss": 1.3719, "step": 14894 }, { "epoch": 60.795918367346935, "grad_norm": 21.755281448364258, "learning_rate": 1.9622448979591837e-05, "loss": 0.1519, "step": 14895 }, { "epoch": 60.8, "grad_norm": 38.57910919189453, "learning_rate": 1.9620408163265307e-05, "loss": 1.1327, "step": 14896 }, { "epoch": 60.80408163265306, "grad_norm": 41.52518081665039, "learning_rate": 1.9618367346938778e-05, "loss": 1.4677, "step": 14897 }, { "epoch": 60.80816326530612, "grad_norm": 34.028907775878906, "learning_rate": 1.9616326530612245e-05, "loss": 1.1827, "step": 14898 }, { "epoch": 60.81224489795918, "grad_norm": 39.907772064208984, "learning_rate": 1.9614285714285715e-05, "loss": 1.3419, "step": 14899 }, { "epoch": 60.816326530612244, "grad_norm": 22.338207244873047, "learning_rate": 1.9612244897959182e-05, "loss": 2.0572, "step": 14900 }, { "epoch": 60.820408163265306, "grad_norm": 8.97912311553955, "learning_rate": 1.9610204081632656e-05, "loss": 3.2225, "step": 14901 }, { "epoch": 60.82448979591837, "grad_norm": 21.739742279052734, "learning_rate": 1.9608163265306123e-05, "loss": 2.1211, "step": 14902 }, { "epoch": 60.82857142857143, "grad_norm": 32.05424880981445, "learning_rate": 1.9606122448979594e-05, "loss": 1.2987, "step": 14903 }, { "epoch": 60.83265306122449, "grad_norm": 37.25935363769531, "learning_rate": 1.960408163265306e-05, "loss": 1.0506, "step": 14904 }, { "epoch": 60.83673469387755, "grad_norm": 36.030052185058594, "learning_rate": 1.960204081632653e-05, "loss": 0.7214, "step": 14905 }, { "epoch": 60.840816326530614, "grad_norm": 34.57461929321289, "learning_rate": 1.9600000000000002e-05, "loss": 1.5837, "step": 14906 }, { "epoch": 60.844897959183676, "grad_norm": 26.745407104492188, "learning_rate": 1.959795918367347e-05, "loss": 1.8799, "step": 14907 }, { "epoch": 60.84897959183674, "grad_norm": 34.25203323364258, "learning_rate": 1.959591836734694e-05, "loss": 1.6564, "step": 14908 }, { "epoch": 60.85306122448979, "grad_norm": 31.57447052001953, "learning_rate": 1.959387755102041e-05, "loss": 1.3861, "step": 14909 }, { "epoch": 60.857142857142854, "grad_norm": 17.01935577392578, "learning_rate": 1.9591836734693877e-05, "loss": 2.6447, "step": 14910 }, { "epoch": 60.861224489795916, "grad_norm": 30.006824493408203, "learning_rate": 1.9589795918367348e-05, "loss": 1.9389, "step": 14911 }, { "epoch": 60.86530612244898, "grad_norm": 29.76368522644043, "learning_rate": 1.958775510204082e-05, "loss": 1.6418, "step": 14912 }, { "epoch": 60.86938775510204, "grad_norm": 36.066162109375, "learning_rate": 1.958571428571429e-05, "loss": 1.4317, "step": 14913 }, { "epoch": 60.8734693877551, "grad_norm": 35.052818298339844, "learning_rate": 1.9583673469387756e-05, "loss": 1.2258, "step": 14914 }, { "epoch": 60.87755102040816, "grad_norm": 17.23240852355957, "learning_rate": 1.9581632653061223e-05, "loss": 2.5352, "step": 14915 }, { "epoch": 60.881632653061224, "grad_norm": 28.665254592895508, "learning_rate": 1.9579591836734694e-05, "loss": 2.1382, "step": 14916 }, { "epoch": 60.885714285714286, "grad_norm": 42.8066520690918, "learning_rate": 1.9577551020408164e-05, "loss": 1.5456, "step": 14917 }, { "epoch": 60.88979591836735, "grad_norm": 25.581872940063477, "learning_rate": 1.9575510204081635e-05, "loss": 1.777, "step": 14918 }, { "epoch": 60.89387755102041, "grad_norm": 39.55072784423828, "learning_rate": 1.9573469387755102e-05, "loss": 0.8399, "step": 14919 }, { "epoch": 60.89795918367347, "grad_norm": 24.215829849243164, "learning_rate": 1.9571428571428572e-05, "loss": 2.0938, "step": 14920 }, { "epoch": 60.90204081632653, "grad_norm": 39.872840881347656, "learning_rate": 1.9569387755102043e-05, "loss": 1.4634, "step": 14921 }, { "epoch": 60.906122448979595, "grad_norm": 40.90892791748047, "learning_rate": 1.956734693877551e-05, "loss": 0.829, "step": 14922 }, { "epoch": 60.910204081632656, "grad_norm": 29.614341735839844, "learning_rate": 1.956530612244898e-05, "loss": 1.8067, "step": 14923 }, { "epoch": 60.91428571428571, "grad_norm": 28.885215759277344, "learning_rate": 1.956326530612245e-05, "loss": 1.6957, "step": 14924 }, { "epoch": 60.91836734693877, "grad_norm": 36.459800720214844, "learning_rate": 1.9561224489795918e-05, "loss": 1.7023, "step": 14925 }, { "epoch": 60.922448979591834, "grad_norm": 34.98222351074219, "learning_rate": 1.955918367346939e-05, "loss": 0.932, "step": 14926 }, { "epoch": 60.926530612244896, "grad_norm": 37.15756607055664, "learning_rate": 1.9557142857142856e-05, "loss": 1.037, "step": 14927 }, { "epoch": 60.93061224489796, "grad_norm": 49.11848449707031, "learning_rate": 1.955510204081633e-05, "loss": 1.0985, "step": 14928 }, { "epoch": 60.93469387755102, "grad_norm": 35.19624710083008, "learning_rate": 1.9553061224489797e-05, "loss": 0.7207, "step": 14929 }, { "epoch": 60.93877551020408, "grad_norm": 30.37981414794922, "learning_rate": 1.9551020408163267e-05, "loss": 1.1459, "step": 14930 }, { "epoch": 60.94285714285714, "grad_norm": 17.44118881225586, "learning_rate": 1.9548979591836734e-05, "loss": 2.3989, "step": 14931 }, { "epoch": 60.946938775510205, "grad_norm": 39.106258392333984, "learning_rate": 1.9546938775510205e-05, "loss": 0.7621, "step": 14932 }, { "epoch": 60.95102040816327, "grad_norm": 34.78974151611328, "learning_rate": 1.9544897959183675e-05, "loss": 0.7558, "step": 14933 }, { "epoch": 60.95510204081633, "grad_norm": 26.715892791748047, "learning_rate": 1.9542857142857143e-05, "loss": 1.7805, "step": 14934 }, { "epoch": 60.95918367346939, "grad_norm": 19.80733299255371, "learning_rate": 1.9540816326530613e-05, "loss": 1.8519, "step": 14935 }, { "epoch": 60.96326530612245, "grad_norm": 15.286402702331543, "learning_rate": 1.9538775510204084e-05, "loss": 2.8039, "step": 14936 }, { "epoch": 60.96734693877551, "grad_norm": 32.07541275024414, "learning_rate": 1.953673469387755e-05, "loss": 1.5671, "step": 14937 }, { "epoch": 60.97142857142857, "grad_norm": 20.486778259277344, "learning_rate": 1.953469387755102e-05, "loss": 2.5431, "step": 14938 }, { "epoch": 60.97551020408163, "grad_norm": 24.591184616088867, "learning_rate": 1.9532653061224492e-05, "loss": 2.1344, "step": 14939 }, { "epoch": 60.97959183673469, "grad_norm": 37.90061950683594, "learning_rate": 1.9530612244897962e-05, "loss": 1.9252, "step": 14940 }, { "epoch": 60.98367346938775, "grad_norm": 32.761619567871094, "learning_rate": 1.952857142857143e-05, "loss": 1.3622, "step": 14941 }, { "epoch": 60.987755102040815, "grad_norm": 32.69620895385742, "learning_rate": 1.9526530612244897e-05, "loss": 1.6722, "step": 14942 }, { "epoch": 60.99183673469388, "grad_norm": 33.371742248535156, "learning_rate": 1.9524489795918367e-05, "loss": 1.3662, "step": 14943 }, { "epoch": 60.99591836734694, "grad_norm": 40.1732063293457, "learning_rate": 1.9522448979591838e-05, "loss": 1.3363, "step": 14944 }, { "epoch": 61.0, "grad_norm": 26.75432586669922, "learning_rate": 1.9520408163265308e-05, "loss": 2.0382, "step": 14945 }, { "epoch": 61.00408163265306, "grad_norm": 24.052419662475586, "learning_rate": 1.9518367346938775e-05, "loss": 1.1952, "step": 14946 }, { "epoch": 61.00816326530612, "grad_norm": 27.397937774658203, "learning_rate": 1.9516326530612246e-05, "loss": 1.7866, "step": 14947 }, { "epoch": 61.012244897959185, "grad_norm": 33.32393264770508, "learning_rate": 1.9514285714285716e-05, "loss": 1.2691, "step": 14948 }, { "epoch": 61.01632653061225, "grad_norm": 34.660396575927734, "learning_rate": 1.9512244897959183e-05, "loss": 1.1654, "step": 14949 }, { "epoch": 61.02040816326531, "grad_norm": 23.47490119934082, "learning_rate": 1.9510204081632654e-05, "loss": 2.2248, "step": 14950 }, { "epoch": 61.02448979591837, "grad_norm": 41.78560256958008, "learning_rate": 1.9508163265306124e-05, "loss": 0.9669, "step": 14951 }, { "epoch": 61.02857142857143, "grad_norm": 32.7418212890625, "learning_rate": 1.9506122448979595e-05, "loss": 1.4371, "step": 14952 }, { "epoch": 61.03265306122449, "grad_norm": 30.617677688598633, "learning_rate": 1.9504081632653062e-05, "loss": 0.6247, "step": 14953 }, { "epoch": 61.03673469387755, "grad_norm": 26.3444881439209, "learning_rate": 1.950204081632653e-05, "loss": 1.7352, "step": 14954 }, { "epoch": 61.04081632653061, "grad_norm": 30.099843978881836, "learning_rate": 1.9500000000000003e-05, "loss": 1.4905, "step": 14955 }, { "epoch": 61.04489795918367, "grad_norm": 36.55332946777344, "learning_rate": 1.949795918367347e-05, "loss": 1.1543, "step": 14956 }, { "epoch": 61.04897959183673, "grad_norm": 18.35079574584961, "learning_rate": 1.949591836734694e-05, "loss": 2.4268, "step": 14957 }, { "epoch": 61.053061224489795, "grad_norm": 39.73329544067383, "learning_rate": 1.9493877551020408e-05, "loss": 0.7134, "step": 14958 }, { "epoch": 61.05714285714286, "grad_norm": 21.501323699951172, "learning_rate": 1.949183673469388e-05, "loss": 1.8678, "step": 14959 }, { "epoch": 61.06122448979592, "grad_norm": 30.123708724975586, "learning_rate": 1.948979591836735e-05, "loss": 1.7965, "step": 14960 }, { "epoch": 61.06530612244898, "grad_norm": 28.66166877746582, "learning_rate": 1.9487755102040816e-05, "loss": 1.1085, "step": 14961 }, { "epoch": 61.06938775510204, "grad_norm": 15.441080093383789, "learning_rate": 1.9485714285714286e-05, "loss": 2.4337, "step": 14962 }, { "epoch": 61.073469387755104, "grad_norm": 39.62030029296875, "learning_rate": 1.9483673469387757e-05, "loss": 0.678, "step": 14963 }, { "epoch": 61.077551020408166, "grad_norm": 32.870914459228516, "learning_rate": 1.9481632653061224e-05, "loss": 0.5877, "step": 14964 }, { "epoch": 61.08163265306123, "grad_norm": 31.960857391357422, "learning_rate": 1.9479591836734695e-05, "loss": 1.7675, "step": 14965 }, { "epoch": 61.08571428571429, "grad_norm": 32.67567825317383, "learning_rate": 1.9477551020408165e-05, "loss": 1.5162, "step": 14966 }, { "epoch": 61.089795918367344, "grad_norm": 39.506126403808594, "learning_rate": 1.9475510204081636e-05, "loss": 0.7326, "step": 14967 }, { "epoch": 61.093877551020405, "grad_norm": 36.449676513671875, "learning_rate": 1.9473469387755103e-05, "loss": 0.8517, "step": 14968 }, { "epoch": 61.09795918367347, "grad_norm": 28.46704864501953, "learning_rate": 1.947142857142857e-05, "loss": 1.4057, "step": 14969 }, { "epoch": 61.10204081632653, "grad_norm": 25.393394470214844, "learning_rate": 1.946938775510204e-05, "loss": 1.58, "step": 14970 }, { "epoch": 61.10612244897959, "grad_norm": 31.454544067382812, "learning_rate": 1.946734693877551e-05, "loss": 1.1619, "step": 14971 }, { "epoch": 61.11020408163265, "grad_norm": 47.391441345214844, "learning_rate": 1.946530612244898e-05, "loss": 1.6014, "step": 14972 }, { "epoch": 61.114285714285714, "grad_norm": 29.254125595092773, "learning_rate": 1.946326530612245e-05, "loss": 1.6904, "step": 14973 }, { "epoch": 61.118367346938776, "grad_norm": 35.33570098876953, "learning_rate": 1.946122448979592e-05, "loss": 1.1713, "step": 14974 }, { "epoch": 61.12244897959184, "grad_norm": 27.0948486328125, "learning_rate": 1.945918367346939e-05, "loss": 1.8103, "step": 14975 }, { "epoch": 61.1265306122449, "grad_norm": 39.25185012817383, "learning_rate": 1.9457142857142857e-05, "loss": 0.9921, "step": 14976 }, { "epoch": 61.13061224489796, "grad_norm": 22.551389694213867, "learning_rate": 1.9455102040816327e-05, "loss": 1.943, "step": 14977 }, { "epoch": 61.13469387755102, "grad_norm": 45.072391510009766, "learning_rate": 1.9453061224489798e-05, "loss": 1.0684, "step": 14978 }, { "epoch": 61.138775510204084, "grad_norm": 51.92067337036133, "learning_rate": 1.9451020408163268e-05, "loss": 0.9475, "step": 14979 }, { "epoch": 61.142857142857146, "grad_norm": 28.1011962890625, "learning_rate": 1.9448979591836735e-05, "loss": 1.7663, "step": 14980 }, { "epoch": 61.14693877551021, "grad_norm": 35.86972427368164, "learning_rate": 1.9446938775510203e-05, "loss": 1.1618, "step": 14981 }, { "epoch": 61.15102040816326, "grad_norm": 42.585044860839844, "learning_rate": 1.9444897959183676e-05, "loss": 0.5897, "step": 14982 }, { "epoch": 61.155102040816324, "grad_norm": 42.768829345703125, "learning_rate": 1.9442857142857144e-05, "loss": 1.5876, "step": 14983 }, { "epoch": 61.159183673469386, "grad_norm": 19.797683715820312, "learning_rate": 1.9440816326530614e-05, "loss": 2.3478, "step": 14984 }, { "epoch": 61.16326530612245, "grad_norm": 29.34148406982422, "learning_rate": 1.943877551020408e-05, "loss": 1.3126, "step": 14985 }, { "epoch": 61.16734693877551, "grad_norm": 32.89227294921875, "learning_rate": 1.943673469387755e-05, "loss": 0.6116, "step": 14986 }, { "epoch": 61.17142857142857, "grad_norm": 28.772218704223633, "learning_rate": 1.9434693877551022e-05, "loss": 1.2759, "step": 14987 }, { "epoch": 61.17551020408163, "grad_norm": 43.491546630859375, "learning_rate": 1.943265306122449e-05, "loss": 1.6885, "step": 14988 }, { "epoch": 61.179591836734694, "grad_norm": 30.813827514648438, "learning_rate": 1.943061224489796e-05, "loss": 1.8572, "step": 14989 }, { "epoch": 61.183673469387756, "grad_norm": 24.620452880859375, "learning_rate": 1.942857142857143e-05, "loss": 2.003, "step": 14990 }, { "epoch": 61.18775510204082, "grad_norm": 24.831010818481445, "learning_rate": 1.9426530612244897e-05, "loss": 1.8939, "step": 14991 }, { "epoch": 61.19183673469388, "grad_norm": 40.691043853759766, "learning_rate": 1.9424489795918368e-05, "loss": 1.8325, "step": 14992 }, { "epoch": 61.19591836734694, "grad_norm": 26.504915237426758, "learning_rate": 1.942244897959184e-05, "loss": 1.6625, "step": 14993 }, { "epoch": 61.2, "grad_norm": 54.89020538330078, "learning_rate": 1.942040816326531e-05, "loss": 0.7317, "step": 14994 }, { "epoch": 61.204081632653065, "grad_norm": 23.54068946838379, "learning_rate": 1.9418367346938776e-05, "loss": 1.9313, "step": 14995 }, { "epoch": 61.20816326530612, "grad_norm": 63.55466079711914, "learning_rate": 1.9416326530612247e-05, "loss": 0.7005, "step": 14996 }, { "epoch": 61.21224489795918, "grad_norm": 5.924291610717773, "learning_rate": 1.9414285714285714e-05, "loss": 0.0464, "step": 14997 }, { "epoch": 61.21632653061224, "grad_norm": 17.3558292388916, "learning_rate": 1.9412244897959184e-05, "loss": 2.4389, "step": 14998 }, { "epoch": 61.220408163265304, "grad_norm": 24.18872833251953, "learning_rate": 1.9410204081632655e-05, "loss": 0.2345, "step": 14999 }, { "epoch": 61.224489795918366, "grad_norm": 15.509238243103027, "learning_rate": 1.9408163265306122e-05, "loss": 2.7148, "step": 15000 }, { "epoch": 61.22857142857143, "grad_norm": 23.680082321166992, "learning_rate": 1.9406122448979592e-05, "loss": 1.6886, "step": 15001 }, { "epoch": 61.23265306122449, "grad_norm": 27.38319206237793, "learning_rate": 1.9404081632653063e-05, "loss": 1.2303, "step": 15002 }, { "epoch": 61.23673469387755, "grad_norm": 20.764204025268555, "learning_rate": 1.940204081632653e-05, "loss": 0.134, "step": 15003 }, { "epoch": 61.24081632653061, "grad_norm": 31.66935157775879, "learning_rate": 1.94e-05, "loss": 1.7497, "step": 15004 }, { "epoch": 61.244897959183675, "grad_norm": 36.612613677978516, "learning_rate": 1.939795918367347e-05, "loss": 1.1115, "step": 15005 }, { "epoch": 61.248979591836736, "grad_norm": 37.95356750488281, "learning_rate": 1.939591836734694e-05, "loss": 1.0973, "step": 15006 }, { "epoch": 61.2530612244898, "grad_norm": 49.5533561706543, "learning_rate": 1.939387755102041e-05, "loss": 0.8414, "step": 15007 }, { "epoch": 61.25714285714286, "grad_norm": 21.61667823791504, "learning_rate": 1.9391836734693876e-05, "loss": 2.3253, "step": 15008 }, { "epoch": 61.26122448979592, "grad_norm": 35.9632682800293, "learning_rate": 1.938979591836735e-05, "loss": 1.2444, "step": 15009 }, { "epoch": 61.265306122448976, "grad_norm": 32.810977935791016, "learning_rate": 1.9387755102040817e-05, "loss": 1.9188, "step": 15010 }, { "epoch": 61.26938775510204, "grad_norm": 43.875789642333984, "learning_rate": 1.9385714285714287e-05, "loss": 1.0392, "step": 15011 }, { "epoch": 61.2734693877551, "grad_norm": 23.03438949584961, "learning_rate": 1.9383673469387755e-05, "loss": 1.953, "step": 15012 }, { "epoch": 61.27755102040816, "grad_norm": 25.081356048583984, "learning_rate": 1.9381632653061225e-05, "loss": 2.0859, "step": 15013 }, { "epoch": 61.28163265306122, "grad_norm": 52.95274353027344, "learning_rate": 1.9379591836734696e-05, "loss": 0.6434, "step": 15014 }, { "epoch": 61.285714285714285, "grad_norm": 54.626033782958984, "learning_rate": 1.9377551020408163e-05, "loss": 0.7882, "step": 15015 }, { "epoch": 61.289795918367346, "grad_norm": 31.73244857788086, "learning_rate": 1.9375510204081633e-05, "loss": 1.3339, "step": 15016 }, { "epoch": 61.29387755102041, "grad_norm": 10.02429485321045, "learning_rate": 1.9373469387755104e-05, "loss": 3.1946, "step": 15017 }, { "epoch": 61.29795918367347, "grad_norm": 30.644088745117188, "learning_rate": 1.9371428571428574e-05, "loss": 1.9607, "step": 15018 }, { "epoch": 61.30204081632653, "grad_norm": 24.803497314453125, "learning_rate": 1.936938775510204e-05, "loss": 1.7666, "step": 15019 }, { "epoch": 61.30612244897959, "grad_norm": 25.751419067382812, "learning_rate": 1.9367346938775512e-05, "loss": 1.6807, "step": 15020 }, { "epoch": 61.310204081632655, "grad_norm": 19.87405014038086, "learning_rate": 1.9365306122448982e-05, "loss": 2.1033, "step": 15021 }, { "epoch": 61.31428571428572, "grad_norm": 38.89920425415039, "learning_rate": 1.936326530612245e-05, "loss": 1.0092, "step": 15022 }, { "epoch": 61.31836734693878, "grad_norm": 24.317962646484375, "learning_rate": 1.936122448979592e-05, "loss": 0.3326, "step": 15023 }, { "epoch": 61.32244897959184, "grad_norm": 34.50401306152344, "learning_rate": 1.9359183673469387e-05, "loss": 1.231, "step": 15024 }, { "epoch": 61.326530612244895, "grad_norm": 23.717615127563477, "learning_rate": 1.9357142857142858e-05, "loss": 1.9202, "step": 15025 }, { "epoch": 61.33061224489796, "grad_norm": 49.63738250732422, "learning_rate": 1.9355102040816328e-05, "loss": 0.6159, "step": 15026 }, { "epoch": 61.33469387755102, "grad_norm": 34.489566802978516, "learning_rate": 1.9353061224489795e-05, "loss": 1.497, "step": 15027 }, { "epoch": 61.33877551020408, "grad_norm": 34.0672492980957, "learning_rate": 1.9351020408163266e-05, "loss": 1.5847, "step": 15028 }, { "epoch": 61.34285714285714, "grad_norm": 55.2671012878418, "learning_rate": 1.9348979591836736e-05, "loss": 0.824, "step": 15029 }, { "epoch": 61.3469387755102, "grad_norm": 42.632442474365234, "learning_rate": 1.9346938775510203e-05, "loss": 0.639, "step": 15030 }, { "epoch": 61.351020408163265, "grad_norm": 32.935611724853516, "learning_rate": 1.9344897959183674e-05, "loss": 1.6085, "step": 15031 }, { "epoch": 61.35510204081633, "grad_norm": 33.94658279418945, "learning_rate": 1.9342857142857144e-05, "loss": 1.3472, "step": 15032 }, { "epoch": 61.35918367346939, "grad_norm": 34.573463439941406, "learning_rate": 1.9340816326530615e-05, "loss": 1.6978, "step": 15033 }, { "epoch": 61.36326530612245, "grad_norm": 24.23348045349121, "learning_rate": 1.9338775510204082e-05, "loss": 2.0195, "step": 15034 }, { "epoch": 61.36734693877551, "grad_norm": 20.33273696899414, "learning_rate": 1.933673469387755e-05, "loss": 1.7806, "step": 15035 }, { "epoch": 61.371428571428574, "grad_norm": 45.290321350097656, "learning_rate": 1.9334693877551023e-05, "loss": 0.2591, "step": 15036 }, { "epoch": 61.375510204081635, "grad_norm": 36.16609191894531, "learning_rate": 1.933265306122449e-05, "loss": 0.8451, "step": 15037 }, { "epoch": 61.3795918367347, "grad_norm": 23.214815139770508, "learning_rate": 1.933061224489796e-05, "loss": 1.9085, "step": 15038 }, { "epoch": 61.38367346938775, "grad_norm": 22.79665184020996, "learning_rate": 1.9328571428571428e-05, "loss": 1.9048, "step": 15039 }, { "epoch": 61.38775510204081, "grad_norm": 22.62353515625, "learning_rate": 1.9326530612244902e-05, "loss": 1.9932, "step": 15040 }, { "epoch": 61.391836734693875, "grad_norm": 28.6478328704834, "learning_rate": 1.932448979591837e-05, "loss": 1.4909, "step": 15041 }, { "epoch": 61.39591836734694, "grad_norm": 28.707454681396484, "learning_rate": 1.9322448979591836e-05, "loss": 0.4973, "step": 15042 }, { "epoch": 61.4, "grad_norm": 33.23479080200195, "learning_rate": 1.9320408163265307e-05, "loss": 1.3106, "step": 15043 }, { "epoch": 61.40408163265306, "grad_norm": 48.67391586303711, "learning_rate": 1.9318367346938777e-05, "loss": 0.6128, "step": 15044 }, { "epoch": 61.40816326530612, "grad_norm": 21.070575714111328, "learning_rate": 1.9316326530612248e-05, "loss": 2.0578, "step": 15045 }, { "epoch": 61.412244897959184, "grad_norm": 43.748966217041016, "learning_rate": 1.9314285714285715e-05, "loss": 1.0878, "step": 15046 }, { "epoch": 61.416326530612245, "grad_norm": 34.54265213012695, "learning_rate": 1.9312244897959185e-05, "loss": 1.4063, "step": 15047 }, { "epoch": 61.42040816326531, "grad_norm": 25.716739654541016, "learning_rate": 1.9310204081632656e-05, "loss": 1.6938, "step": 15048 }, { "epoch": 61.42448979591837, "grad_norm": 32.537593841552734, "learning_rate": 1.9308163265306123e-05, "loss": 2.1748, "step": 15049 }, { "epoch": 61.42857142857143, "grad_norm": 36.68583679199219, "learning_rate": 1.9306122448979593e-05, "loss": 0.7312, "step": 15050 }, { "epoch": 61.43265306122449, "grad_norm": 24.59351348876953, "learning_rate": 1.930408163265306e-05, "loss": 2.397, "step": 15051 }, { "epoch": 61.436734693877554, "grad_norm": 23.245981216430664, "learning_rate": 1.930204081632653e-05, "loss": 1.9114, "step": 15052 }, { "epoch": 61.440816326530616, "grad_norm": 33.03227996826172, "learning_rate": 1.93e-05, "loss": 0.5237, "step": 15053 }, { "epoch": 61.44489795918367, "grad_norm": 32.5844841003418, "learning_rate": 1.929795918367347e-05, "loss": 0.8879, "step": 15054 }, { "epoch": 61.44897959183673, "grad_norm": 26.167926788330078, "learning_rate": 1.929591836734694e-05, "loss": 1.6892, "step": 15055 }, { "epoch": 61.453061224489794, "grad_norm": 23.278066635131836, "learning_rate": 1.929387755102041e-05, "loss": 2.4772, "step": 15056 }, { "epoch": 61.457142857142856, "grad_norm": 23.736148834228516, "learning_rate": 1.9291836734693877e-05, "loss": 2.091, "step": 15057 }, { "epoch": 61.46122448979592, "grad_norm": 21.893829345703125, "learning_rate": 1.9289795918367347e-05, "loss": 2.01, "step": 15058 }, { "epoch": 61.46530612244898, "grad_norm": 28.375497817993164, "learning_rate": 1.9287755102040818e-05, "loss": 0.8841, "step": 15059 }, { "epoch": 61.46938775510204, "grad_norm": 20.5994930267334, "learning_rate": 1.928571428571429e-05, "loss": 2.4436, "step": 15060 }, { "epoch": 61.4734693877551, "grad_norm": 39.97484588623047, "learning_rate": 1.9283673469387756e-05, "loss": 1.3828, "step": 15061 }, { "epoch": 61.477551020408164, "grad_norm": 30.882164001464844, "learning_rate": 1.9281632653061226e-05, "loss": 1.4976, "step": 15062 }, { "epoch": 61.481632653061226, "grad_norm": 36.916587829589844, "learning_rate": 1.9279591836734697e-05, "loss": 1.1968, "step": 15063 }, { "epoch": 61.48571428571429, "grad_norm": 37.97343063354492, "learning_rate": 1.9277551020408164e-05, "loss": 0.7389, "step": 15064 }, { "epoch": 61.48979591836735, "grad_norm": 25.956073760986328, "learning_rate": 1.9275510204081634e-05, "loss": 1.615, "step": 15065 }, { "epoch": 61.49387755102041, "grad_norm": 30.81673812866211, "learning_rate": 1.92734693877551e-05, "loss": 1.7544, "step": 15066 }, { "epoch": 61.49795918367347, "grad_norm": 35.20026779174805, "learning_rate": 1.9271428571428575e-05, "loss": 1.0508, "step": 15067 }, { "epoch": 61.50204081632653, "grad_norm": 31.76648712158203, "learning_rate": 1.9269387755102042e-05, "loss": 1.5101, "step": 15068 }, { "epoch": 61.50612244897959, "grad_norm": 21.202983856201172, "learning_rate": 1.926734693877551e-05, "loss": 2.665, "step": 15069 }, { "epoch": 61.51020408163265, "grad_norm": 34.37229919433594, "learning_rate": 1.926530612244898e-05, "loss": 1.1697, "step": 15070 }, { "epoch": 61.51428571428571, "grad_norm": 42.41856002807617, "learning_rate": 1.926326530612245e-05, "loss": 0.8031, "step": 15071 }, { "epoch": 61.518367346938774, "grad_norm": 39.918121337890625, "learning_rate": 1.926122448979592e-05, "loss": 0.5532, "step": 15072 }, { "epoch": 61.522448979591836, "grad_norm": 32.52479934692383, "learning_rate": 1.9259183673469388e-05, "loss": 0.7391, "step": 15073 }, { "epoch": 61.5265306122449, "grad_norm": 35.519989013671875, "learning_rate": 1.9257142857142855e-05, "loss": 0.3051, "step": 15074 }, { "epoch": 61.53061224489796, "grad_norm": 33.11399841308594, "learning_rate": 1.925510204081633e-05, "loss": 1.7933, "step": 15075 }, { "epoch": 61.53469387755102, "grad_norm": 22.332063674926758, "learning_rate": 1.9253061224489796e-05, "loss": 2.0067, "step": 15076 }, { "epoch": 61.53877551020408, "grad_norm": 49.02263641357422, "learning_rate": 1.9251020408163267e-05, "loss": 1.7563, "step": 15077 }, { "epoch": 61.542857142857144, "grad_norm": 29.153854370117188, "learning_rate": 1.9248979591836734e-05, "loss": 1.5434, "step": 15078 }, { "epoch": 61.546938775510206, "grad_norm": 27.875261306762695, "learning_rate": 1.9246938775510204e-05, "loss": 1.752, "step": 15079 }, { "epoch": 61.55102040816327, "grad_norm": 31.21656608581543, "learning_rate": 1.9244897959183675e-05, "loss": 1.702, "step": 15080 }, { "epoch": 61.55510204081633, "grad_norm": 36.030609130859375, "learning_rate": 1.9242857142857142e-05, "loss": 1.3133, "step": 15081 }, { "epoch": 61.55918367346939, "grad_norm": 26.125757217407227, "learning_rate": 1.9240816326530613e-05, "loss": 0.2306, "step": 15082 }, { "epoch": 61.563265306122446, "grad_norm": 34.872642517089844, "learning_rate": 1.9238775510204083e-05, "loss": 1.4172, "step": 15083 }, { "epoch": 61.56734693877551, "grad_norm": 27.005016326904297, "learning_rate": 1.923673469387755e-05, "loss": 1.676, "step": 15084 }, { "epoch": 61.57142857142857, "grad_norm": 22.678064346313477, "learning_rate": 1.923469387755102e-05, "loss": 2.1674, "step": 15085 }, { "epoch": 61.57551020408163, "grad_norm": 23.642515182495117, "learning_rate": 1.923265306122449e-05, "loss": 1.9893, "step": 15086 }, { "epoch": 61.57959183673469, "grad_norm": 53.15922927856445, "learning_rate": 1.9230612244897962e-05, "loss": 1.5324, "step": 15087 }, { "epoch": 61.583673469387755, "grad_norm": 41.84519577026367, "learning_rate": 1.922857142857143e-05, "loss": 0.7343, "step": 15088 }, { "epoch": 61.587755102040816, "grad_norm": 40.94475555419922, "learning_rate": 1.92265306122449e-05, "loss": 1.0097, "step": 15089 }, { "epoch": 61.59183673469388, "grad_norm": 24.42313003540039, "learning_rate": 1.922448979591837e-05, "loss": 1.9064, "step": 15090 }, { "epoch": 61.59591836734694, "grad_norm": 30.066781997680664, "learning_rate": 1.9222448979591837e-05, "loss": 1.3117, "step": 15091 }, { "epoch": 61.6, "grad_norm": 21.311702728271484, "learning_rate": 1.9220408163265308e-05, "loss": 2.0534, "step": 15092 }, { "epoch": 61.60408163265306, "grad_norm": 35.04526138305664, "learning_rate": 1.9218367346938775e-05, "loss": 1.6319, "step": 15093 }, { "epoch": 61.608163265306125, "grad_norm": 40.75277328491211, "learning_rate": 1.921632653061225e-05, "loss": 0.5692, "step": 15094 }, { "epoch": 61.61224489795919, "grad_norm": 29.3126220703125, "learning_rate": 1.9214285714285716e-05, "loss": 1.2462, "step": 15095 }, { "epoch": 61.61632653061225, "grad_norm": 28.96963119506836, "learning_rate": 1.9212244897959183e-05, "loss": 1.2299, "step": 15096 }, { "epoch": 61.6204081632653, "grad_norm": 20.990320205688477, "learning_rate": 1.9210204081632653e-05, "loss": 2.4541, "step": 15097 }, { "epoch": 61.624489795918365, "grad_norm": 34.73749542236328, "learning_rate": 1.9208163265306124e-05, "loss": 1.0776, "step": 15098 }, { "epoch": 61.628571428571426, "grad_norm": 43.59996032714844, "learning_rate": 1.9206122448979594e-05, "loss": 0.5392, "step": 15099 }, { "epoch": 61.63265306122449, "grad_norm": 35.653114318847656, "learning_rate": 1.920408163265306e-05, "loss": 1.4511, "step": 15100 }, { "epoch": 61.63673469387755, "grad_norm": 13.306585311889648, "learning_rate": 1.920204081632653e-05, "loss": 2.6307, "step": 15101 }, { "epoch": 61.64081632653061, "grad_norm": 14.435717582702637, "learning_rate": 1.9200000000000003e-05, "loss": 2.9394, "step": 15102 }, { "epoch": 61.64489795918367, "grad_norm": 40.70137023925781, "learning_rate": 1.919795918367347e-05, "loss": 1.144, "step": 15103 }, { "epoch": 61.648979591836735, "grad_norm": 55.3969612121582, "learning_rate": 1.919591836734694e-05, "loss": 0.3381, "step": 15104 }, { "epoch": 61.6530612244898, "grad_norm": 22.050615310668945, "learning_rate": 1.9193877551020407e-05, "loss": 1.967, "step": 15105 }, { "epoch": 61.65714285714286, "grad_norm": 38.52500534057617, "learning_rate": 1.9191836734693878e-05, "loss": 0.8394, "step": 15106 }, { "epoch": 61.66122448979592, "grad_norm": 19.055419921875, "learning_rate": 1.918979591836735e-05, "loss": 2.544, "step": 15107 }, { "epoch": 61.66530612244898, "grad_norm": 33.061824798583984, "learning_rate": 1.9187755102040815e-05, "loss": 0.2367, "step": 15108 }, { "epoch": 61.66938775510204, "grad_norm": 37.006797790527344, "learning_rate": 1.9185714285714286e-05, "loss": 1.0058, "step": 15109 }, { "epoch": 61.673469387755105, "grad_norm": 29.477123260498047, "learning_rate": 1.9183673469387756e-05, "loss": 1.5767, "step": 15110 }, { "epoch": 61.67755102040816, "grad_norm": 37.901180267333984, "learning_rate": 1.9181632653061227e-05, "loss": 0.7344, "step": 15111 }, { "epoch": 61.68163265306122, "grad_norm": 33.40666961669922, "learning_rate": 1.9179591836734694e-05, "loss": 0.7195, "step": 15112 }, { "epoch": 61.68571428571428, "grad_norm": 43.062435150146484, "learning_rate": 1.9177551020408165e-05, "loss": 1.0849, "step": 15113 }, { "epoch": 61.689795918367345, "grad_norm": 33.259971618652344, "learning_rate": 1.9175510204081635e-05, "loss": 1.431, "step": 15114 }, { "epoch": 61.69387755102041, "grad_norm": 30.83941078186035, "learning_rate": 1.9173469387755102e-05, "loss": 1.8653, "step": 15115 }, { "epoch": 61.69795918367347, "grad_norm": 36.81071472167969, "learning_rate": 1.9171428571428573e-05, "loss": 0.5287, "step": 15116 }, { "epoch": 61.70204081632653, "grad_norm": 29.803863525390625, "learning_rate": 1.9169387755102043e-05, "loss": 1.8836, "step": 15117 }, { "epoch": 61.70612244897959, "grad_norm": 38.117469787597656, "learning_rate": 1.916734693877551e-05, "loss": 1.5276, "step": 15118 }, { "epoch": 61.710204081632654, "grad_norm": 31.51442527770996, "learning_rate": 1.916530612244898e-05, "loss": 1.5971, "step": 15119 }, { "epoch": 61.714285714285715, "grad_norm": 23.18885040283203, "learning_rate": 1.9163265306122448e-05, "loss": 1.9177, "step": 15120 }, { "epoch": 61.71836734693878, "grad_norm": 35.320098876953125, "learning_rate": 1.9161224489795922e-05, "loss": 1.5963, "step": 15121 }, { "epoch": 61.72244897959184, "grad_norm": 38.997894287109375, "learning_rate": 1.915918367346939e-05, "loss": 0.7731, "step": 15122 }, { "epoch": 61.7265306122449, "grad_norm": 31.907129287719727, "learning_rate": 1.9157142857142856e-05, "loss": 1.2429, "step": 15123 }, { "epoch": 61.73061224489796, "grad_norm": 24.004024505615234, "learning_rate": 1.9155102040816327e-05, "loss": 2.1374, "step": 15124 }, { "epoch": 61.734693877551024, "grad_norm": 33.01132583618164, "learning_rate": 1.9153061224489797e-05, "loss": 1.1481, "step": 15125 }, { "epoch": 61.73877551020408, "grad_norm": 33.600975036621094, "learning_rate": 1.9151020408163268e-05, "loss": 1.6843, "step": 15126 }, { "epoch": 61.74285714285714, "grad_norm": 42.168968200683594, "learning_rate": 1.9148979591836735e-05, "loss": 1.3065, "step": 15127 }, { "epoch": 61.7469387755102, "grad_norm": 20.37784194946289, "learning_rate": 1.9146938775510202e-05, "loss": 2.529, "step": 15128 }, { "epoch": 61.751020408163264, "grad_norm": 17.84909439086914, "learning_rate": 1.9144897959183676e-05, "loss": 2.4248, "step": 15129 }, { "epoch": 61.755102040816325, "grad_norm": 44.550682067871094, "learning_rate": 1.9142857142857143e-05, "loss": 0.707, "step": 15130 }, { "epoch": 61.75918367346939, "grad_norm": 30.051387786865234, "learning_rate": 1.9140816326530614e-05, "loss": 0.295, "step": 15131 }, { "epoch": 61.76326530612245, "grad_norm": 39.44209671020508, "learning_rate": 1.913877551020408e-05, "loss": 0.7028, "step": 15132 }, { "epoch": 61.76734693877551, "grad_norm": 23.028423309326172, "learning_rate": 1.9136734693877555e-05, "loss": 1.8469, "step": 15133 }, { "epoch": 61.77142857142857, "grad_norm": 23.36487579345703, "learning_rate": 1.913469387755102e-05, "loss": 1.7804, "step": 15134 }, { "epoch": 61.775510204081634, "grad_norm": 29.474077224731445, "learning_rate": 1.913265306122449e-05, "loss": 0.5747, "step": 15135 }, { "epoch": 61.779591836734696, "grad_norm": 36.90398025512695, "learning_rate": 1.913061224489796e-05, "loss": 1.0579, "step": 15136 }, { "epoch": 61.78367346938776, "grad_norm": 26.311763763427734, "learning_rate": 1.912857142857143e-05, "loss": 2.0864, "step": 15137 }, { "epoch": 61.78775510204082, "grad_norm": 51.56848907470703, "learning_rate": 1.91265306122449e-05, "loss": 0.9399, "step": 15138 }, { "epoch": 61.79183673469388, "grad_norm": 28.747495651245117, "learning_rate": 1.9124489795918367e-05, "loss": 1.8797, "step": 15139 }, { "epoch": 61.795918367346935, "grad_norm": 21.949541091918945, "learning_rate": 1.9122448979591838e-05, "loss": 2.3676, "step": 15140 }, { "epoch": 61.8, "grad_norm": 23.45539093017578, "learning_rate": 1.912040816326531e-05, "loss": 0.2288, "step": 15141 }, { "epoch": 61.80408163265306, "grad_norm": 30.63471221923828, "learning_rate": 1.9118367346938776e-05, "loss": 1.5608, "step": 15142 }, { "epoch": 61.80816326530612, "grad_norm": 35.58173751831055, "learning_rate": 1.9116326530612246e-05, "loss": 1.1319, "step": 15143 }, { "epoch": 61.81224489795918, "grad_norm": 24.181758880615234, "learning_rate": 1.9114285714285717e-05, "loss": 1.8717, "step": 15144 }, { "epoch": 61.816326530612244, "grad_norm": 23.567081451416016, "learning_rate": 1.9112244897959184e-05, "loss": 2.3958, "step": 15145 }, { "epoch": 61.820408163265306, "grad_norm": 45.81187057495117, "learning_rate": 1.9110204081632654e-05, "loss": 0.7856, "step": 15146 }, { "epoch": 61.82448979591837, "grad_norm": 32.681373596191406, "learning_rate": 1.910816326530612e-05, "loss": 0.2995, "step": 15147 }, { "epoch": 61.82857142857143, "grad_norm": 25.06782341003418, "learning_rate": 1.9106122448979595e-05, "loss": 0.3145, "step": 15148 }, { "epoch": 61.83265306122449, "grad_norm": 17.696168899536133, "learning_rate": 1.9104081632653062e-05, "loss": 2.3267, "step": 15149 }, { "epoch": 61.83673469387755, "grad_norm": 54.14014434814453, "learning_rate": 1.910204081632653e-05, "loss": 1.1429, "step": 15150 }, { "epoch": 61.840816326530614, "grad_norm": 31.210887908935547, "learning_rate": 1.91e-05, "loss": 0.228, "step": 15151 }, { "epoch": 61.844897959183676, "grad_norm": 26.242233276367188, "learning_rate": 1.909795918367347e-05, "loss": 1.6911, "step": 15152 }, { "epoch": 61.84897959183674, "grad_norm": 40.60202407836914, "learning_rate": 1.909591836734694e-05, "loss": 1.0018, "step": 15153 }, { "epoch": 61.85306122448979, "grad_norm": 44.381752014160156, "learning_rate": 1.9093877551020408e-05, "loss": 1.1854, "step": 15154 }, { "epoch": 61.857142857142854, "grad_norm": 30.036352157592773, "learning_rate": 1.909183673469388e-05, "loss": 1.4215, "step": 15155 }, { "epoch": 61.861224489795916, "grad_norm": 34.18632125854492, "learning_rate": 1.908979591836735e-05, "loss": 0.7351, "step": 15156 }, { "epoch": 61.86530612244898, "grad_norm": 50.715328216552734, "learning_rate": 1.9087755102040816e-05, "loss": 0.7745, "step": 15157 }, { "epoch": 61.86938775510204, "grad_norm": 36.55667495727539, "learning_rate": 1.9085714285714287e-05, "loss": 1.4018, "step": 15158 }, { "epoch": 61.8734693877551, "grad_norm": 44.681549072265625, "learning_rate": 1.9083673469387754e-05, "loss": 0.8836, "step": 15159 }, { "epoch": 61.87755102040816, "grad_norm": 36.4136962890625, "learning_rate": 1.9081632653061228e-05, "loss": 1.7231, "step": 15160 }, { "epoch": 61.881632653061224, "grad_norm": 25.149333953857422, "learning_rate": 1.9079591836734695e-05, "loss": 2.0694, "step": 15161 }, { "epoch": 61.885714285714286, "grad_norm": 24.291019439697266, "learning_rate": 1.9077551020408162e-05, "loss": 2.1288, "step": 15162 }, { "epoch": 61.88979591836735, "grad_norm": 21.54486083984375, "learning_rate": 1.9075510204081633e-05, "loss": 2.3848, "step": 15163 }, { "epoch": 61.89387755102041, "grad_norm": 25.55476951599121, "learning_rate": 1.9073469387755103e-05, "loss": 1.8859, "step": 15164 }, { "epoch": 61.89795918367347, "grad_norm": 33.40636444091797, "learning_rate": 1.9071428571428574e-05, "loss": 1.0872, "step": 15165 }, { "epoch": 61.90204081632653, "grad_norm": 23.473102569580078, "learning_rate": 1.906938775510204e-05, "loss": 2.025, "step": 15166 }, { "epoch": 61.906122448979595, "grad_norm": 34.039798736572266, "learning_rate": 1.906734693877551e-05, "loss": 1.1833, "step": 15167 }, { "epoch": 61.910204081632656, "grad_norm": 25.544057846069336, "learning_rate": 1.9065306122448982e-05, "loss": 2.0384, "step": 15168 }, { "epoch": 61.91428571428571, "grad_norm": 33.66389465332031, "learning_rate": 1.906326530612245e-05, "loss": 1.2844, "step": 15169 }, { "epoch": 61.91836734693877, "grad_norm": 24.04547691345215, "learning_rate": 1.906122448979592e-05, "loss": 2.146, "step": 15170 }, { "epoch": 61.922448979591834, "grad_norm": 30.872373580932617, "learning_rate": 1.905918367346939e-05, "loss": 1.6217, "step": 15171 }, { "epoch": 61.926530612244896, "grad_norm": 46.90549087524414, "learning_rate": 1.9057142857142857e-05, "loss": 1.026, "step": 15172 }, { "epoch": 61.93061224489796, "grad_norm": 27.529756546020508, "learning_rate": 1.9055102040816328e-05, "loss": 1.27, "step": 15173 }, { "epoch": 61.93469387755102, "grad_norm": 30.59695816040039, "learning_rate": 1.9053061224489795e-05, "loss": 1.9847, "step": 15174 }, { "epoch": 61.93877551020408, "grad_norm": 37.89039993286133, "learning_rate": 1.905102040816327e-05, "loss": 0.8514, "step": 15175 }, { "epoch": 61.94285714285714, "grad_norm": 17.504297256469727, "learning_rate": 1.9048979591836736e-05, "loss": 2.6054, "step": 15176 }, { "epoch": 61.946938775510205, "grad_norm": 37.53104019165039, "learning_rate": 1.9046938775510206e-05, "loss": 0.5915, "step": 15177 }, { "epoch": 61.95102040816327, "grad_norm": 44.415283203125, "learning_rate": 1.9044897959183673e-05, "loss": 0.3377, "step": 15178 }, { "epoch": 61.95510204081633, "grad_norm": 22.189151763916016, "learning_rate": 1.9042857142857144e-05, "loss": 1.9848, "step": 15179 }, { "epoch": 61.95918367346939, "grad_norm": 41.275108337402344, "learning_rate": 1.9040816326530614e-05, "loss": 0.6928, "step": 15180 }, { "epoch": 61.96326530612245, "grad_norm": 42.23701477050781, "learning_rate": 1.903877551020408e-05, "loss": 1.4441, "step": 15181 }, { "epoch": 61.96734693877551, "grad_norm": 30.082077026367188, "learning_rate": 1.9036734693877552e-05, "loss": 1.4676, "step": 15182 }, { "epoch": 61.97142857142857, "grad_norm": 41.79332733154297, "learning_rate": 1.9034693877551023e-05, "loss": 0.9035, "step": 15183 }, { "epoch": 61.97551020408163, "grad_norm": 32.74436950683594, "learning_rate": 1.903265306122449e-05, "loss": 0.2547, "step": 15184 }, { "epoch": 61.97959183673469, "grad_norm": 45.71562194824219, "learning_rate": 1.903061224489796e-05, "loss": 0.2438, "step": 15185 }, { "epoch": 61.98367346938775, "grad_norm": 31.79197120666504, "learning_rate": 1.9028571428571427e-05, "loss": 0.4038, "step": 15186 }, { "epoch": 61.987755102040815, "grad_norm": 46.56317138671875, "learning_rate": 1.90265306122449e-05, "loss": 0.7952, "step": 15187 }, { "epoch": 61.99183673469388, "grad_norm": 25.236080169677734, "learning_rate": 1.902448979591837e-05, "loss": 1.9549, "step": 15188 }, { "epoch": 61.99591836734694, "grad_norm": 37.00984573364258, "learning_rate": 1.9022448979591836e-05, "loss": 1.1385, "step": 15189 }, { "epoch": 62.0, "grad_norm": 29.33916473388672, "learning_rate": 1.9020408163265306e-05, "loss": 2.0561, "step": 15190 }, { "epoch": 62.00408163265306, "grad_norm": 25.514949798583984, "learning_rate": 1.9018367346938777e-05, "loss": 2.0376, "step": 15191 }, { "epoch": 62.00816326530612, "grad_norm": 20.710195541381836, "learning_rate": 1.9016326530612247e-05, "loss": 1.9316, "step": 15192 }, { "epoch": 62.012244897959185, "grad_norm": 14.124053001403809, "learning_rate": 1.9014285714285714e-05, "loss": 2.8791, "step": 15193 }, { "epoch": 62.01632653061225, "grad_norm": 42.412166595458984, "learning_rate": 1.9012244897959185e-05, "loss": 1.0508, "step": 15194 }, { "epoch": 62.02040816326531, "grad_norm": 26.96650505065918, "learning_rate": 1.9010204081632655e-05, "loss": 0.5516, "step": 15195 }, { "epoch": 62.02448979591837, "grad_norm": 28.773488998413086, "learning_rate": 1.9008163265306122e-05, "loss": 1.6587, "step": 15196 }, { "epoch": 62.02857142857143, "grad_norm": 22.805294036865234, "learning_rate": 1.9006122448979593e-05, "loss": 1.9088, "step": 15197 }, { "epoch": 62.03265306122449, "grad_norm": 28.438968658447266, "learning_rate": 1.9004081632653063e-05, "loss": 1.3298, "step": 15198 }, { "epoch": 62.03673469387755, "grad_norm": 59.21023941040039, "learning_rate": 1.9002040816326534e-05, "loss": 1.7067, "step": 15199 }, { "epoch": 62.04081632653061, "grad_norm": 26.814382553100586, "learning_rate": 1.9e-05, "loss": 1.6478, "step": 15200 }, { "epoch": 62.04489795918367, "grad_norm": 38.15702438354492, "learning_rate": 1.8997959183673468e-05, "loss": 0.5165, "step": 15201 }, { "epoch": 62.04897959183673, "grad_norm": 43.302650451660156, "learning_rate": 1.8995918367346942e-05, "loss": 0.718, "step": 15202 }, { "epoch": 62.053061224489795, "grad_norm": 25.028459548950195, "learning_rate": 1.899387755102041e-05, "loss": 1.4706, "step": 15203 }, { "epoch": 62.05714285714286, "grad_norm": 27.9370174407959, "learning_rate": 1.899183673469388e-05, "loss": 0.6688, "step": 15204 }, { "epoch": 62.06122448979592, "grad_norm": 33.17045593261719, "learning_rate": 1.8989795918367347e-05, "loss": 1.2022, "step": 15205 }, { "epoch": 62.06530612244898, "grad_norm": 20.016033172607422, "learning_rate": 1.8987755102040817e-05, "loss": 2.0003, "step": 15206 }, { "epoch": 62.06938775510204, "grad_norm": 34.529632568359375, "learning_rate": 1.8985714285714288e-05, "loss": 0.9146, "step": 15207 }, { "epoch": 62.073469387755104, "grad_norm": 40.692989349365234, "learning_rate": 1.8983673469387755e-05, "loss": 1.0787, "step": 15208 }, { "epoch": 62.077551020408166, "grad_norm": 35.47578811645508, "learning_rate": 1.8981632653061226e-05, "loss": 1.3772, "step": 15209 }, { "epoch": 62.08163265306123, "grad_norm": 30.92696189880371, "learning_rate": 1.8979591836734696e-05, "loss": 1.5786, "step": 15210 }, { "epoch": 62.08571428571429, "grad_norm": 36.3636360168457, "learning_rate": 1.8977551020408163e-05, "loss": 1.6564, "step": 15211 }, { "epoch": 62.089795918367344, "grad_norm": 27.924238204956055, "learning_rate": 1.8975510204081634e-05, "loss": 1.7624, "step": 15212 }, { "epoch": 62.093877551020405, "grad_norm": 14.819808006286621, "learning_rate": 1.89734693877551e-05, "loss": 2.5741, "step": 15213 }, { "epoch": 62.09795918367347, "grad_norm": 27.333459854125977, "learning_rate": 1.8971428571428575e-05, "loss": 1.5149, "step": 15214 }, { "epoch": 62.10204081632653, "grad_norm": 32.283470153808594, "learning_rate": 1.8969387755102042e-05, "loss": 1.6772, "step": 15215 }, { "epoch": 62.10612244897959, "grad_norm": 26.958599090576172, "learning_rate": 1.896734693877551e-05, "loss": 1.7188, "step": 15216 }, { "epoch": 62.11020408163265, "grad_norm": 24.16182518005371, "learning_rate": 1.896530612244898e-05, "loss": 1.531, "step": 15217 }, { "epoch": 62.114285714285714, "grad_norm": 34.02971267700195, "learning_rate": 1.896326530612245e-05, "loss": 1.7263, "step": 15218 }, { "epoch": 62.118367346938776, "grad_norm": 20.123197555541992, "learning_rate": 1.896122448979592e-05, "loss": 2.2446, "step": 15219 }, { "epoch": 62.12244897959184, "grad_norm": 36.34524154663086, "learning_rate": 1.8959183673469388e-05, "loss": 0.7326, "step": 15220 }, { "epoch": 62.1265306122449, "grad_norm": 43.86941909790039, "learning_rate": 1.8957142857142858e-05, "loss": 0.5382, "step": 15221 }, { "epoch": 62.13061224489796, "grad_norm": 23.588409423828125, "learning_rate": 1.895510204081633e-05, "loss": 2.0575, "step": 15222 }, { "epoch": 62.13469387755102, "grad_norm": 56.459747314453125, "learning_rate": 1.8953061224489796e-05, "loss": 0.3734, "step": 15223 }, { "epoch": 62.138775510204084, "grad_norm": 51.47343063354492, "learning_rate": 1.8951020408163266e-05, "loss": 0.9093, "step": 15224 }, { "epoch": 62.142857142857146, "grad_norm": 36.90794372558594, "learning_rate": 1.8948979591836737e-05, "loss": 1.0207, "step": 15225 }, { "epoch": 62.14693877551021, "grad_norm": 32.57299041748047, "learning_rate": 1.8946938775510207e-05, "loss": 0.2627, "step": 15226 }, { "epoch": 62.15102040816326, "grad_norm": 33.63544464111328, "learning_rate": 1.8944897959183674e-05, "loss": 1.7717, "step": 15227 }, { "epoch": 62.155102040816324, "grad_norm": 25.90880012512207, "learning_rate": 1.894285714285714e-05, "loss": 1.6482, "step": 15228 }, { "epoch": 62.159183673469386, "grad_norm": 36.77367401123047, "learning_rate": 1.8940816326530615e-05, "loss": 0.2466, "step": 15229 }, { "epoch": 62.16326530612245, "grad_norm": 29.49045181274414, "learning_rate": 1.8938775510204083e-05, "loss": 1.167, "step": 15230 }, { "epoch": 62.16734693877551, "grad_norm": 33.4786376953125, "learning_rate": 1.8936734693877553e-05, "loss": 0.5518, "step": 15231 }, { "epoch": 62.17142857142857, "grad_norm": 42.824092864990234, "learning_rate": 1.893469387755102e-05, "loss": 0.5111, "step": 15232 }, { "epoch": 62.17551020408163, "grad_norm": 34.15114212036133, "learning_rate": 1.893265306122449e-05, "loss": 0.6444, "step": 15233 }, { "epoch": 62.179591836734694, "grad_norm": 23.047731399536133, "learning_rate": 1.893061224489796e-05, "loss": 2.4018, "step": 15234 }, { "epoch": 62.183673469387756, "grad_norm": 36.32653045654297, "learning_rate": 1.892857142857143e-05, "loss": 1.2521, "step": 15235 }, { "epoch": 62.18775510204082, "grad_norm": 23.023645401000977, "learning_rate": 1.89265306122449e-05, "loss": 1.9721, "step": 15236 }, { "epoch": 62.19183673469388, "grad_norm": 34.028892517089844, "learning_rate": 1.892448979591837e-05, "loss": 1.1506, "step": 15237 }, { "epoch": 62.19591836734694, "grad_norm": 27.29311752319336, "learning_rate": 1.8922448979591837e-05, "loss": 1.5636, "step": 15238 }, { "epoch": 62.2, "grad_norm": 49.535186767578125, "learning_rate": 1.8920408163265307e-05, "loss": 1.6713, "step": 15239 }, { "epoch": 62.204081632653065, "grad_norm": 40.23527526855469, "learning_rate": 1.8918367346938774e-05, "loss": 1.4027, "step": 15240 }, { "epoch": 62.20816326530612, "grad_norm": 20.69368553161621, "learning_rate": 1.8916326530612248e-05, "loss": 2.592, "step": 15241 }, { "epoch": 62.21224489795918, "grad_norm": 31.322376251220703, "learning_rate": 1.8914285714285715e-05, "loss": 0.7793, "step": 15242 }, { "epoch": 62.21632653061224, "grad_norm": 24.047250747680664, "learning_rate": 1.8912244897959182e-05, "loss": 2.0478, "step": 15243 }, { "epoch": 62.220408163265304, "grad_norm": 23.22965431213379, "learning_rate": 1.8910204081632653e-05, "loss": 1.7447, "step": 15244 }, { "epoch": 62.224489795918366, "grad_norm": 20.667261123657227, "learning_rate": 1.8908163265306123e-05, "loss": 2.3211, "step": 15245 }, { "epoch": 62.22857142857143, "grad_norm": 37.56329345703125, "learning_rate": 1.8906122448979594e-05, "loss": 1.0076, "step": 15246 }, { "epoch": 62.23265306122449, "grad_norm": 38.11900329589844, "learning_rate": 1.890408163265306e-05, "loss": 1.1846, "step": 15247 }, { "epoch": 62.23673469387755, "grad_norm": 20.15996742248535, "learning_rate": 1.890204081632653e-05, "loss": 2.1095, "step": 15248 }, { "epoch": 62.24081632653061, "grad_norm": 29.64717674255371, "learning_rate": 1.8900000000000002e-05, "loss": 1.1684, "step": 15249 }, { "epoch": 62.244897959183675, "grad_norm": 34.116886138916016, "learning_rate": 1.889795918367347e-05, "loss": 0.4956, "step": 15250 }, { "epoch": 62.248979591836736, "grad_norm": 38.76277160644531, "learning_rate": 1.889591836734694e-05, "loss": 0.6308, "step": 15251 }, { "epoch": 62.2530612244898, "grad_norm": 24.36614990234375, "learning_rate": 1.889387755102041e-05, "loss": 1.8937, "step": 15252 }, { "epoch": 62.25714285714286, "grad_norm": 28.20188331604004, "learning_rate": 1.889183673469388e-05, "loss": 1.8383, "step": 15253 }, { "epoch": 62.26122448979592, "grad_norm": 26.733537673950195, "learning_rate": 1.8889795918367348e-05, "loss": 1.9734, "step": 15254 }, { "epoch": 62.265306122448976, "grad_norm": 25.631410598754883, "learning_rate": 1.8887755102040815e-05, "loss": 1.4567, "step": 15255 }, { "epoch": 62.26938775510204, "grad_norm": 17.6726016998291, "learning_rate": 1.888571428571429e-05, "loss": 2.3433, "step": 15256 }, { "epoch": 62.2734693877551, "grad_norm": 25.32906723022461, "learning_rate": 1.8883673469387756e-05, "loss": 2.1125, "step": 15257 }, { "epoch": 62.27755102040816, "grad_norm": 36.80586624145508, "learning_rate": 1.8881632653061226e-05, "loss": 1.3124, "step": 15258 }, { "epoch": 62.28163265306122, "grad_norm": 29.77088165283203, "learning_rate": 1.8879591836734694e-05, "loss": 0.8749, "step": 15259 }, { "epoch": 62.285714285714285, "grad_norm": 45.905303955078125, "learning_rate": 1.8877551020408164e-05, "loss": 0.5981, "step": 15260 }, { "epoch": 62.289795918367346, "grad_norm": 30.65291976928711, "learning_rate": 1.8875510204081635e-05, "loss": 1.8298, "step": 15261 }, { "epoch": 62.29387755102041, "grad_norm": 36.941253662109375, "learning_rate": 1.8873469387755102e-05, "loss": 1.1494, "step": 15262 }, { "epoch": 62.29795918367347, "grad_norm": 31.27004623413086, "learning_rate": 1.8871428571428572e-05, "loss": 0.3392, "step": 15263 }, { "epoch": 62.30204081632653, "grad_norm": 18.041162490844727, "learning_rate": 1.8869387755102043e-05, "loss": 2.4513, "step": 15264 }, { "epoch": 62.30612244897959, "grad_norm": 34.24306869506836, "learning_rate": 1.886734693877551e-05, "loss": 1.0277, "step": 15265 }, { "epoch": 62.310204081632655, "grad_norm": 29.989656448364258, "learning_rate": 1.886530612244898e-05, "loss": 0.6293, "step": 15266 }, { "epoch": 62.31428571428572, "grad_norm": 29.964244842529297, "learning_rate": 1.8863265306122448e-05, "loss": 1.556, "step": 15267 }, { "epoch": 62.31836734693878, "grad_norm": 35.18840026855469, "learning_rate": 1.886122448979592e-05, "loss": 1.3113, "step": 15268 }, { "epoch": 62.32244897959184, "grad_norm": 19.25577163696289, "learning_rate": 1.885918367346939e-05, "loss": 2.4629, "step": 15269 }, { "epoch": 62.326530612244895, "grad_norm": 67.34602355957031, "learning_rate": 1.885714285714286e-05, "loss": 0.3357, "step": 15270 }, { "epoch": 62.33061224489796, "grad_norm": 21.098726272583008, "learning_rate": 1.8855102040816326e-05, "loss": 2.4606, "step": 15271 }, { "epoch": 62.33469387755102, "grad_norm": 40.428810119628906, "learning_rate": 1.8853061224489797e-05, "loss": 1.1388, "step": 15272 }, { "epoch": 62.33877551020408, "grad_norm": 23.77947998046875, "learning_rate": 1.8851020408163267e-05, "loss": 0.1852, "step": 15273 }, { "epoch": 62.34285714285714, "grad_norm": 38.09374237060547, "learning_rate": 1.8848979591836734e-05, "loss": 0.5778, "step": 15274 }, { "epoch": 62.3469387755102, "grad_norm": 31.10660171508789, "learning_rate": 1.8846938775510205e-05, "loss": 1.7343, "step": 15275 }, { "epoch": 62.351020408163265, "grad_norm": 31.810972213745117, "learning_rate": 1.8844897959183675e-05, "loss": 0.7065, "step": 15276 }, { "epoch": 62.35510204081633, "grad_norm": 33.94179153442383, "learning_rate": 1.8842857142857143e-05, "loss": 1.4119, "step": 15277 }, { "epoch": 62.35918367346939, "grad_norm": 34.75617218017578, "learning_rate": 1.8840816326530613e-05, "loss": 0.7239, "step": 15278 }, { "epoch": 62.36326530612245, "grad_norm": 39.77949905395508, "learning_rate": 1.8838775510204084e-05, "loss": 0.748, "step": 15279 }, { "epoch": 62.36734693877551, "grad_norm": 32.604698181152344, "learning_rate": 1.8836734693877554e-05, "loss": 1.1812, "step": 15280 }, { "epoch": 62.371428571428574, "grad_norm": 36.98982238769531, "learning_rate": 1.883469387755102e-05, "loss": 1.6237, "step": 15281 }, { "epoch": 62.375510204081635, "grad_norm": 45.38766860961914, "learning_rate": 1.8832653061224488e-05, "loss": 0.6077, "step": 15282 }, { "epoch": 62.3795918367347, "grad_norm": NaN, "learning_rate": 1.883061224489796e-05, "loss": 1.0544, "step": 15283 }, { "epoch": 62.38367346938775, "grad_norm": 39.180419921875, "learning_rate": 1.883061224489796e-05, "loss": 0.7287, "step": 15284 }, { "epoch": 62.38775510204081, "grad_norm": 21.95017433166504, "learning_rate": 1.882857142857143e-05, "loss": 1.8859, "step": 15285 }, { "epoch": 62.391836734693875, "grad_norm": 34.08769607543945, "learning_rate": 1.88265306122449e-05, "loss": 1.486, "step": 15286 }, { "epoch": 62.39591836734694, "grad_norm": 23.707090377807617, "learning_rate": 1.8824489795918367e-05, "loss": 1.9487, "step": 15287 }, { "epoch": 62.4, "grad_norm": 25.724472045898438, "learning_rate": 1.8822448979591837e-05, "loss": 2.2346, "step": 15288 }, { "epoch": 62.40408163265306, "grad_norm": 31.405282974243164, "learning_rate": 1.8820408163265308e-05, "loss": 0.6685, "step": 15289 }, { "epoch": 62.40816326530612, "grad_norm": 39.05067825317383, "learning_rate": 1.8818367346938775e-05, "loss": 0.661, "step": 15290 }, { "epoch": 62.412244897959184, "grad_norm": 28.362144470214844, "learning_rate": 1.8816326530612246e-05, "loss": 0.3152, "step": 15291 }, { "epoch": 62.416326530612245, "grad_norm": 48.7198371887207, "learning_rate": 1.8814285714285716e-05, "loss": 0.5426, "step": 15292 }, { "epoch": 62.42040816326531, "grad_norm": 38.71328353881836, "learning_rate": 1.8812244897959187e-05, "loss": 1.142, "step": 15293 }, { "epoch": 62.42448979591837, "grad_norm": 35.557865142822266, "learning_rate": 1.8810204081632654e-05, "loss": 1.856, "step": 15294 }, { "epoch": 62.42857142857143, "grad_norm": 43.70738220214844, "learning_rate": 1.880816326530612e-05, "loss": 0.9641, "step": 15295 }, { "epoch": 62.43265306122449, "grad_norm": 30.552967071533203, "learning_rate": 1.8806122448979595e-05, "loss": 1.8603, "step": 15296 }, { "epoch": 62.436734693877554, "grad_norm": 27.258394241333008, "learning_rate": 1.8804081632653062e-05, "loss": 1.4822, "step": 15297 }, { "epoch": 62.440816326530616, "grad_norm": 19.97142219543457, "learning_rate": 1.8802040816326532e-05, "loss": 1.9228, "step": 15298 }, { "epoch": 62.44489795918367, "grad_norm": 30.24345588684082, "learning_rate": 1.88e-05, "loss": 0.2667, "step": 15299 }, { "epoch": 62.44897959183673, "grad_norm": 36.599578857421875, "learning_rate": 1.879795918367347e-05, "loss": 1.569, "step": 15300 }, { "epoch": 62.453061224489794, "grad_norm": 22.74473762512207, "learning_rate": 1.879591836734694e-05, "loss": 0.2622, "step": 15301 }, { "epoch": 62.457142857142856, "grad_norm": 37.423709869384766, "learning_rate": 1.8793877551020408e-05, "loss": 1.7142, "step": 15302 }, { "epoch": 62.46122448979592, "grad_norm": 37.05299758911133, "learning_rate": 1.8791836734693878e-05, "loss": 1.4384, "step": 15303 }, { "epoch": 62.46530612244898, "grad_norm": 22.476099014282227, "learning_rate": 1.878979591836735e-05, "loss": 1.8856, "step": 15304 }, { "epoch": 62.46938775510204, "grad_norm": 42.62679672241211, "learning_rate": 1.8787755102040816e-05, "loss": 0.815, "step": 15305 }, { "epoch": 62.4734693877551, "grad_norm": 33.29184341430664, "learning_rate": 1.8785714285714286e-05, "loss": 1.5039, "step": 15306 }, { "epoch": 62.477551020408164, "grad_norm": 38.01101303100586, "learning_rate": 1.8783673469387757e-05, "loss": 1.0531, "step": 15307 }, { "epoch": 62.481632653061226, "grad_norm": 29.219038009643555, "learning_rate": 1.8781632653061227e-05, "loss": 1.9109, "step": 15308 }, { "epoch": 62.48571428571429, "grad_norm": 42.129817962646484, "learning_rate": 1.8779591836734695e-05, "loss": 0.6457, "step": 15309 }, { "epoch": 62.48979591836735, "grad_norm": 41.619773864746094, "learning_rate": 1.877755102040816e-05, "loss": 0.7086, "step": 15310 }, { "epoch": 62.49387755102041, "grad_norm": 41.40730667114258, "learning_rate": 1.8775510204081632e-05, "loss": 0.8407, "step": 15311 }, { "epoch": 62.49795918367347, "grad_norm": 27.02667808532715, "learning_rate": 1.8773469387755103e-05, "loss": 1.9378, "step": 15312 }, { "epoch": 62.50204081632653, "grad_norm": 44.17335510253906, "learning_rate": 1.8771428571428573e-05, "loss": 1.0223, "step": 15313 }, { "epoch": 62.50612244897959, "grad_norm": 33.6751823425293, "learning_rate": 1.876938775510204e-05, "loss": 0.4984, "step": 15314 }, { "epoch": 62.51020408163265, "grad_norm": 32.725852966308594, "learning_rate": 1.876734693877551e-05, "loss": 1.0556, "step": 15315 }, { "epoch": 62.51428571428571, "grad_norm": 24.257585525512695, "learning_rate": 1.876530612244898e-05, "loss": 1.6507, "step": 15316 }, { "epoch": 62.518367346938774, "grad_norm": 33.66005325317383, "learning_rate": 1.876326530612245e-05, "loss": 0.5416, "step": 15317 }, { "epoch": 62.522448979591836, "grad_norm": 37.681053161621094, "learning_rate": 1.876122448979592e-05, "loss": 1.2312, "step": 15318 }, { "epoch": 62.5265306122449, "grad_norm": 34.09798812866211, "learning_rate": 1.875918367346939e-05, "loss": 1.3486, "step": 15319 }, { "epoch": 62.53061224489796, "grad_norm": 28.291643142700195, "learning_rate": 1.875714285714286e-05, "loss": 1.2094, "step": 15320 }, { "epoch": 62.53469387755102, "grad_norm": 33.49834442138672, "learning_rate": 1.8755102040816327e-05, "loss": 1.9183, "step": 15321 }, { "epoch": 62.53877551020408, "grad_norm": 28.942140579223633, "learning_rate": 1.8753061224489794e-05, "loss": 1.9228, "step": 15322 }, { "epoch": 62.542857142857144, "grad_norm": 39.868385314941406, "learning_rate": 1.8751020408163268e-05, "loss": 1.2098, "step": 15323 }, { "epoch": 62.546938775510206, "grad_norm": 21.868492126464844, "learning_rate": 1.8748979591836735e-05, "loss": 1.9375, "step": 15324 }, { "epoch": 62.55102040816327, "grad_norm": 17.416301727294922, "learning_rate": 1.8746938775510206e-05, "loss": 2.758, "step": 15325 }, { "epoch": 62.55510204081633, "grad_norm": 15.64976978302002, "learning_rate": 1.8744897959183673e-05, "loss": 2.469, "step": 15326 }, { "epoch": 62.55918367346939, "grad_norm": 48.54969024658203, "learning_rate": 1.8742857142857143e-05, "loss": 0.7625, "step": 15327 }, { "epoch": 62.563265306122446, "grad_norm": 37.77290725708008, "learning_rate": 1.8740816326530614e-05, "loss": 0.5218, "step": 15328 }, { "epoch": 62.56734693877551, "grad_norm": 48.871341705322266, "learning_rate": 1.873877551020408e-05, "loss": 0.8222, "step": 15329 }, { "epoch": 62.57142857142857, "grad_norm": 31.781217575073242, "learning_rate": 1.873673469387755e-05, "loss": 1.8344, "step": 15330 }, { "epoch": 62.57551020408163, "grad_norm": 24.473344802856445, "learning_rate": 1.8734693877551022e-05, "loss": 2.0738, "step": 15331 }, { "epoch": 62.57959183673469, "grad_norm": 36.5560188293457, "learning_rate": 1.873265306122449e-05, "loss": 1.4773, "step": 15332 }, { "epoch": 62.583673469387755, "grad_norm": 24.993366241455078, "learning_rate": 1.873061224489796e-05, "loss": 0.1873, "step": 15333 }, { "epoch": 62.587755102040816, "grad_norm": 32.0965576171875, "learning_rate": 1.872857142857143e-05, "loss": 0.6729, "step": 15334 }, { "epoch": 62.59183673469388, "grad_norm": 28.660764694213867, "learning_rate": 1.87265306122449e-05, "loss": 1.124, "step": 15335 }, { "epoch": 62.59591836734694, "grad_norm": 37.012489318847656, "learning_rate": 1.8724489795918368e-05, "loss": 1.613, "step": 15336 }, { "epoch": 62.6, "grad_norm": 20.880306243896484, "learning_rate": 1.872244897959184e-05, "loss": 2.3667, "step": 15337 }, { "epoch": 62.60408163265306, "grad_norm": 31.187023162841797, "learning_rate": 1.8720408163265306e-05, "loss": 1.47, "step": 15338 }, { "epoch": 62.608163265306125, "grad_norm": 45.66391372680664, "learning_rate": 1.8718367346938776e-05, "loss": 0.8718, "step": 15339 }, { "epoch": 62.61224489795919, "grad_norm": 35.667503356933594, "learning_rate": 1.8716326530612247e-05, "loss": 0.6437, "step": 15340 }, { "epoch": 62.61632653061225, "grad_norm": 31.07919692993164, "learning_rate": 1.8714285714285714e-05, "loss": 1.4122, "step": 15341 }, { "epoch": 62.6204081632653, "grad_norm": 20.089834213256836, "learning_rate": 1.8712244897959184e-05, "loss": 0.2587, "step": 15342 }, { "epoch": 62.624489795918365, "grad_norm": 37.228538513183594, "learning_rate": 1.8710204081632655e-05, "loss": 1.0298, "step": 15343 }, { "epoch": 62.628571428571426, "grad_norm": 16.525104522705078, "learning_rate": 1.8708163265306122e-05, "loss": 2.5657, "step": 15344 }, { "epoch": 62.63265306122449, "grad_norm": 34.87412643432617, "learning_rate": 1.8706122448979592e-05, "loss": 1.3835, "step": 15345 }, { "epoch": 62.63673469387755, "grad_norm": 27.783069610595703, "learning_rate": 1.8704081632653063e-05, "loss": 1.7289, "step": 15346 }, { "epoch": 62.64081632653061, "grad_norm": 22.405302047729492, "learning_rate": 1.8702040816326533e-05, "loss": 1.9511, "step": 15347 }, { "epoch": 62.64489795918367, "grad_norm": 26.557518005371094, "learning_rate": 1.87e-05, "loss": 2.0848, "step": 15348 }, { "epoch": 62.648979591836735, "grad_norm": 29.634973526000977, "learning_rate": 1.8697959183673468e-05, "loss": 0.5039, "step": 15349 }, { "epoch": 62.6530612244898, "grad_norm": 29.622116088867188, "learning_rate": 1.869591836734694e-05, "loss": 1.8492, "step": 15350 }, { "epoch": 62.65714285714286, "grad_norm": 25.33285903930664, "learning_rate": 1.869387755102041e-05, "loss": 1.9831, "step": 15351 }, { "epoch": 62.66122448979592, "grad_norm": 28.332460403442383, "learning_rate": 1.869183673469388e-05, "loss": 0.2229, "step": 15352 }, { "epoch": 62.66530612244898, "grad_norm": 28.246606826782227, "learning_rate": 1.8689795918367346e-05, "loss": 1.4529, "step": 15353 }, { "epoch": 62.66938775510204, "grad_norm": 22.421464920043945, "learning_rate": 1.8687755102040817e-05, "loss": 1.8814, "step": 15354 }, { "epoch": 62.673469387755105, "grad_norm": 36.17433547973633, "learning_rate": 1.8685714285714287e-05, "loss": 0.9372, "step": 15355 }, { "epoch": 62.67755102040816, "grad_norm": 28.174718856811523, "learning_rate": 1.8683673469387754e-05, "loss": 0.2311, "step": 15356 }, { "epoch": 62.68163265306122, "grad_norm": 40.1771240234375, "learning_rate": 1.8681632653061225e-05, "loss": 1.1776, "step": 15357 }, { "epoch": 62.68571428571428, "grad_norm": 22.065465927124023, "learning_rate": 1.8679591836734696e-05, "loss": 1.9583, "step": 15358 }, { "epoch": 62.689795918367345, "grad_norm": 28.646286010742188, "learning_rate": 1.8677551020408166e-05, "loss": 1.6632, "step": 15359 }, { "epoch": 62.69387755102041, "grad_norm": 18.858789443969727, "learning_rate": 1.8675510204081633e-05, "loss": 2.3194, "step": 15360 }, { "epoch": 62.69795918367347, "grad_norm": 28.43981170654297, "learning_rate": 1.8673469387755104e-05, "loss": 1.6657, "step": 15361 }, { "epoch": 62.70204081632653, "grad_norm": 35.95901107788086, "learning_rate": 1.8671428571428574e-05, "loss": 0.663, "step": 15362 }, { "epoch": 62.70612244897959, "grad_norm": 33.98337936401367, "learning_rate": 1.866938775510204e-05, "loss": 1.4736, "step": 15363 }, { "epoch": 62.710204081632654, "grad_norm": 36.55190658569336, "learning_rate": 1.8667346938775512e-05, "loss": 1.0288, "step": 15364 }, { "epoch": 62.714285714285715, "grad_norm": 22.806800842285156, "learning_rate": 1.866530612244898e-05, "loss": 2.0493, "step": 15365 }, { "epoch": 62.71836734693878, "grad_norm": 29.185375213623047, "learning_rate": 1.866326530612245e-05, "loss": 0.2312, "step": 15366 }, { "epoch": 62.72244897959184, "grad_norm": 35.15608215332031, "learning_rate": 1.866122448979592e-05, "loss": 1.5273, "step": 15367 }, { "epoch": 62.7265306122449, "grad_norm": 34.37796401977539, "learning_rate": 1.8659183673469387e-05, "loss": 1.4669, "step": 15368 }, { "epoch": 62.73061224489796, "grad_norm": 50.25755310058594, "learning_rate": 1.8657142857142858e-05, "loss": 1.319, "step": 15369 }, { "epoch": 62.734693877551024, "grad_norm": 31.692642211914062, "learning_rate": 1.8655102040816328e-05, "loss": 1.2939, "step": 15370 }, { "epoch": 62.73877551020408, "grad_norm": 52.800331115722656, "learning_rate": 1.8653061224489795e-05, "loss": 0.9416, "step": 15371 }, { "epoch": 62.74285714285714, "grad_norm": 33.1436653137207, "learning_rate": 1.8651020408163266e-05, "loss": 1.389, "step": 15372 }, { "epoch": 62.7469387755102, "grad_norm": 32.90663528442383, "learning_rate": 1.8648979591836736e-05, "loss": 1.2534, "step": 15373 }, { "epoch": 62.751020408163264, "grad_norm": 37.591495513916016, "learning_rate": 1.8646938775510207e-05, "loss": 0.8468, "step": 15374 }, { "epoch": 62.755102040816325, "grad_norm": 23.14034080505371, "learning_rate": 1.8644897959183674e-05, "loss": 1.9439, "step": 15375 }, { "epoch": 62.75918367346939, "grad_norm": 22.769609451293945, "learning_rate": 1.864285714285714e-05, "loss": 1.6095, "step": 15376 }, { "epoch": 62.76326530612245, "grad_norm": 43.273860931396484, "learning_rate": 1.8640816326530615e-05, "loss": 1.3538, "step": 15377 }, { "epoch": 62.76734693877551, "grad_norm": 28.960420608520508, "learning_rate": 1.8638775510204082e-05, "loss": 1.5331, "step": 15378 }, { "epoch": 62.77142857142857, "grad_norm": 53.655029296875, "learning_rate": 1.8636734693877553e-05, "loss": 0.7679, "step": 15379 }, { "epoch": 62.775510204081634, "grad_norm": 38.178565979003906, "learning_rate": 1.863469387755102e-05, "loss": 0.8786, "step": 15380 }, { "epoch": 62.779591836734696, "grad_norm": 9.241040229797363, "learning_rate": 1.8632653061224494e-05, "loss": 3.2009, "step": 15381 }, { "epoch": 62.78367346938776, "grad_norm": 37.367897033691406, "learning_rate": 1.863061224489796e-05, "loss": 1.1892, "step": 15382 }, { "epoch": 62.78775510204082, "grad_norm": 31.53868865966797, "learning_rate": 1.8628571428571428e-05, "loss": 0.9843, "step": 15383 }, { "epoch": 62.79183673469388, "grad_norm": 19.618013381958008, "learning_rate": 1.86265306122449e-05, "loss": 2.063, "step": 15384 }, { "epoch": 62.795918367346935, "grad_norm": 31.976856231689453, "learning_rate": 1.862448979591837e-05, "loss": 1.0493, "step": 15385 }, { "epoch": 62.8, "grad_norm": 23.47722625732422, "learning_rate": 1.862244897959184e-05, "loss": 1.7954, "step": 15386 }, { "epoch": 62.80408163265306, "grad_norm": 56.967567443847656, "learning_rate": 1.8620408163265307e-05, "loss": 0.6858, "step": 15387 }, { "epoch": 62.80816326530612, "grad_norm": 15.002640724182129, "learning_rate": 1.8618367346938777e-05, "loss": 0.1286, "step": 15388 }, { "epoch": 62.81224489795918, "grad_norm": 35.11648941040039, "learning_rate": 1.8616326530612248e-05, "loss": 0.6783, "step": 15389 }, { "epoch": 62.816326530612244, "grad_norm": 30.406763076782227, "learning_rate": 1.8614285714285715e-05, "loss": 1.6824, "step": 15390 }, { "epoch": 62.820408163265306, "grad_norm": 27.191112518310547, "learning_rate": 1.8612244897959185e-05, "loss": 1.8221, "step": 15391 }, { "epoch": 62.82448979591837, "grad_norm": 33.418338775634766, "learning_rate": 1.8610204081632652e-05, "loss": 1.5747, "step": 15392 }, { "epoch": 62.82857142857143, "grad_norm": 25.189464569091797, "learning_rate": 1.8608163265306123e-05, "loss": 2.0109, "step": 15393 }, { "epoch": 62.83265306122449, "grad_norm": 26.267568588256836, "learning_rate": 1.8606122448979593e-05, "loss": 1.798, "step": 15394 }, { "epoch": 62.83673469387755, "grad_norm": 37.30453872680664, "learning_rate": 1.860408163265306e-05, "loss": 0.6543, "step": 15395 }, { "epoch": 62.840816326530614, "grad_norm": 23.095735549926758, "learning_rate": 1.860204081632653e-05, "loss": 2.0597, "step": 15396 }, { "epoch": 62.844897959183676, "grad_norm": 17.9825382232666, "learning_rate": 1.86e-05, "loss": 2.353, "step": 15397 }, { "epoch": 62.84897959183674, "grad_norm": 36.94740295410156, "learning_rate": 1.859795918367347e-05, "loss": 1.248, "step": 15398 }, { "epoch": 62.85306122448979, "grad_norm": 24.15518569946289, "learning_rate": 1.859591836734694e-05, "loss": 1.696, "step": 15399 }, { "epoch": 62.857142857142854, "grad_norm": 31.41061019897461, "learning_rate": 1.859387755102041e-05, "loss": 1.1026, "step": 15400 }, { "epoch": 62.861224489795916, "grad_norm": 21.29252052307129, "learning_rate": 1.859183673469388e-05, "loss": 2.0067, "step": 15401 }, { "epoch": 62.86530612244898, "grad_norm": 25.055315017700195, "learning_rate": 1.8589795918367347e-05, "loss": 1.8242, "step": 15402 }, { "epoch": 62.86938775510204, "grad_norm": 39.41109085083008, "learning_rate": 1.8587755102040818e-05, "loss": 0.7358, "step": 15403 }, { "epoch": 62.8734693877551, "grad_norm": 18.216602325439453, "learning_rate": 1.858571428571429e-05, "loss": 2.474, "step": 15404 }, { "epoch": 62.87755102040816, "grad_norm": 20.29939079284668, "learning_rate": 1.8583673469387755e-05, "loss": 1.8919, "step": 15405 }, { "epoch": 62.881632653061224, "grad_norm": 34.67216110229492, "learning_rate": 1.8581632653061226e-05, "loss": 1.0735, "step": 15406 }, { "epoch": 62.885714285714286, "grad_norm": 38.05056381225586, "learning_rate": 1.8579591836734693e-05, "loss": 1.5478, "step": 15407 }, { "epoch": 62.88979591836735, "grad_norm": 23.25571060180664, "learning_rate": 1.8577551020408167e-05, "loss": 2.4508, "step": 15408 }, { "epoch": 62.89387755102041, "grad_norm": 20.603729248046875, "learning_rate": 1.8575510204081634e-05, "loss": 1.7769, "step": 15409 }, { "epoch": 62.89795918367347, "grad_norm": 35.40569305419922, "learning_rate": 1.85734693877551e-05, "loss": 1.0599, "step": 15410 }, { "epoch": 62.90204081632653, "grad_norm": 32.35540771484375, "learning_rate": 1.8571428571428572e-05, "loss": 1.3054, "step": 15411 }, { "epoch": 62.906122448979595, "grad_norm": 28.721847534179688, "learning_rate": 1.8569387755102042e-05, "loss": 1.7105, "step": 15412 }, { "epoch": 62.910204081632656, "grad_norm": 26.867658615112305, "learning_rate": 1.8567346938775513e-05, "loss": 1.2037, "step": 15413 }, { "epoch": 62.91428571428571, "grad_norm": 31.973520278930664, "learning_rate": 1.856530612244898e-05, "loss": 0.4915, "step": 15414 }, { "epoch": 62.91836734693877, "grad_norm": 51.7951774597168, "learning_rate": 1.856326530612245e-05, "loss": 1.5408, "step": 15415 }, { "epoch": 62.922448979591834, "grad_norm": 26.87525177001953, "learning_rate": 1.856122448979592e-05, "loss": 0.1743, "step": 15416 }, { "epoch": 62.926530612244896, "grad_norm": 23.56831169128418, "learning_rate": 1.8559183673469388e-05, "loss": 2.4435, "step": 15417 }, { "epoch": 62.93061224489796, "grad_norm": 24.810638427734375, "learning_rate": 1.855714285714286e-05, "loss": 1.9813, "step": 15418 }, { "epoch": 62.93469387755102, "grad_norm": 27.891361236572266, "learning_rate": 1.8555102040816326e-05, "loss": 1.2387, "step": 15419 }, { "epoch": 62.93877551020408, "grad_norm": 33.35541915893555, "learning_rate": 1.8553061224489796e-05, "loss": 1.3455, "step": 15420 }, { "epoch": 62.94285714285714, "grad_norm": 34.49517059326172, "learning_rate": 1.8551020408163267e-05, "loss": 0.3148, "step": 15421 }, { "epoch": 62.946938775510205, "grad_norm": 23.319673538208008, "learning_rate": 1.8548979591836734e-05, "loss": 1.9396, "step": 15422 }, { "epoch": 62.95102040816327, "grad_norm": 27.726848602294922, "learning_rate": 1.8546938775510204e-05, "loss": 1.7607, "step": 15423 }, { "epoch": 62.95510204081633, "grad_norm": 31.19962501525879, "learning_rate": 1.8544897959183675e-05, "loss": 1.4393, "step": 15424 }, { "epoch": 62.95918367346939, "grad_norm": 28.35543441772461, "learning_rate": 1.8542857142857142e-05, "loss": 1.5895, "step": 15425 }, { "epoch": 62.96326530612245, "grad_norm": 33.98202896118164, "learning_rate": 1.8540816326530613e-05, "loss": 0.9143, "step": 15426 }, { "epoch": 62.96734693877551, "grad_norm": 33.0522346496582, "learning_rate": 1.8538775510204083e-05, "loss": 1.8122, "step": 15427 }, { "epoch": 62.97142857142857, "grad_norm": 21.96626853942871, "learning_rate": 1.8536734693877554e-05, "loss": 2.3409, "step": 15428 }, { "epoch": 62.97551020408163, "grad_norm": 30.33650016784668, "learning_rate": 1.853469387755102e-05, "loss": 1.0966, "step": 15429 }, { "epoch": 62.97959183673469, "grad_norm": 23.015382766723633, "learning_rate": 1.853265306122449e-05, "loss": 2.1123, "step": 15430 }, { "epoch": 62.98367346938775, "grad_norm": 35.234004974365234, "learning_rate": 1.853061224489796e-05, "loss": 0.7947, "step": 15431 }, { "epoch": 62.987755102040815, "grad_norm": 22.05025291442871, "learning_rate": 1.852857142857143e-05, "loss": 2.3602, "step": 15432 }, { "epoch": 62.99183673469388, "grad_norm": 26.649085998535156, "learning_rate": 1.85265306122449e-05, "loss": 1.9578, "step": 15433 }, { "epoch": 62.99591836734694, "grad_norm": 36.468929290771484, "learning_rate": 1.8524489795918366e-05, "loss": 0.9833, "step": 15434 }, { "epoch": 63.0, "grad_norm": 3.2633299827575684, "learning_rate": 1.852244897959184e-05, "loss": 0.0533, "step": 15435 }, { "epoch": 63.00408163265306, "grad_norm": 21.726896286010742, "learning_rate": 1.8520408163265307e-05, "loss": 1.953, "step": 15436 }, { "epoch": 63.00816326530612, "grad_norm": 50.631473541259766, "learning_rate": 1.8518367346938775e-05, "loss": 1.1253, "step": 15437 }, { "epoch": 63.012244897959185, "grad_norm": 25.957590103149414, "learning_rate": 1.8516326530612245e-05, "loss": 1.7027, "step": 15438 }, { "epoch": 63.01632653061225, "grad_norm": 39.73928451538086, "learning_rate": 1.8514285714285716e-05, "loss": 0.728, "step": 15439 }, { "epoch": 63.02040816326531, "grad_norm": 23.48487091064453, "learning_rate": 1.8512244897959186e-05, "loss": 2.3225, "step": 15440 }, { "epoch": 63.02448979591837, "grad_norm": 30.38884735107422, "learning_rate": 1.8510204081632653e-05, "loss": 1.3642, "step": 15441 }, { "epoch": 63.02857142857143, "grad_norm": 24.653573989868164, "learning_rate": 1.8508163265306124e-05, "loss": 1.5532, "step": 15442 }, { "epoch": 63.03265306122449, "grad_norm": 36.80719757080078, "learning_rate": 1.8506122448979594e-05, "loss": 1.6358, "step": 15443 }, { "epoch": 63.03673469387755, "grad_norm": 29.67198944091797, "learning_rate": 1.850408163265306e-05, "loss": 1.3918, "step": 15444 }, { "epoch": 63.04081632653061, "grad_norm": 40.077674865722656, "learning_rate": 1.8502040816326532e-05, "loss": 0.5698, "step": 15445 }, { "epoch": 63.04489795918367, "grad_norm": 37.89451217651367, "learning_rate": 1.85e-05, "loss": 1.3792, "step": 15446 }, { "epoch": 63.04897959183673, "grad_norm": 22.144258499145508, "learning_rate": 1.849795918367347e-05, "loss": 2.03, "step": 15447 }, { "epoch": 63.053061224489795, "grad_norm": 50.330509185791016, "learning_rate": 1.849591836734694e-05, "loss": 0.6665, "step": 15448 }, { "epoch": 63.05714285714286, "grad_norm": 79.20462799072266, "learning_rate": 1.8493877551020407e-05, "loss": 0.6507, "step": 15449 }, { "epoch": 63.06122448979592, "grad_norm": 40.47023391723633, "learning_rate": 1.8491836734693878e-05, "loss": 1.3132, "step": 15450 }, { "epoch": 63.06530612244898, "grad_norm": 22.699092864990234, "learning_rate": 1.8489795918367348e-05, "loss": 2.0752, "step": 15451 }, { "epoch": 63.06938775510204, "grad_norm": 47.86487579345703, "learning_rate": 1.848775510204082e-05, "loss": 0.7056, "step": 15452 }, { "epoch": 63.073469387755104, "grad_norm": 29.297893524169922, "learning_rate": 1.8485714285714286e-05, "loss": 1.4171, "step": 15453 }, { "epoch": 63.077551020408166, "grad_norm": 30.950382232666016, "learning_rate": 1.8483673469387756e-05, "loss": 0.2079, "step": 15454 }, { "epoch": 63.08163265306123, "grad_norm": 34.81781768798828, "learning_rate": 1.8481632653061227e-05, "loss": 0.6556, "step": 15455 }, { "epoch": 63.08571428571429, "grad_norm": 30.995410919189453, "learning_rate": 1.8479591836734694e-05, "loss": 1.0085, "step": 15456 }, { "epoch": 63.089795918367344, "grad_norm": 42.32170104980469, "learning_rate": 1.8477551020408165e-05, "loss": 0.2514, "step": 15457 }, { "epoch": 63.093877551020405, "grad_norm": 42.34782791137695, "learning_rate": 1.8475510204081635e-05, "loss": 1.051, "step": 15458 }, { "epoch": 63.09795918367347, "grad_norm": 32.209163665771484, "learning_rate": 1.8473469387755102e-05, "loss": 1.3193, "step": 15459 }, { "epoch": 63.10204081632653, "grad_norm": 21.265087127685547, "learning_rate": 1.8471428571428573e-05, "loss": 0.124, "step": 15460 }, { "epoch": 63.10612244897959, "grad_norm": 30.68764877319336, "learning_rate": 1.846938775510204e-05, "loss": 1.0063, "step": 15461 }, { "epoch": 63.11020408163265, "grad_norm": 30.31456184387207, "learning_rate": 1.8467346938775514e-05, "loss": 0.9384, "step": 15462 }, { "epoch": 63.114285714285714, "grad_norm": 40.25093460083008, "learning_rate": 1.846530612244898e-05, "loss": 0.6106, "step": 15463 }, { "epoch": 63.118367346938776, "grad_norm": 18.353776931762695, "learning_rate": 1.8463265306122448e-05, "loss": 2.7241, "step": 15464 }, { "epoch": 63.12244897959184, "grad_norm": 35.90503692626953, "learning_rate": 1.846122448979592e-05, "loss": 0.6796, "step": 15465 }, { "epoch": 63.1265306122449, "grad_norm": 25.86520767211914, "learning_rate": 1.845918367346939e-05, "loss": 1.5912, "step": 15466 }, { "epoch": 63.13061224489796, "grad_norm": 39.29744338989258, "learning_rate": 1.845714285714286e-05, "loss": 1.3155, "step": 15467 }, { "epoch": 63.13469387755102, "grad_norm": 35.551795959472656, "learning_rate": 1.8455102040816327e-05, "loss": 1.4368, "step": 15468 }, { "epoch": 63.138775510204084, "grad_norm": 25.795320510864258, "learning_rate": 1.8453061224489797e-05, "loss": 0.1895, "step": 15469 }, { "epoch": 63.142857142857146, "grad_norm": 17.82875633239746, "learning_rate": 1.8451020408163268e-05, "loss": 2.4012, "step": 15470 }, { "epoch": 63.14693877551021, "grad_norm": 41.99028015136719, "learning_rate": 1.8448979591836735e-05, "loss": 0.9047, "step": 15471 }, { "epoch": 63.15102040816326, "grad_norm": 38.13845443725586, "learning_rate": 1.8446938775510205e-05, "loss": 1.106, "step": 15472 }, { "epoch": 63.155102040816324, "grad_norm": 32.209468841552734, "learning_rate": 1.8444897959183672e-05, "loss": 1.1268, "step": 15473 }, { "epoch": 63.159183673469386, "grad_norm": 43.87432098388672, "learning_rate": 1.8442857142857146e-05, "loss": 0.2505, "step": 15474 }, { "epoch": 63.16326530612245, "grad_norm": 21.660194396972656, "learning_rate": 1.8440816326530613e-05, "loss": 1.9359, "step": 15475 }, { "epoch": 63.16734693877551, "grad_norm": 23.549036026000977, "learning_rate": 1.843877551020408e-05, "loss": 1.8231, "step": 15476 }, { "epoch": 63.17142857142857, "grad_norm": 21.61585235595703, "learning_rate": 1.843673469387755e-05, "loss": 1.9007, "step": 15477 }, { "epoch": 63.17551020408163, "grad_norm": 32.987789154052734, "learning_rate": 1.843469387755102e-05, "loss": 0.4384, "step": 15478 }, { "epoch": 63.179591836734694, "grad_norm": 27.298524856567383, "learning_rate": 1.8432653061224492e-05, "loss": 1.6465, "step": 15479 }, { "epoch": 63.183673469387756, "grad_norm": 33.6776237487793, "learning_rate": 1.843061224489796e-05, "loss": 0.325, "step": 15480 }, { "epoch": 63.18775510204082, "grad_norm": 33.91073226928711, "learning_rate": 1.842857142857143e-05, "loss": 1.5634, "step": 15481 }, { "epoch": 63.19183673469388, "grad_norm": 33.13216781616211, "learning_rate": 1.84265306122449e-05, "loss": 1.7215, "step": 15482 }, { "epoch": 63.19591836734694, "grad_norm": 23.87906265258789, "learning_rate": 1.8424489795918367e-05, "loss": 1.8356, "step": 15483 }, { "epoch": 63.2, "grad_norm": 20.352855682373047, "learning_rate": 1.8422448979591838e-05, "loss": 2.3771, "step": 15484 }, { "epoch": 63.204081632653065, "grad_norm": 38.71623992919922, "learning_rate": 1.842040816326531e-05, "loss": 1.4716, "step": 15485 }, { "epoch": 63.20816326530612, "grad_norm": 29.41086769104004, "learning_rate": 1.8418367346938776e-05, "loss": 2.1109, "step": 15486 }, { "epoch": 63.21224489795918, "grad_norm": 33.637027740478516, "learning_rate": 1.8416326530612246e-05, "loss": 0.4414, "step": 15487 }, { "epoch": 63.21632653061224, "grad_norm": 41.6041259765625, "learning_rate": 1.8414285714285713e-05, "loss": 1.5931, "step": 15488 }, { "epoch": 63.220408163265304, "grad_norm": 20.343753814697266, "learning_rate": 1.8412244897959187e-05, "loss": 1.9956, "step": 15489 }, { "epoch": 63.224489795918366, "grad_norm": 25.771324157714844, "learning_rate": 1.8410204081632654e-05, "loss": 1.666, "step": 15490 }, { "epoch": 63.22857142857143, "grad_norm": 41.69642639160156, "learning_rate": 1.840816326530612e-05, "loss": 0.6688, "step": 15491 }, { "epoch": 63.23265306122449, "grad_norm": 32.24467468261719, "learning_rate": 1.8406122448979592e-05, "loss": 1.1594, "step": 15492 }, { "epoch": 63.23673469387755, "grad_norm": 26.709739685058594, "learning_rate": 1.8404081632653062e-05, "loss": 1.5883, "step": 15493 }, { "epoch": 63.24081632653061, "grad_norm": 37.02972412109375, "learning_rate": 1.8402040816326533e-05, "loss": 0.9191, "step": 15494 }, { "epoch": 63.244897959183675, "grad_norm": 29.545757293701172, "learning_rate": 1.84e-05, "loss": 1.1536, "step": 15495 }, { "epoch": 63.248979591836736, "grad_norm": 33.518341064453125, "learning_rate": 1.839795918367347e-05, "loss": 0.251, "step": 15496 }, { "epoch": 63.2530612244898, "grad_norm": 26.598154067993164, "learning_rate": 1.839591836734694e-05, "loss": 1.6868, "step": 15497 }, { "epoch": 63.25714285714286, "grad_norm": 36.930946350097656, "learning_rate": 1.8393877551020408e-05, "loss": 0.5465, "step": 15498 }, { "epoch": 63.26122448979592, "grad_norm": 38.01470184326172, "learning_rate": 1.839183673469388e-05, "loss": 1.4892, "step": 15499 }, { "epoch": 63.265306122448976, "grad_norm": 38.09669876098633, "learning_rate": 1.8389795918367346e-05, "loss": 1.1825, "step": 15500 }, { "epoch": 63.26938775510204, "grad_norm": 30.594018936157227, "learning_rate": 1.838775510204082e-05, "loss": 1.2744, "step": 15501 }, { "epoch": 63.2734693877551, "grad_norm": 39.53434371948242, "learning_rate": 1.8385714285714287e-05, "loss": 1.0577, "step": 15502 }, { "epoch": 63.27755102040816, "grad_norm": 39.560977935791016, "learning_rate": 1.8383673469387754e-05, "loss": 1.3389, "step": 15503 }, { "epoch": 63.28163265306122, "grad_norm": 27.83150291442871, "learning_rate": 1.8381632653061224e-05, "loss": 1.877, "step": 15504 }, { "epoch": 63.285714285714285, "grad_norm": 40.103782653808594, "learning_rate": 1.8379591836734695e-05, "loss": 1.3939, "step": 15505 }, { "epoch": 63.289795918367346, "grad_norm": 34.831119537353516, "learning_rate": 1.8377551020408165e-05, "loss": 1.2422, "step": 15506 }, { "epoch": 63.29387755102041, "grad_norm": 31.124536514282227, "learning_rate": 1.8375510204081633e-05, "loss": 1.7986, "step": 15507 }, { "epoch": 63.29795918367347, "grad_norm": 36.52110290527344, "learning_rate": 1.8373469387755103e-05, "loss": 0.7286, "step": 15508 }, { "epoch": 63.30204081632653, "grad_norm": 8.86708927154541, "learning_rate": 1.8371428571428574e-05, "loss": 3.1638, "step": 15509 }, { "epoch": 63.30612244897959, "grad_norm": 34.25190734863281, "learning_rate": 1.836938775510204e-05, "loss": 0.941, "step": 15510 }, { "epoch": 63.310204081632655, "grad_norm": 32.383583068847656, "learning_rate": 1.836734693877551e-05, "loss": 1.5535, "step": 15511 }, { "epoch": 63.31428571428572, "grad_norm": 45.40528106689453, "learning_rate": 1.8365306122448982e-05, "loss": 0.7681, "step": 15512 }, { "epoch": 63.31836734693878, "grad_norm": 23.774185180664062, "learning_rate": 1.836326530612245e-05, "loss": 1.8914, "step": 15513 }, { "epoch": 63.32244897959184, "grad_norm": 25.752792358398438, "learning_rate": 1.836122448979592e-05, "loss": 1.5512, "step": 15514 }, { "epoch": 63.326530612244895, "grad_norm": 29.96894073486328, "learning_rate": 1.8359183673469387e-05, "loss": 1.4149, "step": 15515 }, { "epoch": 63.33061224489796, "grad_norm": 22.896940231323242, "learning_rate": 1.835714285714286e-05, "loss": 1.9688, "step": 15516 }, { "epoch": 63.33469387755102, "grad_norm": 31.48733901977539, "learning_rate": 1.8355102040816328e-05, "loss": 1.0223, "step": 15517 }, { "epoch": 63.33877551020408, "grad_norm": 46.63149642944336, "learning_rate": 1.8353061224489798e-05, "loss": 0.506, "step": 15518 }, { "epoch": 63.34285714285714, "grad_norm": 32.190216064453125, "learning_rate": 1.8351020408163265e-05, "loss": 1.7949, "step": 15519 }, { "epoch": 63.3469387755102, "grad_norm": 21.830039978027344, "learning_rate": 1.8348979591836736e-05, "loss": 2.0245, "step": 15520 }, { "epoch": 63.351020408163265, "grad_norm": 21.64787483215332, "learning_rate": 1.8346938775510206e-05, "loss": 2.3024, "step": 15521 }, { "epoch": 63.35510204081633, "grad_norm": 23.795265197753906, "learning_rate": 1.8344897959183673e-05, "loss": 1.853, "step": 15522 }, { "epoch": 63.35918367346939, "grad_norm": 39.16214370727539, "learning_rate": 1.8342857142857144e-05, "loss": 1.1246, "step": 15523 }, { "epoch": 63.36326530612245, "grad_norm": 20.73482894897461, "learning_rate": 1.8340816326530614e-05, "loss": 1.8719, "step": 15524 }, { "epoch": 63.36734693877551, "grad_norm": 43.762046813964844, "learning_rate": 1.833877551020408e-05, "loss": 0.5042, "step": 15525 }, { "epoch": 63.371428571428574, "grad_norm": 35.070526123046875, "learning_rate": 1.8336734693877552e-05, "loss": 0.9287, "step": 15526 }, { "epoch": 63.375510204081635, "grad_norm": 22.38770294189453, "learning_rate": 1.833469387755102e-05, "loss": 1.9354, "step": 15527 }, { "epoch": 63.3795918367347, "grad_norm": 25.856836318969727, "learning_rate": 1.8332653061224493e-05, "loss": 1.9402, "step": 15528 }, { "epoch": 63.38367346938775, "grad_norm": 26.65512466430664, "learning_rate": 1.833061224489796e-05, "loss": 1.5162, "step": 15529 }, { "epoch": 63.38775510204081, "grad_norm": 30.40247917175293, "learning_rate": 1.8328571428571427e-05, "loss": 1.3412, "step": 15530 }, { "epoch": 63.391836734693875, "grad_norm": 29.451251983642578, "learning_rate": 1.8326530612244898e-05, "loss": 1.4806, "step": 15531 }, { "epoch": 63.39591836734694, "grad_norm": 50.647979736328125, "learning_rate": 1.832448979591837e-05, "loss": 1.3587, "step": 15532 }, { "epoch": 63.4, "grad_norm": 37.01511001586914, "learning_rate": 1.832244897959184e-05, "loss": 0.5233, "step": 15533 }, { "epoch": 63.40408163265306, "grad_norm": 28.430931091308594, "learning_rate": 1.8320408163265306e-05, "loss": 1.8586, "step": 15534 }, { "epoch": 63.40816326530612, "grad_norm": 37.91623306274414, "learning_rate": 1.8318367346938777e-05, "loss": 0.944, "step": 15535 }, { "epoch": 63.412244897959184, "grad_norm": 49.79373550415039, "learning_rate": 1.8316326530612247e-05, "loss": 1.226, "step": 15536 }, { "epoch": 63.416326530612245, "grad_norm": 30.488059997558594, "learning_rate": 1.8314285714285714e-05, "loss": 1.154, "step": 15537 }, { "epoch": 63.42040816326531, "grad_norm": 17.12932014465332, "learning_rate": 1.8312244897959185e-05, "loss": 2.5208, "step": 15538 }, { "epoch": 63.42448979591837, "grad_norm": 35.57474899291992, "learning_rate": 1.8310204081632655e-05, "loss": 0.5889, "step": 15539 }, { "epoch": 63.42857142857143, "grad_norm": 34.9144401550293, "learning_rate": 1.8308163265306126e-05, "loss": 0.7049, "step": 15540 }, { "epoch": 63.43265306122449, "grad_norm": 39.53435134887695, "learning_rate": 1.8306122448979593e-05, "loss": 0.9529, "step": 15541 }, { "epoch": 63.436734693877554, "grad_norm": 27.512901306152344, "learning_rate": 1.830408163265306e-05, "loss": 1.2329, "step": 15542 }, { "epoch": 63.440816326530616, "grad_norm": 35.620941162109375, "learning_rate": 1.8302040816326534e-05, "loss": 0.9522, "step": 15543 }, { "epoch": 63.44489795918367, "grad_norm": 43.91822052001953, "learning_rate": 1.83e-05, "loss": 1.1536, "step": 15544 }, { "epoch": 63.44897959183673, "grad_norm": 20.388961791992188, "learning_rate": 1.829795918367347e-05, "loss": 2.3954, "step": 15545 }, { "epoch": 63.453061224489794, "grad_norm": 35.43973922729492, "learning_rate": 1.829591836734694e-05, "loss": 1.5219, "step": 15546 }, { "epoch": 63.457142857142856, "grad_norm": 42.30274963378906, "learning_rate": 1.829387755102041e-05, "loss": 0.6618, "step": 15547 }, { "epoch": 63.46122448979592, "grad_norm": 35.48455047607422, "learning_rate": 1.829183673469388e-05, "loss": 1.5965, "step": 15548 }, { "epoch": 63.46530612244898, "grad_norm": 44.43866729736328, "learning_rate": 1.8289795918367347e-05, "loss": 1.6983, "step": 15549 }, { "epoch": 63.46938775510204, "grad_norm": 25.674762725830078, "learning_rate": 1.8287755102040817e-05, "loss": 1.7281, "step": 15550 }, { "epoch": 63.4734693877551, "grad_norm": 50.400447845458984, "learning_rate": 1.8285714285714288e-05, "loss": 0.6044, "step": 15551 }, { "epoch": 63.477551020408164, "grad_norm": 39.91388702392578, "learning_rate": 1.8283673469387755e-05, "loss": 0.8067, "step": 15552 }, { "epoch": 63.481632653061226, "grad_norm": 33.36920166015625, "learning_rate": 1.8281632653061225e-05, "loss": 1.2773, "step": 15553 }, { "epoch": 63.48571428571429, "grad_norm": 39.147735595703125, "learning_rate": 1.8279591836734693e-05, "loss": 0.6505, "step": 15554 }, { "epoch": 63.48979591836735, "grad_norm": 30.047462463378906, "learning_rate": 1.8277551020408166e-05, "loss": 1.8244, "step": 15555 }, { "epoch": 63.49387755102041, "grad_norm": 24.16335678100586, "learning_rate": 1.8275510204081634e-05, "loss": 1.8686, "step": 15556 }, { "epoch": 63.49795918367347, "grad_norm": 21.530033111572266, "learning_rate": 1.82734693877551e-05, "loss": 1.9239, "step": 15557 }, { "epoch": 63.50204081632653, "grad_norm": 39.36001205444336, "learning_rate": 1.827142857142857e-05, "loss": 1.1408, "step": 15558 }, { "epoch": 63.50612244897959, "grad_norm": 25.78133201599121, "learning_rate": 1.8269387755102042e-05, "loss": 1.9901, "step": 15559 }, { "epoch": 63.51020408163265, "grad_norm": 39.89518737792969, "learning_rate": 1.8267346938775512e-05, "loss": 1.4032, "step": 15560 }, { "epoch": 63.51428571428571, "grad_norm": 25.41191864013672, "learning_rate": 1.826530612244898e-05, "loss": 1.9916, "step": 15561 }, { "epoch": 63.518367346938774, "grad_norm": 32.806182861328125, "learning_rate": 1.826326530612245e-05, "loss": 1.2278, "step": 15562 }, { "epoch": 63.522448979591836, "grad_norm": 35.14654541015625, "learning_rate": 1.826122448979592e-05, "loss": 1.2818, "step": 15563 }, { "epoch": 63.5265306122449, "grad_norm": 33.92774200439453, "learning_rate": 1.8259183673469388e-05, "loss": 1.9039, "step": 15564 }, { "epoch": 63.53061224489796, "grad_norm": 46.852088928222656, "learning_rate": 1.8257142857142858e-05, "loss": 1.2434, "step": 15565 }, { "epoch": 63.53469387755102, "grad_norm": 39.69292068481445, "learning_rate": 1.825510204081633e-05, "loss": 0.8866, "step": 15566 }, { "epoch": 63.53877551020408, "grad_norm": 30.704944610595703, "learning_rate": 1.82530612244898e-05, "loss": 0.8598, "step": 15567 }, { "epoch": 63.542857142857144, "grad_norm": 37.139984130859375, "learning_rate": 1.8251020408163266e-05, "loss": 1.1567, "step": 15568 }, { "epoch": 63.546938775510206, "grad_norm": 22.68600845336914, "learning_rate": 1.8248979591836733e-05, "loss": 2.4825, "step": 15569 }, { "epoch": 63.55102040816327, "grad_norm": 25.699153900146484, "learning_rate": 1.8246938775510207e-05, "loss": 1.687, "step": 15570 }, { "epoch": 63.55510204081633, "grad_norm": 24.09618377685547, "learning_rate": 1.8244897959183674e-05, "loss": 2.028, "step": 15571 }, { "epoch": 63.55918367346939, "grad_norm": 23.65669059753418, "learning_rate": 1.8242857142857145e-05, "loss": 1.8529, "step": 15572 }, { "epoch": 63.563265306122446, "grad_norm": 38.16011428833008, "learning_rate": 1.8240816326530612e-05, "loss": 0.5305, "step": 15573 }, { "epoch": 63.56734693877551, "grad_norm": 24.392728805541992, "learning_rate": 1.8238775510204082e-05, "loss": 1.8851, "step": 15574 }, { "epoch": 63.57142857142857, "grad_norm": 52.119346618652344, "learning_rate": 1.8236734693877553e-05, "loss": 0.9231, "step": 15575 }, { "epoch": 63.57551020408163, "grad_norm": 30.379201889038086, "learning_rate": 1.823469387755102e-05, "loss": 1.8463, "step": 15576 }, { "epoch": 63.57959183673469, "grad_norm": 21.83431053161621, "learning_rate": 1.823265306122449e-05, "loss": 2.2616, "step": 15577 }, { "epoch": 63.583673469387755, "grad_norm": 24.833005905151367, "learning_rate": 1.823061224489796e-05, "loss": 2.3554, "step": 15578 }, { "epoch": 63.587755102040816, "grad_norm": 19.644559860229492, "learning_rate": 1.8228571428571428e-05, "loss": 2.4593, "step": 15579 }, { "epoch": 63.59183673469388, "grad_norm": 39.22370147705078, "learning_rate": 1.82265306122449e-05, "loss": 0.8241, "step": 15580 }, { "epoch": 63.59591836734694, "grad_norm": 23.715911865234375, "learning_rate": 1.8224489795918366e-05, "loss": 1.6137, "step": 15581 }, { "epoch": 63.6, "grad_norm": 41.72309875488281, "learning_rate": 1.822244897959184e-05, "loss": 0.7265, "step": 15582 }, { "epoch": 63.60408163265306, "grad_norm": 52.35475540161133, "learning_rate": 1.8220408163265307e-05, "loss": 1.7061, "step": 15583 }, { "epoch": 63.608163265306125, "grad_norm": 48.72509002685547, "learning_rate": 1.8218367346938774e-05, "loss": 0.3142, "step": 15584 }, { "epoch": 63.61224489795919, "grad_norm": 30.551034927368164, "learning_rate": 1.8216326530612245e-05, "loss": 1.7985, "step": 15585 }, { "epoch": 63.61632653061225, "grad_norm": 27.3542423248291, "learning_rate": 1.8214285714285715e-05, "loss": 1.7925, "step": 15586 }, { "epoch": 63.6204081632653, "grad_norm": 39.23455810546875, "learning_rate": 1.8212244897959186e-05, "loss": 0.6998, "step": 15587 }, { "epoch": 63.624489795918365, "grad_norm": 25.020776748657227, "learning_rate": 1.8210204081632653e-05, "loss": 0.2106, "step": 15588 }, { "epoch": 63.628571428571426, "grad_norm": 51.4839973449707, "learning_rate": 1.8208163265306123e-05, "loss": 0.5357, "step": 15589 }, { "epoch": 63.63265306122449, "grad_norm": 39.26972961425781, "learning_rate": 1.8206122448979594e-05, "loss": 0.6115, "step": 15590 }, { "epoch": 63.63673469387755, "grad_norm": 29.20758628845215, "learning_rate": 1.820408163265306e-05, "loss": 1.2676, "step": 15591 }, { "epoch": 63.64081632653061, "grad_norm": 33.115474700927734, "learning_rate": 1.820204081632653e-05, "loss": 1.7754, "step": 15592 }, { "epoch": 63.64489795918367, "grad_norm": 35.115943908691406, "learning_rate": 1.8200000000000002e-05, "loss": 0.4244, "step": 15593 }, { "epoch": 63.648979591836735, "grad_norm": 49.2297477722168, "learning_rate": 1.8197959183673472e-05, "loss": 1.1297, "step": 15594 }, { "epoch": 63.6530612244898, "grad_norm": 33.07425308227539, "learning_rate": 1.819591836734694e-05, "loss": 1.3296, "step": 15595 }, { "epoch": 63.65714285714286, "grad_norm": 16.465059280395508, "learning_rate": 1.8193877551020407e-05, "loss": 2.621, "step": 15596 }, { "epoch": 63.66122448979592, "grad_norm": 37.57265853881836, "learning_rate": 1.819183673469388e-05, "loss": 1.1966, "step": 15597 }, { "epoch": 63.66530612244898, "grad_norm": 33.6304817199707, "learning_rate": 1.8189795918367348e-05, "loss": 1.7766, "step": 15598 }, { "epoch": 63.66938775510204, "grad_norm": 29.031831741333008, "learning_rate": 1.8187755102040818e-05, "loss": 2.2671, "step": 15599 }, { "epoch": 63.673469387755105, "grad_norm": 33.96186065673828, "learning_rate": 1.8185714285714285e-05, "loss": 0.6258, "step": 15600 }, { "epoch": 63.67755102040816, "grad_norm": 32.928672790527344, "learning_rate": 1.8183673469387756e-05, "loss": 0.9308, "step": 15601 }, { "epoch": 63.68163265306122, "grad_norm": 35.63119125366211, "learning_rate": 1.8181632653061226e-05, "loss": 0.2804, "step": 15602 }, { "epoch": 63.68571428571428, "grad_norm": 50.04893112182617, "learning_rate": 1.8179591836734694e-05, "loss": 0.6969, "step": 15603 }, { "epoch": 63.689795918367345, "grad_norm": 35.061527252197266, "learning_rate": 1.8177551020408164e-05, "loss": 1.9299, "step": 15604 }, { "epoch": 63.69387755102041, "grad_norm": 28.123558044433594, "learning_rate": 1.8175510204081635e-05, "loss": 2.1052, "step": 15605 }, { "epoch": 63.69795918367347, "grad_norm": 25.83854103088379, "learning_rate": 1.81734693877551e-05, "loss": 1.9715, "step": 15606 }, { "epoch": 63.70204081632653, "grad_norm": 40.12925338745117, "learning_rate": 1.8171428571428572e-05, "loss": 0.9113, "step": 15607 }, { "epoch": 63.70612244897959, "grad_norm": 36.601314544677734, "learning_rate": 1.816938775510204e-05, "loss": 1.0505, "step": 15608 }, { "epoch": 63.710204081632654, "grad_norm": 42.85734939575195, "learning_rate": 1.8167346938775513e-05, "loss": 0.2463, "step": 15609 }, { "epoch": 63.714285714285715, "grad_norm": 36.70049285888672, "learning_rate": 1.816530612244898e-05, "loss": 1.4184, "step": 15610 }, { "epoch": 63.71836734693878, "grad_norm": 14.925567626953125, "learning_rate": 1.816326530612245e-05, "loss": 2.9146, "step": 15611 }, { "epoch": 63.72244897959184, "grad_norm": 21.51314926147461, "learning_rate": 1.8161224489795918e-05, "loss": 1.9229, "step": 15612 }, { "epoch": 63.7265306122449, "grad_norm": 19.428081512451172, "learning_rate": 1.815918367346939e-05, "loss": 0.2279, "step": 15613 }, { "epoch": 63.73061224489796, "grad_norm": 25.084856033325195, "learning_rate": 1.815714285714286e-05, "loss": 2.0622, "step": 15614 }, { "epoch": 63.734693877551024, "grad_norm": 33.54450607299805, "learning_rate": 1.8155102040816326e-05, "loss": 1.0822, "step": 15615 }, { "epoch": 63.73877551020408, "grad_norm": 20.736690521240234, "learning_rate": 1.8153061224489797e-05, "loss": 1.8298, "step": 15616 }, { "epoch": 63.74285714285714, "grad_norm": 27.0994930267334, "learning_rate": 1.8151020408163267e-05, "loss": 1.7036, "step": 15617 }, { "epoch": 63.7469387755102, "grad_norm": 44.67169189453125, "learning_rate": 1.8148979591836734e-05, "loss": 0.7223, "step": 15618 }, { "epoch": 63.751020408163264, "grad_norm": 37.48260498046875, "learning_rate": 1.8146938775510205e-05, "loss": 0.7282, "step": 15619 }, { "epoch": 63.755102040816325, "grad_norm": 27.485462188720703, "learning_rate": 1.8144897959183675e-05, "loss": 1.5399, "step": 15620 }, { "epoch": 63.75918367346939, "grad_norm": 36.46578598022461, "learning_rate": 1.8142857142857146e-05, "loss": 0.7624, "step": 15621 }, { "epoch": 63.76326530612245, "grad_norm": 46.554073333740234, "learning_rate": 1.8140816326530613e-05, "loss": 0.9288, "step": 15622 }, { "epoch": 63.76734693877551, "grad_norm": 37.59090042114258, "learning_rate": 1.813877551020408e-05, "loss": 0.7674, "step": 15623 }, { "epoch": 63.77142857142857, "grad_norm": 29.23438835144043, "learning_rate": 1.8136734693877554e-05, "loss": 1.1252, "step": 15624 }, { "epoch": 63.775510204081634, "grad_norm": 46.180755615234375, "learning_rate": 1.813469387755102e-05, "loss": 1.1327, "step": 15625 }, { "epoch": 63.779591836734696, "grad_norm": 19.598426818847656, "learning_rate": 1.813265306122449e-05, "loss": 2.0636, "step": 15626 }, { "epoch": 63.78367346938776, "grad_norm": 23.061805725097656, "learning_rate": 1.813061224489796e-05, "loss": 2.2812, "step": 15627 }, { "epoch": 63.78775510204082, "grad_norm": 30.467741012573242, "learning_rate": 1.812857142857143e-05, "loss": 1.0683, "step": 15628 }, { "epoch": 63.79183673469388, "grad_norm": 33.95182800292969, "learning_rate": 1.81265306122449e-05, "loss": 0.5899, "step": 15629 }, { "epoch": 63.795918367346935, "grad_norm": 39.56216049194336, "learning_rate": 1.8124489795918367e-05, "loss": 1.6393, "step": 15630 }, { "epoch": 63.8, "grad_norm": 40.95801544189453, "learning_rate": 1.8122448979591837e-05, "loss": 1.3982, "step": 15631 }, { "epoch": 63.80408163265306, "grad_norm": 29.208200454711914, "learning_rate": 1.8120408163265308e-05, "loss": 1.9187, "step": 15632 }, { "epoch": 63.80816326530612, "grad_norm": 18.84406089782715, "learning_rate": 1.811836734693878e-05, "loss": 1.7281, "step": 15633 }, { "epoch": 63.81224489795918, "grad_norm": 21.6976261138916, "learning_rate": 1.8116326530612246e-05, "loss": 1.9234, "step": 15634 }, { "epoch": 63.816326530612244, "grad_norm": 46.37610626220703, "learning_rate": 1.8114285714285713e-05, "loss": 0.7468, "step": 15635 }, { "epoch": 63.820408163265306, "grad_norm": 29.1920108795166, "learning_rate": 1.8112244897959187e-05, "loss": 1.1661, "step": 15636 }, { "epoch": 63.82448979591837, "grad_norm": 33.934043884277344, "learning_rate": 1.8110204081632654e-05, "loss": 0.1911, "step": 15637 }, { "epoch": 63.82857142857143, "grad_norm": 19.182924270629883, "learning_rate": 1.8108163265306124e-05, "loss": 2.3227, "step": 15638 }, { "epoch": 63.83265306122449, "grad_norm": 27.357765197753906, "learning_rate": 1.810612244897959e-05, "loss": 2.0628, "step": 15639 }, { "epoch": 63.83673469387755, "grad_norm": 23.278276443481445, "learning_rate": 1.8104081632653062e-05, "loss": 2.3107, "step": 15640 }, { "epoch": 63.840816326530614, "grad_norm": 33.46120834350586, "learning_rate": 1.8102040816326532e-05, "loss": 1.0453, "step": 15641 }, { "epoch": 63.844897959183676, "grad_norm": 26.184131622314453, "learning_rate": 1.81e-05, "loss": 1.7561, "step": 15642 }, { "epoch": 63.84897959183674, "grad_norm": 29.9832763671875, "learning_rate": 1.809795918367347e-05, "loss": 1.7997, "step": 15643 }, { "epoch": 63.85306122448979, "grad_norm": 31.276269912719727, "learning_rate": 1.809591836734694e-05, "loss": 0.6526, "step": 15644 }, { "epoch": 63.857142857142854, "grad_norm": 36.99864959716797, "learning_rate": 1.8093877551020408e-05, "loss": 1.0176, "step": 15645 }, { "epoch": 63.861224489795916, "grad_norm": 16.529298782348633, "learning_rate": 1.8091836734693878e-05, "loss": 2.2654, "step": 15646 }, { "epoch": 63.86530612244898, "grad_norm": 28.888505935668945, "learning_rate": 1.808979591836735e-05, "loss": 1.6852, "step": 15647 }, { "epoch": 63.86938775510204, "grad_norm": 33.78224182128906, "learning_rate": 1.808775510204082e-05, "loss": 1.5883, "step": 15648 }, { "epoch": 63.8734693877551, "grad_norm": 21.99409294128418, "learning_rate": 1.8085714285714286e-05, "loss": 2.6506, "step": 15649 }, { "epoch": 63.87755102040816, "grad_norm": 29.848840713500977, "learning_rate": 1.8083673469387753e-05, "loss": 1.5067, "step": 15650 }, { "epoch": 63.881632653061224, "grad_norm": 19.211828231811523, "learning_rate": 1.8081632653061227e-05, "loss": 2.3374, "step": 15651 }, { "epoch": 63.885714285714286, "grad_norm": 26.899118423461914, "learning_rate": 1.8079591836734694e-05, "loss": 0.2812, "step": 15652 }, { "epoch": 63.88979591836735, "grad_norm": 30.037275314331055, "learning_rate": 1.8077551020408165e-05, "loss": 1.7357, "step": 15653 }, { "epoch": 63.89387755102041, "grad_norm": 26.41374397277832, "learning_rate": 1.8075510204081632e-05, "loss": 1.6859, "step": 15654 }, { "epoch": 63.89795918367347, "grad_norm": 35.058650970458984, "learning_rate": 1.8073469387755103e-05, "loss": 1.6492, "step": 15655 }, { "epoch": 63.90204081632653, "grad_norm": 35.07313537597656, "learning_rate": 1.8071428571428573e-05, "loss": 1.1558, "step": 15656 }, { "epoch": 63.906122448979595, "grad_norm": 41.75319290161133, "learning_rate": 1.806938775510204e-05, "loss": 1.3049, "step": 15657 }, { "epoch": 63.910204081632656, "grad_norm": 35.95414352416992, "learning_rate": 1.806734693877551e-05, "loss": 0.3567, "step": 15658 }, { "epoch": 63.91428571428571, "grad_norm": 26.10798454284668, "learning_rate": 1.806530612244898e-05, "loss": 1.8139, "step": 15659 }, { "epoch": 63.91836734693877, "grad_norm": 41.857398986816406, "learning_rate": 1.8063265306122452e-05, "loss": 1.0244, "step": 15660 }, { "epoch": 63.922448979591834, "grad_norm": 47.06509017944336, "learning_rate": 1.806122448979592e-05, "loss": 0.8164, "step": 15661 }, { "epoch": 63.926530612244896, "grad_norm": 33.66363525390625, "learning_rate": 1.8059183673469386e-05, "loss": 0.5896, "step": 15662 }, { "epoch": 63.93061224489796, "grad_norm": 35.67156219482422, "learning_rate": 1.805714285714286e-05, "loss": 1.4522, "step": 15663 }, { "epoch": 63.93469387755102, "grad_norm": 30.501331329345703, "learning_rate": 1.8055102040816327e-05, "loss": 1.723, "step": 15664 }, { "epoch": 63.93877551020408, "grad_norm": 37.00687026977539, "learning_rate": 1.8053061224489798e-05, "loss": 0.224, "step": 15665 }, { "epoch": 63.94285714285714, "grad_norm": 33.69056701660156, "learning_rate": 1.8051020408163265e-05, "loss": 1.2053, "step": 15666 }, { "epoch": 63.946938775510205, "grad_norm": 16.441518783569336, "learning_rate": 1.8048979591836735e-05, "loss": 2.4837, "step": 15667 }, { "epoch": 63.95102040816327, "grad_norm": 42.421363830566406, "learning_rate": 1.8046938775510206e-05, "loss": 0.7767, "step": 15668 }, { "epoch": 63.95510204081633, "grad_norm": 30.262296676635742, "learning_rate": 1.8044897959183673e-05, "loss": 0.7747, "step": 15669 }, { "epoch": 63.95918367346939, "grad_norm": 27.886457443237305, "learning_rate": 1.8042857142857143e-05, "loss": 1.638, "step": 15670 }, { "epoch": 63.96326530612245, "grad_norm": 16.954069137573242, "learning_rate": 1.8040816326530614e-05, "loss": 0.2444, "step": 15671 }, { "epoch": 63.96734693877551, "grad_norm": 26.491546630859375, "learning_rate": 1.803877551020408e-05, "loss": 1.8726, "step": 15672 }, { "epoch": 63.97142857142857, "grad_norm": 46.21569061279297, "learning_rate": 1.803673469387755e-05, "loss": 0.5427, "step": 15673 }, { "epoch": 63.97551020408163, "grad_norm": 33.26683044433594, "learning_rate": 1.8034693877551022e-05, "loss": 1.494, "step": 15674 }, { "epoch": 63.97959183673469, "grad_norm": 8.923531532287598, "learning_rate": 1.8032653061224493e-05, "loss": 0.0579, "step": 15675 }, { "epoch": 63.98367346938775, "grad_norm": 67.44823455810547, "learning_rate": 1.803061224489796e-05, "loss": 1.5227, "step": 15676 }, { "epoch": 63.987755102040815, "grad_norm": 30.850717544555664, "learning_rate": 1.802857142857143e-05, "loss": 0.521, "step": 15677 }, { "epoch": 63.99183673469388, "grad_norm": 32.35529708862305, "learning_rate": 1.80265306122449e-05, "loss": 1.8264, "step": 15678 }, { "epoch": 63.99591836734694, "grad_norm": 30.490890502929688, "learning_rate": 1.8024489795918368e-05, "loss": 1.9816, "step": 15679 }, { "epoch": 64.0, "grad_norm": 18.871261596679688, "learning_rate": 1.802244897959184e-05, "loss": 2.4843, "step": 15680 }, { "epoch": 64.00408163265305, "grad_norm": 23.350704193115234, "learning_rate": 1.8020408163265305e-05, "loss": 1.8872, "step": 15681 }, { "epoch": 64.00816326530612, "grad_norm": 26.75653076171875, "learning_rate": 1.8018367346938776e-05, "loss": 1.6725, "step": 15682 }, { "epoch": 64.01224489795918, "grad_norm": 26.560087203979492, "learning_rate": 1.8016326530612247e-05, "loss": 1.1229, "step": 15683 }, { "epoch": 64.01632653061225, "grad_norm": 31.261035919189453, "learning_rate": 1.8014285714285714e-05, "loss": 1.389, "step": 15684 }, { "epoch": 64.0204081632653, "grad_norm": 21.754499435424805, "learning_rate": 1.8012244897959184e-05, "loss": 2.236, "step": 15685 }, { "epoch": 64.02448979591837, "grad_norm": 25.38349151611328, "learning_rate": 1.8010204081632655e-05, "loss": 1.892, "step": 15686 }, { "epoch": 64.02857142857142, "grad_norm": 32.74191665649414, "learning_rate": 1.8008163265306125e-05, "loss": 1.3013, "step": 15687 }, { "epoch": 64.0326530612245, "grad_norm": 36.619327545166016, "learning_rate": 1.8006122448979592e-05, "loss": 0.5829, "step": 15688 }, { "epoch": 64.03673469387755, "grad_norm": 34.240455627441406, "learning_rate": 1.800408163265306e-05, "loss": 1.5312, "step": 15689 }, { "epoch": 64.04081632653062, "grad_norm": 29.860071182250977, "learning_rate": 1.8002040816326533e-05, "loss": 0.9859, "step": 15690 }, { "epoch": 64.04489795918367, "grad_norm": 38.552608489990234, "learning_rate": 1.8e-05, "loss": 0.632, "step": 15691 }, { "epoch": 64.04897959183674, "grad_norm": 29.53631019592285, "learning_rate": 1.799795918367347e-05, "loss": 1.6771, "step": 15692 }, { "epoch": 64.0530612244898, "grad_norm": 35.65519714355469, "learning_rate": 1.7995918367346938e-05, "loss": 1.6368, "step": 15693 }, { "epoch": 64.05714285714286, "grad_norm": 41.47861099243164, "learning_rate": 1.799387755102041e-05, "loss": 0.6315, "step": 15694 }, { "epoch": 64.06122448979592, "grad_norm": 20.997438430786133, "learning_rate": 1.799183673469388e-05, "loss": 1.9785, "step": 15695 }, { "epoch": 64.06530612244897, "grad_norm": 29.685951232910156, "learning_rate": 1.7989795918367346e-05, "loss": 1.6748, "step": 15696 }, { "epoch": 64.06938775510204, "grad_norm": 23.95439338684082, "learning_rate": 1.7987755102040817e-05, "loss": 1.9557, "step": 15697 }, { "epoch": 64.0734693877551, "grad_norm": 34.44684600830078, "learning_rate": 1.7985714285714287e-05, "loss": 1.3825, "step": 15698 }, { "epoch": 64.07755102040817, "grad_norm": 22.162975311279297, "learning_rate": 1.7983673469387758e-05, "loss": 0.1777, "step": 15699 }, { "epoch": 64.08163265306122, "grad_norm": 23.99701499938965, "learning_rate": 1.7981632653061225e-05, "loss": 2.4262, "step": 15700 }, { "epoch": 64.08571428571429, "grad_norm": 22.022716522216797, "learning_rate": 1.7979591836734695e-05, "loss": 2.2753, "step": 15701 }, { "epoch": 64.08979591836734, "grad_norm": 32.68159484863281, "learning_rate": 1.7977551020408166e-05, "loss": 1.0739, "step": 15702 }, { "epoch": 64.09387755102041, "grad_norm": 51.686546325683594, "learning_rate": 1.7975510204081633e-05, "loss": 0.5774, "step": 15703 }, { "epoch": 64.09795918367347, "grad_norm": 30.65811538696289, "learning_rate": 1.7973469387755104e-05, "loss": 0.7505, "step": 15704 }, { "epoch": 64.10204081632654, "grad_norm": 21.59271812438965, "learning_rate": 1.797142857142857e-05, "loss": 2.2425, "step": 15705 }, { "epoch": 64.10612244897959, "grad_norm": 22.298513412475586, "learning_rate": 1.796938775510204e-05, "loss": 2.0838, "step": 15706 }, { "epoch": 64.11020408163266, "grad_norm": 39.331180572509766, "learning_rate": 1.7967346938775512e-05, "loss": 0.6889, "step": 15707 }, { "epoch": 64.11428571428571, "grad_norm": 34.166072845458984, "learning_rate": 1.796530612244898e-05, "loss": 1.0055, "step": 15708 }, { "epoch": 64.11836734693877, "grad_norm": 28.825973510742188, "learning_rate": 1.796326530612245e-05, "loss": 1.6084, "step": 15709 }, { "epoch": 64.12244897959184, "grad_norm": 31.907312393188477, "learning_rate": 1.796122448979592e-05, "loss": 0.2112, "step": 15710 }, { "epoch": 64.12653061224489, "grad_norm": 39.58564376831055, "learning_rate": 1.7959183673469387e-05, "loss": 1.4078, "step": 15711 }, { "epoch": 64.13061224489796, "grad_norm": 42.35224151611328, "learning_rate": 1.7957142857142858e-05, "loss": 0.5945, "step": 15712 }, { "epoch": 64.13469387755102, "grad_norm": 38.13637161254883, "learning_rate": 1.7955102040816328e-05, "loss": 1.1572, "step": 15713 }, { "epoch": 64.13877551020408, "grad_norm": 30.503345489501953, "learning_rate": 1.79530612244898e-05, "loss": 1.6334, "step": 15714 }, { "epoch": 64.14285714285714, "grad_norm": 29.903844833374023, "learning_rate": 1.7951020408163266e-05, "loss": 0.5181, "step": 15715 }, { "epoch": 64.14693877551021, "grad_norm": 27.278499603271484, "learning_rate": 1.7948979591836733e-05, "loss": 1.8701, "step": 15716 }, { "epoch": 64.15102040816326, "grad_norm": 27.776214599609375, "learning_rate": 1.7946938775510207e-05, "loss": 1.9263, "step": 15717 }, { "epoch": 64.15510204081633, "grad_norm": 38.33640670776367, "learning_rate": 1.7944897959183674e-05, "loss": 1.4714, "step": 15718 }, { "epoch": 64.15918367346939, "grad_norm": 45.06886672973633, "learning_rate": 1.7942857142857144e-05, "loss": 1.1691, "step": 15719 }, { "epoch": 64.16326530612245, "grad_norm": 25.0289306640625, "learning_rate": 1.794081632653061e-05, "loss": 1.8112, "step": 15720 }, { "epoch": 64.16734693877551, "grad_norm": 24.140567779541016, "learning_rate": 1.7938775510204085e-05, "loss": 1.5209, "step": 15721 }, { "epoch": 64.17142857142858, "grad_norm": 24.71238899230957, "learning_rate": 1.7936734693877552e-05, "loss": 1.8431, "step": 15722 }, { "epoch": 64.17551020408163, "grad_norm": 32.25674057006836, "learning_rate": 1.793469387755102e-05, "loss": 0.49, "step": 15723 }, { "epoch": 64.17959183673469, "grad_norm": 32.12632751464844, "learning_rate": 1.793265306122449e-05, "loss": 0.1977, "step": 15724 }, { "epoch": 64.18367346938776, "grad_norm": 44.913246154785156, "learning_rate": 1.793061224489796e-05, "loss": 1.0954, "step": 15725 }, { "epoch": 64.18775510204081, "grad_norm": 22.374435424804688, "learning_rate": 1.792857142857143e-05, "loss": 1.8254, "step": 15726 }, { "epoch": 64.19183673469388, "grad_norm": 19.256717681884766, "learning_rate": 1.7926530612244898e-05, "loss": 2.283, "step": 15727 }, { "epoch": 64.19591836734693, "grad_norm": 33.427635192871094, "learning_rate": 1.792448979591837e-05, "loss": 0.6952, "step": 15728 }, { "epoch": 64.2, "grad_norm": 21.489032745361328, "learning_rate": 1.792244897959184e-05, "loss": 1.9281, "step": 15729 }, { "epoch": 64.20408163265306, "grad_norm": 28.972904205322266, "learning_rate": 1.7920408163265306e-05, "loss": 1.4824, "step": 15730 }, { "epoch": 64.20816326530613, "grad_norm": 34.607383728027344, "learning_rate": 1.7918367346938777e-05, "loss": 0.8722, "step": 15731 }, { "epoch": 64.21224489795918, "grad_norm": 33.67978286743164, "learning_rate": 1.7916326530612244e-05, "loss": 0.8816, "step": 15732 }, { "epoch": 64.21632653061225, "grad_norm": 50.86526107788086, "learning_rate": 1.7914285714285715e-05, "loss": 1.6037, "step": 15733 }, { "epoch": 64.2204081632653, "grad_norm": 31.040010452270508, "learning_rate": 1.7912244897959185e-05, "loss": 1.1792, "step": 15734 }, { "epoch": 64.22448979591837, "grad_norm": 31.630212783813477, "learning_rate": 1.7910204081632652e-05, "loss": 0.4449, "step": 15735 }, { "epoch": 64.22857142857143, "grad_norm": 39.327552795410156, "learning_rate": 1.7908163265306123e-05, "loss": 1.7224, "step": 15736 }, { "epoch": 64.2326530612245, "grad_norm": 25.965885162353516, "learning_rate": 1.7906122448979593e-05, "loss": 0.2879, "step": 15737 }, { "epoch": 64.23673469387755, "grad_norm": 33.12044143676758, "learning_rate": 1.790408163265306e-05, "loss": 1.5064, "step": 15738 }, { "epoch": 64.2408163265306, "grad_norm": 24.844003677368164, "learning_rate": 1.790204081632653e-05, "loss": 2.2587, "step": 15739 }, { "epoch": 64.24489795918367, "grad_norm": 35.10365295410156, "learning_rate": 1.79e-05, "loss": 0.2837, "step": 15740 }, { "epoch": 64.24897959183673, "grad_norm": 24.401052474975586, "learning_rate": 1.7897959183673472e-05, "loss": 1.9179, "step": 15741 }, { "epoch": 64.2530612244898, "grad_norm": 17.51047134399414, "learning_rate": 1.789591836734694e-05, "loss": 2.485, "step": 15742 }, { "epoch": 64.25714285714285, "grad_norm": 39.160091400146484, "learning_rate": 1.7893877551020406e-05, "loss": 1.5942, "step": 15743 }, { "epoch": 64.26122448979592, "grad_norm": 47.889278411865234, "learning_rate": 1.789183673469388e-05, "loss": 0.2966, "step": 15744 }, { "epoch": 64.26530612244898, "grad_norm": 30.319398880004883, "learning_rate": 1.7889795918367347e-05, "loss": 1.5343, "step": 15745 }, { "epoch": 64.26938775510204, "grad_norm": 37.69839096069336, "learning_rate": 1.7887755102040818e-05, "loss": 1.2326, "step": 15746 }, { "epoch": 64.2734693877551, "grad_norm": 33.440303802490234, "learning_rate": 1.7885714285714285e-05, "loss": 1.1933, "step": 15747 }, { "epoch": 64.27755102040817, "grad_norm": 32.75873947143555, "learning_rate": 1.788367346938776e-05, "loss": 1.5917, "step": 15748 }, { "epoch": 64.28163265306122, "grad_norm": 29.696659088134766, "learning_rate": 1.7881632653061226e-05, "loss": 1.4506, "step": 15749 }, { "epoch": 64.28571428571429, "grad_norm": 33.55894470214844, "learning_rate": 1.7879591836734693e-05, "loss": 1.1425, "step": 15750 }, { "epoch": 64.28979591836735, "grad_norm": 36.917205810546875, "learning_rate": 1.7877551020408164e-05, "loss": 0.6241, "step": 15751 }, { "epoch": 64.29387755102042, "grad_norm": 20.614856719970703, "learning_rate": 1.7875510204081634e-05, "loss": 1.8559, "step": 15752 }, { "epoch": 64.29795918367347, "grad_norm": 31.562402725219727, "learning_rate": 1.7873469387755105e-05, "loss": 1.3224, "step": 15753 }, { "epoch": 64.30204081632652, "grad_norm": 28.82643699645996, "learning_rate": 1.787142857142857e-05, "loss": 1.8584, "step": 15754 }, { "epoch": 64.3061224489796, "grad_norm": 40.78805160522461, "learning_rate": 1.7869387755102042e-05, "loss": 1.1305, "step": 15755 }, { "epoch": 64.31020408163265, "grad_norm": 35.1776123046875, "learning_rate": 1.7867346938775513e-05, "loss": 0.6896, "step": 15756 }, { "epoch": 64.31428571428572, "grad_norm": 29.042490005493164, "learning_rate": 1.786530612244898e-05, "loss": 1.5865, "step": 15757 }, { "epoch": 64.31836734693877, "grad_norm": 34.43397903442383, "learning_rate": 1.786326530612245e-05, "loss": 1.4486, "step": 15758 }, { "epoch": 64.32244897959184, "grad_norm": 8.722477912902832, "learning_rate": 1.7861224489795917e-05, "loss": 3.1539, "step": 15759 }, { "epoch": 64.3265306122449, "grad_norm": 27.742015838623047, "learning_rate": 1.7859183673469388e-05, "loss": 1.8883, "step": 15760 }, { "epoch": 64.33061224489796, "grad_norm": 21.320894241333008, "learning_rate": 1.785714285714286e-05, "loss": 2.3979, "step": 15761 }, { "epoch": 64.33469387755102, "grad_norm": 26.781463623046875, "learning_rate": 1.7855102040816326e-05, "loss": 0.2004, "step": 15762 }, { "epoch": 64.33877551020409, "grad_norm": 17.787357330322266, "learning_rate": 1.7853061224489796e-05, "loss": 2.1792, "step": 15763 }, { "epoch": 64.34285714285714, "grad_norm": 27.686695098876953, "learning_rate": 1.7851020408163267e-05, "loss": 1.7513, "step": 15764 }, { "epoch": 64.34693877551021, "grad_norm": 44.040287017822266, "learning_rate": 1.7848979591836734e-05, "loss": 0.7471, "step": 15765 }, { "epoch": 64.35102040816327, "grad_norm": 29.993362426757812, "learning_rate": 1.7846938775510204e-05, "loss": 1.7504, "step": 15766 }, { "epoch": 64.35510204081632, "grad_norm": 30.805328369140625, "learning_rate": 1.7844897959183675e-05, "loss": 1.1661, "step": 15767 }, { "epoch": 64.35918367346939, "grad_norm": 34.06758117675781, "learning_rate": 1.7842857142857145e-05, "loss": 1.4052, "step": 15768 }, { "epoch": 64.36326530612244, "grad_norm": 19.856904983520508, "learning_rate": 1.7840816326530612e-05, "loss": 2.001, "step": 15769 }, { "epoch": 64.36734693877551, "grad_norm": 42.076820373535156, "learning_rate": 1.7838775510204083e-05, "loss": 0.7303, "step": 15770 }, { "epoch": 64.37142857142857, "grad_norm": 18.233816146850586, "learning_rate": 1.7836734693877553e-05, "loss": 1.8956, "step": 15771 }, { "epoch": 64.37551020408164, "grad_norm": 22.69366455078125, "learning_rate": 1.783469387755102e-05, "loss": 1.8056, "step": 15772 }, { "epoch": 64.37959183673469, "grad_norm": 41.124610900878906, "learning_rate": 1.783265306122449e-05, "loss": 1.1428, "step": 15773 }, { "epoch": 64.38367346938776, "grad_norm": 26.16344451904297, "learning_rate": 1.7830612244897958e-05, "loss": 2.2778, "step": 15774 }, { "epoch": 64.38775510204081, "grad_norm": 20.445728302001953, "learning_rate": 1.7828571428571432e-05, "loss": 1.8875, "step": 15775 }, { "epoch": 64.39183673469388, "grad_norm": 36.61841583251953, "learning_rate": 1.78265306122449e-05, "loss": 0.5149, "step": 15776 }, { "epoch": 64.39591836734694, "grad_norm": 23.009552001953125, "learning_rate": 1.7824489795918366e-05, "loss": 1.7785, "step": 15777 }, { "epoch": 64.4, "grad_norm": 28.7829532623291, "learning_rate": 1.7822448979591837e-05, "loss": 1.1345, "step": 15778 }, { "epoch": 64.40408163265306, "grad_norm": 35.07832717895508, "learning_rate": 1.7820408163265307e-05, "loss": 0.9296, "step": 15779 }, { "epoch": 64.40816326530613, "grad_norm": 50.924198150634766, "learning_rate": 1.7818367346938778e-05, "loss": 1.2006, "step": 15780 }, { "epoch": 64.41224489795918, "grad_norm": 23.260210037231445, "learning_rate": 1.7816326530612245e-05, "loss": 1.9695, "step": 15781 }, { "epoch": 64.41632653061224, "grad_norm": 32.69783020019531, "learning_rate": 1.7814285714285716e-05, "loss": 1.7976, "step": 15782 }, { "epoch": 64.42040816326531, "grad_norm": 31.179851531982422, "learning_rate": 1.7812244897959186e-05, "loss": 0.7199, "step": 15783 }, { "epoch": 64.42448979591836, "grad_norm": 33.703330993652344, "learning_rate": 1.7810204081632653e-05, "loss": 1.0363, "step": 15784 }, { "epoch": 64.42857142857143, "grad_norm": 26.315427780151367, "learning_rate": 1.7808163265306124e-05, "loss": 1.643, "step": 15785 }, { "epoch": 64.43265306122449, "grad_norm": 41.8265380859375, "learning_rate": 1.780612244897959e-05, "loss": 0.7075, "step": 15786 }, { "epoch": 64.43673469387755, "grad_norm": 33.168392181396484, "learning_rate": 1.780408163265306e-05, "loss": 0.4261, "step": 15787 }, { "epoch": 64.44081632653061, "grad_norm": 65.81300354003906, "learning_rate": 1.7802040816326532e-05, "loss": 0.6524, "step": 15788 }, { "epoch": 64.44489795918368, "grad_norm": 26.297588348388672, "learning_rate": 1.78e-05, "loss": 0.2509, "step": 15789 }, { "epoch": 64.44897959183673, "grad_norm": 44.19670486450195, "learning_rate": 1.779795918367347e-05, "loss": 0.694, "step": 15790 }, { "epoch": 64.4530612244898, "grad_norm": 15.320012092590332, "learning_rate": 1.779591836734694e-05, "loss": 2.6951, "step": 15791 }, { "epoch": 64.45714285714286, "grad_norm": 31.289024353027344, "learning_rate": 1.779387755102041e-05, "loss": 1.66, "step": 15792 }, { "epoch": 64.46122448979592, "grad_norm": 28.19584083557129, "learning_rate": 1.7791836734693878e-05, "loss": 1.7663, "step": 15793 }, { "epoch": 64.46530612244898, "grad_norm": 86.41629791259766, "learning_rate": 1.7789795918367348e-05, "loss": 1.5893, "step": 15794 }, { "epoch": 64.46938775510205, "grad_norm": 14.72938346862793, "learning_rate": 1.778775510204082e-05, "loss": 2.585, "step": 15795 }, { "epoch": 64.4734693877551, "grad_norm": 19.019742965698242, "learning_rate": 1.7785714285714286e-05, "loss": 2.3562, "step": 15796 }, { "epoch": 64.47755102040816, "grad_norm": 55.8607177734375, "learning_rate": 1.7783673469387756e-05, "loss": 0.5533, "step": 15797 }, { "epoch": 64.48163265306123, "grad_norm": 25.26783561706543, "learning_rate": 1.7781632653061227e-05, "loss": 2.026, "step": 15798 }, { "epoch": 64.48571428571428, "grad_norm": 32.52410125732422, "learning_rate": 1.7779591836734694e-05, "loss": 0.2396, "step": 15799 }, { "epoch": 64.48979591836735, "grad_norm": 33.971336364746094, "learning_rate": 1.7777551020408164e-05, "loss": 0.9912, "step": 15800 }, { "epoch": 64.4938775510204, "grad_norm": 32.1014289855957, "learning_rate": 1.777551020408163e-05, "loss": 1.5307, "step": 15801 }, { "epoch": 64.49795918367347, "grad_norm": 34.93497848510742, "learning_rate": 1.7773469387755105e-05, "loss": 1.2758, "step": 15802 }, { "epoch": 64.50204081632653, "grad_norm": 39.940460205078125, "learning_rate": 1.7771428571428573e-05, "loss": 1.5237, "step": 15803 }, { "epoch": 64.5061224489796, "grad_norm": 48.480289459228516, "learning_rate": 1.776938775510204e-05, "loss": 0.6691, "step": 15804 }, { "epoch": 64.51020408163265, "grad_norm": 36.4782600402832, "learning_rate": 1.776734693877551e-05, "loss": 1.4253, "step": 15805 }, { "epoch": 64.51428571428572, "grad_norm": 30.296897888183594, "learning_rate": 1.776530612244898e-05, "loss": 1.3109, "step": 15806 }, { "epoch": 64.51836734693877, "grad_norm": 38.593284606933594, "learning_rate": 1.776326530612245e-05, "loss": 1.1945, "step": 15807 }, { "epoch": 64.52244897959184, "grad_norm": 19.747480392456055, "learning_rate": 1.776122448979592e-05, "loss": 2.4063, "step": 15808 }, { "epoch": 64.5265306122449, "grad_norm": 34.20207977294922, "learning_rate": 1.775918367346939e-05, "loss": 1.4927, "step": 15809 }, { "epoch": 64.53061224489795, "grad_norm": 23.93340301513672, "learning_rate": 1.775714285714286e-05, "loss": 2.3541, "step": 15810 }, { "epoch": 64.53469387755102, "grad_norm": 34.7695198059082, "learning_rate": 1.7755102040816327e-05, "loss": 0.4026, "step": 15811 }, { "epoch": 64.53877551020408, "grad_norm": 28.141145706176758, "learning_rate": 1.7753061224489797e-05, "loss": 1.7084, "step": 15812 }, { "epoch": 64.54285714285714, "grad_norm": 21.588218688964844, "learning_rate": 1.7751020408163264e-05, "loss": 1.8841, "step": 15813 }, { "epoch": 64.5469387755102, "grad_norm": 28.26136016845703, "learning_rate": 1.7748979591836738e-05, "loss": 0.2509, "step": 15814 }, { "epoch": 64.55102040816327, "grad_norm": 23.93276596069336, "learning_rate": 1.7746938775510205e-05, "loss": 1.9743, "step": 15815 }, { "epoch": 64.55510204081632, "grad_norm": 32.96344757080078, "learning_rate": 1.7744897959183672e-05, "loss": 1.0083, "step": 15816 }, { "epoch": 64.55918367346939, "grad_norm": 19.989694595336914, "learning_rate": 1.7742857142857143e-05, "loss": 2.5802, "step": 15817 }, { "epoch": 64.56326530612245, "grad_norm": 18.732446670532227, "learning_rate": 1.7740816326530613e-05, "loss": 2.3575, "step": 15818 }, { "epoch": 64.56734693877551, "grad_norm": 48.01143264770508, "learning_rate": 1.7738775510204084e-05, "loss": 0.6971, "step": 15819 }, { "epoch": 64.57142857142857, "grad_norm": 29.31395721435547, "learning_rate": 1.773673469387755e-05, "loss": 1.7765, "step": 15820 }, { "epoch": 64.57551020408164, "grad_norm": 24.66527557373047, "learning_rate": 1.773469387755102e-05, "loss": 2.0021, "step": 15821 }, { "epoch": 64.57959183673469, "grad_norm": 16.197233200073242, "learning_rate": 1.7732653061224492e-05, "loss": 2.4242, "step": 15822 }, { "epoch": 64.58367346938776, "grad_norm": 23.051475524902344, "learning_rate": 1.773061224489796e-05, "loss": 1.8873, "step": 15823 }, { "epoch": 64.58775510204082, "grad_norm": 38.4916877746582, "learning_rate": 1.772857142857143e-05, "loss": 1.3058, "step": 15824 }, { "epoch": 64.59183673469387, "grad_norm": 30.74594497680664, "learning_rate": 1.77265306122449e-05, "loss": 1.4141, "step": 15825 }, { "epoch": 64.59591836734694, "grad_norm": 32.74030303955078, "learning_rate": 1.7724489795918367e-05, "loss": 0.4503, "step": 15826 }, { "epoch": 64.6, "grad_norm": 20.721527099609375, "learning_rate": 1.7722448979591838e-05, "loss": 1.9973, "step": 15827 }, { "epoch": 64.60408163265306, "grad_norm": 37.91942596435547, "learning_rate": 1.7720408163265305e-05, "loss": 0.2697, "step": 15828 }, { "epoch": 64.60816326530612, "grad_norm": 27.9691162109375, "learning_rate": 1.771836734693878e-05, "loss": 1.769, "step": 15829 }, { "epoch": 64.61224489795919, "grad_norm": 45.513587951660156, "learning_rate": 1.7716326530612246e-05, "loss": 1.1541, "step": 15830 }, { "epoch": 64.61632653061224, "grad_norm": 40.6718635559082, "learning_rate": 1.7714285714285713e-05, "loss": 0.4864, "step": 15831 }, { "epoch": 64.62040816326531, "grad_norm": 39.43159866333008, "learning_rate": 1.7712244897959184e-05, "loss": 0.9638, "step": 15832 }, { "epoch": 64.62448979591836, "grad_norm": 33.57502746582031, "learning_rate": 1.7710204081632654e-05, "loss": 0.7464, "step": 15833 }, { "epoch": 64.62857142857143, "grad_norm": 36.66575622558594, "learning_rate": 1.7708163265306125e-05, "loss": 1.3938, "step": 15834 }, { "epoch": 64.63265306122449, "grad_norm": 3.380667209625244, "learning_rate": 1.7706122448979592e-05, "loss": 0.0473, "step": 15835 }, { "epoch": 64.63673469387756, "grad_norm": 18.4502010345459, "learning_rate": 1.7704081632653062e-05, "loss": 2.4042, "step": 15836 }, { "epoch": 64.64081632653061, "grad_norm": 30.318443298339844, "learning_rate": 1.7702040816326533e-05, "loss": 1.8719, "step": 15837 }, { "epoch": 64.64489795918368, "grad_norm": 40.43589401245117, "learning_rate": 1.77e-05, "loss": 1.478, "step": 15838 }, { "epoch": 64.64897959183673, "grad_norm": 39.890411376953125, "learning_rate": 1.769795918367347e-05, "loss": 0.9424, "step": 15839 }, { "epoch": 64.65306122448979, "grad_norm": 34.252357482910156, "learning_rate": 1.7695918367346938e-05, "loss": 0.7085, "step": 15840 }, { "epoch": 64.65714285714286, "grad_norm": 36.780242919921875, "learning_rate": 1.769387755102041e-05, "loss": 1.5939, "step": 15841 }, { "epoch": 64.66122448979591, "grad_norm": 39.2567138671875, "learning_rate": 1.769183673469388e-05, "loss": 1.2964, "step": 15842 }, { "epoch": 64.66530612244898, "grad_norm": 33.41519546508789, "learning_rate": 1.7689795918367346e-05, "loss": 0.9995, "step": 15843 }, { "epoch": 64.66938775510204, "grad_norm": 35.31007766723633, "learning_rate": 1.7687755102040816e-05, "loss": 1.088, "step": 15844 }, { "epoch": 64.6734693877551, "grad_norm": 41.8246955871582, "learning_rate": 1.7685714285714287e-05, "loss": 1.0075, "step": 15845 }, { "epoch": 64.67755102040816, "grad_norm": 30.077280044555664, "learning_rate": 1.7683673469387757e-05, "loss": 1.2367, "step": 15846 }, { "epoch": 64.68163265306123, "grad_norm": 18.854965209960938, "learning_rate": 1.7681632653061224e-05, "loss": 1.6971, "step": 15847 }, { "epoch": 64.68571428571428, "grad_norm": 34.52737808227539, "learning_rate": 1.7679591836734695e-05, "loss": 0.6597, "step": 15848 }, { "epoch": 64.68979591836735, "grad_norm": 30.04517936706543, "learning_rate": 1.7677551020408165e-05, "loss": 1.6633, "step": 15849 }, { "epoch": 64.6938775510204, "grad_norm": 47.777587890625, "learning_rate": 1.7675510204081633e-05, "loss": 0.4022, "step": 15850 }, { "epoch": 64.69795918367348, "grad_norm": 36.9525032043457, "learning_rate": 1.7673469387755103e-05, "loss": 0.5735, "step": 15851 }, { "epoch": 64.70204081632653, "grad_norm": 29.29418182373047, "learning_rate": 1.7671428571428574e-05, "loss": 1.6029, "step": 15852 }, { "epoch": 64.70612244897958, "grad_norm": 26.167428970336914, "learning_rate": 1.766938775510204e-05, "loss": 2.0662, "step": 15853 }, { "epoch": 64.71020408163265, "grad_norm": 48.0717658996582, "learning_rate": 1.766734693877551e-05, "loss": 1.0909, "step": 15854 }, { "epoch": 64.71428571428571, "grad_norm": 42.55891036987305, "learning_rate": 1.766530612244898e-05, "loss": 1.5071, "step": 15855 }, { "epoch": 64.71836734693878, "grad_norm": 43.13386535644531, "learning_rate": 1.7663265306122452e-05, "loss": 1.4424, "step": 15856 }, { "epoch": 64.72244897959183, "grad_norm": 37.38569259643555, "learning_rate": 1.766122448979592e-05, "loss": 0.7257, "step": 15857 }, { "epoch": 64.7265306122449, "grad_norm": 35.289554595947266, "learning_rate": 1.765918367346939e-05, "loss": 1.3739, "step": 15858 }, { "epoch": 64.73061224489796, "grad_norm": 37.01198959350586, "learning_rate": 1.7657142857142857e-05, "loss": 0.9341, "step": 15859 }, { "epoch": 64.73469387755102, "grad_norm": 25.273439407348633, "learning_rate": 1.7655102040816328e-05, "loss": 1.8483, "step": 15860 }, { "epoch": 64.73877551020408, "grad_norm": 34.03374481201172, "learning_rate": 1.7653061224489798e-05, "loss": 1.1098, "step": 15861 }, { "epoch": 64.74285714285715, "grad_norm": 30.886802673339844, "learning_rate": 1.7651020408163265e-05, "loss": 1.4311, "step": 15862 }, { "epoch": 64.7469387755102, "grad_norm": 31.68790626525879, "learning_rate": 1.7648979591836736e-05, "loss": 1.0319, "step": 15863 }, { "epoch": 64.75102040816327, "grad_norm": 61.55936813354492, "learning_rate": 1.7646938775510206e-05, "loss": 0.9799, "step": 15864 }, { "epoch": 64.75510204081633, "grad_norm": 28.294879913330078, "learning_rate": 1.7644897959183673e-05, "loss": 0.1935, "step": 15865 }, { "epoch": 64.7591836734694, "grad_norm": 27.71149444580078, "learning_rate": 1.7642857142857144e-05, "loss": 2.2145, "step": 15866 }, { "epoch": 64.76326530612245, "grad_norm": 23.092844009399414, "learning_rate": 1.764081632653061e-05, "loss": 1.9091, "step": 15867 }, { "epoch": 64.7673469387755, "grad_norm": 49.23241424560547, "learning_rate": 1.7638775510204085e-05, "loss": 0.6622, "step": 15868 }, { "epoch": 64.77142857142857, "grad_norm": 43.77025604248047, "learning_rate": 1.7636734693877552e-05, "loss": 0.7424, "step": 15869 }, { "epoch": 64.77551020408163, "grad_norm": 35.00908660888672, "learning_rate": 1.763469387755102e-05, "loss": 1.7191, "step": 15870 }, { "epoch": 64.7795918367347, "grad_norm": 31.86849594116211, "learning_rate": 1.763265306122449e-05, "loss": 0.6565, "step": 15871 }, { "epoch": 64.78367346938775, "grad_norm": 39.59010314941406, "learning_rate": 1.763061224489796e-05, "loss": 0.5178, "step": 15872 }, { "epoch": 64.78775510204082, "grad_norm": 34.90626907348633, "learning_rate": 1.762857142857143e-05, "loss": 1.5908, "step": 15873 }, { "epoch": 64.79183673469387, "grad_norm": 26.585535049438477, "learning_rate": 1.7626530612244898e-05, "loss": 1.5722, "step": 15874 }, { "epoch": 64.79591836734694, "grad_norm": 41.53261947631836, "learning_rate": 1.7624489795918368e-05, "loss": 1.0782, "step": 15875 }, { "epoch": 64.8, "grad_norm": 37.06097412109375, "learning_rate": 1.762244897959184e-05, "loss": 1.4486, "step": 15876 }, { "epoch": 64.80408163265307, "grad_norm": 31.545969009399414, "learning_rate": 1.7620408163265306e-05, "loss": 0.2581, "step": 15877 }, { "epoch": 64.80816326530612, "grad_norm": 30.0693416595459, "learning_rate": 1.7618367346938776e-05, "loss": 1.69, "step": 15878 }, { "epoch": 64.81224489795919, "grad_norm": 28.137056350708008, "learning_rate": 1.7616326530612247e-05, "loss": 1.7831, "step": 15879 }, { "epoch": 64.81632653061224, "grad_norm": 27.450069427490234, "learning_rate": 1.7614285714285717e-05, "loss": 1.2316, "step": 15880 }, { "epoch": 64.82040816326531, "grad_norm": 31.03913116455078, "learning_rate": 1.7612244897959185e-05, "loss": 0.8406, "step": 15881 }, { "epoch": 64.82448979591837, "grad_norm": 33.39091110229492, "learning_rate": 1.7610204081632652e-05, "loss": 0.6512, "step": 15882 }, { "epoch": 64.82857142857142, "grad_norm": 23.594953536987305, "learning_rate": 1.7608163265306126e-05, "loss": 1.7592, "step": 15883 }, { "epoch": 64.83265306122449, "grad_norm": 33.03125, "learning_rate": 1.7606122448979593e-05, "loss": 0.2003, "step": 15884 }, { "epoch": 64.83673469387755, "grad_norm": 33.395179748535156, "learning_rate": 1.7604081632653063e-05, "loss": 0.5474, "step": 15885 }, { "epoch": 64.84081632653061, "grad_norm": 51.60310363769531, "learning_rate": 1.760204081632653e-05, "loss": 0.5033, "step": 15886 }, { "epoch": 64.84489795918367, "grad_norm": 35.90742111206055, "learning_rate": 1.76e-05, "loss": 0.9948, "step": 15887 }, { "epoch": 64.84897959183674, "grad_norm": 24.913982391357422, "learning_rate": 1.759795918367347e-05, "loss": 1.8582, "step": 15888 }, { "epoch": 64.85306122448979, "grad_norm": 25.916156768798828, "learning_rate": 1.759591836734694e-05, "loss": 1.6141, "step": 15889 }, { "epoch": 64.85714285714286, "grad_norm": 43.274532318115234, "learning_rate": 1.759387755102041e-05, "loss": 0.6673, "step": 15890 }, { "epoch": 64.86122448979592, "grad_norm": 10.737529754638672, "learning_rate": 1.759183673469388e-05, "loss": 0.0944, "step": 15891 }, { "epoch": 64.86530612244898, "grad_norm": 40.81372833251953, "learning_rate": 1.7589795918367347e-05, "loss": 1.1589, "step": 15892 }, { "epoch": 64.86938775510204, "grad_norm": 29.175565719604492, "learning_rate": 1.7587755102040817e-05, "loss": 1.923, "step": 15893 }, { "epoch": 64.87346938775511, "grad_norm": 40.5800666809082, "learning_rate": 1.7585714285714284e-05, "loss": 0.9428, "step": 15894 }, { "epoch": 64.87755102040816, "grad_norm": 46.5039176940918, "learning_rate": 1.7583673469387758e-05, "loss": 0.7152, "step": 15895 }, { "epoch": 64.88163265306123, "grad_norm": 38.62510299682617, "learning_rate": 1.7581632653061225e-05, "loss": 1.3162, "step": 15896 }, { "epoch": 64.88571428571429, "grad_norm": 32.98249816894531, "learning_rate": 1.7579591836734692e-05, "loss": 0.8208, "step": 15897 }, { "epoch": 64.88979591836734, "grad_norm": 41.51993179321289, "learning_rate": 1.7577551020408163e-05, "loss": 0.5411, "step": 15898 }, { "epoch": 64.89387755102041, "grad_norm": 52.31233596801758, "learning_rate": 1.7575510204081634e-05, "loss": 0.8991, "step": 15899 }, { "epoch": 64.89795918367346, "grad_norm": 26.93424415588379, "learning_rate": 1.7573469387755104e-05, "loss": 1.7241, "step": 15900 }, { "epoch": 64.90204081632653, "grad_norm": 31.265913009643555, "learning_rate": 1.757142857142857e-05, "loss": 1.508, "step": 15901 }, { "epoch": 64.90612244897959, "grad_norm": 47.79106521606445, "learning_rate": 1.756938775510204e-05, "loss": 1.3189, "step": 15902 }, { "epoch": 64.91020408163266, "grad_norm": 27.18529510498047, "learning_rate": 1.7567346938775512e-05, "loss": 2.0745, "step": 15903 }, { "epoch": 64.91428571428571, "grad_norm": 28.079017639160156, "learning_rate": 1.756530612244898e-05, "loss": 0.2193, "step": 15904 }, { "epoch": 64.91836734693878, "grad_norm": 30.643205642700195, "learning_rate": 1.756326530612245e-05, "loss": 1.7871, "step": 15905 }, { "epoch": 64.92244897959183, "grad_norm": 42.5562744140625, "learning_rate": 1.756122448979592e-05, "loss": 1.3571, "step": 15906 }, { "epoch": 64.9265306122449, "grad_norm": 33.78615188598633, "learning_rate": 1.755918367346939e-05, "loss": 0.8977, "step": 15907 }, { "epoch": 64.93061224489796, "grad_norm": 15.705782890319824, "learning_rate": 1.7557142857142858e-05, "loss": 2.9114, "step": 15908 }, { "epoch": 64.93469387755103, "grad_norm": 52.20525360107422, "learning_rate": 1.7555102040816325e-05, "loss": 0.7546, "step": 15909 }, { "epoch": 64.93877551020408, "grad_norm": 30.885080337524414, "learning_rate": 1.75530612244898e-05, "loss": 1.896, "step": 15910 }, { "epoch": 64.94285714285714, "grad_norm": 37.34254455566406, "learning_rate": 1.7551020408163266e-05, "loss": 1.0515, "step": 15911 }, { "epoch": 64.9469387755102, "grad_norm": 33.40119934082031, "learning_rate": 1.7548979591836737e-05, "loss": 1.2733, "step": 15912 }, { "epoch": 64.95102040816326, "grad_norm": 29.43346405029297, "learning_rate": 1.7546938775510204e-05, "loss": 1.5894, "step": 15913 }, { "epoch": 64.95510204081633, "grad_norm": 33.4727783203125, "learning_rate": 1.7544897959183674e-05, "loss": 1.1083, "step": 15914 }, { "epoch": 64.95918367346938, "grad_norm": 24.083040237426758, "learning_rate": 1.7542857142857145e-05, "loss": 1.9137, "step": 15915 }, { "epoch": 64.96326530612245, "grad_norm": 32.390785217285156, "learning_rate": 1.7540816326530612e-05, "loss": 1.3023, "step": 15916 }, { "epoch": 64.9673469387755, "grad_norm": 22.92085075378418, "learning_rate": 1.7538775510204082e-05, "loss": 2.3953, "step": 15917 }, { "epoch": 64.97142857142858, "grad_norm": 36.25617218017578, "learning_rate": 1.7536734693877553e-05, "loss": 0.6235, "step": 15918 }, { "epoch": 64.97551020408163, "grad_norm": 36.399574279785156, "learning_rate": 1.753469387755102e-05, "loss": 1.5958, "step": 15919 }, { "epoch": 64.9795918367347, "grad_norm": 50.78238296508789, "learning_rate": 1.753265306122449e-05, "loss": 0.5448, "step": 15920 }, { "epoch": 64.98367346938775, "grad_norm": 36.14604949951172, "learning_rate": 1.7530612244897958e-05, "loss": 1.0062, "step": 15921 }, { "epoch": 64.98775510204082, "grad_norm": 43.338653564453125, "learning_rate": 1.752857142857143e-05, "loss": 0.8056, "step": 15922 }, { "epoch": 64.99183673469388, "grad_norm": 28.828174591064453, "learning_rate": 1.75265306122449e-05, "loss": 2.1137, "step": 15923 }, { "epoch": 64.99591836734695, "grad_norm": 43.85586929321289, "learning_rate": 1.7524489795918366e-05, "loss": 1.1129, "step": 15924 }, { "epoch": 65.0, "grad_norm": 27.615964889526367, "learning_rate": 1.7522448979591836e-05, "loss": 1.9036, "step": 15925 }, { "epoch": 65.00408163265305, "grad_norm": 23.27901840209961, "learning_rate": 1.7520408163265307e-05, "loss": 2.2526, "step": 15926 }, { "epoch": 65.00816326530612, "grad_norm": 29.52408790588379, "learning_rate": 1.7518367346938777e-05, "loss": 1.3981, "step": 15927 }, { "epoch": 65.01224489795918, "grad_norm": 58.23821258544922, "learning_rate": 1.7516326530612245e-05, "loss": 0.8479, "step": 15928 }, { "epoch": 65.01632653061225, "grad_norm": 39.96009826660156, "learning_rate": 1.7514285714285715e-05, "loss": 1.3144, "step": 15929 }, { "epoch": 65.0204081632653, "grad_norm": 24.654024124145508, "learning_rate": 1.7512244897959186e-05, "loss": 1.5977, "step": 15930 }, { "epoch": 65.02448979591837, "grad_norm": 38.968170166015625, "learning_rate": 1.7510204081632653e-05, "loss": 0.5724, "step": 15931 }, { "epoch": 65.02857142857142, "grad_norm": 37.637550354003906, "learning_rate": 1.7508163265306123e-05, "loss": 0.6682, "step": 15932 }, { "epoch": 65.0326530612245, "grad_norm": 40.880165100097656, "learning_rate": 1.7506122448979594e-05, "loss": 1.106, "step": 15933 }, { "epoch": 65.03673469387755, "grad_norm": 27.46739387512207, "learning_rate": 1.7504081632653064e-05, "loss": 1.538, "step": 15934 }, { "epoch": 65.04081632653062, "grad_norm": 32.365482330322266, "learning_rate": 1.750204081632653e-05, "loss": 1.1746, "step": 15935 }, { "epoch": 65.04489795918367, "grad_norm": 31.885841369628906, "learning_rate": 1.75e-05, "loss": 1.7586, "step": 15936 }, { "epoch": 65.04897959183674, "grad_norm": 25.369544982910156, "learning_rate": 1.7497959183673472e-05, "loss": 1.8969, "step": 15937 }, { "epoch": 65.0530612244898, "grad_norm": 14.522024154663086, "learning_rate": 1.749591836734694e-05, "loss": 2.5256, "step": 15938 }, { "epoch": 65.05714285714286, "grad_norm": 32.814476013183594, "learning_rate": 1.749387755102041e-05, "loss": 1.3755, "step": 15939 }, { "epoch": 65.06122448979592, "grad_norm": 27.204898834228516, "learning_rate": 1.7491836734693877e-05, "loss": 2.0219, "step": 15940 }, { "epoch": 65.06530612244897, "grad_norm": 31.105867385864258, "learning_rate": 1.7489795918367348e-05, "loss": 1.103, "step": 15941 }, { "epoch": 65.06938775510204, "grad_norm": 51.69480514526367, "learning_rate": 1.7487755102040818e-05, "loss": 1.5253, "step": 15942 }, { "epoch": 65.0734693877551, "grad_norm": 39.489444732666016, "learning_rate": 1.7485714285714285e-05, "loss": 1.1937, "step": 15943 }, { "epoch": 65.07755102040817, "grad_norm": 35.61296463012695, "learning_rate": 1.7483673469387756e-05, "loss": 1.0725, "step": 15944 }, { "epoch": 65.08163265306122, "grad_norm": 36.76515579223633, "learning_rate": 1.7481632653061226e-05, "loss": 0.7005, "step": 15945 }, { "epoch": 65.08571428571429, "grad_norm": 20.080629348754883, "learning_rate": 1.7479591836734693e-05, "loss": 2.3479, "step": 15946 }, { "epoch": 65.08979591836734, "grad_norm": 47.54783630371094, "learning_rate": 1.7477551020408164e-05, "loss": 0.6933, "step": 15947 }, { "epoch": 65.09387755102041, "grad_norm": 31.574167251586914, "learning_rate": 1.747551020408163e-05, "loss": 1.1675, "step": 15948 }, { "epoch": 65.09795918367347, "grad_norm": 35.784061431884766, "learning_rate": 1.7473469387755105e-05, "loss": 1.3045, "step": 15949 }, { "epoch": 65.10204081632654, "grad_norm": 38.50616455078125, "learning_rate": 1.7471428571428572e-05, "loss": 0.8505, "step": 15950 }, { "epoch": 65.10612244897959, "grad_norm": 46.933807373046875, "learning_rate": 1.7469387755102043e-05, "loss": 0.5197, "step": 15951 }, { "epoch": 65.11020408163266, "grad_norm": 37.691871643066406, "learning_rate": 1.746734693877551e-05, "loss": 1.1213, "step": 15952 }, { "epoch": 65.11428571428571, "grad_norm": 26.322500228881836, "learning_rate": 1.746530612244898e-05, "loss": 1.1292, "step": 15953 }, { "epoch": 65.11836734693877, "grad_norm": 24.890430450439453, "learning_rate": 1.746326530612245e-05, "loss": 0.2372, "step": 15954 }, { "epoch": 65.12244897959184, "grad_norm": 35.67173385620117, "learning_rate": 1.7461224489795918e-05, "loss": 1.2681, "step": 15955 }, { "epoch": 65.12653061224489, "grad_norm": 23.326711654663086, "learning_rate": 1.745918367346939e-05, "loss": 1.8031, "step": 15956 }, { "epoch": 65.13061224489796, "grad_norm": 35.82052230834961, "learning_rate": 1.745714285714286e-05, "loss": 1.1975, "step": 15957 }, { "epoch": 65.13469387755102, "grad_norm": 31.743608474731445, "learning_rate": 1.7455102040816326e-05, "loss": 1.7862, "step": 15958 }, { "epoch": 65.13877551020408, "grad_norm": 26.628366470336914, "learning_rate": 1.7453061224489797e-05, "loss": 1.6736, "step": 15959 }, { "epoch": 65.14285714285714, "grad_norm": 24.265281677246094, "learning_rate": 1.7451020408163267e-05, "loss": 1.8303, "step": 15960 }, { "epoch": 65.14693877551021, "grad_norm": 34.206661224365234, "learning_rate": 1.7448979591836738e-05, "loss": 0.2533, "step": 15961 }, { "epoch": 65.15102040816326, "grad_norm": 39.03200149536133, "learning_rate": 1.7446938775510205e-05, "loss": 0.9698, "step": 15962 }, { "epoch": 65.15510204081633, "grad_norm": 21.466678619384766, "learning_rate": 1.7444897959183672e-05, "loss": 2.3927, "step": 15963 }, { "epoch": 65.15918367346939, "grad_norm": 21.714006423950195, "learning_rate": 1.7442857142857146e-05, "loss": 2.223, "step": 15964 }, { "epoch": 65.16326530612245, "grad_norm": 51.4178352355957, "learning_rate": 1.7440816326530613e-05, "loss": 0.5922, "step": 15965 }, { "epoch": 65.16734693877551, "grad_norm": 39.859275817871094, "learning_rate": 1.7438775510204083e-05, "loss": 0.6251, "step": 15966 }, { "epoch": 65.17142857142858, "grad_norm": 42.04410171508789, "learning_rate": 1.743673469387755e-05, "loss": 1.097, "step": 15967 }, { "epoch": 65.17551020408163, "grad_norm": 39.459320068359375, "learning_rate": 1.743469387755102e-05, "loss": 0.6957, "step": 15968 }, { "epoch": 65.17959183673469, "grad_norm": 35.562255859375, "learning_rate": 1.743265306122449e-05, "loss": 1.4355, "step": 15969 }, { "epoch": 65.18367346938776, "grad_norm": 36.32486343383789, "learning_rate": 1.743061224489796e-05, "loss": 0.4394, "step": 15970 }, { "epoch": 65.18775510204081, "grad_norm": 46.153690338134766, "learning_rate": 1.742857142857143e-05, "loss": 0.8971, "step": 15971 }, { "epoch": 65.19183673469388, "grad_norm": 35.37871551513672, "learning_rate": 1.74265306122449e-05, "loss": 0.8565, "step": 15972 }, { "epoch": 65.19591836734693, "grad_norm": 27.018033981323242, "learning_rate": 1.742448979591837e-05, "loss": 0.2128, "step": 15973 }, { "epoch": 65.2, "grad_norm": 34.17823028564453, "learning_rate": 1.7422448979591837e-05, "loss": 1.3067, "step": 15974 }, { "epoch": 65.20408163265306, "grad_norm": 39.30234909057617, "learning_rate": 1.7420408163265304e-05, "loss": 0.4743, "step": 15975 }, { "epoch": 65.20816326530613, "grad_norm": 26.933128356933594, "learning_rate": 1.741836734693878e-05, "loss": 2.3522, "step": 15976 }, { "epoch": 65.21224489795918, "grad_norm": 18.167219161987305, "learning_rate": 1.7416326530612245e-05, "loss": 2.6824, "step": 15977 }, { "epoch": 65.21632653061225, "grad_norm": 31.353958129882812, "learning_rate": 1.7414285714285716e-05, "loss": 1.4741, "step": 15978 }, { "epoch": 65.2204081632653, "grad_norm": 38.54144287109375, "learning_rate": 1.7412244897959183e-05, "loss": 0.8854, "step": 15979 }, { "epoch": 65.22448979591837, "grad_norm": 20.240158081054688, "learning_rate": 1.7410204081632654e-05, "loss": 1.9579, "step": 15980 }, { "epoch": 65.22857142857143, "grad_norm": 22.62639617919922, "learning_rate": 1.7408163265306124e-05, "loss": 2.2443, "step": 15981 }, { "epoch": 65.2326530612245, "grad_norm": 58.92321014404297, "learning_rate": 1.740612244897959e-05, "loss": 0.5713, "step": 15982 }, { "epoch": 65.23673469387755, "grad_norm": 31.992206573486328, "learning_rate": 1.7404081632653062e-05, "loss": 1.1483, "step": 15983 }, { "epoch": 65.2408163265306, "grad_norm": 31.000246047973633, "learning_rate": 1.7402040816326532e-05, "loss": 0.5789, "step": 15984 }, { "epoch": 65.24489795918367, "grad_norm": 39.840911865234375, "learning_rate": 1.74e-05, "loss": 0.5264, "step": 15985 }, { "epoch": 65.24897959183673, "grad_norm": 22.68404769897461, "learning_rate": 1.739795918367347e-05, "loss": 1.8866, "step": 15986 }, { "epoch": 65.2530612244898, "grad_norm": NaN, "learning_rate": 1.739591836734694e-05, "loss": 0.5653, "step": 15987 }, { "epoch": 65.25714285714285, "grad_norm": 41.99832534790039, "learning_rate": 1.739591836734694e-05, "loss": 1.5392, "step": 15988 }, { "epoch": 65.26122448979592, "grad_norm": 33.59791564941406, "learning_rate": 1.739387755102041e-05, "loss": 1.5085, "step": 15989 }, { "epoch": 65.26530612244898, "grad_norm": 21.905820846557617, "learning_rate": 1.7391836734693878e-05, "loss": 1.9301, "step": 15990 }, { "epoch": 65.26938775510204, "grad_norm": 16.205631256103516, "learning_rate": 1.7389795918367345e-05, "loss": 2.369, "step": 15991 }, { "epoch": 65.2734693877551, "grad_norm": 31.242321014404297, "learning_rate": 1.738775510204082e-05, "loss": 1.6098, "step": 15992 }, { "epoch": 65.27755102040817, "grad_norm": 29.742876052856445, "learning_rate": 1.7385714285714286e-05, "loss": 1.8425, "step": 15993 }, { "epoch": 65.28163265306122, "grad_norm": 32.21672058105469, "learning_rate": 1.7383673469387757e-05, "loss": 0.4068, "step": 15994 }, { "epoch": 65.28571428571429, "grad_norm": 37.16942596435547, "learning_rate": 1.7381632653061224e-05, "loss": 0.9037, "step": 15995 }, { "epoch": 65.28979591836735, "grad_norm": 29.059263229370117, "learning_rate": 1.7379591836734694e-05, "loss": 0.1971, "step": 15996 }, { "epoch": 65.29387755102042, "grad_norm": 30.462200164794922, "learning_rate": 1.7377551020408165e-05, "loss": 0.6376, "step": 15997 }, { "epoch": 65.29795918367347, "grad_norm": 24.689363479614258, "learning_rate": 1.7375510204081632e-05, "loss": 2.0003, "step": 15998 }, { "epoch": 65.30204081632652, "grad_norm": 22.276020050048828, "learning_rate": 1.7373469387755103e-05, "loss": 2.3528, "step": 15999 }, { "epoch": 65.3061224489796, "grad_norm": 32.12538146972656, "learning_rate": 1.7371428571428573e-05, "loss": 0.7833, "step": 16000 }, { "epoch": 65.31020408163265, "grad_norm": 31.335552215576172, "learning_rate": 1.7369387755102044e-05, "loss": 1.7327, "step": 16001 }, { "epoch": 65.31428571428572, "grad_norm": 58.678123474121094, "learning_rate": 1.736734693877551e-05, "loss": 1.4272, "step": 16002 }, { "epoch": 65.31836734693877, "grad_norm": 31.561838150024414, "learning_rate": 1.7365306122448978e-05, "loss": 1.1074, "step": 16003 }, { "epoch": 65.32244897959184, "grad_norm": 30.9560546875, "learning_rate": 1.7363265306122452e-05, "loss": 1.7311, "step": 16004 }, { "epoch": 65.3265306122449, "grad_norm": 30.108278274536133, "learning_rate": 1.736122448979592e-05, "loss": 0.9913, "step": 16005 }, { "epoch": 65.33061224489796, "grad_norm": 26.269207000732422, "learning_rate": 1.735918367346939e-05, "loss": 1.6958, "step": 16006 }, { "epoch": 65.33469387755102, "grad_norm": 26.84134292602539, "learning_rate": 1.7357142857142856e-05, "loss": 1.6782, "step": 16007 }, { "epoch": 65.33877551020409, "grad_norm": 35.18817901611328, "learning_rate": 1.7355102040816327e-05, "loss": 0.5188, "step": 16008 }, { "epoch": 65.34285714285714, "grad_norm": 15.00952434539795, "learning_rate": 1.7353061224489798e-05, "loss": 0.0942, "step": 16009 }, { "epoch": 65.34693877551021, "grad_norm": 33.48017120361328, "learning_rate": 1.7351020408163265e-05, "loss": 0.1917, "step": 16010 }, { "epoch": 65.35102040816327, "grad_norm": 30.560949325561523, "learning_rate": 1.7348979591836735e-05, "loss": 1.3399, "step": 16011 }, { "epoch": 65.35510204081632, "grad_norm": 35.92490768432617, "learning_rate": 1.7346938775510206e-05, "loss": 1.6851, "step": 16012 }, { "epoch": 65.35918367346939, "grad_norm": 29.618013381958008, "learning_rate": 1.7344897959183673e-05, "loss": 0.4787, "step": 16013 }, { "epoch": 65.36326530612244, "grad_norm": 28.55333137512207, "learning_rate": 1.7342857142857143e-05, "loss": 0.3141, "step": 16014 }, { "epoch": 65.36734693877551, "grad_norm": 37.32572555541992, "learning_rate": 1.7340816326530614e-05, "loss": 0.9688, "step": 16015 }, { "epoch": 65.37142857142857, "grad_norm": 31.060569763183594, "learning_rate": 1.7338775510204084e-05, "loss": 0.9541, "step": 16016 }, { "epoch": 65.37551020408164, "grad_norm": 32.39963912963867, "learning_rate": 1.733673469387755e-05, "loss": 1.1276, "step": 16017 }, { "epoch": 65.37959183673469, "grad_norm": 33.32369613647461, "learning_rate": 1.7334693877551022e-05, "loss": 1.4302, "step": 16018 }, { "epoch": 65.38367346938776, "grad_norm": 33.33052444458008, "learning_rate": 1.7332653061224492e-05, "loss": 0.4263, "step": 16019 }, { "epoch": 65.38775510204081, "grad_norm": 33.959102630615234, "learning_rate": 1.733061224489796e-05, "loss": 1.5195, "step": 16020 }, { "epoch": 65.39183673469388, "grad_norm": 35.84185028076172, "learning_rate": 1.732857142857143e-05, "loss": 1.4196, "step": 16021 }, { "epoch": 65.39591836734694, "grad_norm": 41.739227294921875, "learning_rate": 1.7326530612244897e-05, "loss": 0.2958, "step": 16022 }, { "epoch": 65.4, "grad_norm": 35.08056640625, "learning_rate": 1.7324489795918368e-05, "loss": 1.0633, "step": 16023 }, { "epoch": 65.40408163265306, "grad_norm": 43.5756950378418, "learning_rate": 1.7322448979591838e-05, "loss": 0.8737, "step": 16024 }, { "epoch": 65.40816326530613, "grad_norm": 29.396242141723633, "learning_rate": 1.7320408163265305e-05, "loss": 1.5955, "step": 16025 }, { "epoch": 65.41224489795918, "grad_norm": 37.112186431884766, "learning_rate": 1.7318367346938776e-05, "loss": 1.4, "step": 16026 }, { "epoch": 65.41632653061224, "grad_norm": 41.547855377197266, "learning_rate": 1.7316326530612246e-05, "loss": 1.0013, "step": 16027 }, { "epoch": 65.42040816326531, "grad_norm": 32.39664840698242, "learning_rate": 1.7314285714285717e-05, "loss": 0.6141, "step": 16028 }, { "epoch": 65.42448979591836, "grad_norm": 35.934425354003906, "learning_rate": 1.7312244897959184e-05, "loss": 0.7008, "step": 16029 }, { "epoch": 65.42857142857143, "grad_norm": 34.77992630004883, "learning_rate": 1.731020408163265e-05, "loss": 1.0694, "step": 16030 }, { "epoch": 65.43265306122449, "grad_norm": 38.11480712890625, "learning_rate": 1.7308163265306125e-05, "loss": 1.0243, "step": 16031 }, { "epoch": 65.43673469387755, "grad_norm": 24.51026725769043, "learning_rate": 1.7306122448979592e-05, "loss": 1.9111, "step": 16032 }, { "epoch": 65.44081632653061, "grad_norm": 29.61878776550293, "learning_rate": 1.7304081632653063e-05, "loss": 2.0558, "step": 16033 }, { "epoch": 65.44489795918368, "grad_norm": 23.29798126220703, "learning_rate": 1.730204081632653e-05, "loss": 1.7841, "step": 16034 }, { "epoch": 65.44897959183673, "grad_norm": 47.970428466796875, "learning_rate": 1.73e-05, "loss": 0.6027, "step": 16035 }, { "epoch": 65.4530612244898, "grad_norm": 46.030670166015625, "learning_rate": 1.729795918367347e-05, "loss": 0.4964, "step": 16036 }, { "epoch": 65.45714285714286, "grad_norm": 67.33411407470703, "learning_rate": 1.7295918367346938e-05, "loss": 0.7159, "step": 16037 }, { "epoch": 65.46122448979592, "grad_norm": 31.201007843017578, "learning_rate": 1.729387755102041e-05, "loss": 1.4574, "step": 16038 }, { "epoch": 65.46530612244898, "grad_norm": 31.34477996826172, "learning_rate": 1.729183673469388e-05, "loss": 1.623, "step": 16039 }, { "epoch": 65.46938775510205, "grad_norm": 31.828340530395508, "learning_rate": 1.728979591836735e-05, "loss": 1.396, "step": 16040 }, { "epoch": 65.4734693877551, "grad_norm": 42.7086296081543, "learning_rate": 1.7287755102040817e-05, "loss": 0.9795, "step": 16041 }, { "epoch": 65.47755102040816, "grad_norm": 49.4123420715332, "learning_rate": 1.7285714285714287e-05, "loss": 1.0808, "step": 16042 }, { "epoch": 65.48163265306123, "grad_norm": 12.3275728225708, "learning_rate": 1.7283673469387758e-05, "loss": 3.1602, "step": 16043 }, { "epoch": 65.48571428571428, "grad_norm": 24.800813674926758, "learning_rate": 1.7281632653061225e-05, "loss": 2.6, "step": 16044 }, { "epoch": 65.48979591836735, "grad_norm": 28.156784057617188, "learning_rate": 1.7279591836734695e-05, "loss": 1.8539, "step": 16045 }, { "epoch": 65.4938775510204, "grad_norm": 28.144092559814453, "learning_rate": 1.7277551020408166e-05, "loss": 1.9489, "step": 16046 }, { "epoch": 65.49795918367347, "grad_norm": 33.81515884399414, "learning_rate": 1.7275510204081633e-05, "loss": 1.3262, "step": 16047 }, { "epoch": 65.50204081632653, "grad_norm": 26.08890724182129, "learning_rate": 1.7273469387755104e-05, "loss": 1.7307, "step": 16048 }, { "epoch": 65.5061224489796, "grad_norm": 32.01657485961914, "learning_rate": 1.727142857142857e-05, "loss": 0.4756, "step": 16049 }, { "epoch": 65.51020408163265, "grad_norm": 39.57065963745117, "learning_rate": 1.726938775510204e-05, "loss": 0.6707, "step": 16050 }, { "epoch": 65.51428571428572, "grad_norm": 24.70673179626465, "learning_rate": 1.726734693877551e-05, "loss": 1.7968, "step": 16051 }, { "epoch": 65.51836734693877, "grad_norm": 5.412691116333008, "learning_rate": 1.726530612244898e-05, "loss": 0.0478, "step": 16052 }, { "epoch": 65.52244897959184, "grad_norm": 35.974266052246094, "learning_rate": 1.726326530612245e-05, "loss": 0.7465, "step": 16053 }, { "epoch": 65.5265306122449, "grad_norm": 32.083213806152344, "learning_rate": 1.726122448979592e-05, "loss": 0.5632, "step": 16054 }, { "epoch": 65.53061224489795, "grad_norm": 16.234596252441406, "learning_rate": 1.725918367346939e-05, "loss": 0.1442, "step": 16055 }, { "epoch": 65.53469387755102, "grad_norm": 19.72518539428711, "learning_rate": 1.7257142857142857e-05, "loss": 2.3223, "step": 16056 }, { "epoch": 65.53877551020408, "grad_norm": 49.6978645324707, "learning_rate": 1.7255102040816325e-05, "loss": 1.2994, "step": 16057 }, { "epoch": 65.54285714285714, "grad_norm": 25.885339736938477, "learning_rate": 1.72530612244898e-05, "loss": 1.6661, "step": 16058 }, { "epoch": 65.5469387755102, "grad_norm": 40.43113708496094, "learning_rate": 1.7251020408163266e-05, "loss": 0.6985, "step": 16059 }, { "epoch": 65.55102040816327, "grad_norm": 45.233428955078125, "learning_rate": 1.7248979591836736e-05, "loss": 0.5594, "step": 16060 }, { "epoch": 65.55510204081632, "grad_norm": 30.38629722595215, "learning_rate": 1.7246938775510203e-05, "loss": 1.6817, "step": 16061 }, { "epoch": 65.55918367346939, "grad_norm": 41.996517181396484, "learning_rate": 1.7244897959183677e-05, "loss": 0.9853, "step": 16062 }, { "epoch": 65.56326530612245, "grad_norm": 33.84551239013672, "learning_rate": 1.7242857142857144e-05, "loss": 1.3091, "step": 16063 }, { "epoch": 65.56734693877551, "grad_norm": 31.629085540771484, "learning_rate": 1.724081632653061e-05, "loss": 1.4602, "step": 16064 }, { "epoch": 65.57142857142857, "grad_norm": 69.07694244384766, "learning_rate": 1.7238775510204082e-05, "loss": 1.4351, "step": 16065 }, { "epoch": 65.57551020408164, "grad_norm": 26.91447639465332, "learning_rate": 1.7236734693877552e-05, "loss": 0.1982, "step": 16066 }, { "epoch": 65.57959183673469, "grad_norm": 35.28718185424805, "learning_rate": 1.7234693877551023e-05, "loss": 1.1331, "step": 16067 }, { "epoch": 65.58367346938776, "grad_norm": 42.66182327270508, "learning_rate": 1.723265306122449e-05, "loss": 0.918, "step": 16068 }, { "epoch": 65.58775510204082, "grad_norm": 29.798952102661133, "learning_rate": 1.723061224489796e-05, "loss": 1.7039, "step": 16069 }, { "epoch": 65.59183673469387, "grad_norm": 37.59183883666992, "learning_rate": 1.722857142857143e-05, "loss": 1.2793, "step": 16070 }, { "epoch": 65.59591836734694, "grad_norm": 35.65628433227539, "learning_rate": 1.7226530612244898e-05, "loss": 1.8456, "step": 16071 }, { "epoch": 65.6, "grad_norm": 25.506006240844727, "learning_rate": 1.722448979591837e-05, "loss": 1.518, "step": 16072 }, { "epoch": 65.60408163265306, "grad_norm": 22.523183822631836, "learning_rate": 1.722244897959184e-05, "loss": 1.8967, "step": 16073 }, { "epoch": 65.60816326530612, "grad_norm": 46.51947021484375, "learning_rate": 1.7220408163265306e-05, "loss": 0.9091, "step": 16074 }, { "epoch": 65.61224489795919, "grad_norm": 44.80797576904297, "learning_rate": 1.7218367346938777e-05, "loss": 0.6582, "step": 16075 }, { "epoch": 65.61632653061224, "grad_norm": 20.811481475830078, "learning_rate": 1.7216326530612244e-05, "loss": 1.7841, "step": 16076 }, { "epoch": 65.62040816326531, "grad_norm": 32.790184020996094, "learning_rate": 1.7214285714285715e-05, "loss": 2.0862, "step": 16077 }, { "epoch": 65.62448979591836, "grad_norm": 34.076805114746094, "learning_rate": 1.7212244897959185e-05, "loss": 1.5347, "step": 16078 }, { "epoch": 65.62857142857143, "grad_norm": 24.79620933532715, "learning_rate": 1.7210204081632652e-05, "loss": 1.6029, "step": 16079 }, { "epoch": 65.63265306122449, "grad_norm": 24.184904098510742, "learning_rate": 1.7208163265306123e-05, "loss": 1.8245, "step": 16080 }, { "epoch": 65.63673469387756, "grad_norm": 35.37692642211914, "learning_rate": 1.7206122448979593e-05, "loss": 1.503, "step": 16081 }, { "epoch": 65.64081632653061, "grad_norm": 34.75566101074219, "learning_rate": 1.7204081632653064e-05, "loss": 0.1866, "step": 16082 }, { "epoch": 65.64489795918368, "grad_norm": 15.676098823547363, "learning_rate": 1.720204081632653e-05, "loss": 0.2252, "step": 16083 }, { "epoch": 65.64897959183673, "grad_norm": 24.86495590209961, "learning_rate": 1.7199999999999998e-05, "loss": 2.0193, "step": 16084 }, { "epoch": 65.65306122448979, "grad_norm": 46.24201202392578, "learning_rate": 1.7197959183673472e-05, "loss": 1.5348, "step": 16085 }, { "epoch": 65.65714285714286, "grad_norm": 33.48666000366211, "learning_rate": 1.719591836734694e-05, "loss": 1.3536, "step": 16086 }, { "epoch": 65.66122448979591, "grad_norm": 24.563581466674805, "learning_rate": 1.719387755102041e-05, "loss": 1.8764, "step": 16087 }, { "epoch": 65.66530612244898, "grad_norm": 35.48283386230469, "learning_rate": 1.7191836734693877e-05, "loss": 0.3306, "step": 16088 }, { "epoch": 65.66938775510204, "grad_norm": 20.422969818115234, "learning_rate": 1.718979591836735e-05, "loss": 2.3709, "step": 16089 }, { "epoch": 65.6734693877551, "grad_norm": 20.351491928100586, "learning_rate": 1.7187755102040818e-05, "loss": 1.6855, "step": 16090 }, { "epoch": 65.67755102040816, "grad_norm": 27.12794303894043, "learning_rate": 1.7185714285714285e-05, "loss": 1.6328, "step": 16091 }, { "epoch": 65.68163265306123, "grad_norm": 52.253074645996094, "learning_rate": 1.7183673469387755e-05, "loss": 1.644, "step": 16092 }, { "epoch": 65.68571428571428, "grad_norm": 21.11101531982422, "learning_rate": 1.7181632653061226e-05, "loss": 1.8613, "step": 16093 }, { "epoch": 65.68979591836735, "grad_norm": 30.607690811157227, "learning_rate": 1.7179591836734696e-05, "loss": 1.2844, "step": 16094 }, { "epoch": 65.6938775510204, "grad_norm": 30.03017234802246, "learning_rate": 1.7177551020408163e-05, "loss": 1.4794, "step": 16095 }, { "epoch": 65.69795918367348, "grad_norm": 31.145395278930664, "learning_rate": 1.7175510204081634e-05, "loss": 0.7982, "step": 16096 }, { "epoch": 65.70204081632653, "grad_norm": 24.40220832824707, "learning_rate": 1.7173469387755104e-05, "loss": 1.8595, "step": 16097 }, { "epoch": 65.70612244897958, "grad_norm": 24.52083396911621, "learning_rate": 1.717142857142857e-05, "loss": 1.896, "step": 16098 }, { "epoch": 65.71020408163265, "grad_norm": 37.262367248535156, "learning_rate": 1.7169387755102042e-05, "loss": 1.0036, "step": 16099 }, { "epoch": 65.71428571428571, "grad_norm": 42.2473258972168, "learning_rate": 1.7167346938775513e-05, "loss": 0.4824, "step": 16100 }, { "epoch": 65.71836734693878, "grad_norm": 36.848724365234375, "learning_rate": 1.716530612244898e-05, "loss": 0.5921, "step": 16101 }, { "epoch": 65.72244897959183, "grad_norm": 20.52656364440918, "learning_rate": 1.716326530612245e-05, "loss": 2.4315, "step": 16102 }, { "epoch": 65.7265306122449, "grad_norm": 34.44209289550781, "learning_rate": 1.7161224489795917e-05, "loss": 1.5877, "step": 16103 }, { "epoch": 65.73061224489796, "grad_norm": 47.92941665649414, "learning_rate": 1.7159183673469388e-05, "loss": 0.7471, "step": 16104 }, { "epoch": 65.73469387755102, "grad_norm": 25.291574478149414, "learning_rate": 1.715714285714286e-05, "loss": 1.5333, "step": 16105 }, { "epoch": 65.73877551020408, "grad_norm": 38.114192962646484, "learning_rate": 1.7155102040816326e-05, "loss": 0.659, "step": 16106 }, { "epoch": 65.74285714285715, "grad_norm": 47.10499954223633, "learning_rate": 1.7153061224489796e-05, "loss": 1.2005, "step": 16107 }, { "epoch": 65.7469387755102, "grad_norm": 18.69598960876465, "learning_rate": 1.7151020408163267e-05, "loss": 2.2145, "step": 16108 }, { "epoch": 65.75102040816327, "grad_norm": 21.87568473815918, "learning_rate": 1.7148979591836737e-05, "loss": 1.8899, "step": 16109 }, { "epoch": 65.75510204081633, "grad_norm": 29.124990463256836, "learning_rate": 1.7146938775510204e-05, "loss": 1.8299, "step": 16110 }, { "epoch": 65.7591836734694, "grad_norm": 40.68014907836914, "learning_rate": 1.7144897959183675e-05, "loss": 0.8109, "step": 16111 }, { "epoch": 65.76326530612245, "grad_norm": 25.960641860961914, "learning_rate": 1.7142857142857145e-05, "loss": 1.5438, "step": 16112 }, { "epoch": 65.7673469387755, "grad_norm": 26.283395767211914, "learning_rate": 1.7140816326530612e-05, "loss": 1.8424, "step": 16113 }, { "epoch": 65.77142857142857, "grad_norm": 35.99317932128906, "learning_rate": 1.7138775510204083e-05, "loss": 0.9085, "step": 16114 }, { "epoch": 65.77551020408163, "grad_norm": 16.8266658782959, "learning_rate": 1.713673469387755e-05, "loss": 2.8874, "step": 16115 }, { "epoch": 65.7795918367347, "grad_norm": 28.409748077392578, "learning_rate": 1.7134693877551024e-05, "loss": 1.6311, "step": 16116 }, { "epoch": 65.78367346938775, "grad_norm": 31.94240951538086, "learning_rate": 1.713265306122449e-05, "loss": 1.0775, "step": 16117 }, { "epoch": 65.78775510204082, "grad_norm": 28.514150619506836, "learning_rate": 1.7130612244897958e-05, "loss": 1.5702, "step": 16118 }, { "epoch": 65.79183673469387, "grad_norm": 34.762664794921875, "learning_rate": 1.712857142857143e-05, "loss": 0.638, "step": 16119 }, { "epoch": 65.79591836734694, "grad_norm": 26.034801483154297, "learning_rate": 1.71265306122449e-05, "loss": 1.1312, "step": 16120 }, { "epoch": 65.8, "grad_norm": 25.341936111450195, "learning_rate": 1.712448979591837e-05, "loss": 1.8368, "step": 16121 }, { "epoch": 65.80408163265307, "grad_norm": 37.289981842041016, "learning_rate": 1.7122448979591837e-05, "loss": 0.8878, "step": 16122 }, { "epoch": 65.80816326530612, "grad_norm": 32.30861282348633, "learning_rate": 1.7120408163265307e-05, "loss": 1.7415, "step": 16123 }, { "epoch": 65.81224489795919, "grad_norm": 34.340206146240234, "learning_rate": 1.7118367346938778e-05, "loss": 0.9654, "step": 16124 }, { "epoch": 65.81632653061224, "grad_norm": 49.62006378173828, "learning_rate": 1.7116326530612245e-05, "loss": 0.7251, "step": 16125 }, { "epoch": 65.82040816326531, "grad_norm": 26.410831451416016, "learning_rate": 1.7114285714285715e-05, "loss": 1.7465, "step": 16126 }, { "epoch": 65.82448979591837, "grad_norm": 36.2193717956543, "learning_rate": 1.7112244897959183e-05, "loss": 0.6841, "step": 16127 }, { "epoch": 65.82857142857142, "grad_norm": 28.81977081298828, "learning_rate": 1.7110204081632653e-05, "loss": 1.8732, "step": 16128 }, { "epoch": 65.83265306122449, "grad_norm": 36.002967834472656, "learning_rate": 1.7108163265306124e-05, "loss": 1.0532, "step": 16129 }, { "epoch": 65.83673469387755, "grad_norm": 23.68140983581543, "learning_rate": 1.710612244897959e-05, "loss": 1.9061, "step": 16130 }, { "epoch": 65.84081632653061, "grad_norm": 31.58525848388672, "learning_rate": 1.710408163265306e-05, "loss": 1.4497, "step": 16131 }, { "epoch": 65.84489795918367, "grad_norm": 25.112163543701172, "learning_rate": 1.7102040816326532e-05, "loss": 1.9707, "step": 16132 }, { "epoch": 65.84897959183674, "grad_norm": 30.840618133544922, "learning_rate": 1.7100000000000002e-05, "loss": 1.0358, "step": 16133 }, { "epoch": 65.85306122448979, "grad_norm": 37.04837417602539, "learning_rate": 1.709795918367347e-05, "loss": 1.5898, "step": 16134 }, { "epoch": 65.85714285714286, "grad_norm": 27.68286895751953, "learning_rate": 1.709591836734694e-05, "loss": 1.5758, "step": 16135 }, { "epoch": 65.86122448979592, "grad_norm": 34.85952377319336, "learning_rate": 1.709387755102041e-05, "loss": 0.2071, "step": 16136 }, { "epoch": 65.86530612244898, "grad_norm": 43.96034622192383, "learning_rate": 1.7091836734693878e-05, "loss": 0.2941, "step": 16137 }, { "epoch": 65.86938775510204, "grad_norm": 35.48070526123047, "learning_rate": 1.7089795918367348e-05, "loss": 0.7111, "step": 16138 }, { "epoch": 65.87346938775511, "grad_norm": 21.481910705566406, "learning_rate": 1.708775510204082e-05, "loss": 2.3075, "step": 16139 }, { "epoch": 65.87755102040816, "grad_norm": 54.36012268066406, "learning_rate": 1.7085714285714286e-05, "loss": 0.747, "step": 16140 }, { "epoch": 65.88163265306123, "grad_norm": 29.653579711914062, "learning_rate": 1.7083673469387756e-05, "loss": 1.2972, "step": 16141 }, { "epoch": 65.88571428571429, "grad_norm": 23.160755157470703, "learning_rate": 1.7081632653061223e-05, "loss": 2.1305, "step": 16142 }, { "epoch": 65.88979591836734, "grad_norm": 34.62844467163086, "learning_rate": 1.7079591836734697e-05, "loss": 1.227, "step": 16143 }, { "epoch": 65.89387755102041, "grad_norm": 36.1847038269043, "learning_rate": 1.7077551020408164e-05, "loss": 1.628, "step": 16144 }, { "epoch": 65.89795918367346, "grad_norm": 23.61248207092285, "learning_rate": 1.707551020408163e-05, "loss": 1.8172, "step": 16145 }, { "epoch": 65.90204081632653, "grad_norm": 33.494998931884766, "learning_rate": 1.7073469387755102e-05, "loss": 1.8004, "step": 16146 }, { "epoch": 65.90612244897959, "grad_norm": 34.18335723876953, "learning_rate": 1.7071428571428573e-05, "loss": 1.6578, "step": 16147 }, { "epoch": 65.91020408163266, "grad_norm": 44.664310455322266, "learning_rate": 1.7069387755102043e-05, "loss": 1.002, "step": 16148 }, { "epoch": 65.91428571428571, "grad_norm": 16.32733154296875, "learning_rate": 1.706734693877551e-05, "loss": 2.4883, "step": 16149 }, { "epoch": 65.91836734693878, "grad_norm": 20.666763305664062, "learning_rate": 1.706530612244898e-05, "loss": 0.1635, "step": 16150 }, { "epoch": 65.92244897959183, "grad_norm": 39.182395935058594, "learning_rate": 1.706326530612245e-05, "loss": 0.5062, "step": 16151 }, { "epoch": 65.9265306122449, "grad_norm": 23.007091522216797, "learning_rate": 1.706122448979592e-05, "loss": 2.318, "step": 16152 }, { "epoch": 65.93061224489796, "grad_norm": 26.537328720092773, "learning_rate": 1.705918367346939e-05, "loss": 1.1219, "step": 16153 }, { "epoch": 65.93469387755103, "grad_norm": 31.920211791992188, "learning_rate": 1.7057142857142856e-05, "loss": 1.1449, "step": 16154 }, { "epoch": 65.93877551020408, "grad_norm": 21.553136825561523, "learning_rate": 1.705510204081633e-05, "loss": 1.8822, "step": 16155 }, { "epoch": 65.94285714285714, "grad_norm": 34.69620895385742, "learning_rate": 1.7053061224489797e-05, "loss": 1.2435, "step": 16156 }, { "epoch": 65.9469387755102, "grad_norm": 30.845827102661133, "learning_rate": 1.7051020408163264e-05, "loss": 0.7794, "step": 16157 }, { "epoch": 65.95102040816326, "grad_norm": 21.1195068359375, "learning_rate": 1.7048979591836735e-05, "loss": 2.2619, "step": 16158 }, { "epoch": 65.95510204081633, "grad_norm": 28.609939575195312, "learning_rate": 1.7046938775510205e-05, "loss": 0.1942, "step": 16159 }, { "epoch": 65.95918367346938, "grad_norm": 39.9207763671875, "learning_rate": 1.7044897959183676e-05, "loss": 0.5761, "step": 16160 }, { "epoch": 65.96326530612245, "grad_norm": 26.37086296081543, "learning_rate": 1.7042857142857143e-05, "loss": 2.0168, "step": 16161 }, { "epoch": 65.9673469387755, "grad_norm": 30.576324462890625, "learning_rate": 1.7040816326530613e-05, "loss": 1.7804, "step": 16162 }, { "epoch": 65.97142857142858, "grad_norm": 49.434913635253906, "learning_rate": 1.7038775510204084e-05, "loss": 0.4308, "step": 16163 }, { "epoch": 65.97551020408163, "grad_norm": 31.650253295898438, "learning_rate": 1.703673469387755e-05, "loss": 1.0395, "step": 16164 }, { "epoch": 65.9795918367347, "grad_norm": 36.75055694580078, "learning_rate": 1.703469387755102e-05, "loss": 1.577, "step": 16165 }, { "epoch": 65.98367346938775, "grad_norm": 27.767120361328125, "learning_rate": 1.7032653061224492e-05, "loss": 2.1897, "step": 16166 }, { "epoch": 65.98775510204082, "grad_norm": 19.853792190551758, "learning_rate": 1.703061224489796e-05, "loss": 1.9925, "step": 16167 }, { "epoch": 65.99183673469388, "grad_norm": 28.733684539794922, "learning_rate": 1.702857142857143e-05, "loss": 0.5734, "step": 16168 }, { "epoch": 65.99591836734695, "grad_norm": 20.906160354614258, "learning_rate": 1.7026530612244897e-05, "loss": 2.4265, "step": 16169 }, { "epoch": 66.0, "grad_norm": 31.78883934020996, "learning_rate": 1.702448979591837e-05, "loss": 1.1486, "step": 16170 }, { "epoch": 66.00408163265305, "grad_norm": 36.5815315246582, "learning_rate": 1.7022448979591838e-05, "loss": 0.4804, "step": 16171 }, { "epoch": 66.00816326530612, "grad_norm": 32.83290481567383, "learning_rate": 1.7020408163265305e-05, "loss": 0.8502, "step": 16172 }, { "epoch": 66.01224489795918, "grad_norm": 31.799835205078125, "learning_rate": 1.7018367346938775e-05, "loss": 0.4019, "step": 16173 }, { "epoch": 66.01632653061225, "grad_norm": 27.157699584960938, "learning_rate": 1.7016326530612246e-05, "loss": 0.2026, "step": 16174 }, { "epoch": 66.0204081632653, "grad_norm": 19.467153549194336, "learning_rate": 1.7014285714285716e-05, "loss": 2.3741, "step": 16175 }, { "epoch": 66.02448979591837, "grad_norm": 25.753664016723633, "learning_rate": 1.7012244897959184e-05, "loss": 2.3116, "step": 16176 }, { "epoch": 66.02857142857142, "grad_norm": 32.1770133972168, "learning_rate": 1.7010204081632654e-05, "loss": 1.1962, "step": 16177 }, { "epoch": 66.0326530612245, "grad_norm": 20.95807456970215, "learning_rate": 1.7008163265306125e-05, "loss": 2.3065, "step": 16178 }, { "epoch": 66.03673469387755, "grad_norm": 35.112083435058594, "learning_rate": 1.7006122448979592e-05, "loss": 0.8219, "step": 16179 }, { "epoch": 66.04081632653062, "grad_norm": 33.08210754394531, "learning_rate": 1.7004081632653062e-05, "loss": 0.1833, "step": 16180 }, { "epoch": 66.04489795918367, "grad_norm": 7.738157272338867, "learning_rate": 1.700204081632653e-05, "loss": 0.0481, "step": 16181 }, { "epoch": 66.04897959183674, "grad_norm": 10.344311714172363, "learning_rate": 1.7000000000000003e-05, "loss": 3.1371, "step": 16182 }, { "epoch": 66.0530612244898, "grad_norm": 27.420406341552734, "learning_rate": 1.699795918367347e-05, "loss": 1.5926, "step": 16183 }, { "epoch": 66.05714285714286, "grad_norm": 42.193851470947266, "learning_rate": 1.6995918367346938e-05, "loss": 0.6566, "step": 16184 }, { "epoch": 66.06122448979592, "grad_norm": 37.194339752197266, "learning_rate": 1.6993877551020408e-05, "loss": 0.4463, "step": 16185 }, { "epoch": 66.06530612244897, "grad_norm": 29.65974235534668, "learning_rate": 1.699183673469388e-05, "loss": 0.4254, "step": 16186 }, { "epoch": 66.06938775510204, "grad_norm": 29.941862106323242, "learning_rate": 1.698979591836735e-05, "loss": 0.6572, "step": 16187 }, { "epoch": 66.0734693877551, "grad_norm": 49.84479904174805, "learning_rate": 1.6987755102040816e-05, "loss": 0.6802, "step": 16188 }, { "epoch": 66.07755102040817, "grad_norm": 36.077049255371094, "learning_rate": 1.6985714285714287e-05, "loss": 1.1597, "step": 16189 }, { "epoch": 66.08163265306122, "grad_norm": 37.11044692993164, "learning_rate": 1.6983673469387757e-05, "loss": 1.3952, "step": 16190 }, { "epoch": 66.08571428571429, "grad_norm": 30.21263885498047, "learning_rate": 1.6981632653061224e-05, "loss": 1.3471, "step": 16191 }, { "epoch": 66.08979591836734, "grad_norm": 28.25009536743164, "learning_rate": 1.6979591836734695e-05, "loss": 1.6084, "step": 16192 }, { "epoch": 66.09387755102041, "grad_norm": 44.64027404785156, "learning_rate": 1.6977551020408165e-05, "loss": 0.96, "step": 16193 }, { "epoch": 66.09795918367347, "grad_norm": 55.35328674316406, "learning_rate": 1.6975510204081632e-05, "loss": 0.5827, "step": 16194 }, { "epoch": 66.10204081632654, "grad_norm": 47.806026458740234, "learning_rate": 1.6973469387755103e-05, "loss": 0.483, "step": 16195 }, { "epoch": 66.10612244897959, "grad_norm": 36.013282775878906, "learning_rate": 1.697142857142857e-05, "loss": 1.1194, "step": 16196 }, { "epoch": 66.11020408163266, "grad_norm": 35.09294128417969, "learning_rate": 1.6969387755102044e-05, "loss": 0.7759, "step": 16197 }, { "epoch": 66.11428571428571, "grad_norm": 22.589570999145508, "learning_rate": 1.696734693877551e-05, "loss": 1.9333, "step": 16198 }, { "epoch": 66.11836734693877, "grad_norm": 24.55405044555664, "learning_rate": 1.696530612244898e-05, "loss": 2.1929, "step": 16199 }, { "epoch": 66.12244897959184, "grad_norm": 23.16253662109375, "learning_rate": 1.696326530612245e-05, "loss": 2.026, "step": 16200 }, { "epoch": 66.12653061224489, "grad_norm": 27.596906661987305, "learning_rate": 1.696122448979592e-05, "loss": 1.5067, "step": 16201 }, { "epoch": 66.13061224489796, "grad_norm": 28.313920974731445, "learning_rate": 1.695918367346939e-05, "loss": 1.269, "step": 16202 }, { "epoch": 66.13469387755102, "grad_norm": 44.46158218383789, "learning_rate": 1.6957142857142857e-05, "loss": 1.2548, "step": 16203 }, { "epoch": 66.13877551020408, "grad_norm": 30.228534698486328, "learning_rate": 1.6955102040816327e-05, "loss": 1.4399, "step": 16204 }, { "epoch": 66.14285714285714, "grad_norm": 28.173503875732422, "learning_rate": 1.6953061224489798e-05, "loss": 1.8036, "step": 16205 }, { "epoch": 66.14693877551021, "grad_norm": 26.156044006347656, "learning_rate": 1.6951020408163265e-05, "loss": 1.9945, "step": 16206 }, { "epoch": 66.15102040816326, "grad_norm": 18.670673370361328, "learning_rate": 1.6948979591836736e-05, "loss": 2.3622, "step": 16207 }, { "epoch": 66.15510204081633, "grad_norm": 22.366331100463867, "learning_rate": 1.6946938775510203e-05, "loss": 1.8359, "step": 16208 }, { "epoch": 66.15918367346939, "grad_norm": 35.65895462036133, "learning_rate": 1.6944897959183677e-05, "loss": 0.878, "step": 16209 }, { "epoch": 66.16326530612245, "grad_norm": 33.980472564697266, "learning_rate": 1.6942857142857144e-05, "loss": 1.0276, "step": 16210 }, { "epoch": 66.16734693877551, "grad_norm": 38.6937141418457, "learning_rate": 1.694081632653061e-05, "loss": 0.8594, "step": 16211 }, { "epoch": 66.17142857142858, "grad_norm": 32.67250442504883, "learning_rate": 1.693877551020408e-05, "loss": 0.6309, "step": 16212 }, { "epoch": 66.17551020408163, "grad_norm": 37.109859466552734, "learning_rate": 1.6936734693877552e-05, "loss": 1.0314, "step": 16213 }, { "epoch": 66.17959183673469, "grad_norm": 44.99198913574219, "learning_rate": 1.6934693877551022e-05, "loss": 0.6209, "step": 16214 }, { "epoch": 66.18367346938776, "grad_norm": 39.25404739379883, "learning_rate": 1.693265306122449e-05, "loss": 1.4804, "step": 16215 }, { "epoch": 66.18775510204081, "grad_norm": 24.051918029785156, "learning_rate": 1.693061224489796e-05, "loss": 2.2509, "step": 16216 }, { "epoch": 66.19183673469388, "grad_norm": 30.010581970214844, "learning_rate": 1.692857142857143e-05, "loss": 1.6326, "step": 16217 }, { "epoch": 66.19591836734693, "grad_norm": 17.900514602661133, "learning_rate": 1.6926530612244898e-05, "loss": 2.3662, "step": 16218 }, { "epoch": 66.2, "grad_norm": 39.8753662109375, "learning_rate": 1.6924489795918368e-05, "loss": 0.6066, "step": 16219 }, { "epoch": 66.20408163265306, "grad_norm": 26.954551696777344, "learning_rate": 1.692244897959184e-05, "loss": 1.0187, "step": 16220 }, { "epoch": 66.20816326530613, "grad_norm": 56.670799255371094, "learning_rate": 1.692040816326531e-05, "loss": 0.6872, "step": 16221 }, { "epoch": 66.21224489795918, "grad_norm": 30.53900146484375, "learning_rate": 1.6918367346938776e-05, "loss": 0.6303, "step": 16222 }, { "epoch": 66.21632653061225, "grad_norm": 37.150390625, "learning_rate": 1.6916326530612243e-05, "loss": 1.0943, "step": 16223 }, { "epoch": 66.2204081632653, "grad_norm": 21.170621871948242, "learning_rate": 1.6914285714285717e-05, "loss": 1.9185, "step": 16224 }, { "epoch": 66.22448979591837, "grad_norm": 34.477027893066406, "learning_rate": 1.6912244897959185e-05, "loss": 1.337, "step": 16225 }, { "epoch": 66.22857142857143, "grad_norm": 46.9816780090332, "learning_rate": 1.6910204081632655e-05, "loss": 1.3836, "step": 16226 }, { "epoch": 66.2326530612245, "grad_norm": 18.658241271972656, "learning_rate": 1.6908163265306122e-05, "loss": 0.1732, "step": 16227 }, { "epoch": 66.23673469387755, "grad_norm": 11.257454872131348, "learning_rate": 1.6906122448979593e-05, "loss": 0.0927, "step": 16228 }, { "epoch": 66.2408163265306, "grad_norm": 35.932979583740234, "learning_rate": 1.6904081632653063e-05, "loss": 1.1272, "step": 16229 }, { "epoch": 66.24489795918367, "grad_norm": 23.604406356811523, "learning_rate": 1.690204081632653e-05, "loss": 1.7597, "step": 16230 }, { "epoch": 66.24897959183673, "grad_norm": 34.739715576171875, "learning_rate": 1.69e-05, "loss": 0.7991, "step": 16231 }, { "epoch": 66.2530612244898, "grad_norm": 24.186676025390625, "learning_rate": 1.689795918367347e-05, "loss": 1.7609, "step": 16232 }, { "epoch": 66.25714285714285, "grad_norm": 26.557098388671875, "learning_rate": 1.689591836734694e-05, "loss": 1.9889, "step": 16233 }, { "epoch": 66.26122448979592, "grad_norm": 29.932172775268555, "learning_rate": 1.689387755102041e-05, "loss": 0.9102, "step": 16234 }, { "epoch": 66.26530612244898, "grad_norm": 28.747798919677734, "learning_rate": 1.6891836734693876e-05, "loss": 0.4773, "step": 16235 }, { "epoch": 66.26938775510204, "grad_norm": 41.09672927856445, "learning_rate": 1.688979591836735e-05, "loss": 0.4674, "step": 16236 }, { "epoch": 66.2734693877551, "grad_norm": 47.50809860229492, "learning_rate": 1.6887755102040817e-05, "loss": 0.4711, "step": 16237 }, { "epoch": 66.27755102040817, "grad_norm": 26.862018585205078, "learning_rate": 1.6885714285714284e-05, "loss": 1.4827, "step": 16238 }, { "epoch": 66.28163265306122, "grad_norm": 52.07120132446289, "learning_rate": 1.6883673469387755e-05, "loss": 0.5594, "step": 16239 }, { "epoch": 66.28571428571429, "grad_norm": 25.630630493164062, "learning_rate": 1.6881632653061225e-05, "loss": 1.9584, "step": 16240 }, { "epoch": 66.28979591836735, "grad_norm": 42.141693115234375, "learning_rate": 1.6879591836734696e-05, "loss": 0.8371, "step": 16241 }, { "epoch": 66.29387755102042, "grad_norm": 50.749813079833984, "learning_rate": 1.6877551020408163e-05, "loss": 1.3447, "step": 16242 }, { "epoch": 66.29795918367347, "grad_norm": 45.060951232910156, "learning_rate": 1.6875510204081633e-05, "loss": 1.1046, "step": 16243 }, { "epoch": 66.30204081632652, "grad_norm": 39.27219772338867, "learning_rate": 1.6873469387755104e-05, "loss": 1.5398, "step": 16244 }, { "epoch": 66.3061224489796, "grad_norm": 50.1828727722168, "learning_rate": 1.687142857142857e-05, "loss": 1.2659, "step": 16245 }, { "epoch": 66.31020408163265, "grad_norm": 23.522119522094727, "learning_rate": 1.686938775510204e-05, "loss": 1.7426, "step": 16246 }, { "epoch": 66.31428571428572, "grad_norm": 19.922508239746094, "learning_rate": 1.6867346938775512e-05, "loss": 1.7192, "step": 16247 }, { "epoch": 66.31836734693877, "grad_norm": 62.141597747802734, "learning_rate": 1.6865306122448983e-05, "loss": 0.889, "step": 16248 }, { "epoch": 66.32244897959184, "grad_norm": 26.54656982421875, "learning_rate": 1.686326530612245e-05, "loss": 1.5641, "step": 16249 }, { "epoch": 66.3265306122449, "grad_norm": 29.498390197753906, "learning_rate": 1.6861224489795917e-05, "loss": 2.0259, "step": 16250 }, { "epoch": 66.33061224489796, "grad_norm": 30.371246337890625, "learning_rate": 1.685918367346939e-05, "loss": 1.3726, "step": 16251 }, { "epoch": 66.33469387755102, "grad_norm": 28.93169593811035, "learning_rate": 1.6857142857142858e-05, "loss": 1.5945, "step": 16252 }, { "epoch": 66.33877551020409, "grad_norm": 22.091394424438477, "learning_rate": 1.685510204081633e-05, "loss": 2.5649, "step": 16253 }, { "epoch": 66.34285714285714, "grad_norm": 31.89313316345215, "learning_rate": 1.6853061224489796e-05, "loss": 1.4143, "step": 16254 }, { "epoch": 66.34693877551021, "grad_norm": 41.09111404418945, "learning_rate": 1.6851020408163266e-05, "loss": 0.6066, "step": 16255 }, { "epoch": 66.35102040816327, "grad_norm": 53.645015716552734, "learning_rate": 1.6848979591836737e-05, "loss": 0.5777, "step": 16256 }, { "epoch": 66.35510204081632, "grad_norm": 34.53325653076172, "learning_rate": 1.6846938775510204e-05, "loss": 0.3953, "step": 16257 }, { "epoch": 66.35918367346939, "grad_norm": 28.18657684326172, "learning_rate": 1.6844897959183674e-05, "loss": 1.8708, "step": 16258 }, { "epoch": 66.36326530612244, "grad_norm": 36.007080078125, "learning_rate": 1.6842857142857145e-05, "loss": 1.6359, "step": 16259 }, { "epoch": 66.36734693877551, "grad_norm": 31.288801193237305, "learning_rate": 1.6840816326530612e-05, "loss": 1.3755, "step": 16260 }, { "epoch": 66.37142857142857, "grad_norm": 32.22489547729492, "learning_rate": 1.6838775510204082e-05, "loss": 0.9581, "step": 16261 }, { "epoch": 66.37551020408164, "grad_norm": 22.39234161376953, "learning_rate": 1.683673469387755e-05, "loss": 1.8364, "step": 16262 }, { "epoch": 66.37959183673469, "grad_norm": 28.001928329467773, "learning_rate": 1.6834693877551023e-05, "loss": 1.8513, "step": 16263 }, { "epoch": 66.38367346938776, "grad_norm": 26.92668914794922, "learning_rate": 1.683265306122449e-05, "loss": 1.4923, "step": 16264 }, { "epoch": 66.38775510204081, "grad_norm": 31.419151306152344, "learning_rate": 1.6830612244897958e-05, "loss": 1.5795, "step": 16265 }, { "epoch": 66.39183673469388, "grad_norm": 26.897579193115234, "learning_rate": 1.6828571428571428e-05, "loss": 1.6909, "step": 16266 }, { "epoch": 66.39591836734694, "grad_norm": 24.858909606933594, "learning_rate": 1.68265306122449e-05, "loss": 1.7628, "step": 16267 }, { "epoch": 66.4, "grad_norm": 19.304540634155273, "learning_rate": 1.682448979591837e-05, "loss": 2.3754, "step": 16268 }, { "epoch": 66.40408163265306, "grad_norm": 33.395294189453125, "learning_rate": 1.6822448979591836e-05, "loss": 1.1122, "step": 16269 }, { "epoch": 66.40816326530613, "grad_norm": 24.577810287475586, "learning_rate": 1.6820408163265307e-05, "loss": 1.7832, "step": 16270 }, { "epoch": 66.41224489795918, "grad_norm": 15.655570983886719, "learning_rate": 1.6818367346938777e-05, "loss": 2.8225, "step": 16271 }, { "epoch": 66.41632653061224, "grad_norm": 37.4971923828125, "learning_rate": 1.6816326530612244e-05, "loss": 1.1659, "step": 16272 }, { "epoch": 66.42040816326531, "grad_norm": 25.599374771118164, "learning_rate": 1.6814285714285715e-05, "loss": 1.7083, "step": 16273 }, { "epoch": 66.42448979591836, "grad_norm": 25.649967193603516, "learning_rate": 1.6812244897959185e-05, "loss": 0.4567, "step": 16274 }, { "epoch": 66.42857142857143, "grad_norm": 29.321796417236328, "learning_rate": 1.6810204081632656e-05, "loss": 1.8023, "step": 16275 }, { "epoch": 66.43265306122449, "grad_norm": 56.1231575012207, "learning_rate": 1.6808163265306123e-05, "loss": 0.8588, "step": 16276 }, { "epoch": 66.43673469387755, "grad_norm": 31.799171447753906, "learning_rate": 1.680612244897959e-05, "loss": 1.6143, "step": 16277 }, { "epoch": 66.44081632653061, "grad_norm": 35.212581634521484, "learning_rate": 1.6804081632653064e-05, "loss": 1.0804, "step": 16278 }, { "epoch": 66.44489795918368, "grad_norm": 29.097457885742188, "learning_rate": 1.680204081632653e-05, "loss": 1.8065, "step": 16279 }, { "epoch": 66.44897959183673, "grad_norm": 26.56881332397461, "learning_rate": 1.6800000000000002e-05, "loss": 1.7947, "step": 16280 }, { "epoch": 66.4530612244898, "grad_norm": 29.322023391723633, "learning_rate": 1.679795918367347e-05, "loss": 1.7077, "step": 16281 }, { "epoch": 66.45714285714286, "grad_norm": 23.13077163696289, "learning_rate": 1.679591836734694e-05, "loss": 2.2416, "step": 16282 }, { "epoch": 66.46122448979592, "grad_norm": 24.753623962402344, "learning_rate": 1.679387755102041e-05, "loss": 1.8648, "step": 16283 }, { "epoch": 66.46530612244898, "grad_norm": 45.79294967651367, "learning_rate": 1.6791836734693877e-05, "loss": 0.9628, "step": 16284 }, { "epoch": 66.46938775510205, "grad_norm": 41.47808837890625, "learning_rate": 1.6789795918367348e-05, "loss": 0.6329, "step": 16285 }, { "epoch": 66.4734693877551, "grad_norm": 27.458049774169922, "learning_rate": 1.6787755102040818e-05, "loss": 1.8568, "step": 16286 }, { "epoch": 66.47755102040816, "grad_norm": 25.142484664916992, "learning_rate": 1.6785714285714285e-05, "loss": 1.8066, "step": 16287 }, { "epoch": 66.48163265306123, "grad_norm": 31.24664878845215, "learning_rate": 1.6783673469387756e-05, "loss": 1.7368, "step": 16288 }, { "epoch": 66.48571428571428, "grad_norm": 33.25042724609375, "learning_rate": 1.6781632653061223e-05, "loss": 0.5885, "step": 16289 }, { "epoch": 66.48979591836735, "grad_norm": 28.08422088623047, "learning_rate": 1.6779591836734697e-05, "loss": 1.7129, "step": 16290 }, { "epoch": 66.4938775510204, "grad_norm": 48.61794662475586, "learning_rate": 1.6777551020408164e-05, "loss": 1.4024, "step": 16291 }, { "epoch": 66.49795918367347, "grad_norm": 18.95726776123047, "learning_rate": 1.6775510204081634e-05, "loss": 2.3373, "step": 16292 }, { "epoch": 66.50204081632653, "grad_norm": 32.10269546508789, "learning_rate": 1.67734693877551e-05, "loss": 1.4939, "step": 16293 }, { "epoch": 66.5061224489796, "grad_norm": 46.97048568725586, "learning_rate": 1.6771428571428572e-05, "loss": 0.4353, "step": 16294 }, { "epoch": 66.51020408163265, "grad_norm": 45.0165901184082, "learning_rate": 1.6769387755102043e-05, "loss": 0.3235, "step": 16295 }, { "epoch": 66.51428571428572, "grad_norm": 37.28779983520508, "learning_rate": 1.676734693877551e-05, "loss": 0.5743, "step": 16296 }, { "epoch": 66.51836734693877, "grad_norm": 13.676560401916504, "learning_rate": 1.676530612244898e-05, "loss": 2.5515, "step": 16297 }, { "epoch": 66.52244897959184, "grad_norm": 31.77375602722168, "learning_rate": 1.676326530612245e-05, "loss": 1.6536, "step": 16298 }, { "epoch": 66.5265306122449, "grad_norm": 27.95705223083496, "learning_rate": 1.6761224489795918e-05, "loss": 0.2369, "step": 16299 }, { "epoch": 66.53061224489795, "grad_norm": 34.8892822265625, "learning_rate": 1.675918367346939e-05, "loss": 0.3908, "step": 16300 }, { "epoch": 66.53469387755102, "grad_norm": 39.663047790527344, "learning_rate": 1.675714285714286e-05, "loss": 1.2211, "step": 16301 }, { "epoch": 66.53877551020408, "grad_norm": 42.03987121582031, "learning_rate": 1.675510204081633e-05, "loss": 1.3512, "step": 16302 }, { "epoch": 66.54285714285714, "grad_norm": 33.72939682006836, "learning_rate": 1.6753061224489796e-05, "loss": 1.5367, "step": 16303 }, { "epoch": 66.5469387755102, "grad_norm": 19.370697021484375, "learning_rate": 1.6751020408163264e-05, "loss": 2.1862, "step": 16304 }, { "epoch": 66.55102040816327, "grad_norm": 26.95726203918457, "learning_rate": 1.6748979591836738e-05, "loss": 2.1142, "step": 16305 }, { "epoch": 66.55510204081632, "grad_norm": 27.341732025146484, "learning_rate": 1.6746938775510205e-05, "loss": 1.7506, "step": 16306 }, { "epoch": 66.55918367346939, "grad_norm": 19.955202102661133, "learning_rate": 1.6744897959183675e-05, "loss": 2.3162, "step": 16307 }, { "epoch": 66.56326530612245, "grad_norm": 33.15121841430664, "learning_rate": 1.6742857142857142e-05, "loss": 0.9968, "step": 16308 }, { "epoch": 66.56734693877551, "grad_norm": 38.414772033691406, "learning_rate": 1.6740816326530613e-05, "loss": 1.0102, "step": 16309 }, { "epoch": 66.57142857142857, "grad_norm": 31.154293060302734, "learning_rate": 1.6738775510204083e-05, "loss": 1.6171, "step": 16310 }, { "epoch": 66.57551020408164, "grad_norm": 33.99270248413086, "learning_rate": 1.673673469387755e-05, "loss": 1.0543, "step": 16311 }, { "epoch": 66.57959183673469, "grad_norm": 21.615400314331055, "learning_rate": 1.673469387755102e-05, "loss": 1.7898, "step": 16312 }, { "epoch": 66.58367346938776, "grad_norm": 57.19232940673828, "learning_rate": 1.673265306122449e-05, "loss": 0.8433, "step": 16313 }, { "epoch": 66.58775510204082, "grad_norm": 30.49323844909668, "learning_rate": 1.6730612244897962e-05, "loss": 1.1883, "step": 16314 }, { "epoch": 66.59183673469387, "grad_norm": 31.886110305786133, "learning_rate": 1.672857142857143e-05, "loss": 1.1098, "step": 16315 }, { "epoch": 66.59591836734694, "grad_norm": 25.64500617980957, "learning_rate": 1.6726530612244896e-05, "loss": 1.8979, "step": 16316 }, { "epoch": 66.6, "grad_norm": 36.4980583190918, "learning_rate": 1.672448979591837e-05, "loss": 0.9284, "step": 16317 }, { "epoch": 66.60408163265306, "grad_norm": 16.618011474609375, "learning_rate": 1.6722448979591837e-05, "loss": 2.4403, "step": 16318 }, { "epoch": 66.60816326530612, "grad_norm": 36.73887252807617, "learning_rate": 1.6720408163265308e-05, "loss": 0.782, "step": 16319 }, { "epoch": 66.61224489795919, "grad_norm": 25.662353515625, "learning_rate": 1.6718367346938775e-05, "loss": 1.0781, "step": 16320 }, { "epoch": 66.61632653061224, "grad_norm": 39.92888259887695, "learning_rate": 1.6716326530612245e-05, "loss": 0.9028, "step": 16321 }, { "epoch": 66.62040816326531, "grad_norm": 49.88111877441406, "learning_rate": 1.6714285714285716e-05, "loss": 1.1673, "step": 16322 }, { "epoch": 66.62448979591836, "grad_norm": 38.950645446777344, "learning_rate": 1.6712244897959183e-05, "loss": 0.9598, "step": 16323 }, { "epoch": 66.62857142857143, "grad_norm": 37.133792877197266, "learning_rate": 1.6710204081632654e-05, "loss": 1.424, "step": 16324 }, { "epoch": 66.63265306122449, "grad_norm": 35.33209228515625, "learning_rate": 1.6708163265306124e-05, "loss": 0.4303, "step": 16325 }, { "epoch": 66.63673469387756, "grad_norm": 42.396446228027344, "learning_rate": 1.670612244897959e-05, "loss": 1.5286, "step": 16326 }, { "epoch": 66.64081632653061, "grad_norm": 43.58113479614258, "learning_rate": 1.6704081632653062e-05, "loss": 0.627, "step": 16327 }, { "epoch": 66.64489795918368, "grad_norm": 31.064119338989258, "learning_rate": 1.6702040816326532e-05, "loss": 1.4368, "step": 16328 }, { "epoch": 66.64897959183673, "grad_norm": 29.825899124145508, "learning_rate": 1.6700000000000003e-05, "loss": 1.0917, "step": 16329 }, { "epoch": 66.65306122448979, "grad_norm": 38.59750747680664, "learning_rate": 1.669795918367347e-05, "loss": 1.3208, "step": 16330 }, { "epoch": 66.65714285714286, "grad_norm": 26.86979103088379, "learning_rate": 1.6695918367346937e-05, "loss": 1.9729, "step": 16331 }, { "epoch": 66.66122448979591, "grad_norm": 34.477149963378906, "learning_rate": 1.669387755102041e-05, "loss": 1.444, "step": 16332 }, { "epoch": 66.66530612244898, "grad_norm": 37.82658386230469, "learning_rate": 1.6691836734693878e-05, "loss": 0.6585, "step": 16333 }, { "epoch": 66.66938775510204, "grad_norm": 38.717899322509766, "learning_rate": 1.668979591836735e-05, "loss": 0.7308, "step": 16334 }, { "epoch": 66.6734693877551, "grad_norm": 39.25587463378906, "learning_rate": 1.6687755102040816e-05, "loss": 1.2171, "step": 16335 }, { "epoch": 66.67755102040816, "grad_norm": 35.15924835205078, "learning_rate": 1.6685714285714286e-05, "loss": 0.542, "step": 16336 }, { "epoch": 66.68163265306123, "grad_norm": 20.83257484436035, "learning_rate": 1.6683673469387757e-05, "loss": 1.6486, "step": 16337 }, { "epoch": 66.68571428571428, "grad_norm": 30.066076278686523, "learning_rate": 1.6681632653061224e-05, "loss": 1.5663, "step": 16338 }, { "epoch": 66.68979591836735, "grad_norm": 37.34454345703125, "learning_rate": 1.6679591836734694e-05, "loss": 1.565, "step": 16339 }, { "epoch": 66.6938775510204, "grad_norm": 33.89433670043945, "learning_rate": 1.6677551020408165e-05, "loss": 1.0408, "step": 16340 }, { "epoch": 66.69795918367348, "grad_norm": 28.520362854003906, "learning_rate": 1.6675510204081635e-05, "loss": 1.6787, "step": 16341 }, { "epoch": 66.70204081632653, "grad_norm": 42.04787063598633, "learning_rate": 1.6673469387755102e-05, "loss": 0.6378, "step": 16342 }, { "epoch": 66.70612244897958, "grad_norm": 36.23528289794922, "learning_rate": 1.667142857142857e-05, "loss": 1.435, "step": 16343 }, { "epoch": 66.71020408163265, "grad_norm": 40.30896759033203, "learning_rate": 1.6669387755102044e-05, "loss": 0.9728, "step": 16344 }, { "epoch": 66.71428571428571, "grad_norm": 29.892398834228516, "learning_rate": 1.666734693877551e-05, "loss": 1.5019, "step": 16345 }, { "epoch": 66.71836734693878, "grad_norm": 20.01759147644043, "learning_rate": 1.666530612244898e-05, "loss": 0.1821, "step": 16346 }, { "epoch": 66.72244897959183, "grad_norm": 27.89605140686035, "learning_rate": 1.6663265306122448e-05, "loss": 2.0019, "step": 16347 }, { "epoch": 66.7265306122449, "grad_norm": 24.413349151611328, "learning_rate": 1.666122448979592e-05, "loss": 1.9921, "step": 16348 }, { "epoch": 66.73061224489796, "grad_norm": 34.7769775390625, "learning_rate": 1.665918367346939e-05, "loss": 0.9088, "step": 16349 }, { "epoch": 66.73469387755102, "grad_norm": 24.052358627319336, "learning_rate": 1.6657142857142856e-05, "loss": 1.9056, "step": 16350 }, { "epoch": 66.73877551020408, "grad_norm": 29.28044319152832, "learning_rate": 1.6655102040816327e-05, "loss": 0.6106, "step": 16351 }, { "epoch": 66.74285714285715, "grad_norm": 41.83922576904297, "learning_rate": 1.6653061224489797e-05, "loss": 0.5977, "step": 16352 }, { "epoch": 66.7469387755102, "grad_norm": 33.085384368896484, "learning_rate": 1.6651020408163265e-05, "loss": 1.5055, "step": 16353 }, { "epoch": 66.75102040816327, "grad_norm": 22.972681045532227, "learning_rate": 1.6648979591836735e-05, "loss": 2.3226, "step": 16354 }, { "epoch": 66.75510204081633, "grad_norm": 27.487943649291992, "learning_rate": 1.6646938775510206e-05, "loss": 1.9803, "step": 16355 }, { "epoch": 66.7591836734694, "grad_norm": 30.10951042175293, "learning_rate": 1.6644897959183676e-05, "loss": 1.5502, "step": 16356 }, { "epoch": 66.76326530612245, "grad_norm": 40.412532806396484, "learning_rate": 1.6642857142857143e-05, "loss": 0.3274, "step": 16357 }, { "epoch": 66.7673469387755, "grad_norm": 30.422443389892578, "learning_rate": 1.6640816326530614e-05, "loss": 1.7357, "step": 16358 }, { "epoch": 66.77142857142857, "grad_norm": 38.710933685302734, "learning_rate": 1.6638775510204084e-05, "loss": 0.6372, "step": 16359 }, { "epoch": 66.77551020408163, "grad_norm": 29.7061710357666, "learning_rate": 1.663673469387755e-05, "loss": 1.3931, "step": 16360 }, { "epoch": 66.7795918367347, "grad_norm": 34.83560562133789, "learning_rate": 1.6634693877551022e-05, "loss": 1.7615, "step": 16361 }, { "epoch": 66.78367346938775, "grad_norm": 24.576818466186523, "learning_rate": 1.663265306122449e-05, "loss": 1.6721, "step": 16362 }, { "epoch": 66.78775510204082, "grad_norm": 34.4704704284668, "learning_rate": 1.663061224489796e-05, "loss": 1.1262, "step": 16363 }, { "epoch": 66.79183673469387, "grad_norm": 41.90065383911133, "learning_rate": 1.662857142857143e-05, "loss": 0.9882, "step": 16364 }, { "epoch": 66.79591836734694, "grad_norm": 21.233293533325195, "learning_rate": 1.6626530612244897e-05, "loss": 0.1901, "step": 16365 }, { "epoch": 66.8, "grad_norm": 26.598875045776367, "learning_rate": 1.6624489795918368e-05, "loss": 1.8767, "step": 16366 }, { "epoch": 66.80408163265307, "grad_norm": 29.1475772857666, "learning_rate": 1.6622448979591838e-05, "loss": 1.2743, "step": 16367 }, { "epoch": 66.80816326530612, "grad_norm": 30.041048049926758, "learning_rate": 1.662040816326531e-05, "loss": 1.6534, "step": 16368 }, { "epoch": 66.81224489795919, "grad_norm": 37.51622009277344, "learning_rate": 1.6618367346938776e-05, "loss": 0.6606, "step": 16369 }, { "epoch": 66.81632653061224, "grad_norm": 39.09479904174805, "learning_rate": 1.6616326530612243e-05, "loss": 1.4282, "step": 16370 }, { "epoch": 66.82040816326531, "grad_norm": 35.485748291015625, "learning_rate": 1.6614285714285717e-05, "loss": 1.3689, "step": 16371 }, { "epoch": 66.82448979591837, "grad_norm": 39.24522018432617, "learning_rate": 1.6612244897959184e-05, "loss": 0.607, "step": 16372 }, { "epoch": 66.82857142857142, "grad_norm": 26.800411224365234, "learning_rate": 1.6610204081632655e-05, "loss": 0.2004, "step": 16373 }, { "epoch": 66.83265306122449, "grad_norm": 31.719209671020508, "learning_rate": 1.660816326530612e-05, "loss": 1.1204, "step": 16374 }, { "epoch": 66.83673469387755, "grad_norm": 36.01780319213867, "learning_rate": 1.6606122448979592e-05, "loss": 1.0379, "step": 16375 }, { "epoch": 66.84081632653061, "grad_norm": 34.87809753417969, "learning_rate": 1.6604081632653063e-05, "loss": 1.0384, "step": 16376 }, { "epoch": 66.84489795918367, "grad_norm": 26.09613609313965, "learning_rate": 1.660204081632653e-05, "loss": 0.2234, "step": 16377 }, { "epoch": 66.84897959183674, "grad_norm": 23.058534622192383, "learning_rate": 1.66e-05, "loss": 0.1972, "step": 16378 }, { "epoch": 66.85306122448979, "grad_norm": 39.61422348022461, "learning_rate": 1.659795918367347e-05, "loss": 1.1588, "step": 16379 }, { "epoch": 66.85714285714286, "grad_norm": 30.574541091918945, "learning_rate": 1.659591836734694e-05, "loss": 1.7075, "step": 16380 }, { "epoch": 66.86122448979592, "grad_norm": 30.416091918945312, "learning_rate": 1.659387755102041e-05, "loss": 0.5971, "step": 16381 }, { "epoch": 66.86530612244898, "grad_norm": 24.15484619140625, "learning_rate": 1.659183673469388e-05, "loss": 2.2337, "step": 16382 }, { "epoch": 66.86938775510204, "grad_norm": 39.31850051879883, "learning_rate": 1.658979591836735e-05, "loss": 0.891, "step": 16383 }, { "epoch": 66.87346938775511, "grad_norm": 25.140926361083984, "learning_rate": 1.6587755102040817e-05, "loss": 0.1458, "step": 16384 }, { "epoch": 66.87755102040816, "grad_norm": 18.95380210876465, "learning_rate": 1.6585714285714287e-05, "loss": 0.2162, "step": 16385 }, { "epoch": 66.88163265306123, "grad_norm": 17.607397079467773, "learning_rate": 1.6583673469387758e-05, "loss": 2.6966, "step": 16386 }, { "epoch": 66.88571428571429, "grad_norm": 42.92282485961914, "learning_rate": 1.6581632653061225e-05, "loss": 0.4715, "step": 16387 }, { "epoch": 66.88979591836734, "grad_norm": 28.374284744262695, "learning_rate": 1.6579591836734695e-05, "loss": 1.6071, "step": 16388 }, { "epoch": 66.89387755102041, "grad_norm": 44.95185470581055, "learning_rate": 1.6577551020408162e-05, "loss": 1.5675, "step": 16389 }, { "epoch": 66.89795918367346, "grad_norm": 26.855602264404297, "learning_rate": 1.6575510204081633e-05, "loss": 0.1772, "step": 16390 }, { "epoch": 66.90204081632653, "grad_norm": 39.0537223815918, "learning_rate": 1.6573469387755103e-05, "loss": 0.6377, "step": 16391 }, { "epoch": 66.90612244897959, "grad_norm": 30.835786819458008, "learning_rate": 1.657142857142857e-05, "loss": 1.9253, "step": 16392 }, { "epoch": 66.91020408163266, "grad_norm": 28.569164276123047, "learning_rate": 1.656938775510204e-05, "loss": 1.974, "step": 16393 }, { "epoch": 66.91428571428571, "grad_norm": 33.851436614990234, "learning_rate": 1.656734693877551e-05, "loss": 1.265, "step": 16394 }, { "epoch": 66.91836734693878, "grad_norm": 33.187347412109375, "learning_rate": 1.6565306122448982e-05, "loss": 0.6925, "step": 16395 }, { "epoch": 66.92244897959183, "grad_norm": 33.52284622192383, "learning_rate": 1.656326530612245e-05, "loss": 1.8488, "step": 16396 }, { "epoch": 66.9265306122449, "grad_norm": 55.76482009887695, "learning_rate": 1.6561224489795916e-05, "loss": 1.6654, "step": 16397 }, { "epoch": 66.93061224489796, "grad_norm": 21.623838424682617, "learning_rate": 1.655918367346939e-05, "loss": 1.852, "step": 16398 }, { "epoch": 66.93469387755103, "grad_norm": 28.963117599487305, "learning_rate": 1.6557142857142857e-05, "loss": 1.0145, "step": 16399 }, { "epoch": 66.93877551020408, "grad_norm": 54.2880859375, "learning_rate": 1.6555102040816328e-05, "loss": 0.7521, "step": 16400 }, { "epoch": 66.94285714285714, "grad_norm": 38.84233856201172, "learning_rate": 1.6553061224489795e-05, "loss": 1.2929, "step": 16401 }, { "epoch": 66.9469387755102, "grad_norm": 25.805402755737305, "learning_rate": 1.6551020408163266e-05, "loss": 2.2681, "step": 16402 }, { "epoch": 66.95102040816326, "grad_norm": 33.542381286621094, "learning_rate": 1.6548979591836736e-05, "loss": 0.8174, "step": 16403 }, { "epoch": 66.95510204081633, "grad_norm": 40.94868087768555, "learning_rate": 1.6546938775510203e-05, "loss": 0.2925, "step": 16404 }, { "epoch": 66.95918367346938, "grad_norm": 33.757965087890625, "learning_rate": 1.6544897959183674e-05, "loss": 1.676, "step": 16405 }, { "epoch": 66.96326530612245, "grad_norm": 30.7613582611084, "learning_rate": 1.6542857142857144e-05, "loss": 1.7304, "step": 16406 }, { "epoch": 66.9673469387755, "grad_norm": 43.72926330566406, "learning_rate": 1.6540816326530615e-05, "loss": 0.7327, "step": 16407 }, { "epoch": 66.97142857142858, "grad_norm": 29.18415069580078, "learning_rate": 1.6538775510204082e-05, "loss": 1.6103, "step": 16408 }, { "epoch": 66.97551020408163, "grad_norm": 35.408809661865234, "learning_rate": 1.6536734693877552e-05, "loss": 1.0082, "step": 16409 }, { "epoch": 66.9795918367347, "grad_norm": 20.201099395751953, "learning_rate": 1.6534693877551023e-05, "loss": 2.2893, "step": 16410 }, { "epoch": 66.98367346938775, "grad_norm": 41.61146545410156, "learning_rate": 1.653265306122449e-05, "loss": 0.3883, "step": 16411 }, { "epoch": 66.98775510204082, "grad_norm": 21.40479278564453, "learning_rate": 1.653061224489796e-05, "loss": 1.8582, "step": 16412 }, { "epoch": 66.99183673469388, "grad_norm": 43.360042572021484, "learning_rate": 1.652857142857143e-05, "loss": 1.3004, "step": 16413 }, { "epoch": 66.99591836734695, "grad_norm": 37.405982971191406, "learning_rate": 1.6526530612244898e-05, "loss": 1.3872, "step": 16414 }, { "epoch": 67.0, "grad_norm": 33.7536506652832, "learning_rate": 1.652448979591837e-05, "loss": 0.6234, "step": 16415 }, { "epoch": 67.00408163265305, "grad_norm": 41.404232025146484, "learning_rate": 1.6522448979591836e-05, "loss": 0.6501, "step": 16416 }, { "epoch": 67.00816326530612, "grad_norm": 43.27492141723633, "learning_rate": 1.6520408163265306e-05, "loss": 0.4411, "step": 16417 }, { "epoch": 67.01224489795918, "grad_norm": 47.955474853515625, "learning_rate": 1.6518367346938777e-05, "loss": 0.5516, "step": 16418 }, { "epoch": 67.01632653061225, "grad_norm": 37.34360122680664, "learning_rate": 1.6516326530612244e-05, "loss": 0.8665, "step": 16419 }, { "epoch": 67.0204081632653, "grad_norm": 34.379425048828125, "learning_rate": 1.6514285714285714e-05, "loss": 1.5738, "step": 16420 }, { "epoch": 67.02448979591837, "grad_norm": 37.563045501708984, "learning_rate": 1.6512244897959185e-05, "loss": 0.8156, "step": 16421 }, { "epoch": 67.02857142857142, "grad_norm": 15.781800270080566, "learning_rate": 1.6510204081632655e-05, "loss": 2.495, "step": 16422 }, { "epoch": 67.0326530612245, "grad_norm": 35.46234893798828, "learning_rate": 1.6508163265306123e-05, "loss": 1.0051, "step": 16423 }, { "epoch": 67.03673469387755, "grad_norm": 33.389102935791016, "learning_rate": 1.650612244897959e-05, "loss": 1.4226, "step": 16424 }, { "epoch": 67.04081632653062, "grad_norm": 23.218034744262695, "learning_rate": 1.6504081632653064e-05, "loss": 1.8781, "step": 16425 }, { "epoch": 67.04489795918367, "grad_norm": 30.45500373840332, "learning_rate": 1.650204081632653e-05, "loss": 0.4057, "step": 16426 }, { "epoch": 67.04897959183674, "grad_norm": 26.565168380737305, "learning_rate": 1.65e-05, "loss": 0.9793, "step": 16427 }, { "epoch": 67.0530612244898, "grad_norm": 25.78362464904785, "learning_rate": 1.649795918367347e-05, "loss": 0.6074, "step": 16428 }, { "epoch": 67.05714285714286, "grad_norm": 41.14421463012695, "learning_rate": 1.6495918367346942e-05, "loss": 0.4973, "step": 16429 }, { "epoch": 67.06122448979592, "grad_norm": 36.56647872924805, "learning_rate": 1.649387755102041e-05, "loss": 1.3285, "step": 16430 }, { "epoch": 67.06530612244897, "grad_norm": 30.535131454467773, "learning_rate": 1.6491836734693877e-05, "loss": 0.7002, "step": 16431 }, { "epoch": 67.06938775510204, "grad_norm": 27.195730209350586, "learning_rate": 1.6489795918367347e-05, "loss": 1.4518, "step": 16432 }, { "epoch": 67.0734693877551, "grad_norm": 30.08440399169922, "learning_rate": 1.6487755102040818e-05, "loss": 1.4812, "step": 16433 }, { "epoch": 67.07755102040817, "grad_norm": 22.79113006591797, "learning_rate": 1.6485714285714288e-05, "loss": 2.1235, "step": 16434 }, { "epoch": 67.08163265306122, "grad_norm": 36.82450485229492, "learning_rate": 1.6483673469387755e-05, "loss": 0.3915, "step": 16435 }, { "epoch": 67.08571428571429, "grad_norm": 23.087238311767578, "learning_rate": 1.6481632653061226e-05, "loss": 0.1718, "step": 16436 }, { "epoch": 67.08979591836734, "grad_norm": 22.0559024810791, "learning_rate": 1.6479591836734696e-05, "loss": 2.2008, "step": 16437 }, { "epoch": 67.09387755102041, "grad_norm": 35.16983413696289, "learning_rate": 1.6477551020408163e-05, "loss": 0.8357, "step": 16438 }, { "epoch": 67.09795918367347, "grad_norm": 29.31447982788086, "learning_rate": 1.6475510204081634e-05, "loss": 1.3391, "step": 16439 }, { "epoch": 67.10204081632654, "grad_norm": 31.068462371826172, "learning_rate": 1.6473469387755104e-05, "loss": 0.8469, "step": 16440 }, { "epoch": 67.10612244897959, "grad_norm": 30.33446502685547, "learning_rate": 1.647142857142857e-05, "loss": 1.7265, "step": 16441 }, { "epoch": 67.11020408163266, "grad_norm": 27.48535919189453, "learning_rate": 1.6469387755102042e-05, "loss": 0.4485, "step": 16442 }, { "epoch": 67.11428571428571, "grad_norm": 22.85016632080078, "learning_rate": 1.646734693877551e-05, "loss": 1.7922, "step": 16443 }, { "epoch": 67.11836734693877, "grad_norm": 18.06471061706543, "learning_rate": 1.646530612244898e-05, "loss": 2.1897, "step": 16444 }, { "epoch": 67.12244897959184, "grad_norm": 33.785316467285156, "learning_rate": 1.646326530612245e-05, "loss": 0.3754, "step": 16445 }, { "epoch": 67.12653061224489, "grad_norm": 35.69735336303711, "learning_rate": 1.6461224489795917e-05, "loss": 0.5901, "step": 16446 }, { "epoch": 67.13061224489796, "grad_norm": 31.84625244140625, "learning_rate": 1.6459183673469388e-05, "loss": 1.0038, "step": 16447 }, { "epoch": 67.13469387755102, "grad_norm": 35.82701110839844, "learning_rate": 1.645714285714286e-05, "loss": 1.0848, "step": 16448 }, { "epoch": 67.13877551020408, "grad_norm": 22.34130859375, "learning_rate": 1.645510204081633e-05, "loss": 2.0042, "step": 16449 }, { "epoch": 67.14285714285714, "grad_norm": 40.51988983154297, "learning_rate": 1.6453061224489796e-05, "loss": 1.3736, "step": 16450 }, { "epoch": 67.14693877551021, "grad_norm": 36.58967971801758, "learning_rate": 1.6451020408163266e-05, "loss": 0.7734, "step": 16451 }, { "epoch": 67.15102040816326, "grad_norm": 23.708568572998047, "learning_rate": 1.6448979591836737e-05, "loss": 2.2226, "step": 16452 }, { "epoch": 67.15510204081633, "grad_norm": 27.280344009399414, "learning_rate": 1.6446938775510204e-05, "loss": 1.0519, "step": 16453 }, { "epoch": 67.15918367346939, "grad_norm": 40.552330017089844, "learning_rate": 1.6444897959183675e-05, "loss": 0.9269, "step": 16454 }, { "epoch": 67.16326530612245, "grad_norm": 28.470714569091797, "learning_rate": 1.6442857142857142e-05, "loss": 0.2213, "step": 16455 }, { "epoch": 67.16734693877551, "grad_norm": 21.276880264282227, "learning_rate": 1.6440816326530616e-05, "loss": 1.932, "step": 16456 }, { "epoch": 67.17142857142858, "grad_norm": 45.459678649902344, "learning_rate": 1.6438775510204083e-05, "loss": 1.0181, "step": 16457 }, { "epoch": 67.17551020408163, "grad_norm": 32.35266876220703, "learning_rate": 1.643673469387755e-05, "loss": 1.6707, "step": 16458 }, { "epoch": 67.17959183673469, "grad_norm": 25.997882843017578, "learning_rate": 1.643469387755102e-05, "loss": 1.8345, "step": 16459 }, { "epoch": 67.18367346938776, "grad_norm": 31.9429931640625, "learning_rate": 1.643265306122449e-05, "loss": 1.3249, "step": 16460 }, { "epoch": 67.18775510204081, "grad_norm": 35.58367919921875, "learning_rate": 1.643061224489796e-05, "loss": 1.1526, "step": 16461 }, { "epoch": 67.19183673469388, "grad_norm": 9.300874710083008, "learning_rate": 1.642857142857143e-05, "loss": 3.1315, "step": 16462 }, { "epoch": 67.19591836734693, "grad_norm": 38.5963134765625, "learning_rate": 1.64265306122449e-05, "loss": 0.7231, "step": 16463 }, { "epoch": 67.2, "grad_norm": 32.21208953857422, "learning_rate": 1.642448979591837e-05, "loss": 1.2601, "step": 16464 }, { "epoch": 67.20408163265306, "grad_norm": 40.031375885009766, "learning_rate": 1.6422448979591837e-05, "loss": 0.5076, "step": 16465 }, { "epoch": 67.20816326530613, "grad_norm": 31.65546989440918, "learning_rate": 1.6420408163265307e-05, "loss": 1.3978, "step": 16466 }, { "epoch": 67.21224489795918, "grad_norm": 27.783451080322266, "learning_rate": 1.6418367346938778e-05, "loss": 1.6942, "step": 16467 }, { "epoch": 67.21632653061225, "grad_norm": 23.34273338317871, "learning_rate": 1.6416326530612245e-05, "loss": 0.1969, "step": 16468 }, { "epoch": 67.2204081632653, "grad_norm": 51.44569778442383, "learning_rate": 1.6414285714285715e-05, "loss": 1.2774, "step": 16469 }, { "epoch": 67.22448979591837, "grad_norm": 52.60663604736328, "learning_rate": 1.6412244897959183e-05, "loss": 1.1506, "step": 16470 }, { "epoch": 67.22857142857143, "grad_norm": 31.982393264770508, "learning_rate": 1.6410204081632653e-05, "loss": 0.3204, "step": 16471 }, { "epoch": 67.2326530612245, "grad_norm": 43.28772735595703, "learning_rate": 1.6408163265306124e-05, "loss": 0.4917, "step": 16472 }, { "epoch": 67.23673469387755, "grad_norm": 44.14601516723633, "learning_rate": 1.6406122448979594e-05, "loss": 0.4492, "step": 16473 }, { "epoch": 67.2408163265306, "grad_norm": 29.781404495239258, "learning_rate": 1.640408163265306e-05, "loss": 0.5491, "step": 16474 }, { "epoch": 67.24489795918367, "grad_norm": 21.833303451538086, "learning_rate": 1.6402040816326532e-05, "loss": 1.939, "step": 16475 }, { "epoch": 67.24897959183673, "grad_norm": 35.0386848449707, "learning_rate": 1.6400000000000002e-05, "loss": 1.4669, "step": 16476 }, { "epoch": 67.2530612244898, "grad_norm": 30.60603904724121, "learning_rate": 1.639795918367347e-05, "loss": 1.3145, "step": 16477 }, { "epoch": 67.25714285714285, "grad_norm": 27.501907348632812, "learning_rate": 1.639591836734694e-05, "loss": 1.6282, "step": 16478 }, { "epoch": 67.26122448979592, "grad_norm": 31.23822021484375, "learning_rate": 1.639387755102041e-05, "loss": 0.5666, "step": 16479 }, { "epoch": 67.26530612244898, "grad_norm": 35.04684066772461, "learning_rate": 1.6391836734693878e-05, "loss": 0.5734, "step": 16480 }, { "epoch": 67.26938775510204, "grad_norm": 45.25940704345703, "learning_rate": 1.6389795918367348e-05, "loss": 0.5982, "step": 16481 }, { "epoch": 67.2734693877551, "grad_norm": 35.7121467590332, "learning_rate": 1.6387755102040815e-05, "loss": 0.7006, "step": 16482 }, { "epoch": 67.27755102040817, "grad_norm": 26.67613983154297, "learning_rate": 1.638571428571429e-05, "loss": 1.7869, "step": 16483 }, { "epoch": 67.28163265306122, "grad_norm": 24.146419525146484, "learning_rate": 1.6383673469387756e-05, "loss": 2.3193, "step": 16484 }, { "epoch": 67.28571428571429, "grad_norm": 24.044536590576172, "learning_rate": 1.6381632653061223e-05, "loss": 1.762, "step": 16485 }, { "epoch": 67.28979591836735, "grad_norm": 33.9591178894043, "learning_rate": 1.6379591836734694e-05, "loss": 1.2196, "step": 16486 }, { "epoch": 67.29387755102042, "grad_norm": 19.965368270874023, "learning_rate": 1.6377551020408164e-05, "loss": 2.3097, "step": 16487 }, { "epoch": 67.29795918367347, "grad_norm": 34.62921905517578, "learning_rate": 1.6375510204081635e-05, "loss": 1.2412, "step": 16488 }, { "epoch": 67.30204081632652, "grad_norm": 47.982383728027344, "learning_rate": 1.6373469387755102e-05, "loss": 1.3336, "step": 16489 }, { "epoch": 67.3061224489796, "grad_norm": 37.57136154174805, "learning_rate": 1.6371428571428572e-05, "loss": 1.4546, "step": 16490 }, { "epoch": 67.31020408163265, "grad_norm": 40.13412094116211, "learning_rate": 1.6369387755102043e-05, "loss": 1.2113, "step": 16491 }, { "epoch": 67.31428571428572, "grad_norm": 17.699121475219727, "learning_rate": 1.636734693877551e-05, "loss": 2.1543, "step": 16492 }, { "epoch": 67.31836734693877, "grad_norm": 38.41446304321289, "learning_rate": 1.636530612244898e-05, "loss": 1.0876, "step": 16493 }, { "epoch": 67.32244897959184, "grad_norm": 28.741348266601562, "learning_rate": 1.636326530612245e-05, "loss": 2.0204, "step": 16494 }, { "epoch": 67.3265306122449, "grad_norm": 27.57111358642578, "learning_rate": 1.636122448979592e-05, "loss": 1.4918, "step": 16495 }, { "epoch": 67.33061224489796, "grad_norm": 38.772586822509766, "learning_rate": 1.635918367346939e-05, "loss": 0.8761, "step": 16496 }, { "epoch": 67.33469387755102, "grad_norm": 32.75253677368164, "learning_rate": 1.6357142857142856e-05, "loss": 1.3825, "step": 16497 }, { "epoch": 67.33877551020409, "grad_norm": 27.158702850341797, "learning_rate": 1.6355102040816326e-05, "loss": 1.9263, "step": 16498 }, { "epoch": 67.34285714285714, "grad_norm": 52.43709945678711, "learning_rate": 1.6353061224489797e-05, "loss": 0.9169, "step": 16499 }, { "epoch": 67.34693877551021, "grad_norm": 29.25128746032715, "learning_rate": 1.6351020408163267e-05, "loss": 1.5258, "step": 16500 }, { "epoch": 67.35102040816327, "grad_norm": 28.69503402709961, "learning_rate": 1.6348979591836735e-05, "loss": 0.2349, "step": 16501 }, { "epoch": 67.35510204081632, "grad_norm": 19.505508422851562, "learning_rate": 1.6346938775510205e-05, "loss": 2.6597, "step": 16502 }, { "epoch": 67.35918367346939, "grad_norm": 33.0374870300293, "learning_rate": 1.6344897959183676e-05, "loss": 0.9245, "step": 16503 }, { "epoch": 67.36326530612244, "grad_norm": 40.057186126708984, "learning_rate": 1.6342857142857143e-05, "loss": 1.0698, "step": 16504 }, { "epoch": 67.36734693877551, "grad_norm": 21.234411239624023, "learning_rate": 1.6340816326530613e-05, "loss": 2.2681, "step": 16505 }, { "epoch": 67.37142857142857, "grad_norm": 27.336742401123047, "learning_rate": 1.6338775510204084e-05, "loss": 1.8814, "step": 16506 }, { "epoch": 67.37551020408164, "grad_norm": 40.822357177734375, "learning_rate": 1.633673469387755e-05, "loss": 0.9673, "step": 16507 }, { "epoch": 67.37959183673469, "grad_norm": 27.59580421447754, "learning_rate": 1.633469387755102e-05, "loss": 1.5732, "step": 16508 }, { "epoch": 67.38367346938776, "grad_norm": 36.71034240722656, "learning_rate": 1.633265306122449e-05, "loss": 0.6026, "step": 16509 }, { "epoch": 67.38775510204081, "grad_norm": 27.377395629882812, "learning_rate": 1.6330612244897962e-05, "loss": 1.675, "step": 16510 }, { "epoch": 67.39183673469388, "grad_norm": 25.14879035949707, "learning_rate": 1.632857142857143e-05, "loss": 1.4376, "step": 16511 }, { "epoch": 67.39591836734694, "grad_norm": 49.80876922607422, "learning_rate": 1.6326530612244897e-05, "loss": 0.7336, "step": 16512 }, { "epoch": 67.4, "grad_norm": 34.78881072998047, "learning_rate": 1.6324489795918367e-05, "loss": 0.9458, "step": 16513 }, { "epoch": 67.40408163265306, "grad_norm": 33.82893371582031, "learning_rate": 1.6322448979591838e-05, "loss": 0.6283, "step": 16514 }, { "epoch": 67.40816326530613, "grad_norm": 36.17111587524414, "learning_rate": 1.6320408163265308e-05, "loss": 1.0012, "step": 16515 }, { "epoch": 67.41224489795918, "grad_norm": 33.96050262451172, "learning_rate": 1.6318367346938775e-05, "loss": 1.7208, "step": 16516 }, { "epoch": 67.41632653061224, "grad_norm": 27.290010452270508, "learning_rate": 1.6316326530612246e-05, "loss": 1.9308, "step": 16517 }, { "epoch": 67.42040816326531, "grad_norm": 32.50722885131836, "learning_rate": 1.6314285714285716e-05, "loss": 1.5014, "step": 16518 }, { "epoch": 67.42448979591836, "grad_norm": 73.56568908691406, "learning_rate": 1.6312244897959183e-05, "loss": 0.8919, "step": 16519 }, { "epoch": 67.42857142857143, "grad_norm": 41.286502838134766, "learning_rate": 1.6310204081632654e-05, "loss": 0.8477, "step": 16520 }, { "epoch": 67.43265306122449, "grad_norm": 28.330106735229492, "learning_rate": 1.630816326530612e-05, "loss": 1.7753, "step": 16521 }, { "epoch": 67.43673469387755, "grad_norm": 27.35361671447754, "learning_rate": 1.6306122448979595e-05, "loss": 2.0042, "step": 16522 }, { "epoch": 67.44081632653061, "grad_norm": 38.52559280395508, "learning_rate": 1.6304081632653062e-05, "loss": 1.2764, "step": 16523 }, { "epoch": 67.44489795918368, "grad_norm": 25.680864334106445, "learning_rate": 1.630204081632653e-05, "loss": 1.932, "step": 16524 }, { "epoch": 67.44897959183673, "grad_norm": 28.922088623046875, "learning_rate": 1.63e-05, "loss": 2.102, "step": 16525 }, { "epoch": 67.4530612244898, "grad_norm": 40.95667266845703, "learning_rate": 1.629795918367347e-05, "loss": 0.6271, "step": 16526 }, { "epoch": 67.45714285714286, "grad_norm": 40.73831558227539, "learning_rate": 1.629591836734694e-05, "loss": 1.5003, "step": 16527 }, { "epoch": 67.46122448979592, "grad_norm": 30.42538070678711, "learning_rate": 1.6293877551020408e-05, "loss": 1.0416, "step": 16528 }, { "epoch": 67.46530612244898, "grad_norm": 34.158329010009766, "learning_rate": 1.629183673469388e-05, "loss": 1.2137, "step": 16529 }, { "epoch": 67.46938775510205, "grad_norm": 19.412294387817383, "learning_rate": 1.628979591836735e-05, "loss": 2.3806, "step": 16530 }, { "epoch": 67.4734693877551, "grad_norm": 31.438976287841797, "learning_rate": 1.6287755102040816e-05, "loss": 1.4348, "step": 16531 }, { "epoch": 67.47755102040816, "grad_norm": 31.13444709777832, "learning_rate": 1.6285714285714287e-05, "loss": 1.6866, "step": 16532 }, { "epoch": 67.48163265306123, "grad_norm": 35.51430130004883, "learning_rate": 1.6283673469387757e-05, "loss": 1.2009, "step": 16533 }, { "epoch": 67.48571428571428, "grad_norm": 34.3328742980957, "learning_rate": 1.6281632653061224e-05, "loss": 1.0133, "step": 16534 }, { "epoch": 67.48979591836735, "grad_norm": 29.674543380737305, "learning_rate": 1.6279591836734695e-05, "loss": 1.5758, "step": 16535 }, { "epoch": 67.4938775510204, "grad_norm": 45.55957794189453, "learning_rate": 1.6277551020408162e-05, "loss": 0.9456, "step": 16536 }, { "epoch": 67.49795918367347, "grad_norm": 32.67649459838867, "learning_rate": 1.6275510204081636e-05, "loss": 0.605, "step": 16537 }, { "epoch": 67.50204081632653, "grad_norm": 23.14171600341797, "learning_rate": 1.6273469387755103e-05, "loss": 1.8488, "step": 16538 }, { "epoch": 67.5061224489796, "grad_norm": 23.403427124023438, "learning_rate": 1.6271428571428573e-05, "loss": 1.8306, "step": 16539 }, { "epoch": 67.51020408163265, "grad_norm": 44.168758392333984, "learning_rate": 1.626938775510204e-05, "loss": 0.5195, "step": 16540 }, { "epoch": 67.51428571428572, "grad_norm": 31.15900421142578, "learning_rate": 1.626734693877551e-05, "loss": 1.6582, "step": 16541 }, { "epoch": 67.51836734693877, "grad_norm": 28.17372703552246, "learning_rate": 1.626530612244898e-05, "loss": 1.5827, "step": 16542 }, { "epoch": 67.52244897959184, "grad_norm": 18.695589065551758, "learning_rate": 1.626326530612245e-05, "loss": 2.413, "step": 16543 }, { "epoch": 67.5265306122449, "grad_norm": 30.9403076171875, "learning_rate": 1.626122448979592e-05, "loss": 1.5283, "step": 16544 }, { "epoch": 67.53061224489795, "grad_norm": 33.132667541503906, "learning_rate": 1.625918367346939e-05, "loss": 1.6773, "step": 16545 }, { "epoch": 67.53469387755102, "grad_norm": 33.7281608581543, "learning_rate": 1.6257142857142857e-05, "loss": 1.47, "step": 16546 }, { "epoch": 67.53877551020408, "grad_norm": 39.071258544921875, "learning_rate": 1.6255102040816327e-05, "loss": 0.9074, "step": 16547 }, { "epoch": 67.54285714285714, "grad_norm": 22.274005889892578, "learning_rate": 1.6253061224489795e-05, "loss": 1.7835, "step": 16548 }, { "epoch": 67.5469387755102, "grad_norm": 38.75920486450195, "learning_rate": 1.625102040816327e-05, "loss": 0.5766, "step": 16549 }, { "epoch": 67.55102040816327, "grad_norm": 16.83675765991211, "learning_rate": 1.6248979591836736e-05, "loss": 0.1444, "step": 16550 }, { "epoch": 67.55510204081632, "grad_norm": 37.01430892944336, "learning_rate": 1.6246938775510203e-05, "loss": 0.4316, "step": 16551 }, { "epoch": 67.55918367346939, "grad_norm": 23.033357620239258, "learning_rate": 1.6244897959183673e-05, "loss": 1.7486, "step": 16552 }, { "epoch": 67.56326530612245, "grad_norm": 70.97877502441406, "learning_rate": 1.6242857142857144e-05, "loss": 0.5277, "step": 16553 }, { "epoch": 67.56734693877551, "grad_norm": 16.131736755371094, "learning_rate": 1.6240816326530614e-05, "loss": 2.353, "step": 16554 }, { "epoch": 67.57142857142857, "grad_norm": 32.27976989746094, "learning_rate": 1.623877551020408e-05, "loss": 1.1515, "step": 16555 }, { "epoch": 67.57551020408164, "grad_norm": 29.41687774658203, "learning_rate": 1.6236734693877552e-05, "loss": 1.6269, "step": 16556 }, { "epoch": 67.57959183673469, "grad_norm": 32.686824798583984, "learning_rate": 1.6234693877551022e-05, "loss": 1.7335, "step": 16557 }, { "epoch": 67.58367346938776, "grad_norm": 30.972490310668945, "learning_rate": 1.623265306122449e-05, "loss": 1.3938, "step": 16558 }, { "epoch": 67.58775510204082, "grad_norm": 30.68335723876953, "learning_rate": 1.623061224489796e-05, "loss": 1.5182, "step": 16559 }, { "epoch": 67.59183673469387, "grad_norm": 25.401182174682617, "learning_rate": 1.622857142857143e-05, "loss": 0.2102, "step": 16560 }, { "epoch": 67.59591836734694, "grad_norm": 23.76298713684082, "learning_rate": 1.62265306122449e-05, "loss": 1.7809, "step": 16561 }, { "epoch": 67.6, "grad_norm": 23.36253547668457, "learning_rate": 1.6224489795918368e-05, "loss": 1.9252, "step": 16562 }, { "epoch": 67.60408163265306, "grad_norm": 34.592769622802734, "learning_rate": 1.6222448979591835e-05, "loss": 1.4759, "step": 16563 }, { "epoch": 67.60816326530612, "grad_norm": 42.29371643066406, "learning_rate": 1.622040816326531e-05, "loss": 0.9325, "step": 16564 }, { "epoch": 67.61224489795919, "grad_norm": 41.17547607421875, "learning_rate": 1.6218367346938776e-05, "loss": 1.2356, "step": 16565 }, { "epoch": 67.61632653061224, "grad_norm": 27.00635528564453, "learning_rate": 1.6216326530612247e-05, "loss": 0.2273, "step": 16566 }, { "epoch": 67.62040816326531, "grad_norm": 36.055564880371094, "learning_rate": 1.6214285714285714e-05, "loss": 1.2391, "step": 16567 }, { "epoch": 67.62448979591836, "grad_norm": 37.9403076171875, "learning_rate": 1.6212244897959184e-05, "loss": 1.0234, "step": 16568 }, { "epoch": 67.62857142857143, "grad_norm": 60.81053924560547, "learning_rate": 1.6210204081632655e-05, "loss": 1.6436, "step": 16569 }, { "epoch": 67.63265306122449, "grad_norm": 45.19614028930664, "learning_rate": 1.6208163265306122e-05, "loss": 0.6511, "step": 16570 }, { "epoch": 67.63673469387756, "grad_norm": 43.33346939086914, "learning_rate": 1.6206122448979593e-05, "loss": 1.0588, "step": 16571 }, { "epoch": 67.64081632653061, "grad_norm": 40.93129348754883, "learning_rate": 1.6204081632653063e-05, "loss": 1.2189, "step": 16572 }, { "epoch": 67.64489795918368, "grad_norm": 42.12298583984375, "learning_rate": 1.620204081632653e-05, "loss": 0.9977, "step": 16573 }, { "epoch": 67.64897959183673, "grad_norm": 32.3116340637207, "learning_rate": 1.62e-05, "loss": 0.4985, "step": 16574 }, { "epoch": 67.65306122448979, "grad_norm": 25.127033233642578, "learning_rate": 1.6197959183673468e-05, "loss": 1.8378, "step": 16575 }, { "epoch": 67.65714285714286, "grad_norm": 27.004396438598633, "learning_rate": 1.6195918367346942e-05, "loss": 1.6407, "step": 16576 }, { "epoch": 67.66122448979591, "grad_norm": 18.859216690063477, "learning_rate": 1.619387755102041e-05, "loss": 2.3869, "step": 16577 }, { "epoch": 67.66530612244898, "grad_norm": 26.792633056640625, "learning_rate": 1.6191836734693876e-05, "loss": 1.8525, "step": 16578 }, { "epoch": 67.66938775510204, "grad_norm": 20.24382781982422, "learning_rate": 1.6189795918367347e-05, "loss": 1.9171, "step": 16579 }, { "epoch": 67.6734693877551, "grad_norm": 24.298845291137695, "learning_rate": 1.6187755102040817e-05, "loss": 1.8912, "step": 16580 }, { "epoch": 67.67755102040816, "grad_norm": 26.831716537475586, "learning_rate": 1.6185714285714288e-05, "loss": 1.9195, "step": 16581 }, { "epoch": 67.68163265306123, "grad_norm": 20.285415649414062, "learning_rate": 1.6183673469387755e-05, "loss": 1.6207, "step": 16582 }, { "epoch": 67.68571428571428, "grad_norm": 35.59468460083008, "learning_rate": 1.6181632653061225e-05, "loss": 1.2534, "step": 16583 }, { "epoch": 67.68979591836735, "grad_norm": 30.66938591003418, "learning_rate": 1.6179591836734696e-05, "loss": 1.0938, "step": 16584 }, { "epoch": 67.6938775510204, "grad_norm": 25.110048294067383, "learning_rate": 1.6177551020408163e-05, "loss": 1.8181, "step": 16585 }, { "epoch": 67.69795918367348, "grad_norm": 45.499691009521484, "learning_rate": 1.6175510204081633e-05, "loss": 0.6622, "step": 16586 }, { "epoch": 67.70204081632653, "grad_norm": 30.847137451171875, "learning_rate": 1.6173469387755104e-05, "loss": 0.379, "step": 16587 }, { "epoch": 67.70612244897958, "grad_norm": 44.633365631103516, "learning_rate": 1.6171428571428574e-05, "loss": 0.6167, "step": 16588 }, { "epoch": 67.71020408163265, "grad_norm": 36.04671096801758, "learning_rate": 1.616938775510204e-05, "loss": 1.3727, "step": 16589 }, { "epoch": 67.71428571428571, "grad_norm": 26.613018035888672, "learning_rate": 1.616734693877551e-05, "loss": 0.1733, "step": 16590 }, { "epoch": 67.71836734693878, "grad_norm": 40.0029182434082, "learning_rate": 1.6165306122448983e-05, "loss": 0.8861, "step": 16591 }, { "epoch": 67.72244897959183, "grad_norm": 19.84801483154297, "learning_rate": 1.616326530612245e-05, "loss": 2.3571, "step": 16592 }, { "epoch": 67.7265306122449, "grad_norm": 21.90494155883789, "learning_rate": 1.616122448979592e-05, "loss": 2.3316, "step": 16593 }, { "epoch": 67.73061224489796, "grad_norm": 27.170093536376953, "learning_rate": 1.6159183673469387e-05, "loss": 2.2299, "step": 16594 }, { "epoch": 67.73469387755102, "grad_norm": 24.128440856933594, "learning_rate": 1.6157142857142858e-05, "loss": 2.2248, "step": 16595 }, { "epoch": 67.73877551020408, "grad_norm": 24.255685806274414, "learning_rate": 1.615510204081633e-05, "loss": 2.5725, "step": 16596 }, { "epoch": 67.74285714285715, "grad_norm": 14.866437911987305, "learning_rate": 1.6153061224489795e-05, "loss": 2.8352, "step": 16597 }, { "epoch": 67.7469387755102, "grad_norm": 26.465227127075195, "learning_rate": 1.6151020408163266e-05, "loss": 0.9073, "step": 16598 }, { "epoch": 67.75102040816327, "grad_norm": 28.911148071289062, "learning_rate": 1.6148979591836736e-05, "loss": 1.6506, "step": 16599 }, { "epoch": 67.75510204081633, "grad_norm": 33.14968490600586, "learning_rate": 1.6146938775510204e-05, "loss": 1.1522, "step": 16600 }, { "epoch": 67.7591836734694, "grad_norm": 2.282926559448242, "learning_rate": 1.6144897959183674e-05, "loss": 0.0512, "step": 16601 }, { "epoch": 67.76326530612245, "grad_norm": 16.296918869018555, "learning_rate": 1.614285714285714e-05, "loss": 0.1622, "step": 16602 }, { "epoch": 67.7673469387755, "grad_norm": 27.076377868652344, "learning_rate": 1.6140816326530615e-05, "loss": 0.1721, "step": 16603 }, { "epoch": 67.77142857142857, "grad_norm": 27.38093376159668, "learning_rate": 1.6138775510204082e-05, "loss": 1.6956, "step": 16604 }, { "epoch": 67.77551020408163, "grad_norm": 34.50217056274414, "learning_rate": 1.613673469387755e-05, "loss": 1.2777, "step": 16605 }, { "epoch": 67.7795918367347, "grad_norm": 54.529388427734375, "learning_rate": 1.613469387755102e-05, "loss": 0.4444, "step": 16606 }, { "epoch": 67.78367346938775, "grad_norm": 24.934083938598633, "learning_rate": 1.613265306122449e-05, "loss": 1.7893, "step": 16607 }, { "epoch": 67.78775510204082, "grad_norm": 33.245365142822266, "learning_rate": 1.613061224489796e-05, "loss": 1.6196, "step": 16608 }, { "epoch": 67.79183673469387, "grad_norm": 11.999431610107422, "learning_rate": 1.6128571428571428e-05, "loss": 0.0918, "step": 16609 }, { "epoch": 67.79591836734694, "grad_norm": 46.04540252685547, "learning_rate": 1.61265306122449e-05, "loss": 1.0606, "step": 16610 }, { "epoch": 67.8, "grad_norm": 41.26716232299805, "learning_rate": 1.612448979591837e-05, "loss": 0.5672, "step": 16611 }, { "epoch": 67.80408163265307, "grad_norm": 37.0981559753418, "learning_rate": 1.6122448979591836e-05, "loss": 1.1, "step": 16612 }, { "epoch": 67.80816326530612, "grad_norm": 32.02262878417969, "learning_rate": 1.6120408163265307e-05, "loss": 1.8386, "step": 16613 }, { "epoch": 67.81224489795919, "grad_norm": 41.217403411865234, "learning_rate": 1.6118367346938777e-05, "loss": 0.8778, "step": 16614 }, { "epoch": 67.81632653061224, "grad_norm": 39.212913513183594, "learning_rate": 1.6116326530612248e-05, "loss": 1.6583, "step": 16615 }, { "epoch": 67.82040816326531, "grad_norm": 35.51722717285156, "learning_rate": 1.6114285714285715e-05, "loss": 1.807, "step": 16616 }, { "epoch": 67.82448979591837, "grad_norm": 35.852195739746094, "learning_rate": 1.6112244897959182e-05, "loss": 1.5472, "step": 16617 }, { "epoch": 67.82857142857142, "grad_norm": 31.557064056396484, "learning_rate": 1.6110204081632656e-05, "loss": 1.043, "step": 16618 }, { "epoch": 67.83265306122449, "grad_norm": 33.45799255371094, "learning_rate": 1.6108163265306123e-05, "loss": 1.3376, "step": 16619 }, { "epoch": 67.83673469387755, "grad_norm": 32.77952575683594, "learning_rate": 1.6106122448979594e-05, "loss": 0.1726, "step": 16620 }, { "epoch": 67.84081632653061, "grad_norm": 20.66318130493164, "learning_rate": 1.610408163265306e-05, "loss": 2.4251, "step": 16621 }, { "epoch": 67.84489795918367, "grad_norm": 48.3336181640625, "learning_rate": 1.610204081632653e-05, "loss": 1.0257, "step": 16622 }, { "epoch": 67.84897959183674, "grad_norm": 28.846839904785156, "learning_rate": 1.6100000000000002e-05, "loss": 0.8852, "step": 16623 }, { "epoch": 67.85306122448979, "grad_norm": 27.209747314453125, "learning_rate": 1.609795918367347e-05, "loss": 1.5754, "step": 16624 }, { "epoch": 67.85714285714286, "grad_norm": 32.15152359008789, "learning_rate": 1.609591836734694e-05, "loss": 1.0241, "step": 16625 }, { "epoch": 67.86122448979592, "grad_norm": 28.549894332885742, "learning_rate": 1.609387755102041e-05, "loss": 1.4432, "step": 16626 }, { "epoch": 67.86530612244898, "grad_norm": 27.76230812072754, "learning_rate": 1.6091836734693877e-05, "loss": 1.6897, "step": 16627 }, { "epoch": 67.86938775510204, "grad_norm": 24.207265853881836, "learning_rate": 1.6089795918367347e-05, "loss": 1.7837, "step": 16628 }, { "epoch": 67.87346938775511, "grad_norm": 32.53315353393555, "learning_rate": 1.6087755102040815e-05, "loss": 1.3421, "step": 16629 }, { "epoch": 67.87755102040816, "grad_norm": 51.713558197021484, "learning_rate": 1.608571428571429e-05, "loss": 0.6341, "step": 16630 }, { "epoch": 67.88163265306123, "grad_norm": 23.77836036682129, "learning_rate": 1.6083673469387756e-05, "loss": 1.8821, "step": 16631 }, { "epoch": 67.88571428571429, "grad_norm": 24.840065002441406, "learning_rate": 1.6081632653061226e-05, "loss": 1.7999, "step": 16632 }, { "epoch": 67.88979591836734, "grad_norm": 20.05219841003418, "learning_rate": 1.6079591836734693e-05, "loss": 0.2211, "step": 16633 }, { "epoch": 67.89387755102041, "grad_norm": 24.925987243652344, "learning_rate": 1.6077551020408164e-05, "loss": 1.7659, "step": 16634 }, { "epoch": 67.89795918367346, "grad_norm": 37.23759460449219, "learning_rate": 1.6075510204081634e-05, "loss": 1.4563, "step": 16635 }, { "epoch": 67.90204081632653, "grad_norm": 34.44620895385742, "learning_rate": 1.60734693877551e-05, "loss": 0.8853, "step": 16636 }, { "epoch": 67.90612244897959, "grad_norm": 36.41120147705078, "learning_rate": 1.6071428571428572e-05, "loss": 0.5617, "step": 16637 }, { "epoch": 67.91020408163266, "grad_norm": 28.698394775390625, "learning_rate": 1.6069387755102042e-05, "loss": 0.6101, "step": 16638 }, { "epoch": 67.91428571428571, "grad_norm": 40.826778411865234, "learning_rate": 1.606734693877551e-05, "loss": 1.306, "step": 16639 }, { "epoch": 67.91836734693878, "grad_norm": 39.37188720703125, "learning_rate": 1.606530612244898e-05, "loss": 1.2221, "step": 16640 }, { "epoch": 67.92244897959183, "grad_norm": 29.32805824279785, "learning_rate": 1.606326530612245e-05, "loss": 0.3013, "step": 16641 }, { "epoch": 67.9265306122449, "grad_norm": 28.78740692138672, "learning_rate": 1.606122448979592e-05, "loss": 1.1395, "step": 16642 }, { "epoch": 67.93061224489796, "grad_norm": 35.111358642578125, "learning_rate": 1.6059183673469388e-05, "loss": 1.3502, "step": 16643 }, { "epoch": 67.93469387755103, "grad_norm": 31.620521545410156, "learning_rate": 1.6057142857142855e-05, "loss": 0.5564, "step": 16644 }, { "epoch": 67.93877551020408, "grad_norm": 37.404876708984375, "learning_rate": 1.605510204081633e-05, "loss": 1.3351, "step": 16645 }, { "epoch": 67.94285714285714, "grad_norm": 26.365737915039062, "learning_rate": 1.6053061224489796e-05, "loss": 1.8401, "step": 16646 }, { "epoch": 67.9469387755102, "grad_norm": 29.41103172302246, "learning_rate": 1.6051020408163267e-05, "loss": 0.1684, "step": 16647 }, { "epoch": 67.95102040816326, "grad_norm": 41.05948257446289, "learning_rate": 1.6048979591836734e-05, "loss": 0.7314, "step": 16648 }, { "epoch": 67.95510204081633, "grad_norm": 33.69940185546875, "learning_rate": 1.6046938775510205e-05, "loss": 0.7496, "step": 16649 }, { "epoch": 67.95918367346938, "grad_norm": 24.524572372436523, "learning_rate": 1.6044897959183675e-05, "loss": 1.7628, "step": 16650 }, { "epoch": 67.96326530612245, "grad_norm": 37.22870635986328, "learning_rate": 1.6042857142857142e-05, "loss": 0.6303, "step": 16651 }, { "epoch": 67.9673469387755, "grad_norm": 32.998870849609375, "learning_rate": 1.6040816326530613e-05, "loss": 0.1853, "step": 16652 }, { "epoch": 67.97142857142858, "grad_norm": 41.1434440612793, "learning_rate": 1.6038775510204083e-05, "loss": 0.4243, "step": 16653 }, { "epoch": 67.97551020408163, "grad_norm": 38.30949401855469, "learning_rate": 1.6036734693877554e-05, "loss": 0.6788, "step": 16654 }, { "epoch": 67.9795918367347, "grad_norm": 27.009716033935547, "learning_rate": 1.603469387755102e-05, "loss": 1.5246, "step": 16655 }, { "epoch": 67.98367346938775, "grad_norm": 34.618492126464844, "learning_rate": 1.6032653061224488e-05, "loss": 0.5772, "step": 16656 }, { "epoch": 67.98775510204082, "grad_norm": 34.622528076171875, "learning_rate": 1.6030612244897962e-05, "loss": 1.4969, "step": 16657 }, { "epoch": 67.99183673469388, "grad_norm": 28.006633758544922, "learning_rate": 1.602857142857143e-05, "loss": 1.7941, "step": 16658 }, { "epoch": 67.99591836734695, "grad_norm": 40.96738052368164, "learning_rate": 1.60265306122449e-05, "loss": 0.5701, "step": 16659 }, { "epoch": 68.0, "grad_norm": 50.03643035888672, "learning_rate": 1.6024489795918367e-05, "loss": 0.6768, "step": 16660 }, { "epoch": 68.00408163265305, "grad_norm": 28.426513671875, "learning_rate": 1.6022448979591837e-05, "loss": 1.7481, "step": 16661 }, { "epoch": 68.00816326530612, "grad_norm": 62.301116943359375, "learning_rate": 1.6020408163265308e-05, "loss": 0.5552, "step": 16662 }, { "epoch": 68.01224489795918, "grad_norm": 29.170513153076172, "learning_rate": 1.6018367346938775e-05, "loss": 1.5201, "step": 16663 }, { "epoch": 68.01632653061225, "grad_norm": 37.96574401855469, "learning_rate": 1.6016326530612245e-05, "loss": 1.4433, "step": 16664 }, { "epoch": 68.0204081632653, "grad_norm": 38.358646392822266, "learning_rate": 1.6014285714285716e-05, "loss": 0.9974, "step": 16665 }, { "epoch": 68.02448979591837, "grad_norm": 30.7314395904541, "learning_rate": 1.6012244897959183e-05, "loss": 0.21, "step": 16666 }, { "epoch": 68.02857142857142, "grad_norm": 28.470840454101562, "learning_rate": 1.6010204081632653e-05, "loss": 1.781, "step": 16667 }, { "epoch": 68.0326530612245, "grad_norm": 37.28578567504883, "learning_rate": 1.6008163265306124e-05, "loss": 1.4871, "step": 16668 }, { "epoch": 68.03673469387755, "grad_norm": 31.702938079833984, "learning_rate": 1.6006122448979595e-05, "loss": 1.6531, "step": 16669 }, { "epoch": 68.04081632653062, "grad_norm": 38.93941879272461, "learning_rate": 1.600408163265306e-05, "loss": 0.8653, "step": 16670 }, { "epoch": 68.04489795918367, "grad_norm": 23.498004913330078, "learning_rate": 1.600204081632653e-05, "loss": 2.2646, "step": 16671 }, { "epoch": 68.04897959183674, "grad_norm": 26.759300231933594, "learning_rate": 1.6000000000000003e-05, "loss": 1.7146, "step": 16672 }, { "epoch": 68.0530612244898, "grad_norm": 43.40742111206055, "learning_rate": 1.599795918367347e-05, "loss": 0.6483, "step": 16673 }, { "epoch": 68.05714285714286, "grad_norm": 27.75428009033203, "learning_rate": 1.599591836734694e-05, "loss": 1.5144, "step": 16674 }, { "epoch": 68.06122448979592, "grad_norm": 31.74277114868164, "learning_rate": 1.5993877551020407e-05, "loss": 1.9888, "step": 16675 }, { "epoch": 68.06530612244897, "grad_norm": 24.057716369628906, "learning_rate": 1.599183673469388e-05, "loss": 1.755, "step": 16676 }, { "epoch": 68.06938775510204, "grad_norm": 52.927974700927734, "learning_rate": 1.598979591836735e-05, "loss": 1.5214, "step": 16677 }, { "epoch": 68.0734693877551, "grad_norm": 38.41705322265625, "learning_rate": 1.5987755102040816e-05, "loss": 1.3364, "step": 16678 }, { "epoch": 68.07755102040817, "grad_norm": 43.3514404296875, "learning_rate": 1.5985714285714286e-05, "loss": 0.4966, "step": 16679 }, { "epoch": 68.08163265306122, "grad_norm": 28.557065963745117, "learning_rate": 1.5983673469387757e-05, "loss": 0.3714, "step": 16680 }, { "epoch": 68.08571428571429, "grad_norm": 44.406455993652344, "learning_rate": 1.5981632653061227e-05, "loss": 0.8078, "step": 16681 }, { "epoch": 68.08979591836734, "grad_norm": 20.378501892089844, "learning_rate": 1.5979591836734694e-05, "loss": 2.2322, "step": 16682 }, { "epoch": 68.09387755102041, "grad_norm": 30.72065544128418, "learning_rate": 1.597755102040816e-05, "loss": 1.4774, "step": 16683 }, { "epoch": 68.09795918367347, "grad_norm": 37.54339599609375, "learning_rate": 1.5975510204081635e-05, "loss": 0.7187, "step": 16684 }, { "epoch": 68.10204081632654, "grad_norm": 35.41201400756836, "learning_rate": 1.5973469387755102e-05, "loss": 1.518, "step": 16685 }, { "epoch": 68.10612244897959, "grad_norm": 45.461360931396484, "learning_rate": 1.5971428571428573e-05, "loss": 0.8687, "step": 16686 }, { "epoch": 68.11020408163266, "grad_norm": 28.504316329956055, "learning_rate": 1.596938775510204e-05, "loss": 1.7247, "step": 16687 }, { "epoch": 68.11428571428571, "grad_norm": 34.626834869384766, "learning_rate": 1.596734693877551e-05, "loss": 1.3212, "step": 16688 }, { "epoch": 68.11836734693877, "grad_norm": 37.23719024658203, "learning_rate": 1.596530612244898e-05, "loss": 1.2689, "step": 16689 }, { "epoch": 68.12244897959184, "grad_norm": 50.20589828491211, "learning_rate": 1.5963265306122448e-05, "loss": 0.4991, "step": 16690 }, { "epoch": 68.12653061224489, "grad_norm": 33.5873908996582, "learning_rate": 1.596122448979592e-05, "loss": 1.1279, "step": 16691 }, { "epoch": 68.13061224489796, "grad_norm": 38.38645553588867, "learning_rate": 1.595918367346939e-05, "loss": 1.744, "step": 16692 }, { "epoch": 68.13469387755102, "grad_norm": 30.622894287109375, "learning_rate": 1.5957142857142856e-05, "loss": 0.8802, "step": 16693 }, { "epoch": 68.13877551020408, "grad_norm": 32.085575103759766, "learning_rate": 1.5955102040816327e-05, "loss": 1.1548, "step": 16694 }, { "epoch": 68.14285714285714, "grad_norm": 24.561813354492188, "learning_rate": 1.5953061224489797e-05, "loss": 1.6992, "step": 16695 }, { "epoch": 68.14693877551021, "grad_norm": 33.4914665222168, "learning_rate": 1.5951020408163268e-05, "loss": 0.1612, "step": 16696 }, { "epoch": 68.15102040816326, "grad_norm": 32.699893951416016, "learning_rate": 1.5948979591836735e-05, "loss": 1.1051, "step": 16697 }, { "epoch": 68.15510204081633, "grad_norm": 31.788755416870117, "learning_rate": 1.5946938775510206e-05, "loss": 0.5754, "step": 16698 }, { "epoch": 68.15918367346939, "grad_norm": 39.76009750366211, "learning_rate": 1.5944897959183676e-05, "loss": 0.8823, "step": 16699 }, { "epoch": 68.16326530612245, "grad_norm": 27.179033279418945, "learning_rate": 1.5942857142857143e-05, "loss": 1.6644, "step": 16700 }, { "epoch": 68.16734693877551, "grad_norm": 40.207462310791016, "learning_rate": 1.5940816326530614e-05, "loss": 1.4281, "step": 16701 }, { "epoch": 68.17142857142858, "grad_norm": 33.23644256591797, "learning_rate": 1.593877551020408e-05, "loss": 1.3985, "step": 16702 }, { "epoch": 68.17551020408163, "grad_norm": 34.4752197265625, "learning_rate": 1.5936734693877555e-05, "loss": 0.9174, "step": 16703 }, { "epoch": 68.17959183673469, "grad_norm": 20.838407516479492, "learning_rate": 1.5934693877551022e-05, "loss": 1.5519, "step": 16704 }, { "epoch": 68.18367346938776, "grad_norm": 31.708065032958984, "learning_rate": 1.593265306122449e-05, "loss": 0.316, "step": 16705 }, { "epoch": 68.18775510204081, "grad_norm": 40.95619201660156, "learning_rate": 1.593061224489796e-05, "loss": 0.5463, "step": 16706 }, { "epoch": 68.19183673469388, "grad_norm": 35.765098571777344, "learning_rate": 1.592857142857143e-05, "loss": 0.8375, "step": 16707 }, { "epoch": 68.19591836734693, "grad_norm": 21.314716339111328, "learning_rate": 1.59265306122449e-05, "loss": 2.2216, "step": 16708 }, { "epoch": 68.2, "grad_norm": 20.987899780273438, "learning_rate": 1.5924489795918368e-05, "loss": 2.4871, "step": 16709 }, { "epoch": 68.20408163265306, "grad_norm": 24.74083137512207, "learning_rate": 1.5922448979591835e-05, "loss": 1.6942, "step": 16710 }, { "epoch": 68.20816326530613, "grad_norm": 39.1597785949707, "learning_rate": 1.592040816326531e-05, "loss": 1.2333, "step": 16711 }, { "epoch": 68.21224489795918, "grad_norm": 30.3078556060791, "learning_rate": 1.5918367346938776e-05, "loss": 0.8064, "step": 16712 }, { "epoch": 68.21632653061225, "grad_norm": 30.817508697509766, "learning_rate": 1.5916326530612246e-05, "loss": 1.0472, "step": 16713 }, { "epoch": 68.2204081632653, "grad_norm": 22.860877990722656, "learning_rate": 1.5914285714285713e-05, "loss": 1.7787, "step": 16714 }, { "epoch": 68.22448979591837, "grad_norm": 22.09587860107422, "learning_rate": 1.5912244897959184e-05, "loss": 0.145, "step": 16715 }, { "epoch": 68.22857142857143, "grad_norm": 37.19276809692383, "learning_rate": 1.5910204081632654e-05, "loss": 0.4812, "step": 16716 }, { "epoch": 68.2326530612245, "grad_norm": 31.392059326171875, "learning_rate": 1.590816326530612e-05, "loss": 1.0648, "step": 16717 }, { "epoch": 68.23673469387755, "grad_norm": 27.05301856994629, "learning_rate": 1.5906122448979592e-05, "loss": 1.6069, "step": 16718 }, { "epoch": 68.2408163265306, "grad_norm": 36.326473236083984, "learning_rate": 1.5904081632653063e-05, "loss": 1.2298, "step": 16719 }, { "epoch": 68.24489795918367, "grad_norm": 29.666061401367188, "learning_rate": 1.5902040816326533e-05, "loss": 1.7298, "step": 16720 }, { "epoch": 68.24897959183673, "grad_norm": 22.336353302001953, "learning_rate": 1.59e-05, "loss": 1.7215, "step": 16721 }, { "epoch": 68.2530612244898, "grad_norm": 33.50864028930664, "learning_rate": 1.589795918367347e-05, "loss": 0.6398, "step": 16722 }, { "epoch": 68.25714285714285, "grad_norm": 28.51801872253418, "learning_rate": 1.589591836734694e-05, "loss": 0.5601, "step": 16723 }, { "epoch": 68.26122448979592, "grad_norm": 34.608970642089844, "learning_rate": 1.589387755102041e-05, "loss": 1.1862, "step": 16724 }, { "epoch": 68.26530612244898, "grad_norm": 10.263607025146484, "learning_rate": 1.589183673469388e-05, "loss": 3.1256, "step": 16725 }, { "epoch": 68.26938775510204, "grad_norm": 39.44347381591797, "learning_rate": 1.588979591836735e-05, "loss": 1.1531, "step": 16726 }, { "epoch": 68.2734693877551, "grad_norm": 42.14219284057617, "learning_rate": 1.5887755102040817e-05, "loss": 0.6354, "step": 16727 }, { "epoch": 68.27755102040817, "grad_norm": 52.09970474243164, "learning_rate": 1.5885714285714287e-05, "loss": 0.7927, "step": 16728 }, { "epoch": 68.28163265306122, "grad_norm": 55.223140716552734, "learning_rate": 1.5883673469387754e-05, "loss": 1.3095, "step": 16729 }, { "epoch": 68.28571428571429, "grad_norm": 37.52398681640625, "learning_rate": 1.5881632653061225e-05, "loss": 0.3671, "step": 16730 }, { "epoch": 68.28979591836735, "grad_norm": 32.847023010253906, "learning_rate": 1.5879591836734695e-05, "loss": 0.5214, "step": 16731 }, { "epoch": 68.29387755102042, "grad_norm": 29.48900604248047, "learning_rate": 1.5877551020408162e-05, "loss": 0.8853, "step": 16732 }, { "epoch": 68.29795918367347, "grad_norm": 38.400489807128906, "learning_rate": 1.5875510204081633e-05, "loss": 0.4371, "step": 16733 }, { "epoch": 68.30204081632652, "grad_norm": 33.34175109863281, "learning_rate": 1.5873469387755103e-05, "loss": 1.2576, "step": 16734 }, { "epoch": 68.3061224489796, "grad_norm": 50.71894836425781, "learning_rate": 1.5871428571428574e-05, "loss": 0.6866, "step": 16735 }, { "epoch": 68.31020408163265, "grad_norm": 26.40328598022461, "learning_rate": 1.586938775510204e-05, "loss": 1.8187, "step": 16736 }, { "epoch": 68.31428571428572, "grad_norm": 37.094200134277344, "learning_rate": 1.5867346938775508e-05, "loss": 0.5547, "step": 16737 }, { "epoch": 68.31836734693877, "grad_norm": 22.73100471496582, "learning_rate": 1.5865306122448982e-05, "loss": 1.9029, "step": 16738 }, { "epoch": 68.32244897959184, "grad_norm": 27.855491638183594, "learning_rate": 1.586326530612245e-05, "loss": 1.0382, "step": 16739 }, { "epoch": 68.3265306122449, "grad_norm": 23.54808235168457, "learning_rate": 1.586122448979592e-05, "loss": 1.8706, "step": 16740 }, { "epoch": 68.33061224489796, "grad_norm": 28.223596572875977, "learning_rate": 1.5859183673469387e-05, "loss": 1.5613, "step": 16741 }, { "epoch": 68.33469387755102, "grad_norm": 30.51740837097168, "learning_rate": 1.5857142857142857e-05, "loss": 0.4209, "step": 16742 }, { "epoch": 68.33877551020409, "grad_norm": 40.269466400146484, "learning_rate": 1.5855102040816328e-05, "loss": 1.1908, "step": 16743 }, { "epoch": 68.34285714285714, "grad_norm": 14.775691986083984, "learning_rate": 1.5853061224489795e-05, "loss": 2.4876, "step": 16744 }, { "epoch": 68.34693877551021, "grad_norm": 27.801860809326172, "learning_rate": 1.5851020408163265e-05, "loss": 1.8031, "step": 16745 }, { "epoch": 68.35102040816327, "grad_norm": 33.58091354370117, "learning_rate": 1.5848979591836736e-05, "loss": 1.6595, "step": 16746 }, { "epoch": 68.35510204081632, "grad_norm": 31.46868896484375, "learning_rate": 1.5846938775510206e-05, "loss": 0.7136, "step": 16747 }, { "epoch": 68.35918367346939, "grad_norm": 27.855253219604492, "learning_rate": 1.5844897959183674e-05, "loss": 1.4682, "step": 16748 }, { "epoch": 68.36326530612244, "grad_norm": 33.62777328491211, "learning_rate": 1.5842857142857144e-05, "loss": 0.7966, "step": 16749 }, { "epoch": 68.36734693877551, "grad_norm": 32.74257278442383, "learning_rate": 1.5840816326530615e-05, "loss": 0.813, "step": 16750 }, { "epoch": 68.37142857142857, "grad_norm": 25.410093307495117, "learning_rate": 1.5838775510204082e-05, "loss": 1.7728, "step": 16751 }, { "epoch": 68.37551020408164, "grad_norm": 37.832218170166016, "learning_rate": 1.5836734693877552e-05, "loss": 1.0606, "step": 16752 }, { "epoch": 68.37959183673469, "grad_norm": 45.001949310302734, "learning_rate": 1.5834693877551023e-05, "loss": 1.2568, "step": 16753 }, { "epoch": 68.38367346938776, "grad_norm": 25.442392349243164, "learning_rate": 1.583265306122449e-05, "loss": 1.9371, "step": 16754 }, { "epoch": 68.38775510204081, "grad_norm": 19.84187889099121, "learning_rate": 1.583061224489796e-05, "loss": 0.1539, "step": 16755 }, { "epoch": 68.39183673469388, "grad_norm": 40.129520416259766, "learning_rate": 1.5828571428571428e-05, "loss": 1.412, "step": 16756 }, { "epoch": 68.39591836734694, "grad_norm": 16.861278533935547, "learning_rate": 1.5826530612244898e-05, "loss": 2.3956, "step": 16757 }, { "epoch": 68.4, "grad_norm": 24.431217193603516, "learning_rate": 1.582448979591837e-05, "loss": 1.8094, "step": 16758 }, { "epoch": 68.40408163265306, "grad_norm": 33.27436065673828, "learning_rate": 1.5822448979591836e-05, "loss": 0.5011, "step": 16759 }, { "epoch": 68.40816326530613, "grad_norm": 25.40311622619629, "learning_rate": 1.5820408163265306e-05, "loss": 1.7558, "step": 16760 }, { "epoch": 68.41224489795918, "grad_norm": 2.3341946601867676, "learning_rate": 1.5818367346938777e-05, "loss": 0.0373, "step": 16761 }, { "epoch": 68.41632653061224, "grad_norm": 25.923288345336914, "learning_rate": 1.5816326530612247e-05, "loss": 1.728, "step": 16762 }, { "epoch": 68.42040816326531, "grad_norm": 42.04526138305664, "learning_rate": 1.5814285714285714e-05, "loss": 1.643, "step": 16763 }, { "epoch": 68.42448979591836, "grad_norm": 26.99928092956543, "learning_rate": 1.581224489795918e-05, "loss": 1.553, "step": 16764 }, { "epoch": 68.42857142857143, "grad_norm": 30.37605857849121, "learning_rate": 1.5810204081632655e-05, "loss": 1.3114, "step": 16765 }, { "epoch": 68.43265306122449, "grad_norm": 36.685455322265625, "learning_rate": 1.5808163265306123e-05, "loss": 1.145, "step": 16766 }, { "epoch": 68.43673469387755, "grad_norm": 29.58286476135254, "learning_rate": 1.5806122448979593e-05, "loss": 1.6687, "step": 16767 }, { "epoch": 68.44081632653061, "grad_norm": 25.547332763671875, "learning_rate": 1.580408163265306e-05, "loss": 1.92, "step": 16768 }, { "epoch": 68.44489795918368, "grad_norm": 16.034605026245117, "learning_rate": 1.5802040816326534e-05, "loss": 2.7841, "step": 16769 }, { "epoch": 68.44897959183673, "grad_norm": 27.675609588623047, "learning_rate": 1.58e-05, "loss": 1.638, "step": 16770 }, { "epoch": 68.4530612244898, "grad_norm": 36.22566604614258, "learning_rate": 1.579795918367347e-05, "loss": 0.915, "step": 16771 }, { "epoch": 68.45714285714286, "grad_norm": 30.167688369750977, "learning_rate": 1.579591836734694e-05, "loss": 1.348, "step": 16772 }, { "epoch": 68.46122448979592, "grad_norm": 46.0196418762207, "learning_rate": 1.579387755102041e-05, "loss": 0.4314, "step": 16773 }, { "epoch": 68.46530612244898, "grad_norm": 17.488874435424805, "learning_rate": 1.579183673469388e-05, "loss": 2.3025, "step": 16774 }, { "epoch": 68.46938775510205, "grad_norm": 38.26922607421875, "learning_rate": 1.5789795918367347e-05, "loss": 1.461, "step": 16775 }, { "epoch": 68.4734693877551, "grad_norm": 30.012348175048828, "learning_rate": 1.5787755102040817e-05, "loss": 1.2783, "step": 16776 }, { "epoch": 68.47755102040816, "grad_norm": 34.099334716796875, "learning_rate": 1.5785714285714288e-05, "loss": 1.3925, "step": 16777 }, { "epoch": 68.48163265306123, "grad_norm": 40.115638732910156, "learning_rate": 1.5783673469387755e-05, "loss": 0.9303, "step": 16778 }, { "epoch": 68.48571428571428, "grad_norm": 30.861684799194336, "learning_rate": 1.5781632653061226e-05, "loss": 1.3743, "step": 16779 }, { "epoch": 68.48979591836735, "grad_norm": 45.80706024169922, "learning_rate": 1.5779591836734696e-05, "loss": 0.8664, "step": 16780 }, { "epoch": 68.4938775510204, "grad_norm": 31.19017219543457, "learning_rate": 1.5777551020408163e-05, "loss": 0.5939, "step": 16781 }, { "epoch": 68.49795918367347, "grad_norm": 31.29634666442871, "learning_rate": 1.5775510204081634e-05, "loss": 1.1595, "step": 16782 }, { "epoch": 68.50204081632653, "grad_norm": 40.01115036010742, "learning_rate": 1.57734693877551e-05, "loss": 1.0264, "step": 16783 }, { "epoch": 68.5061224489796, "grad_norm": 45.007442474365234, "learning_rate": 1.577142857142857e-05, "loss": 0.5822, "step": 16784 }, { "epoch": 68.51020408163265, "grad_norm": 27.527767181396484, "learning_rate": 1.5769387755102042e-05, "loss": 1.5558, "step": 16785 }, { "epoch": 68.51428571428572, "grad_norm": 36.097408294677734, "learning_rate": 1.576734693877551e-05, "loss": 1.152, "step": 16786 }, { "epoch": 68.51836734693877, "grad_norm": 38.71522903442383, "learning_rate": 1.576530612244898e-05, "loss": 0.8619, "step": 16787 }, { "epoch": 68.52244897959184, "grad_norm": 37.64604187011719, "learning_rate": 1.576326530612245e-05, "loss": 0.4313, "step": 16788 }, { "epoch": 68.5265306122449, "grad_norm": 19.948341369628906, "learning_rate": 1.576122448979592e-05, "loss": 2.2501, "step": 16789 }, { "epoch": 68.53061224489795, "grad_norm": 36.923316955566406, "learning_rate": 1.5759183673469388e-05, "loss": 0.4346, "step": 16790 }, { "epoch": 68.53469387755102, "grad_norm": 25.495359420776367, "learning_rate": 1.5757142857142858e-05, "loss": 2.1808, "step": 16791 }, { "epoch": 68.53877551020408, "grad_norm": 32.47824478149414, "learning_rate": 1.575510204081633e-05, "loss": 0.6488, "step": 16792 }, { "epoch": 68.54285714285714, "grad_norm": 36.583709716796875, "learning_rate": 1.5753061224489796e-05, "loss": 0.7731, "step": 16793 }, { "epoch": 68.5469387755102, "grad_norm": 38.3253288269043, "learning_rate": 1.5751020408163266e-05, "loss": 1.3455, "step": 16794 }, { "epoch": 68.55102040816327, "grad_norm": 37.397464752197266, "learning_rate": 1.5748979591836734e-05, "loss": 1.0982, "step": 16795 }, { "epoch": 68.55510204081632, "grad_norm": 28.478307723999023, "learning_rate": 1.5746938775510207e-05, "loss": 0.9973, "step": 16796 }, { "epoch": 68.55918367346939, "grad_norm": 27.821727752685547, "learning_rate": 1.5744897959183675e-05, "loss": 1.5522, "step": 16797 }, { "epoch": 68.56326530612245, "grad_norm": 36.97102355957031, "learning_rate": 1.574285714285714e-05, "loss": 1.1798, "step": 16798 }, { "epoch": 68.56734693877551, "grad_norm": 27.592199325561523, "learning_rate": 1.5740816326530612e-05, "loss": 1.502, "step": 16799 }, { "epoch": 68.57142857142857, "grad_norm": 19.0131778717041, "learning_rate": 1.5738775510204083e-05, "loss": 0.1459, "step": 16800 }, { "epoch": 68.57551020408164, "grad_norm": 22.05293083190918, "learning_rate": 1.5736734693877553e-05, "loss": 0.2129, "step": 16801 }, { "epoch": 68.57959183673469, "grad_norm": 34.96438980102539, "learning_rate": 1.573469387755102e-05, "loss": 1.0291, "step": 16802 }, { "epoch": 68.58367346938776, "grad_norm": 23.429283142089844, "learning_rate": 1.573265306122449e-05, "loss": 1.7881, "step": 16803 }, { "epoch": 68.58775510204082, "grad_norm": 32.21641159057617, "learning_rate": 1.573061224489796e-05, "loss": 1.0584, "step": 16804 }, { "epoch": 68.59183673469387, "grad_norm": 28.496490478515625, "learning_rate": 1.572857142857143e-05, "loss": 1.8508, "step": 16805 }, { "epoch": 68.59591836734694, "grad_norm": 45.408294677734375, "learning_rate": 1.57265306122449e-05, "loss": 0.6434, "step": 16806 }, { "epoch": 68.6, "grad_norm": 23.794200897216797, "learning_rate": 1.572448979591837e-05, "loss": 1.4013, "step": 16807 }, { "epoch": 68.60408163265306, "grad_norm": 44.01459503173828, "learning_rate": 1.5722448979591837e-05, "loss": 0.9605, "step": 16808 }, { "epoch": 68.60816326530612, "grad_norm": 28.515384674072266, "learning_rate": 1.5720408163265307e-05, "loss": 1.9195, "step": 16809 }, { "epoch": 68.61224489795919, "grad_norm": 39.46370315551758, "learning_rate": 1.5718367346938774e-05, "loss": 0.6177, "step": 16810 }, { "epoch": 68.61632653061224, "grad_norm": 29.424217224121094, "learning_rate": 1.5716326530612245e-05, "loss": 0.4636, "step": 16811 }, { "epoch": 68.62040816326531, "grad_norm": 42.625328063964844, "learning_rate": 1.5714285714285715e-05, "loss": 0.5629, "step": 16812 }, { "epoch": 68.62448979591836, "grad_norm": 41.626277923583984, "learning_rate": 1.5712244897959186e-05, "loss": 1.0911, "step": 16813 }, { "epoch": 68.62857142857143, "grad_norm": 12.571871757507324, "learning_rate": 1.5710204081632653e-05, "loss": 0.0904, "step": 16814 }, { "epoch": 68.63265306122449, "grad_norm": 31.651866912841797, "learning_rate": 1.5708163265306123e-05, "loss": 1.0493, "step": 16815 }, { "epoch": 68.63673469387756, "grad_norm": 50.21076202392578, "learning_rate": 1.5706122448979594e-05, "loss": 0.6183, "step": 16816 }, { "epoch": 68.64081632653061, "grad_norm": 34.28121566772461, "learning_rate": 1.570408163265306e-05, "loss": 1.5775, "step": 16817 }, { "epoch": 68.64489795918368, "grad_norm": 29.927064895629883, "learning_rate": 1.570204081632653e-05, "loss": 1.925, "step": 16818 }, { "epoch": 68.64897959183673, "grad_norm": 31.89764404296875, "learning_rate": 1.5700000000000002e-05, "loss": 1.3622, "step": 16819 }, { "epoch": 68.65306122448979, "grad_norm": 22.11626434326172, "learning_rate": 1.569795918367347e-05, "loss": 2.1687, "step": 16820 }, { "epoch": 68.65714285714286, "grad_norm": 22.202234268188477, "learning_rate": 1.569591836734694e-05, "loss": 2.1871, "step": 16821 }, { "epoch": 68.66122448979591, "grad_norm": 28.717121124267578, "learning_rate": 1.5693877551020407e-05, "loss": 0.3004, "step": 16822 }, { "epoch": 68.66530612244898, "grad_norm": 18.382719039916992, "learning_rate": 1.569183673469388e-05, "loss": 2.3372, "step": 16823 }, { "epoch": 68.66938775510204, "grad_norm": 41.43412780761719, "learning_rate": 1.5689795918367348e-05, "loss": 0.7908, "step": 16824 }, { "epoch": 68.6734693877551, "grad_norm": 28.868547439575195, "learning_rate": 1.5687755102040815e-05, "loss": 0.3632, "step": 16825 }, { "epoch": 68.67755102040816, "grad_norm": 40.126285552978516, "learning_rate": 1.5685714285714286e-05, "loss": 0.1739, "step": 16826 }, { "epoch": 68.68163265306123, "grad_norm": 35.200199127197266, "learning_rate": 1.5683673469387756e-05, "loss": 1.5414, "step": 16827 }, { "epoch": 68.68571428571428, "grad_norm": 39.68636703491211, "learning_rate": 1.5681632653061227e-05, "loss": 1.0222, "step": 16828 }, { "epoch": 68.68979591836735, "grad_norm": 40.94073486328125, "learning_rate": 1.5679591836734694e-05, "loss": 1.1034, "step": 16829 }, { "epoch": 68.6938775510204, "grad_norm": 36.70625305175781, "learning_rate": 1.5677551020408164e-05, "loss": 0.5297, "step": 16830 }, { "epoch": 68.69795918367348, "grad_norm": 25.259401321411133, "learning_rate": 1.5675510204081635e-05, "loss": 1.9898, "step": 16831 }, { "epoch": 68.70204081632653, "grad_norm": 31.598827362060547, "learning_rate": 1.5673469387755102e-05, "loss": 1.4264, "step": 16832 }, { "epoch": 68.70612244897958, "grad_norm": 17.147098541259766, "learning_rate": 1.5671428571428572e-05, "loss": 2.6404, "step": 16833 }, { "epoch": 68.71020408163265, "grad_norm": 37.148536682128906, "learning_rate": 1.5669387755102043e-05, "loss": 1.1923, "step": 16834 }, { "epoch": 68.71428571428571, "grad_norm": 32.47890090942383, "learning_rate": 1.5667346938775513e-05, "loss": 1.2091, "step": 16835 }, { "epoch": 68.71836734693878, "grad_norm": 32.05794143676758, "learning_rate": 1.566530612244898e-05, "loss": 1.2733, "step": 16836 }, { "epoch": 68.72244897959183, "grad_norm": 31.903636932373047, "learning_rate": 1.5663265306122448e-05, "loss": 1.431, "step": 16837 }, { "epoch": 68.7265306122449, "grad_norm": 27.93972396850586, "learning_rate": 1.5661224489795918e-05, "loss": 1.7397, "step": 16838 }, { "epoch": 68.73061224489796, "grad_norm": 34.23174285888672, "learning_rate": 1.565918367346939e-05, "loss": 1.4567, "step": 16839 }, { "epoch": 68.73469387755102, "grad_norm": 24.444631576538086, "learning_rate": 1.565714285714286e-05, "loss": 0.1611, "step": 16840 }, { "epoch": 68.73877551020408, "grad_norm": 29.17037582397461, "learning_rate": 1.5655102040816326e-05, "loss": 1.5257, "step": 16841 }, { "epoch": 68.74285714285715, "grad_norm": 40.02454376220703, "learning_rate": 1.5653061224489797e-05, "loss": 1.0053, "step": 16842 }, { "epoch": 68.7469387755102, "grad_norm": 28.349124908447266, "learning_rate": 1.5651020408163267e-05, "loss": 0.9006, "step": 16843 }, { "epoch": 68.75102040816327, "grad_norm": 18.133296966552734, "learning_rate": 1.5648979591836734e-05, "loss": 0.1596, "step": 16844 }, { "epoch": 68.75510204081633, "grad_norm": 49.234188079833984, "learning_rate": 1.5646938775510205e-05, "loss": 1.2704, "step": 16845 }, { "epoch": 68.7591836734694, "grad_norm": 32.5816764831543, "learning_rate": 1.5644897959183676e-05, "loss": 1.4582, "step": 16846 }, { "epoch": 68.76326530612245, "grad_norm": 40.91721725463867, "learning_rate": 1.5642857142857143e-05, "loss": 1.3265, "step": 16847 }, { "epoch": 68.7673469387755, "grad_norm": 25.547266006469727, "learning_rate": 1.5640816326530613e-05, "loss": 2.2036, "step": 16848 }, { "epoch": 68.77142857142857, "grad_norm": 41.75456237792969, "learning_rate": 1.563877551020408e-05, "loss": 0.5897, "step": 16849 }, { "epoch": 68.77551020408163, "grad_norm": 38.52173614501953, "learning_rate": 1.5636734693877554e-05, "loss": 0.8687, "step": 16850 }, { "epoch": 68.7795918367347, "grad_norm": 28.877836227416992, "learning_rate": 1.563469387755102e-05, "loss": 1.6889, "step": 16851 }, { "epoch": 68.78367346938775, "grad_norm": 35.16359329223633, "learning_rate": 1.563265306122449e-05, "loss": 0.204, "step": 16852 }, { "epoch": 68.78775510204082, "grad_norm": 22.62456512451172, "learning_rate": 1.563061224489796e-05, "loss": 2.3721, "step": 16853 }, { "epoch": 68.79183673469387, "grad_norm": 44.9021110534668, "learning_rate": 1.562857142857143e-05, "loss": 0.5221, "step": 16854 }, { "epoch": 68.79591836734694, "grad_norm": 21.295063018798828, "learning_rate": 1.56265306122449e-05, "loss": 2.0129, "step": 16855 }, { "epoch": 68.8, "grad_norm": 32.56651306152344, "learning_rate": 1.5624489795918367e-05, "loss": 1.5852, "step": 16856 }, { "epoch": 68.80408163265307, "grad_norm": 38.056827545166016, "learning_rate": 1.5622448979591838e-05, "loss": 1.1097, "step": 16857 }, { "epoch": 68.80816326530612, "grad_norm": 32.499271392822266, "learning_rate": 1.5620408163265308e-05, "loss": 1.558, "step": 16858 }, { "epoch": 68.81224489795919, "grad_norm": 38.21442413330078, "learning_rate": 1.5618367346938775e-05, "loss": 0.414, "step": 16859 }, { "epoch": 68.81632653061224, "grad_norm": 31.2703914642334, "learning_rate": 1.5616326530612246e-05, "loss": 0.2477, "step": 16860 }, { "epoch": 68.82040816326531, "grad_norm": 19.156843185424805, "learning_rate": 1.5614285714285716e-05, "loss": 2.3543, "step": 16861 }, { "epoch": 68.82448979591837, "grad_norm": 30.89906120300293, "learning_rate": 1.5612244897959187e-05, "loss": 0.2336, "step": 16862 }, { "epoch": 68.82857142857142, "grad_norm": 31.104496002197266, "learning_rate": 1.5610204081632654e-05, "loss": 1.4549, "step": 16863 }, { "epoch": 68.83265306122449, "grad_norm": 46.243980407714844, "learning_rate": 1.560816326530612e-05, "loss": 0.6692, "step": 16864 }, { "epoch": 68.83673469387755, "grad_norm": 31.384244918823242, "learning_rate": 1.560612244897959e-05, "loss": 0.4688, "step": 16865 }, { "epoch": 68.84081632653061, "grad_norm": 37.11700439453125, "learning_rate": 1.5604081632653062e-05, "loss": 0.4632, "step": 16866 }, { "epoch": 68.84489795918367, "grad_norm": 39.562049865722656, "learning_rate": 1.5602040816326533e-05, "loss": 0.8373, "step": 16867 }, { "epoch": 68.84897959183674, "grad_norm": 42.357662200927734, "learning_rate": 1.56e-05, "loss": 1.0652, "step": 16868 }, { "epoch": 68.85306122448979, "grad_norm": 41.364830017089844, "learning_rate": 1.559795918367347e-05, "loss": 0.8914, "step": 16869 }, { "epoch": 68.85714285714286, "grad_norm": 46.38412094116211, "learning_rate": 1.559591836734694e-05, "loss": 0.7077, "step": 16870 }, { "epoch": 68.86122448979592, "grad_norm": 20.04840087890625, "learning_rate": 1.5593877551020408e-05, "loss": 1.916, "step": 16871 }, { "epoch": 68.86530612244898, "grad_norm": 31.376708984375, "learning_rate": 1.559183673469388e-05, "loss": 1.6861, "step": 16872 }, { "epoch": 68.86938775510204, "grad_norm": 23.392934799194336, "learning_rate": 1.558979591836735e-05, "loss": 2.2419, "step": 16873 }, { "epoch": 68.87346938775511, "grad_norm": 16.218063354492188, "learning_rate": 1.5587755102040816e-05, "loss": 2.3272, "step": 16874 }, { "epoch": 68.87755102040816, "grad_norm": 22.136016845703125, "learning_rate": 1.5585714285714287e-05, "loss": 1.8317, "step": 16875 }, { "epoch": 68.88163265306123, "grad_norm": 48.222267150878906, "learning_rate": 1.5583673469387754e-05, "loss": 0.5969, "step": 16876 }, { "epoch": 68.88571428571429, "grad_norm": 32.570308685302734, "learning_rate": 1.5581632653061228e-05, "loss": 0.5132, "step": 16877 }, { "epoch": 68.88979591836734, "grad_norm": 23.110074996948242, "learning_rate": 1.5579591836734695e-05, "loss": 1.8058, "step": 16878 }, { "epoch": 68.89387755102041, "grad_norm": 27.631120681762695, "learning_rate": 1.5577551020408165e-05, "loss": 1.8712, "step": 16879 }, { "epoch": 68.89795918367346, "grad_norm": 36.20842361450195, "learning_rate": 1.5575510204081632e-05, "loss": 1.7067, "step": 16880 }, { "epoch": 68.90204081632653, "grad_norm": 22.812528610229492, "learning_rate": 1.5573469387755103e-05, "loss": 1.7515, "step": 16881 }, { "epoch": 68.90612244897959, "grad_norm": 47.884273529052734, "learning_rate": 1.5571428571428573e-05, "loss": 0.6314, "step": 16882 }, { "epoch": 68.91020408163266, "grad_norm": 39.2562370300293, "learning_rate": 1.556938775510204e-05, "loss": 0.5956, "step": 16883 }, { "epoch": 68.91428571428571, "grad_norm": 24.525842666625977, "learning_rate": 1.556734693877551e-05, "loss": 2.2452, "step": 16884 }, { "epoch": 68.91836734693878, "grad_norm": 30.187049865722656, "learning_rate": 1.556530612244898e-05, "loss": 0.2508, "step": 16885 }, { "epoch": 68.92244897959183, "grad_norm": 35.599056243896484, "learning_rate": 1.556326530612245e-05, "loss": 0.6111, "step": 16886 }, { "epoch": 68.9265306122449, "grad_norm": 26.33669662475586, "learning_rate": 1.556122448979592e-05, "loss": 1.9241, "step": 16887 }, { "epoch": 68.93061224489796, "grad_norm": 22.99280548095703, "learning_rate": 1.555918367346939e-05, "loss": 1.7909, "step": 16888 }, { "epoch": 68.93469387755103, "grad_norm": 38.574344635009766, "learning_rate": 1.555714285714286e-05, "loss": 1.6464, "step": 16889 }, { "epoch": 68.93877551020408, "grad_norm": 40.421878814697266, "learning_rate": 1.5555102040816327e-05, "loss": 0.9476, "step": 16890 }, { "epoch": 68.94285714285714, "grad_norm": 33.10022735595703, "learning_rate": 1.5553061224489794e-05, "loss": 1.7198, "step": 16891 }, { "epoch": 68.9469387755102, "grad_norm": 40.7602424621582, "learning_rate": 1.5551020408163265e-05, "loss": 0.622, "step": 16892 }, { "epoch": 68.95102040816326, "grad_norm": 44.609352111816406, "learning_rate": 1.5548979591836735e-05, "loss": 0.7206, "step": 16893 }, { "epoch": 68.95510204081633, "grad_norm": 30.408586502075195, "learning_rate": 1.5546938775510206e-05, "loss": 1.5086, "step": 16894 }, { "epoch": 68.95918367346938, "grad_norm": 22.047420501708984, "learning_rate": 1.5544897959183673e-05, "loss": 0.2051, "step": 16895 }, { "epoch": 68.96326530612245, "grad_norm": 20.37716293334961, "learning_rate": 1.5542857142857144e-05, "loss": 2.3223, "step": 16896 }, { "epoch": 68.9673469387755, "grad_norm": 30.884639739990234, "learning_rate": 1.5540816326530614e-05, "loss": 1.7502, "step": 16897 }, { "epoch": 68.97142857142858, "grad_norm": 25.387741088867188, "learning_rate": 1.553877551020408e-05, "loss": 1.5004, "step": 16898 }, { "epoch": 68.97551020408163, "grad_norm": 30.940032958984375, "learning_rate": 1.5536734693877552e-05, "loss": 0.9698, "step": 16899 }, { "epoch": 68.9795918367347, "grad_norm": 29.12400245666504, "learning_rate": 1.5534693877551022e-05, "loss": 1.6626, "step": 16900 }, { "epoch": 68.98367346938775, "grad_norm": 30.580480575561523, "learning_rate": 1.553265306122449e-05, "loss": 1.9655, "step": 16901 }, { "epoch": 68.98775510204082, "grad_norm": 25.94437026977539, "learning_rate": 1.553061224489796e-05, "loss": 2.0973, "step": 16902 }, { "epoch": 68.99183673469388, "grad_norm": 31.34593391418457, "learning_rate": 1.5528571428571427e-05, "loss": 0.9904, "step": 16903 }, { "epoch": 68.99591836734695, "grad_norm": 50.76107406616211, "learning_rate": 1.55265306122449e-05, "loss": 1.104, "step": 16904 }, { "epoch": 69.0, "grad_norm": 33.42913055419922, "learning_rate": 1.5524489795918368e-05, "loss": 1.1746, "step": 16905 }, { "epoch": 69.00408163265305, "grad_norm": 34.74681091308594, "learning_rate": 1.552244897959184e-05, "loss": 0.8741, "step": 16906 }, { "epoch": 69.00816326530612, "grad_norm": 36.00743865966797, "learning_rate": 1.5520408163265306e-05, "loss": 0.3671, "step": 16907 }, { "epoch": 69.01224489795918, "grad_norm": 19.32042694091797, "learning_rate": 1.5518367346938776e-05, "loss": 2.187, "step": 16908 }, { "epoch": 69.01632653061225, "grad_norm": 21.26051139831543, "learning_rate": 1.5516326530612247e-05, "loss": 1.6939, "step": 16909 }, { "epoch": 69.0204081632653, "grad_norm": 28.18136215209961, "learning_rate": 1.5514285714285714e-05, "loss": 0.8416, "step": 16910 }, { "epoch": 69.02448979591837, "grad_norm": 32.59894561767578, "learning_rate": 1.5512244897959184e-05, "loss": 1.28, "step": 16911 }, { "epoch": 69.02857142857142, "grad_norm": 25.152891159057617, "learning_rate": 1.5510204081632655e-05, "loss": 0.4803, "step": 16912 }, { "epoch": 69.0326530612245, "grad_norm": 28.98687171936035, "learning_rate": 1.5508163265306122e-05, "loss": 1.3229, "step": 16913 }, { "epoch": 69.03673469387755, "grad_norm": 23.654821395874023, "learning_rate": 1.5506122448979593e-05, "loss": 1.7706, "step": 16914 }, { "epoch": 69.04081632653062, "grad_norm": 40.97252655029297, "learning_rate": 1.5504081632653063e-05, "loss": 0.9803, "step": 16915 }, { "epoch": 69.04489795918367, "grad_norm": 33.591434478759766, "learning_rate": 1.5502040816326534e-05, "loss": 0.7731, "step": 16916 }, { "epoch": 69.04897959183674, "grad_norm": 28.083724975585938, "learning_rate": 1.55e-05, "loss": 0.3954, "step": 16917 }, { "epoch": 69.0530612244898, "grad_norm": 25.9542293548584, "learning_rate": 1.5497959183673468e-05, "loss": 1.7091, "step": 16918 }, { "epoch": 69.05714285714286, "grad_norm": 25.453187942504883, "learning_rate": 1.549591836734694e-05, "loss": 0.1796, "step": 16919 }, { "epoch": 69.06122448979592, "grad_norm": 32.3824348449707, "learning_rate": 1.549387755102041e-05, "loss": 0.2393, "step": 16920 }, { "epoch": 69.06530612244897, "grad_norm": 32.11861801147461, "learning_rate": 1.549183673469388e-05, "loss": 1.1003, "step": 16921 }, { "epoch": 69.06938775510204, "grad_norm": 23.161352157592773, "learning_rate": 1.5489795918367346e-05, "loss": 2.1357, "step": 16922 }, { "epoch": 69.0734693877551, "grad_norm": 28.99536895751953, "learning_rate": 1.5487755102040817e-05, "loss": 0.5418, "step": 16923 }, { "epoch": 69.07755102040817, "grad_norm": 25.25592803955078, "learning_rate": 1.5485714285714287e-05, "loss": 2.1284, "step": 16924 }, { "epoch": 69.08163265306122, "grad_norm": 37.43533706665039, "learning_rate": 1.5483673469387755e-05, "loss": 1.2436, "step": 16925 }, { "epoch": 69.08571428571429, "grad_norm": 31.077735900878906, "learning_rate": 1.5481632653061225e-05, "loss": 1.2082, "step": 16926 }, { "epoch": 69.08979591836734, "grad_norm": 49.182716369628906, "learning_rate": 1.5479591836734696e-05, "loss": 0.9499, "step": 16927 }, { "epoch": 69.09387755102041, "grad_norm": 39.52920913696289, "learning_rate": 1.5477551020408166e-05, "loss": 1.4191, "step": 16928 }, { "epoch": 69.09795918367347, "grad_norm": 28.62662124633789, "learning_rate": 1.5475510204081633e-05, "loss": 0.3602, "step": 16929 }, { "epoch": 69.10204081632654, "grad_norm": 28.57332992553711, "learning_rate": 1.54734693877551e-05, "loss": 0.3996, "step": 16930 }, { "epoch": 69.10612244897959, "grad_norm": 35.98414993286133, "learning_rate": 1.5471428571428574e-05, "loss": 0.9226, "step": 16931 }, { "epoch": 69.11020408163266, "grad_norm": 24.759695053100586, "learning_rate": 1.546938775510204e-05, "loss": 2.101, "step": 16932 }, { "epoch": 69.11428571428571, "grad_norm": 42.46974563598633, "learning_rate": 1.5467346938775512e-05, "loss": 0.7903, "step": 16933 }, { "epoch": 69.11836734693877, "grad_norm": 39.01450729370117, "learning_rate": 1.546530612244898e-05, "loss": 0.5451, "step": 16934 }, { "epoch": 69.12244897959184, "grad_norm": 26.349040985107422, "learning_rate": 1.546326530612245e-05, "loss": 1.8114, "step": 16935 }, { "epoch": 69.12653061224489, "grad_norm": 23.35406494140625, "learning_rate": 1.546122448979592e-05, "loss": 1.632, "step": 16936 }, { "epoch": 69.13061224489796, "grad_norm": 32.04746627807617, "learning_rate": 1.5459183673469387e-05, "loss": 1.6139, "step": 16937 }, { "epoch": 69.13469387755102, "grad_norm": 42.975608825683594, "learning_rate": 1.5457142857142858e-05, "loss": 1.0192, "step": 16938 }, { "epoch": 69.13877551020408, "grad_norm": 37.233306884765625, "learning_rate": 1.5455102040816328e-05, "loss": 0.8524, "step": 16939 }, { "epoch": 69.14285714285714, "grad_norm": 38.02800750732422, "learning_rate": 1.5453061224489795e-05, "loss": 1.4521, "step": 16940 }, { "epoch": 69.14693877551021, "grad_norm": 51.54077911376953, "learning_rate": 1.5451020408163266e-05, "loss": 0.215, "step": 16941 }, { "epoch": 69.15102040816326, "grad_norm": 39.85616683959961, "learning_rate": 1.5448979591836733e-05, "loss": 1.0259, "step": 16942 }, { "epoch": 69.15510204081633, "grad_norm": 36.82011795043945, "learning_rate": 1.5446938775510207e-05, "loss": 0.7373, "step": 16943 }, { "epoch": 69.15918367346939, "grad_norm": 41.019752502441406, "learning_rate": 1.5444897959183674e-05, "loss": 1.4294, "step": 16944 }, { "epoch": 69.16326530612245, "grad_norm": 19.74885368347168, "learning_rate": 1.544285714285714e-05, "loss": 2.3768, "step": 16945 }, { "epoch": 69.16734693877551, "grad_norm": 30.917516708374023, "learning_rate": 1.544081632653061e-05, "loss": 1.486, "step": 16946 }, { "epoch": 69.17142857142858, "grad_norm": 30.30552101135254, "learning_rate": 1.5438775510204082e-05, "loss": 1.6276, "step": 16947 }, { "epoch": 69.17551020408163, "grad_norm": 35.014793395996094, "learning_rate": 1.5436734693877553e-05, "loss": 1.2529, "step": 16948 }, { "epoch": 69.17959183673469, "grad_norm": 35.5482063293457, "learning_rate": 1.543469387755102e-05, "loss": 1.3811, "step": 16949 }, { "epoch": 69.18367346938776, "grad_norm": 27.664793014526367, "learning_rate": 1.543265306122449e-05, "loss": 1.4271, "step": 16950 }, { "epoch": 69.18775510204081, "grad_norm": 38.73875045776367, "learning_rate": 1.543061224489796e-05, "loss": 0.5644, "step": 16951 }, { "epoch": 69.19183673469388, "grad_norm": 18.11257553100586, "learning_rate": 1.5428571428571428e-05, "loss": 0.1538, "step": 16952 }, { "epoch": 69.19591836734693, "grad_norm": 47.82450485229492, "learning_rate": 1.54265306122449e-05, "loss": 1.343, "step": 16953 }, { "epoch": 69.2, "grad_norm": 26.72228240966797, "learning_rate": 1.542448979591837e-05, "loss": 1.5245, "step": 16954 }, { "epoch": 69.20408163265306, "grad_norm": 30.2856388092041, "learning_rate": 1.542244897959184e-05, "loss": 1.4554, "step": 16955 }, { "epoch": 69.20816326530613, "grad_norm": 20.253114700317383, "learning_rate": 1.5420408163265307e-05, "loss": 2.1337, "step": 16956 }, { "epoch": 69.21224489795918, "grad_norm": 43.4993782043457, "learning_rate": 1.5418367346938774e-05, "loss": 1.1754, "step": 16957 }, { "epoch": 69.21632653061225, "grad_norm": 35.25931930541992, "learning_rate": 1.5416326530612248e-05, "loss": 0.8894, "step": 16958 }, { "epoch": 69.2204081632653, "grad_norm": 27.74984359741211, "learning_rate": 1.5414285714285715e-05, "loss": 1.445, "step": 16959 }, { "epoch": 69.22448979591837, "grad_norm": 26.659791946411133, "learning_rate": 1.5412244897959185e-05, "loss": 2.1464, "step": 16960 }, { "epoch": 69.22857142857143, "grad_norm": 20.871753692626953, "learning_rate": 1.5410204081632652e-05, "loss": 2.2055, "step": 16961 }, { "epoch": 69.2326530612245, "grad_norm": 30.6510009765625, "learning_rate": 1.5408163265306123e-05, "loss": 1.4557, "step": 16962 }, { "epoch": 69.23673469387755, "grad_norm": 11.351922988891602, "learning_rate": 1.5406122448979593e-05, "loss": 0.0802, "step": 16963 }, { "epoch": 69.2408163265306, "grad_norm": 29.7138671875, "learning_rate": 1.540408163265306e-05, "loss": 1.3106, "step": 16964 }, { "epoch": 69.24489795918367, "grad_norm": 27.884305953979492, "learning_rate": 1.540204081632653e-05, "loss": 1.8416, "step": 16965 }, { "epoch": 69.24897959183673, "grad_norm": 26.152280807495117, "learning_rate": 1.54e-05, "loss": 1.9164, "step": 16966 }, { "epoch": 69.2530612244898, "grad_norm": 34.624046325683594, "learning_rate": 1.539795918367347e-05, "loss": 1.2698, "step": 16967 }, { "epoch": 69.25714285714285, "grad_norm": 28.130603790283203, "learning_rate": 1.539591836734694e-05, "loss": 1.0614, "step": 16968 }, { "epoch": 69.26122448979592, "grad_norm": 32.69374465942383, "learning_rate": 1.5393877551020406e-05, "loss": 0.1787, "step": 16969 }, { "epoch": 69.26530612244898, "grad_norm": 33.975059509277344, "learning_rate": 1.539183673469388e-05, "loss": 0.8873, "step": 16970 }, { "epoch": 69.26938775510204, "grad_norm": 51.853797912597656, "learning_rate": 1.5389795918367347e-05, "loss": 0.5123, "step": 16971 }, { "epoch": 69.2734693877551, "grad_norm": 25.130603790283203, "learning_rate": 1.5387755102040818e-05, "loss": 1.6687, "step": 16972 }, { "epoch": 69.27755102040817, "grad_norm": 33.73101806640625, "learning_rate": 1.5385714285714285e-05, "loss": 1.0914, "step": 16973 }, { "epoch": 69.28163265306122, "grad_norm": 32.23471450805664, "learning_rate": 1.5383673469387756e-05, "loss": 0.2117, "step": 16974 }, { "epoch": 69.28571428571429, "grad_norm": 30.408145904541016, "learning_rate": 1.5381632653061226e-05, "loss": 1.6072, "step": 16975 }, { "epoch": 69.28979591836735, "grad_norm": 28.715232849121094, "learning_rate": 1.5379591836734693e-05, "loss": 1.4256, "step": 16976 }, { "epoch": 69.29387755102042, "grad_norm": 24.849885940551758, "learning_rate": 1.5377551020408164e-05, "loss": 1.6588, "step": 16977 }, { "epoch": 69.29795918367347, "grad_norm": 38.601993560791016, "learning_rate": 1.5375510204081634e-05, "loss": 0.522, "step": 16978 }, { "epoch": 69.30204081632652, "grad_norm": 22.131450653076172, "learning_rate": 1.53734693877551e-05, "loss": 1.741, "step": 16979 }, { "epoch": 69.3061224489796, "grad_norm": 24.631698608398438, "learning_rate": 1.5371428571428572e-05, "loss": 0.1618, "step": 16980 }, { "epoch": 69.31020408163265, "grad_norm": 40.929786682128906, "learning_rate": 1.5369387755102042e-05, "loss": 0.801, "step": 16981 }, { "epoch": 69.31428571428572, "grad_norm": 22.470304489135742, "learning_rate": 1.5367346938775513e-05, "loss": 1.9064, "step": 16982 }, { "epoch": 69.31836734693877, "grad_norm": 52.41322326660156, "learning_rate": 1.536530612244898e-05, "loss": 0.6673, "step": 16983 }, { "epoch": 69.32244897959184, "grad_norm": 30.917879104614258, "learning_rate": 1.5363265306122447e-05, "loss": 1.6208, "step": 16984 }, { "epoch": 69.3265306122449, "grad_norm": 27.786258697509766, "learning_rate": 1.536122448979592e-05, "loss": 1.85, "step": 16985 }, { "epoch": 69.33061224489796, "grad_norm": 26.772174835205078, "learning_rate": 1.5359183673469388e-05, "loss": 1.8844, "step": 16986 }, { "epoch": 69.33469387755102, "grad_norm": 32.240055084228516, "learning_rate": 1.535714285714286e-05, "loss": 0.4728, "step": 16987 }, { "epoch": 69.33877551020409, "grad_norm": 33.04594039916992, "learning_rate": 1.5355102040816326e-05, "loss": 0.9928, "step": 16988 }, { "epoch": 69.34285714285714, "grad_norm": 21.345277786254883, "learning_rate": 1.5353061224489796e-05, "loss": 1.8502, "step": 16989 }, { "epoch": 69.34693877551021, "grad_norm": 26.95929718017578, "learning_rate": 1.5351020408163267e-05, "loss": 1.6315, "step": 16990 }, { "epoch": 69.35102040816327, "grad_norm": 33.20344161987305, "learning_rate": 1.5348979591836734e-05, "loss": 1.2294, "step": 16991 }, { "epoch": 69.35510204081632, "grad_norm": 36.74123764038086, "learning_rate": 1.5346938775510204e-05, "loss": 1.2096, "step": 16992 }, { "epoch": 69.35918367346939, "grad_norm": 27.73166847229004, "learning_rate": 1.5344897959183675e-05, "loss": 1.555, "step": 16993 }, { "epoch": 69.36326530612244, "grad_norm": 29.256221771240234, "learning_rate": 1.5342857142857146e-05, "loss": 1.7561, "step": 16994 }, { "epoch": 69.36734693877551, "grad_norm": 17.240222930908203, "learning_rate": 1.5340816326530613e-05, "loss": 2.7786, "step": 16995 }, { "epoch": 69.37142857142857, "grad_norm": 46.762062072753906, "learning_rate": 1.533877551020408e-05, "loss": 1.0632, "step": 16996 }, { "epoch": 69.37551020408164, "grad_norm": 27.861974716186523, "learning_rate": 1.5336734693877554e-05, "loss": 2.0294, "step": 16997 }, { "epoch": 69.37959183673469, "grad_norm": 36.68195724487305, "learning_rate": 1.533469387755102e-05, "loss": 1.2724, "step": 16998 }, { "epoch": 69.38367346938776, "grad_norm": 18.79231071472168, "learning_rate": 1.533265306122449e-05, "loss": 2.605, "step": 16999 }, { "epoch": 69.38775510204081, "grad_norm": 31.02102279663086, "learning_rate": 1.533061224489796e-05, "loss": 1.9592, "step": 17000 }, { "epoch": 69.39183673469388, "grad_norm": 37.94318389892578, "learning_rate": 1.532857142857143e-05, "loss": 1.6672, "step": 17001 }, { "epoch": 69.39591836734694, "grad_norm": 32.27475357055664, "learning_rate": 1.53265306122449e-05, "loss": 1.0268, "step": 17002 }, { "epoch": 69.4, "grad_norm": 34.661766052246094, "learning_rate": 1.5324489795918367e-05, "loss": 0.6463, "step": 17003 }, { "epoch": 69.40408163265306, "grad_norm": 20.628177642822266, "learning_rate": 1.5322448979591837e-05, "loss": 1.5397, "step": 17004 }, { "epoch": 69.40816326530613, "grad_norm": 20.180965423583984, "learning_rate": 1.5320408163265308e-05, "loss": 1.8729, "step": 17005 }, { "epoch": 69.41224489795918, "grad_norm": 31.46177101135254, "learning_rate": 1.5318367346938775e-05, "loss": 0.3504, "step": 17006 }, { "epoch": 69.41632653061224, "grad_norm": 21.552824020385742, "learning_rate": 1.5316326530612245e-05, "loss": 1.7343, "step": 17007 }, { "epoch": 69.42040816326531, "grad_norm": 30.86651039123535, "learning_rate": 1.5314285714285716e-05, "loss": 1.2726, "step": 17008 }, { "epoch": 69.42448979591836, "grad_norm": 34.4414176940918, "learning_rate": 1.5312244897959186e-05, "loss": 1.0557, "step": 17009 }, { "epoch": 69.42857142857143, "grad_norm": 42.40556335449219, "learning_rate": 1.5310204081632653e-05, "loss": 0.7609, "step": 17010 }, { "epoch": 69.43265306122449, "grad_norm": 31.70318603515625, "learning_rate": 1.530816326530612e-05, "loss": 1.533, "step": 17011 }, { "epoch": 69.43673469387755, "grad_norm": 28.039560317993164, "learning_rate": 1.5306122448979594e-05, "loss": 1.7445, "step": 17012 }, { "epoch": 69.44081632653061, "grad_norm": 30.58522605895996, "learning_rate": 1.530408163265306e-05, "loss": 0.6881, "step": 17013 }, { "epoch": 69.44489795918368, "grad_norm": 44.78434371948242, "learning_rate": 1.5302040816326532e-05, "loss": 0.4359, "step": 17014 }, { "epoch": 69.44897959183673, "grad_norm": 29.16668701171875, "learning_rate": 1.53e-05, "loss": 1.4495, "step": 17015 }, { "epoch": 69.4530612244898, "grad_norm": 35.451560974121094, "learning_rate": 1.5297959183673473e-05, "loss": 1.738, "step": 17016 }, { "epoch": 69.45714285714286, "grad_norm": 15.076822280883789, "learning_rate": 1.529591836734694e-05, "loss": 2.4889, "step": 17017 }, { "epoch": 69.46122448979592, "grad_norm": 37.5242919921875, "learning_rate": 1.5293877551020407e-05, "loss": 1.0775, "step": 17018 }, { "epoch": 69.46530612244898, "grad_norm": 38.612037658691406, "learning_rate": 1.5291836734693878e-05, "loss": 0.8304, "step": 17019 }, { "epoch": 69.46938775510205, "grad_norm": 36.80158996582031, "learning_rate": 1.528979591836735e-05, "loss": 1.579, "step": 17020 }, { "epoch": 69.4734693877551, "grad_norm": 29.0889835357666, "learning_rate": 1.528775510204082e-05, "loss": 0.6156, "step": 17021 }, { "epoch": 69.47755102040816, "grad_norm": 27.77280044555664, "learning_rate": 1.5285714285714286e-05, "loss": 1.6088, "step": 17022 }, { "epoch": 69.48163265306123, "grad_norm": 24.567644119262695, "learning_rate": 1.5283673469387753e-05, "loss": 2.3243, "step": 17023 }, { "epoch": 69.48571428571428, "grad_norm": 30.86904525756836, "learning_rate": 1.5281632653061227e-05, "loss": 1.664, "step": 17024 }, { "epoch": 69.48979591836735, "grad_norm": 25.021045684814453, "learning_rate": 1.5279591836734694e-05, "loss": 1.7099, "step": 17025 }, { "epoch": 69.4938775510204, "grad_norm": 25.287099838256836, "learning_rate": 1.5277551020408165e-05, "loss": 1.5473, "step": 17026 }, { "epoch": 69.49795918367347, "grad_norm": 42.581539154052734, "learning_rate": 1.5275510204081632e-05, "loss": 0.4636, "step": 17027 }, { "epoch": 69.50204081632653, "grad_norm": 32.15688705444336, "learning_rate": 1.5273469387755102e-05, "loss": 1.5427, "step": 17028 }, { "epoch": 69.5061224489796, "grad_norm": 34.56651306152344, "learning_rate": 1.5271428571428573e-05, "loss": 0.5313, "step": 17029 }, { "epoch": 69.51020408163265, "grad_norm": 35.417015075683594, "learning_rate": 1.526938775510204e-05, "loss": 1.3967, "step": 17030 }, { "epoch": 69.51428571428572, "grad_norm": 39.90692138671875, "learning_rate": 1.526734693877551e-05, "loss": 0.5584, "step": 17031 }, { "epoch": 69.51836734693877, "grad_norm": 18.20357894897461, "learning_rate": 1.526530612244898e-05, "loss": 2.2721, "step": 17032 }, { "epoch": 69.52244897959184, "grad_norm": 23.79338264465332, "learning_rate": 1.5263265306122448e-05, "loss": 1.8641, "step": 17033 }, { "epoch": 69.5265306122449, "grad_norm": 38.24806213378906, "learning_rate": 1.526122448979592e-05, "loss": 0.8508, "step": 17034 }, { "epoch": 69.53061224489795, "grad_norm": 36.34651184082031, "learning_rate": 1.525918367346939e-05, "loss": 0.4915, "step": 17035 }, { "epoch": 69.53469387755102, "grad_norm": 48.233272552490234, "learning_rate": 1.5257142857142858e-05, "loss": 0.21, "step": 17036 }, { "epoch": 69.53877551020408, "grad_norm": 37.00489044189453, "learning_rate": 1.5255102040816327e-05, "loss": 0.7111, "step": 17037 }, { "epoch": 69.54285714285714, "grad_norm": 36.63606262207031, "learning_rate": 1.5253061224489797e-05, "loss": 0.5632, "step": 17038 }, { "epoch": 69.5469387755102, "grad_norm": 24.106245040893555, "learning_rate": 1.5251020408163266e-05, "loss": 1.8733, "step": 17039 }, { "epoch": 69.55102040816327, "grad_norm": 40.67109680175781, "learning_rate": 1.5248979591836735e-05, "loss": 0.522, "step": 17040 }, { "epoch": 69.55510204081632, "grad_norm": 24.161176681518555, "learning_rate": 1.5246938775510205e-05, "loss": 2.2037, "step": 17041 }, { "epoch": 69.55918367346939, "grad_norm": 20.811140060424805, "learning_rate": 1.5244897959183674e-05, "loss": 2.2221, "step": 17042 }, { "epoch": 69.56326530612245, "grad_norm": 23.85619354248047, "learning_rate": 1.5242857142857145e-05, "loss": 1.7077, "step": 17043 }, { "epoch": 69.56734693877551, "grad_norm": 36.67019271850586, "learning_rate": 1.5240816326530614e-05, "loss": 1.4796, "step": 17044 }, { "epoch": 69.57142857142857, "grad_norm": 51.888343811035156, "learning_rate": 1.523877551020408e-05, "loss": 1.2901, "step": 17045 }, { "epoch": 69.57551020408164, "grad_norm": 40.9709587097168, "learning_rate": 1.5236734693877553e-05, "loss": 0.6393, "step": 17046 }, { "epoch": 69.57959183673469, "grad_norm": 39.897464752197266, "learning_rate": 1.523469387755102e-05, "loss": 0.6202, "step": 17047 }, { "epoch": 69.58367346938776, "grad_norm": 41.88505935668945, "learning_rate": 1.5232653061224492e-05, "loss": 0.8392, "step": 17048 }, { "epoch": 69.58775510204082, "grad_norm": 34.539608001708984, "learning_rate": 1.523061224489796e-05, "loss": 1.1297, "step": 17049 }, { "epoch": 69.59183673469387, "grad_norm": 26.48904800415039, "learning_rate": 1.5228571428571428e-05, "loss": 1.7864, "step": 17050 }, { "epoch": 69.59591836734694, "grad_norm": 34.38051223754883, "learning_rate": 1.5226530612244899e-05, "loss": 0.5301, "step": 17051 }, { "epoch": 69.6, "grad_norm": 32.65380859375, "learning_rate": 1.5224489795918368e-05, "loss": 1.7375, "step": 17052 }, { "epoch": 69.60408163265306, "grad_norm": 37.40161895751953, "learning_rate": 1.5222448979591838e-05, "loss": 1.1937, "step": 17053 }, { "epoch": 69.60816326530612, "grad_norm": 48.425655364990234, "learning_rate": 1.5220408163265307e-05, "loss": 0.9761, "step": 17054 }, { "epoch": 69.61224489795919, "grad_norm": 42.93960952758789, "learning_rate": 1.5218367346938776e-05, "loss": 0.6513, "step": 17055 }, { "epoch": 69.61632653061224, "grad_norm": 32.0814323425293, "learning_rate": 1.5216326530612246e-05, "loss": 0.9088, "step": 17056 }, { "epoch": 69.62040816326531, "grad_norm": 29.416744232177734, "learning_rate": 1.5214285714285715e-05, "loss": 1.0254, "step": 17057 }, { "epoch": 69.62448979591836, "grad_norm": 35.473655700683594, "learning_rate": 1.5212244897959186e-05, "loss": 0.9478, "step": 17058 }, { "epoch": 69.62857142857143, "grad_norm": 30.444307327270508, "learning_rate": 1.5210204081632654e-05, "loss": 0.499, "step": 17059 }, { "epoch": 69.63265306122449, "grad_norm": 42.83610916137695, "learning_rate": 1.5208163265306125e-05, "loss": 0.3665, "step": 17060 }, { "epoch": 69.63673469387756, "grad_norm": 33.433013916015625, "learning_rate": 1.5206122448979592e-05, "loss": 0.9479, "step": 17061 }, { "epoch": 69.64081632653061, "grad_norm": 26.95926856994629, "learning_rate": 1.520408163265306e-05, "loss": 0.4256, "step": 17062 }, { "epoch": 69.64489795918368, "grad_norm": 34.73473358154297, "learning_rate": 1.5202040816326531e-05, "loss": 0.8081, "step": 17063 }, { "epoch": 69.64897959183673, "grad_norm": 40.798095703125, "learning_rate": 1.52e-05, "loss": 0.5493, "step": 17064 }, { "epoch": 69.65306122448979, "grad_norm": 31.057737350463867, "learning_rate": 1.519795918367347e-05, "loss": 1.5212, "step": 17065 }, { "epoch": 69.65714285714286, "grad_norm": 41.60005187988281, "learning_rate": 1.519591836734694e-05, "loss": 1.0703, "step": 17066 }, { "epoch": 69.66122448979591, "grad_norm": 30.922054290771484, "learning_rate": 1.5193877551020408e-05, "loss": 1.5351, "step": 17067 }, { "epoch": 69.66530612244898, "grad_norm": 37.42589569091797, "learning_rate": 1.5191836734693879e-05, "loss": 0.4262, "step": 17068 }, { "epoch": 69.66938775510204, "grad_norm": 38.68827438354492, "learning_rate": 1.5189795918367348e-05, "loss": 0.8549, "step": 17069 }, { "epoch": 69.6734693877551, "grad_norm": 53.039066314697266, "learning_rate": 1.5187755102040818e-05, "loss": 0.7594, "step": 17070 }, { "epoch": 69.67755102040816, "grad_norm": 19.3259220123291, "learning_rate": 1.5185714285714287e-05, "loss": 2.3125, "step": 17071 }, { "epoch": 69.68163265306123, "grad_norm": 26.15662384033203, "learning_rate": 1.5183673469387754e-05, "loss": 0.1699, "step": 17072 }, { "epoch": 69.68571428571428, "grad_norm": 27.082992553710938, "learning_rate": 1.5181632653061226e-05, "loss": 1.8244, "step": 17073 }, { "epoch": 69.68979591836735, "grad_norm": 40.003639221191406, "learning_rate": 1.5179591836734693e-05, "loss": 1.2167, "step": 17074 }, { "epoch": 69.6938775510204, "grad_norm": 40.762454986572266, "learning_rate": 1.5177551020408166e-05, "loss": 0.2668, "step": 17075 }, { "epoch": 69.69795918367348, "grad_norm": 37.03373336791992, "learning_rate": 1.5175510204081633e-05, "loss": 1.1077, "step": 17076 }, { "epoch": 69.70204081632653, "grad_norm": 40.44328308105469, "learning_rate": 1.5173469387755102e-05, "loss": 0.5073, "step": 17077 }, { "epoch": 69.70612244897958, "grad_norm": 32.53894805908203, "learning_rate": 1.5171428571428572e-05, "loss": 1.4109, "step": 17078 }, { "epoch": 69.71020408163265, "grad_norm": 40.377777099609375, "learning_rate": 1.5169387755102041e-05, "loss": 1.2945, "step": 17079 }, { "epoch": 69.71428571428571, "grad_norm": 46.26609802246094, "learning_rate": 1.5167346938775511e-05, "loss": 0.4966, "step": 17080 }, { "epoch": 69.71836734693878, "grad_norm": 38.41294860839844, "learning_rate": 1.516530612244898e-05, "loss": 0.458, "step": 17081 }, { "epoch": 69.72244897959183, "grad_norm": 22.17340660095215, "learning_rate": 1.5163265306122449e-05, "loss": 2.3655, "step": 17082 }, { "epoch": 69.7265306122449, "grad_norm": 33.273712158203125, "learning_rate": 1.516122448979592e-05, "loss": 1.0105, "step": 17083 }, { "epoch": 69.73061224489796, "grad_norm": 33.184608459472656, "learning_rate": 1.5159183673469388e-05, "loss": 0.5088, "step": 17084 }, { "epoch": 69.73469387755102, "grad_norm": 46.1157112121582, "learning_rate": 1.5157142857142859e-05, "loss": 0.6301, "step": 17085 }, { "epoch": 69.73877551020408, "grad_norm": 33.66093826293945, "learning_rate": 1.5155102040816328e-05, "loss": 1.7137, "step": 17086 }, { "epoch": 69.74285714285715, "grad_norm": 34.420475006103516, "learning_rate": 1.5153061224489798e-05, "loss": 1.332, "step": 17087 }, { "epoch": 69.7469387755102, "grad_norm": 42.38199234008789, "learning_rate": 1.5151020408163265e-05, "loss": 1.0787, "step": 17088 }, { "epoch": 69.75102040816327, "grad_norm": 31.5273380279541, "learning_rate": 1.5148979591836734e-05, "loss": 1.0329, "step": 17089 }, { "epoch": 69.75510204081633, "grad_norm": 23.11697006225586, "learning_rate": 1.5146938775510205e-05, "loss": 2.5267, "step": 17090 }, { "epoch": 69.7591836734694, "grad_norm": 41.16091537475586, "learning_rate": 1.5144897959183674e-05, "loss": 0.8558, "step": 17091 }, { "epoch": 69.76326530612245, "grad_norm": 40.49562072753906, "learning_rate": 1.5142857142857144e-05, "loss": 1.2261, "step": 17092 }, { "epoch": 69.7673469387755, "grad_norm": 26.205718994140625, "learning_rate": 1.5140816326530613e-05, "loss": 1.7644, "step": 17093 }, { "epoch": 69.77142857142857, "grad_norm": 2.7264466285705566, "learning_rate": 1.5138775510204082e-05, "loss": 0.0388, "step": 17094 }, { "epoch": 69.77551020408163, "grad_norm": 38.231143951416016, "learning_rate": 1.5136734693877552e-05, "loss": 1.0663, "step": 17095 }, { "epoch": 69.7795918367347, "grad_norm": 32.41590118408203, "learning_rate": 1.5134693877551021e-05, "loss": 0.5969, "step": 17096 }, { "epoch": 69.78367346938775, "grad_norm": 24.077817916870117, "learning_rate": 1.5132653061224492e-05, "loss": 2.0095, "step": 17097 }, { "epoch": 69.78775510204082, "grad_norm": 56.91233444213867, "learning_rate": 1.513061224489796e-05, "loss": 1.5916, "step": 17098 }, { "epoch": 69.79183673469387, "grad_norm": 26.108972549438477, "learning_rate": 1.5128571428571427e-05, "loss": 0.2604, "step": 17099 }, { "epoch": 69.79591836734694, "grad_norm": 31.1258487701416, "learning_rate": 1.51265306122449e-05, "loss": 1.6445, "step": 17100 }, { "epoch": 69.8, "grad_norm": 30.410573959350586, "learning_rate": 1.5124489795918367e-05, "loss": 1.0983, "step": 17101 }, { "epoch": 69.80408163265307, "grad_norm": 39.41795349121094, "learning_rate": 1.5122448979591839e-05, "loss": 1.4764, "step": 17102 }, { "epoch": 69.80816326530612, "grad_norm": 39.45162582397461, "learning_rate": 1.5120408163265306e-05, "loss": 0.4112, "step": 17103 }, { "epoch": 69.81224489795919, "grad_norm": 18.98820686340332, "learning_rate": 1.5118367346938775e-05, "loss": 2.295, "step": 17104 }, { "epoch": 69.81632653061224, "grad_norm": 25.79557228088379, "learning_rate": 1.5116326530612245e-05, "loss": 1.7402, "step": 17105 }, { "epoch": 69.82040816326531, "grad_norm": 39.26114273071289, "learning_rate": 1.5114285714285714e-05, "loss": 1.3631, "step": 17106 }, { "epoch": 69.82448979591837, "grad_norm": 24.66720199584961, "learning_rate": 1.5112244897959185e-05, "loss": 1.3607, "step": 17107 }, { "epoch": 69.82857142857142, "grad_norm": 40.72393035888672, "learning_rate": 1.5110204081632654e-05, "loss": 0.5785, "step": 17108 }, { "epoch": 69.83265306122449, "grad_norm": 28.335018157958984, "learning_rate": 1.5108163265306124e-05, "loss": 1.7321, "step": 17109 }, { "epoch": 69.83673469387755, "grad_norm": 29.94685935974121, "learning_rate": 1.5106122448979593e-05, "loss": 0.8451, "step": 17110 }, { "epoch": 69.84081632653061, "grad_norm": 40.01189041137695, "learning_rate": 1.5104081632653062e-05, "loss": 0.2665, "step": 17111 }, { "epoch": 69.84489795918367, "grad_norm": 29.208105087280273, "learning_rate": 1.5102040816326532e-05, "loss": 0.2703, "step": 17112 }, { "epoch": 69.84897959183674, "grad_norm": 38.45145797729492, "learning_rate": 1.51e-05, "loss": 0.9118, "step": 17113 }, { "epoch": 69.85306122448979, "grad_norm": 20.509464263916016, "learning_rate": 1.5097959183673472e-05, "loss": 0.1643, "step": 17114 }, { "epoch": 69.85714285714286, "grad_norm": 37.69308090209961, "learning_rate": 1.5095918367346939e-05, "loss": 0.9333, "step": 17115 }, { "epoch": 69.86122448979592, "grad_norm": 26.29144859313965, "learning_rate": 1.5093877551020408e-05, "loss": 0.1831, "step": 17116 }, { "epoch": 69.86530612244898, "grad_norm": 36.76993179321289, "learning_rate": 1.5091836734693878e-05, "loss": 1.3254, "step": 17117 }, { "epoch": 69.86938775510204, "grad_norm": 34.029361724853516, "learning_rate": 1.5089795918367347e-05, "loss": 1.1224, "step": 17118 }, { "epoch": 69.87346938775511, "grad_norm": 41.66801071166992, "learning_rate": 1.5087755102040817e-05, "loss": 0.8457, "step": 17119 }, { "epoch": 69.87755102040816, "grad_norm": 26.518836975097656, "learning_rate": 1.5085714285714286e-05, "loss": 1.7929, "step": 17120 }, { "epoch": 69.88163265306123, "grad_norm": 36.390769958496094, "learning_rate": 1.5083673469387755e-05, "loss": 1.4617, "step": 17121 }, { "epoch": 69.88571428571429, "grad_norm": 27.25318145751953, "learning_rate": 1.5081632653061226e-05, "loss": 0.1524, "step": 17122 }, { "epoch": 69.88979591836734, "grad_norm": 45.05335235595703, "learning_rate": 1.5079591836734694e-05, "loss": 0.4493, "step": 17123 }, { "epoch": 69.89387755102041, "grad_norm": 39.079185485839844, "learning_rate": 1.5077551020408165e-05, "loss": 1.428, "step": 17124 }, { "epoch": 69.89795918367346, "grad_norm": 27.484752655029297, "learning_rate": 1.5075510204081634e-05, "loss": 1.5564, "step": 17125 }, { "epoch": 69.90204081632653, "grad_norm": 42.80178451538086, "learning_rate": 1.5073469387755101e-05, "loss": 0.4579, "step": 17126 }, { "epoch": 69.90612244897959, "grad_norm": 22.79384994506836, "learning_rate": 1.5071428571428573e-05, "loss": 1.8345, "step": 17127 }, { "epoch": 69.91020408163266, "grad_norm": 10.627203941345215, "learning_rate": 1.506938775510204e-05, "loss": 3.1229, "step": 17128 }, { "epoch": 69.91428571428571, "grad_norm": 52.868125915527344, "learning_rate": 1.5067346938775512e-05, "loss": 0.5732, "step": 17129 }, { "epoch": 69.91836734693878, "grad_norm": 25.31075096130371, "learning_rate": 1.506530612244898e-05, "loss": 1.7143, "step": 17130 }, { "epoch": 69.92244897959183, "grad_norm": 38.17009353637695, "learning_rate": 1.5063265306122452e-05, "loss": 1.2239, "step": 17131 }, { "epoch": 69.9265306122449, "grad_norm": 35.97665786743164, "learning_rate": 1.5061224489795919e-05, "loss": 0.6066, "step": 17132 }, { "epoch": 69.93061224489796, "grad_norm": 26.952014923095703, "learning_rate": 1.5059183673469388e-05, "loss": 1.5013, "step": 17133 }, { "epoch": 69.93469387755103, "grad_norm": 23.630346298217773, "learning_rate": 1.5057142857142858e-05, "loss": 2.2551, "step": 17134 }, { "epoch": 69.93877551020408, "grad_norm": 21.26060676574707, "learning_rate": 1.5055102040816327e-05, "loss": 1.6831, "step": 17135 }, { "epoch": 69.94285714285714, "grad_norm": 28.680932998657227, "learning_rate": 1.5053061224489798e-05, "loss": 1.8473, "step": 17136 }, { "epoch": 69.9469387755102, "grad_norm": 39.14133834838867, "learning_rate": 1.5051020408163266e-05, "loss": 1.0001, "step": 17137 }, { "epoch": 69.95102040816326, "grad_norm": 33.13338851928711, "learning_rate": 1.5048979591836735e-05, "loss": 0.5511, "step": 17138 }, { "epoch": 69.95510204081633, "grad_norm": 21.071640014648438, "learning_rate": 1.5046938775510206e-05, "loss": 2.2788, "step": 17139 }, { "epoch": 69.95918367346938, "grad_norm": 24.443344116210938, "learning_rate": 1.5044897959183673e-05, "loss": 1.7466, "step": 17140 }, { "epoch": 69.96326530612245, "grad_norm": 28.922443389892578, "learning_rate": 1.5042857142857145e-05, "loss": 1.9361, "step": 17141 }, { "epoch": 69.9673469387755, "grad_norm": 25.5602970123291, "learning_rate": 1.5040816326530612e-05, "loss": 1.7984, "step": 17142 }, { "epoch": 69.97142857142858, "grad_norm": 40.61439895629883, "learning_rate": 1.5038775510204081e-05, "loss": 0.8533, "step": 17143 }, { "epoch": 69.97551020408163, "grad_norm": 40.69118881225586, "learning_rate": 1.5036734693877551e-05, "loss": 1.2146, "step": 17144 }, { "epoch": 69.9795918367347, "grad_norm": 34.138919830322266, "learning_rate": 1.503469387755102e-05, "loss": 0.5878, "step": 17145 }, { "epoch": 69.98367346938775, "grad_norm": 33.05386734008789, "learning_rate": 1.503265306122449e-05, "loss": 1.5742, "step": 17146 }, { "epoch": 69.98775510204082, "grad_norm": 35.037818908691406, "learning_rate": 1.503061224489796e-05, "loss": 0.7409, "step": 17147 }, { "epoch": 69.99183673469388, "grad_norm": 36.04401779174805, "learning_rate": 1.5028571428571428e-05, "loss": 1.3815, "step": 17148 }, { "epoch": 69.99591836734695, "grad_norm": 23.796537399291992, "learning_rate": 1.5026530612244899e-05, "loss": 1.3686, "step": 17149 }, { "epoch": 70.0, "grad_norm": 23.764469146728516, "learning_rate": 1.5024489795918368e-05, "loss": 1.0115, "step": 17150 }, { "epoch": 70.00408163265305, "grad_norm": 41.485321044921875, "learning_rate": 1.5022448979591838e-05, "loss": 1.4627, "step": 17151 }, { "epoch": 70.00816326530612, "grad_norm": 39.399757385253906, "learning_rate": 1.5020408163265307e-05, "loss": 1.4584, "step": 17152 }, { "epoch": 70.01224489795918, "grad_norm": 28.127483367919922, "learning_rate": 1.5018367346938778e-05, "loss": 0.8355, "step": 17153 }, { "epoch": 70.01632653061225, "grad_norm": 28.645517349243164, "learning_rate": 1.5016326530612246e-05, "loss": 0.9632, "step": 17154 }, { "epoch": 70.0204081632653, "grad_norm": 27.75954818725586, "learning_rate": 1.5014285714285714e-05, "loss": 1.9058, "step": 17155 }, { "epoch": 70.02448979591837, "grad_norm": 19.268404006958008, "learning_rate": 1.5012244897959186e-05, "loss": 2.2535, "step": 17156 }, { "epoch": 70.02857142857142, "grad_norm": 19.898351669311523, "learning_rate": 1.5010204081632653e-05, "loss": 2.1592, "step": 17157 }, { "epoch": 70.0326530612245, "grad_norm": 33.47561264038086, "learning_rate": 1.5008163265306125e-05, "loss": 1.0636, "step": 17158 }, { "epoch": 70.03673469387755, "grad_norm": 32.55570602416992, "learning_rate": 1.5006122448979592e-05, "loss": 0.9385, "step": 17159 }, { "epoch": 70.04081632653062, "grad_norm": 42.08280563354492, "learning_rate": 1.5004081632653061e-05, "loss": 0.2028, "step": 17160 }, { "epoch": 70.04489795918367, "grad_norm": 23.235137939453125, "learning_rate": 1.5002040816326532e-05, "loss": 2.147, "step": 17161 }, { "epoch": 70.04897959183674, "grad_norm": 35.2984619140625, "learning_rate": 1.5e-05, "loss": 1.3396, "step": 17162 }, { "epoch": 70.0530612244898, "grad_norm": 47.36145782470703, "learning_rate": 1.4997959183673471e-05, "loss": 0.8108, "step": 17163 }, { "epoch": 70.05714285714286, "grad_norm": 26.28082275390625, "learning_rate": 1.499591836734694e-05, "loss": 1.7144, "step": 17164 }, { "epoch": 70.06122448979592, "grad_norm": 30.253337860107422, "learning_rate": 1.4993877551020409e-05, "loss": 1.4522, "step": 17165 }, { "epoch": 70.06530612244897, "grad_norm": 12.202483177185059, "learning_rate": 1.4991836734693879e-05, "loss": 3.1026, "step": 17166 }, { "epoch": 70.06938775510204, "grad_norm": 51.63188552856445, "learning_rate": 1.4989795918367346e-05, "loss": 1.4647, "step": 17167 }, { "epoch": 70.0734693877551, "grad_norm": 38.05074691772461, "learning_rate": 1.4987755102040818e-05, "loss": 0.7279, "step": 17168 }, { "epoch": 70.07755102040817, "grad_norm": 38.95751953125, "learning_rate": 1.4985714285714286e-05, "loss": 0.5367, "step": 17169 }, { "epoch": 70.08163265306122, "grad_norm": 29.86664581298828, "learning_rate": 1.4983673469387754e-05, "loss": 1.6517, "step": 17170 }, { "epoch": 70.08571428571429, "grad_norm": 33.26858139038086, "learning_rate": 1.4981632653061225e-05, "loss": 1.0919, "step": 17171 }, { "epoch": 70.08979591836734, "grad_norm": 32.77311706542969, "learning_rate": 1.4979591836734694e-05, "loss": 0.3723, "step": 17172 }, { "epoch": 70.09387755102041, "grad_norm": 27.517148971557617, "learning_rate": 1.4977551020408164e-05, "loss": 1.4644, "step": 17173 }, { "epoch": 70.09795918367347, "grad_norm": 24.45290756225586, "learning_rate": 1.4975510204081633e-05, "loss": 1.4044, "step": 17174 }, { "epoch": 70.10204081632654, "grad_norm": 17.952667236328125, "learning_rate": 1.4973469387755104e-05, "loss": 2.1051, "step": 17175 }, { "epoch": 70.10612244897959, "grad_norm": 25.728334426879883, "learning_rate": 1.4971428571428572e-05, "loss": 0.3943, "step": 17176 }, { "epoch": 70.11020408163266, "grad_norm": 30.056060791015625, "learning_rate": 1.4969387755102041e-05, "loss": 1.62, "step": 17177 }, { "epoch": 70.11428571428571, "grad_norm": 23.22675895690918, "learning_rate": 1.4967346938775512e-05, "loss": 1.7234, "step": 17178 }, { "epoch": 70.11836734693877, "grad_norm": 27.097187042236328, "learning_rate": 1.496530612244898e-05, "loss": 0.1519, "step": 17179 }, { "epoch": 70.12244897959184, "grad_norm": 26.401607513427734, "learning_rate": 1.4963265306122451e-05, "loss": 1.291, "step": 17180 }, { "epoch": 70.12653061224489, "grad_norm": 38.16592025756836, "learning_rate": 1.496122448979592e-05, "loss": 0.5239, "step": 17181 }, { "epoch": 70.13061224489796, "grad_norm": 30.76477813720703, "learning_rate": 1.4959183673469387e-05, "loss": 1.6385, "step": 17182 }, { "epoch": 70.13469387755102, "grad_norm": 19.7767391204834, "learning_rate": 1.4957142857142859e-05, "loss": 1.7101, "step": 17183 }, { "epoch": 70.13877551020408, "grad_norm": 24.10320281982422, "learning_rate": 1.4955102040816326e-05, "loss": 2.3154, "step": 17184 }, { "epoch": 70.14285714285714, "grad_norm": 22.247995376586914, "learning_rate": 1.4953061224489798e-05, "loss": 2.184, "step": 17185 }, { "epoch": 70.14693877551021, "grad_norm": 35.231197357177734, "learning_rate": 1.4951020408163266e-05, "loss": 1.1848, "step": 17186 }, { "epoch": 70.15102040816326, "grad_norm": 38.8507080078125, "learning_rate": 1.4948979591836734e-05, "loss": 0.5708, "step": 17187 }, { "epoch": 70.15510204081633, "grad_norm": 24.82635498046875, "learning_rate": 1.4946938775510205e-05, "loss": 0.1948, "step": 17188 }, { "epoch": 70.15918367346939, "grad_norm": 33.226253509521484, "learning_rate": 1.4944897959183674e-05, "loss": 1.113, "step": 17189 }, { "epoch": 70.16326530612245, "grad_norm": 32.62678527832031, "learning_rate": 1.4942857142857144e-05, "loss": 1.8417, "step": 17190 }, { "epoch": 70.16734693877551, "grad_norm": 31.69379234313965, "learning_rate": 1.4940816326530613e-05, "loss": 0.3449, "step": 17191 }, { "epoch": 70.17142857142858, "grad_norm": 22.786144256591797, "learning_rate": 1.493877551020408e-05, "loss": 1.8267, "step": 17192 }, { "epoch": 70.17551020408163, "grad_norm": 17.185382843017578, "learning_rate": 1.4936734693877552e-05, "loss": 0.1416, "step": 17193 }, { "epoch": 70.17959183673469, "grad_norm": 25.5208797454834, "learning_rate": 1.493469387755102e-05, "loss": 1.665, "step": 17194 }, { "epoch": 70.18367346938776, "grad_norm": 37.34540939331055, "learning_rate": 1.4932653061224492e-05, "loss": 0.9665, "step": 17195 }, { "epoch": 70.18775510204081, "grad_norm": 34.93558120727539, "learning_rate": 1.4930612244897959e-05, "loss": 0.5185, "step": 17196 }, { "epoch": 70.19183673469388, "grad_norm": 33.18937683105469, "learning_rate": 1.4928571428571431e-05, "loss": 0.5234, "step": 17197 }, { "epoch": 70.19591836734693, "grad_norm": 30.68553352355957, "learning_rate": 1.4926530612244898e-05, "loss": 0.5032, "step": 17198 }, { "epoch": 70.2, "grad_norm": 32.58151626586914, "learning_rate": 1.4924489795918367e-05, "loss": 0.6551, "step": 17199 }, { "epoch": 70.20408163265306, "grad_norm": 23.226974487304688, "learning_rate": 1.4922448979591838e-05, "loss": 1.7042, "step": 17200 }, { "epoch": 70.20816326530613, "grad_norm": 36.434303283691406, "learning_rate": 1.4920408163265306e-05, "loss": 0.4802, "step": 17201 }, { "epoch": 70.21224489795918, "grad_norm": 35.70451354980469, "learning_rate": 1.4918367346938777e-05, "loss": 1.6488, "step": 17202 }, { "epoch": 70.21632653061225, "grad_norm": 36.525630950927734, "learning_rate": 1.4916326530612246e-05, "loss": 1.2434, "step": 17203 }, { "epoch": 70.2204081632653, "grad_norm": 42.171016693115234, "learning_rate": 1.4914285714285715e-05, "loss": 0.4266, "step": 17204 }, { "epoch": 70.22448979591837, "grad_norm": 43.1866340637207, "learning_rate": 1.4912244897959185e-05, "loss": 0.8887, "step": 17205 }, { "epoch": 70.22857142857143, "grad_norm": 34.66291046142578, "learning_rate": 1.4910204081632654e-05, "loss": 0.5455, "step": 17206 }, { "epoch": 70.2326530612245, "grad_norm": 11.341763496398926, "learning_rate": 1.4908163265306124e-05, "loss": 0.0876, "step": 17207 }, { "epoch": 70.23673469387755, "grad_norm": 26.72129249572754, "learning_rate": 1.4906122448979593e-05, "loss": 1.8916, "step": 17208 }, { "epoch": 70.2408163265306, "grad_norm": 34.755958557128906, "learning_rate": 1.490408163265306e-05, "loss": 0.9327, "step": 17209 }, { "epoch": 70.24489795918367, "grad_norm": 30.266571044921875, "learning_rate": 1.4902040816326533e-05, "loss": 0.9345, "step": 17210 }, { "epoch": 70.24897959183673, "grad_norm": 32.19240951538086, "learning_rate": 1.49e-05, "loss": 0.7436, "step": 17211 }, { "epoch": 70.2530612244898, "grad_norm": 26.31810760498047, "learning_rate": 1.4897959183673472e-05, "loss": 1.686, "step": 17212 }, { "epoch": 70.25714285714285, "grad_norm": 33.545433044433594, "learning_rate": 1.4895918367346939e-05, "loss": 0.7785, "step": 17213 }, { "epoch": 70.26122448979592, "grad_norm": 46.905517578125, "learning_rate": 1.4893877551020408e-05, "loss": 0.3993, "step": 17214 }, { "epoch": 70.26530612244898, "grad_norm": 25.810636520385742, "learning_rate": 1.4891836734693878e-05, "loss": 1.8138, "step": 17215 }, { "epoch": 70.26938775510204, "grad_norm": 36.82067108154297, "learning_rate": 1.4889795918367347e-05, "loss": 0.4277, "step": 17216 }, { "epoch": 70.2734693877551, "grad_norm": 26.522621154785156, "learning_rate": 1.4887755102040818e-05, "loss": 2.007, "step": 17217 }, { "epoch": 70.27755102040817, "grad_norm": 40.67826461791992, "learning_rate": 1.4885714285714286e-05, "loss": 1.1424, "step": 17218 }, { "epoch": 70.28163265306122, "grad_norm": 32.456947326660156, "learning_rate": 1.4883673469387757e-05, "loss": 0.8584, "step": 17219 }, { "epoch": 70.28571428571429, "grad_norm": 52.94303894042969, "learning_rate": 1.4881632653061226e-05, "loss": 0.7376, "step": 17220 }, { "epoch": 70.28979591836735, "grad_norm": 42.19325256347656, "learning_rate": 1.4879591836734693e-05, "loss": 0.4945, "step": 17221 }, { "epoch": 70.29387755102042, "grad_norm": 22.10004425048828, "learning_rate": 1.4877551020408165e-05, "loss": 2.2183, "step": 17222 }, { "epoch": 70.29795918367347, "grad_norm": 32.199100494384766, "learning_rate": 1.4875510204081632e-05, "loss": 0.3899, "step": 17223 }, { "epoch": 70.30204081632652, "grad_norm": 24.564558029174805, "learning_rate": 1.4873469387755104e-05, "loss": 2.2205, "step": 17224 }, { "epoch": 70.3061224489796, "grad_norm": 29.900991439819336, "learning_rate": 1.4871428571428572e-05, "loss": 1.4891, "step": 17225 }, { "epoch": 70.31020408163265, "grad_norm": 32.2248649597168, "learning_rate": 1.486938775510204e-05, "loss": 0.1621, "step": 17226 }, { "epoch": 70.31428571428572, "grad_norm": 35.60438537597656, "learning_rate": 1.4867346938775511e-05, "loss": 1.7856, "step": 17227 }, { "epoch": 70.31836734693877, "grad_norm": 44.547035217285156, "learning_rate": 1.486530612244898e-05, "loss": 0.918, "step": 17228 }, { "epoch": 70.32244897959184, "grad_norm": 37.38201141357422, "learning_rate": 1.486326530612245e-05, "loss": 0.8488, "step": 17229 }, { "epoch": 70.3265306122449, "grad_norm": 31.405908584594727, "learning_rate": 1.4861224489795919e-05, "loss": 1.6717, "step": 17230 }, { "epoch": 70.33061224489796, "grad_norm": 32.393043518066406, "learning_rate": 1.4859183673469388e-05, "loss": 1.0797, "step": 17231 }, { "epoch": 70.33469387755102, "grad_norm": 29.79138946533203, "learning_rate": 1.4857142857142858e-05, "loss": 1.7735, "step": 17232 }, { "epoch": 70.33877551020409, "grad_norm": 35.783843994140625, "learning_rate": 1.4855102040816327e-05, "loss": 0.9778, "step": 17233 }, { "epoch": 70.34285714285714, "grad_norm": 16.367753982543945, "learning_rate": 1.4853061224489798e-05, "loss": 2.767, "step": 17234 }, { "epoch": 70.34693877551021, "grad_norm": 33.428653717041016, "learning_rate": 1.4851020408163267e-05, "loss": 1.337, "step": 17235 }, { "epoch": 70.35102040816327, "grad_norm": 33.40390396118164, "learning_rate": 1.4848979591836734e-05, "loss": 1.2892, "step": 17236 }, { "epoch": 70.35510204081632, "grad_norm": 18.193965911865234, "learning_rate": 1.4846938775510206e-05, "loss": 2.2661, "step": 17237 }, { "epoch": 70.35918367346939, "grad_norm": 26.629152297973633, "learning_rate": 1.4844897959183673e-05, "loss": 2.1238, "step": 17238 }, { "epoch": 70.36326530612244, "grad_norm": 24.449108123779297, "learning_rate": 1.4842857142857145e-05, "loss": 1.5598, "step": 17239 }, { "epoch": 70.36734693877551, "grad_norm": 25.473957061767578, "learning_rate": 1.4840816326530612e-05, "loss": 1.8064, "step": 17240 }, { "epoch": 70.37142857142857, "grad_norm": 19.31134033203125, "learning_rate": 1.4838775510204081e-05, "loss": 2.1535, "step": 17241 }, { "epoch": 70.37551020408164, "grad_norm": 37.988155364990234, "learning_rate": 1.4836734693877552e-05, "loss": 1.2103, "step": 17242 }, { "epoch": 70.37959183673469, "grad_norm": 37.320098876953125, "learning_rate": 1.483469387755102e-05, "loss": 0.5614, "step": 17243 }, { "epoch": 70.38367346938776, "grad_norm": 33.197242736816406, "learning_rate": 1.4832653061224491e-05, "loss": 1.3519, "step": 17244 }, { "epoch": 70.38775510204081, "grad_norm": 38.69630432128906, "learning_rate": 1.483061224489796e-05, "loss": 0.4778, "step": 17245 }, { "epoch": 70.39183673469388, "grad_norm": 35.846641540527344, "learning_rate": 1.482857142857143e-05, "loss": 1.3854, "step": 17246 }, { "epoch": 70.39591836734694, "grad_norm": 26.217395782470703, "learning_rate": 1.48265306122449e-05, "loss": 1.8728, "step": 17247 }, { "epoch": 70.4, "grad_norm": 27.4871883392334, "learning_rate": 1.4824489795918366e-05, "loss": 0.2117, "step": 17248 }, { "epoch": 70.40408163265306, "grad_norm": 35.23627853393555, "learning_rate": 1.4822448979591839e-05, "loss": 1.3911, "step": 17249 }, { "epoch": 70.40816326530613, "grad_norm": 36.023216247558594, "learning_rate": 1.4820408163265306e-05, "loss": 0.7047, "step": 17250 }, { "epoch": 70.41224489795918, "grad_norm": 21.46963882446289, "learning_rate": 1.4818367346938778e-05, "loss": 1.6362, "step": 17251 }, { "epoch": 70.41632653061224, "grad_norm": 24.630769729614258, "learning_rate": 1.4816326530612245e-05, "loss": 1.7072, "step": 17252 }, { "epoch": 70.42040816326531, "grad_norm": 45.4348258972168, "learning_rate": 1.4814285714285714e-05, "loss": 0.9852, "step": 17253 }, { "epoch": 70.42448979591836, "grad_norm": 30.14504623413086, "learning_rate": 1.4812244897959184e-05, "loss": 1.4685, "step": 17254 }, { "epoch": 70.42857142857143, "grad_norm": 37.284629821777344, "learning_rate": 1.4810204081632653e-05, "loss": 0.4392, "step": 17255 }, { "epoch": 70.43265306122449, "grad_norm": 23.88129425048828, "learning_rate": 1.4808163265306124e-05, "loss": 0.1865, "step": 17256 }, { "epoch": 70.43673469387755, "grad_norm": 39.588539123535156, "learning_rate": 1.4806122448979592e-05, "loss": 0.673, "step": 17257 }, { "epoch": 70.44081632653061, "grad_norm": 30.07608985900879, "learning_rate": 1.4804081632653061e-05, "loss": 1.4463, "step": 17258 }, { "epoch": 70.44489795918368, "grad_norm": 25.350210189819336, "learning_rate": 1.4802040816326532e-05, "loss": 0.1382, "step": 17259 }, { "epoch": 70.44897959183673, "grad_norm": 30.935476303100586, "learning_rate": 1.48e-05, "loss": 1.5948, "step": 17260 }, { "epoch": 70.4530612244898, "grad_norm": 24.209651947021484, "learning_rate": 1.4797959183673471e-05, "loss": 1.9628, "step": 17261 }, { "epoch": 70.45714285714286, "grad_norm": 44.98806381225586, "learning_rate": 1.479591836734694e-05, "loss": 0.3672, "step": 17262 }, { "epoch": 70.46122448979592, "grad_norm": 27.7656192779541, "learning_rate": 1.4793877551020407e-05, "loss": 1.5189, "step": 17263 }, { "epoch": 70.46530612244898, "grad_norm": 46.33258819580078, "learning_rate": 1.479183673469388e-05, "loss": 0.5334, "step": 17264 }, { "epoch": 70.46938775510205, "grad_norm": 45.09586715698242, "learning_rate": 1.4789795918367346e-05, "loss": 0.533, "step": 17265 }, { "epoch": 70.4734693877551, "grad_norm": 19.711647033691406, "learning_rate": 1.4787755102040819e-05, "loss": 2.2753, "step": 17266 }, { "epoch": 70.47755102040816, "grad_norm": 26.93730926513672, "learning_rate": 1.4785714285714286e-05, "loss": 1.7491, "step": 17267 }, { "epoch": 70.48163265306123, "grad_norm": 31.145526885986328, "learning_rate": 1.4783673469387758e-05, "loss": 1.03, "step": 17268 }, { "epoch": 70.48571428571428, "grad_norm": 34.096527099609375, "learning_rate": 1.4781632653061225e-05, "loss": 0.363, "step": 17269 }, { "epoch": 70.48979591836735, "grad_norm": 34.424827575683594, "learning_rate": 1.4779591836734694e-05, "loss": 0.4294, "step": 17270 }, { "epoch": 70.4938775510204, "grad_norm": 16.664804458618164, "learning_rate": 1.4777551020408164e-05, "loss": 2.5798, "step": 17271 }, { "epoch": 70.49795918367347, "grad_norm": 38.49586486816406, "learning_rate": 1.4775510204081633e-05, "loss": 0.383, "step": 17272 }, { "epoch": 70.50204081632653, "grad_norm": 33.57794189453125, "learning_rate": 1.4773469387755104e-05, "loss": 0.949, "step": 17273 }, { "epoch": 70.5061224489796, "grad_norm": 27.893962860107422, "learning_rate": 1.4771428571428573e-05, "loss": 1.8984, "step": 17274 }, { "epoch": 70.51020408163265, "grad_norm": 22.973878860473633, "learning_rate": 1.476938775510204e-05, "loss": 2.1791, "step": 17275 }, { "epoch": 70.51428571428572, "grad_norm": 44.627952575683594, "learning_rate": 1.4767346938775512e-05, "loss": 0.6546, "step": 17276 }, { "epoch": 70.51836734693877, "grad_norm": 33.921504974365234, "learning_rate": 1.4765306122448979e-05, "loss": 1.1549, "step": 17277 }, { "epoch": 70.52244897959184, "grad_norm": 23.610153198242188, "learning_rate": 1.4763265306122451e-05, "loss": 1.6596, "step": 17278 }, { "epoch": 70.5265306122449, "grad_norm": 42.84794998168945, "learning_rate": 1.4761224489795918e-05, "loss": 0.8109, "step": 17279 }, { "epoch": 70.53061224489795, "grad_norm": 26.05294418334961, "learning_rate": 1.4759183673469387e-05, "loss": 1.8554, "step": 17280 }, { "epoch": 70.53469387755102, "grad_norm": 39.831485748291016, "learning_rate": 1.4757142857142858e-05, "loss": 0.461, "step": 17281 }, { "epoch": 70.53877551020408, "grad_norm": 27.341999053955078, "learning_rate": 1.4755102040816326e-05, "loss": 2.1587, "step": 17282 }, { "epoch": 70.54285714285714, "grad_norm": 37.5146369934082, "learning_rate": 1.4753061224489797e-05, "loss": 0.7529, "step": 17283 }, { "epoch": 70.5469387755102, "grad_norm": 27.36954116821289, "learning_rate": 1.4751020408163266e-05, "loss": 0.9579, "step": 17284 }, { "epoch": 70.55102040816327, "grad_norm": 36.16246032714844, "learning_rate": 1.4748979591836735e-05, "loss": 0.6368, "step": 17285 }, { "epoch": 70.55510204081632, "grad_norm": 29.305946350097656, "learning_rate": 1.4746938775510205e-05, "loss": 1.0223, "step": 17286 }, { "epoch": 70.55918367346939, "grad_norm": 17.12957763671875, "learning_rate": 1.4744897959183674e-05, "loss": 2.483, "step": 17287 }, { "epoch": 70.56326530612245, "grad_norm": 26.010488510131836, "learning_rate": 1.4742857142857144e-05, "loss": 1.5338, "step": 17288 }, { "epoch": 70.56734693877551, "grad_norm": 34.027679443359375, "learning_rate": 1.4740816326530613e-05, "loss": 0.9231, "step": 17289 }, { "epoch": 70.57142857142857, "grad_norm": 41.675960540771484, "learning_rate": 1.4738775510204084e-05, "loss": 0.5328, "step": 17290 }, { "epoch": 70.57551020408164, "grad_norm": 46.008384704589844, "learning_rate": 1.4736734693877553e-05, "loss": 0.6172, "step": 17291 }, { "epoch": 70.57959183673469, "grad_norm": 27.281599044799805, "learning_rate": 1.473469387755102e-05, "loss": 1.3384, "step": 17292 }, { "epoch": 70.58367346938776, "grad_norm": 36.869144439697266, "learning_rate": 1.4732653061224492e-05, "loss": 1.0519, "step": 17293 }, { "epoch": 70.58775510204082, "grad_norm": 22.452890396118164, "learning_rate": 1.4730612244897959e-05, "loss": 1.677, "step": 17294 }, { "epoch": 70.59183673469387, "grad_norm": 35.74454879760742, "learning_rate": 1.4728571428571431e-05, "loss": 1.5212, "step": 17295 }, { "epoch": 70.59591836734694, "grad_norm": 39.46576690673828, "learning_rate": 1.4726530612244898e-05, "loss": 0.5544, "step": 17296 }, { "epoch": 70.6, "grad_norm": 35.013519287109375, "learning_rate": 1.4724489795918367e-05, "loss": 1.0821, "step": 17297 }, { "epoch": 70.60408163265306, "grad_norm": 33.21413040161133, "learning_rate": 1.4722448979591838e-05, "loss": 1.6723, "step": 17298 }, { "epoch": 70.60816326530612, "grad_norm": 33.25130844116211, "learning_rate": 1.4720408163265307e-05, "loss": 1.0524, "step": 17299 }, { "epoch": 70.61224489795919, "grad_norm": 31.26102066040039, "learning_rate": 1.4718367346938777e-05, "loss": 0.5725, "step": 17300 }, { "epoch": 70.61632653061224, "grad_norm": 49.82088851928711, "learning_rate": 1.4716326530612246e-05, "loss": 1.2325, "step": 17301 }, { "epoch": 70.62040816326531, "grad_norm": 18.988569259643555, "learning_rate": 1.4714285714285713e-05, "loss": 0.2061, "step": 17302 }, { "epoch": 70.62448979591836, "grad_norm": 25.510391235351562, "learning_rate": 1.4712244897959185e-05, "loss": 1.7553, "step": 17303 }, { "epoch": 70.62857142857143, "grad_norm": 46.87622833251953, "learning_rate": 1.4710204081632652e-05, "loss": 0.4351, "step": 17304 }, { "epoch": 70.63265306122449, "grad_norm": 35.19879913330078, "learning_rate": 1.4708163265306125e-05, "loss": 1.5431, "step": 17305 }, { "epoch": 70.63673469387756, "grad_norm": 26.34351921081543, "learning_rate": 1.4706122448979592e-05, "loss": 1.7351, "step": 17306 }, { "epoch": 70.64081632653061, "grad_norm": 19.35721206665039, "learning_rate": 1.470408163265306e-05, "loss": 2.279, "step": 17307 }, { "epoch": 70.64489795918368, "grad_norm": 34.14616775512695, "learning_rate": 1.4702040816326531e-05, "loss": 0.2261, "step": 17308 }, { "epoch": 70.64897959183673, "grad_norm": 37.89195251464844, "learning_rate": 1.47e-05, "loss": 0.823, "step": 17309 }, { "epoch": 70.65306122448979, "grad_norm": 33.47444534301758, "learning_rate": 1.469795918367347e-05, "loss": 1.4749, "step": 17310 }, { "epoch": 70.65714285714286, "grad_norm": 32.9401741027832, "learning_rate": 1.469591836734694e-05, "loss": 1.2435, "step": 17311 }, { "epoch": 70.66122448979591, "grad_norm": 36.031646728515625, "learning_rate": 1.469387755102041e-05, "loss": 1.1029, "step": 17312 }, { "epoch": 70.66530612244898, "grad_norm": 34.095584869384766, "learning_rate": 1.4691836734693879e-05, "loss": 1.3189, "step": 17313 }, { "epoch": 70.66938775510204, "grad_norm": 21.183387756347656, "learning_rate": 1.4689795918367347e-05, "loss": 1.5571, "step": 17314 }, { "epoch": 70.6734693877551, "grad_norm": 40.57634735107422, "learning_rate": 1.4687755102040818e-05, "loss": 0.8151, "step": 17315 }, { "epoch": 70.67755102040816, "grad_norm": 28.1850528717041, "learning_rate": 1.4685714285714287e-05, "loss": 1.3677, "step": 17316 }, { "epoch": 70.68163265306123, "grad_norm": 37.89150619506836, "learning_rate": 1.4683673469387757e-05, "loss": 0.2074, "step": 17317 }, { "epoch": 70.68571428571428, "grad_norm": 34.15216827392578, "learning_rate": 1.4681632653061226e-05, "loss": 0.6988, "step": 17318 }, { "epoch": 70.68979591836735, "grad_norm": 41.83366394042969, "learning_rate": 1.4679591836734693e-05, "loss": 0.8902, "step": 17319 }, { "epoch": 70.6938775510204, "grad_norm": 37.555084228515625, "learning_rate": 1.4677551020408165e-05, "loss": 0.7754, "step": 17320 }, { "epoch": 70.69795918367348, "grad_norm": 36.27506637573242, "learning_rate": 1.4675510204081632e-05, "loss": 0.579, "step": 17321 }, { "epoch": 70.70204081632653, "grad_norm": 45.728004455566406, "learning_rate": 1.4673469387755103e-05, "loss": 0.7727, "step": 17322 }, { "epoch": 70.70612244897958, "grad_norm": 34.275115966796875, "learning_rate": 1.4671428571428572e-05, "loss": 1.1743, "step": 17323 }, { "epoch": 70.71020408163265, "grad_norm": 38.53797149658203, "learning_rate": 1.466938775510204e-05, "loss": 1.3083, "step": 17324 }, { "epoch": 70.71428571428571, "grad_norm": 37.511165618896484, "learning_rate": 1.4667346938775511e-05, "loss": 1.268, "step": 17325 }, { "epoch": 70.71836734693878, "grad_norm": 35.67203140258789, "learning_rate": 1.466530612244898e-05, "loss": 0.577, "step": 17326 }, { "epoch": 70.72244897959183, "grad_norm": 42.41581726074219, "learning_rate": 1.466326530612245e-05, "loss": 1.0842, "step": 17327 }, { "epoch": 70.7265306122449, "grad_norm": 32.340919494628906, "learning_rate": 1.466122448979592e-05, "loss": 1.5081, "step": 17328 }, { "epoch": 70.73061224489796, "grad_norm": 35.17994689941406, "learning_rate": 1.4659183673469386e-05, "loss": 1.0142, "step": 17329 }, { "epoch": 70.73469387755102, "grad_norm": 36.208797454833984, "learning_rate": 1.4657142857142859e-05, "loss": 0.9666, "step": 17330 }, { "epoch": 70.73877551020408, "grad_norm": 32.669498443603516, "learning_rate": 1.4655102040816326e-05, "loss": 0.5101, "step": 17331 }, { "epoch": 70.74285714285715, "grad_norm": 33.31403732299805, "learning_rate": 1.4653061224489798e-05, "loss": 1.2972, "step": 17332 }, { "epoch": 70.7469387755102, "grad_norm": 32.079952239990234, "learning_rate": 1.4651020408163265e-05, "loss": 0.847, "step": 17333 }, { "epoch": 70.75102040816327, "grad_norm": 28.63580322265625, "learning_rate": 1.4648979591836737e-05, "loss": 1.5875, "step": 17334 }, { "epoch": 70.75510204081633, "grad_norm": 32.06485366821289, "learning_rate": 1.4646938775510204e-05, "loss": 0.9115, "step": 17335 }, { "epoch": 70.7591836734694, "grad_norm": 39.9699592590332, "learning_rate": 1.4644897959183673e-05, "loss": 1.1643, "step": 17336 }, { "epoch": 70.76326530612245, "grad_norm": 33.49398422241211, "learning_rate": 1.4642857142857144e-05, "loss": 1.4333, "step": 17337 }, { "epoch": 70.7673469387755, "grad_norm": 33.35249328613281, "learning_rate": 1.4640816326530613e-05, "loss": 1.6699, "step": 17338 }, { "epoch": 70.77142857142857, "grad_norm": 46.3990592956543, "learning_rate": 1.4638775510204083e-05, "loss": 0.6447, "step": 17339 }, { "epoch": 70.77551020408163, "grad_norm": 26.51271629333496, "learning_rate": 1.4636734693877552e-05, "loss": 0.3371, "step": 17340 }, { "epoch": 70.7795918367347, "grad_norm": 38.227622985839844, "learning_rate": 1.463469387755102e-05, "loss": 0.4897, "step": 17341 }, { "epoch": 70.78367346938775, "grad_norm": 33.17401885986328, "learning_rate": 1.4632653061224491e-05, "loss": 1.2692, "step": 17342 }, { "epoch": 70.78775510204082, "grad_norm": 8.032557487487793, "learning_rate": 1.463061224489796e-05, "loss": 0.0438, "step": 17343 }, { "epoch": 70.79183673469387, "grad_norm": 24.545534133911133, "learning_rate": 1.462857142857143e-05, "loss": 2.1971, "step": 17344 }, { "epoch": 70.79591836734694, "grad_norm": 16.761714935302734, "learning_rate": 1.46265306122449e-05, "loss": 0.1528, "step": 17345 }, { "epoch": 70.8, "grad_norm": 41.070552825927734, "learning_rate": 1.4624489795918367e-05, "loss": 1.3167, "step": 17346 }, { "epoch": 70.80408163265307, "grad_norm": 31.077091217041016, "learning_rate": 1.4622448979591839e-05, "loss": 1.623, "step": 17347 }, { "epoch": 70.80816326530612, "grad_norm": 22.945798873901367, "learning_rate": 1.4620408163265306e-05, "loss": 1.8517, "step": 17348 }, { "epoch": 70.81224489795919, "grad_norm": 34.27431106567383, "learning_rate": 1.4618367346938776e-05, "loss": 1.4178, "step": 17349 }, { "epoch": 70.81632653061224, "grad_norm": 26.689186096191406, "learning_rate": 1.4616326530612245e-05, "loss": 1.7681, "step": 17350 }, { "epoch": 70.82040816326531, "grad_norm": 23.627363204956055, "learning_rate": 1.4614285714285714e-05, "loss": 1.7801, "step": 17351 }, { "epoch": 70.82448979591837, "grad_norm": 42.53693771362305, "learning_rate": 1.4612244897959185e-05, "loss": 0.9142, "step": 17352 }, { "epoch": 70.82857142857142, "grad_norm": 34.08234405517578, "learning_rate": 1.4610204081632653e-05, "loss": 1.5584, "step": 17353 }, { "epoch": 70.83265306122449, "grad_norm": 36.7604866027832, "learning_rate": 1.4608163265306124e-05, "loss": 1.2947, "step": 17354 }, { "epoch": 70.83673469387755, "grad_norm": 35.031620025634766, "learning_rate": 1.4606122448979593e-05, "loss": 1.1368, "step": 17355 }, { "epoch": 70.84081632653061, "grad_norm": 33.4099235534668, "learning_rate": 1.4604081632653063e-05, "loss": 0.8667, "step": 17356 }, { "epoch": 70.84489795918367, "grad_norm": 30.561864852905273, "learning_rate": 1.4602040816326532e-05, "loss": 0.845, "step": 17357 }, { "epoch": 70.84897959183674, "grad_norm": 28.406143188476562, "learning_rate": 1.4599999999999999e-05, "loss": 1.4747, "step": 17358 }, { "epoch": 70.85306122448979, "grad_norm": 27.295761108398438, "learning_rate": 1.4597959183673471e-05, "loss": 1.5133, "step": 17359 }, { "epoch": 70.85714285714286, "grad_norm": 33.734256744384766, "learning_rate": 1.4595918367346938e-05, "loss": 0.5782, "step": 17360 }, { "epoch": 70.86122448979592, "grad_norm": 35.48737716674805, "learning_rate": 1.459387755102041e-05, "loss": 0.8867, "step": 17361 }, { "epoch": 70.86530612244898, "grad_norm": 24.85130500793457, "learning_rate": 1.4591836734693878e-05, "loss": 0.2561, "step": 17362 }, { "epoch": 70.86938775510204, "grad_norm": 22.67203712463379, "learning_rate": 1.4589795918367347e-05, "loss": 0.1709, "step": 17363 }, { "epoch": 70.87346938775511, "grad_norm": 38.58220672607422, "learning_rate": 1.4587755102040817e-05, "loss": 1.1395, "step": 17364 }, { "epoch": 70.87755102040816, "grad_norm": 41.28150177001953, "learning_rate": 1.4585714285714286e-05, "loss": 0.863, "step": 17365 }, { "epoch": 70.88163265306123, "grad_norm": 43.8071403503418, "learning_rate": 1.4583673469387756e-05, "loss": 0.9039, "step": 17366 }, { "epoch": 70.88571428571429, "grad_norm": 35.2177848815918, "learning_rate": 1.4581632653061225e-05, "loss": 1.1888, "step": 17367 }, { "epoch": 70.88979591836734, "grad_norm": 28.945205688476562, "learning_rate": 1.4579591836734694e-05, "loss": 1.7009, "step": 17368 }, { "epoch": 70.89387755102041, "grad_norm": 37.613224029541016, "learning_rate": 1.4577551020408165e-05, "loss": 0.4958, "step": 17369 }, { "epoch": 70.89795918367346, "grad_norm": 23.04979133605957, "learning_rate": 1.4575510204081633e-05, "loss": 1.7445, "step": 17370 }, { "epoch": 70.90204081632653, "grad_norm": 32.191104888916016, "learning_rate": 1.4573469387755104e-05, "loss": 1.4858, "step": 17371 }, { "epoch": 70.90612244897959, "grad_norm": 39.38079833984375, "learning_rate": 1.4571428571428573e-05, "loss": 1.2558, "step": 17372 }, { "epoch": 70.91020408163266, "grad_norm": 31.305692672729492, "learning_rate": 1.456938775510204e-05, "loss": 1.9775, "step": 17373 }, { "epoch": 70.91428571428571, "grad_norm": 34.753265380859375, "learning_rate": 1.4567346938775512e-05, "loss": 1.7208, "step": 17374 }, { "epoch": 70.91836734693878, "grad_norm": 33.29091262817383, "learning_rate": 1.456530612244898e-05, "loss": 0.2962, "step": 17375 }, { "epoch": 70.92244897959183, "grad_norm": 29.935327529907227, "learning_rate": 1.456326530612245e-05, "loss": 1.556, "step": 17376 }, { "epoch": 70.9265306122449, "grad_norm": 24.738300323486328, "learning_rate": 1.4561224489795919e-05, "loss": 1.6434, "step": 17377 }, { "epoch": 70.93061224489796, "grad_norm": 62.98869323730469, "learning_rate": 1.4559183673469389e-05, "loss": 0.5761, "step": 17378 }, { "epoch": 70.93469387755103, "grad_norm": 36.32917022705078, "learning_rate": 1.4557142857142858e-05, "loss": 1.3889, "step": 17379 }, { "epoch": 70.93877551020408, "grad_norm": 36.1886100769043, "learning_rate": 1.4555102040816327e-05, "loss": 0.6042, "step": 17380 }, { "epoch": 70.94285714285714, "grad_norm": 23.176334381103516, "learning_rate": 1.4553061224489797e-05, "loss": 2.5011, "step": 17381 }, { "epoch": 70.9469387755102, "grad_norm": 36.945587158203125, "learning_rate": 1.4551020408163266e-05, "loss": 1.2718, "step": 17382 }, { "epoch": 70.95102040816326, "grad_norm": 37.45643997192383, "learning_rate": 1.4548979591836737e-05, "loss": 0.4981, "step": 17383 }, { "epoch": 70.95510204081633, "grad_norm": 32.556827545166016, "learning_rate": 1.4546938775510205e-05, "loss": 1.6383, "step": 17384 }, { "epoch": 70.95918367346938, "grad_norm": 19.720550537109375, "learning_rate": 1.4544897959183673e-05, "loss": 2.4069, "step": 17385 }, { "epoch": 70.96326530612245, "grad_norm": 26.419906616210938, "learning_rate": 1.4542857142857145e-05, "loss": 1.8025, "step": 17386 }, { "epoch": 70.9673469387755, "grad_norm": 30.11876106262207, "learning_rate": 1.4540816326530612e-05, "loss": 1.4755, "step": 17387 }, { "epoch": 70.97142857142858, "grad_norm": 44.614646911621094, "learning_rate": 1.4538775510204084e-05, "loss": 1.1765, "step": 17388 }, { "epoch": 70.97551020408163, "grad_norm": 37.15829849243164, "learning_rate": 1.4536734693877551e-05, "loss": 1.4501, "step": 17389 }, { "epoch": 70.9795918367347, "grad_norm": 47.34270477294922, "learning_rate": 1.453469387755102e-05, "loss": 1.0304, "step": 17390 }, { "epoch": 70.98367346938775, "grad_norm": 29.08310890197754, "learning_rate": 1.453265306122449e-05, "loss": 0.2142, "step": 17391 }, { "epoch": 70.98775510204082, "grad_norm": 21.296998977661133, "learning_rate": 1.453061224489796e-05, "loss": 1.9056, "step": 17392 }, { "epoch": 70.99183673469388, "grad_norm": 37.249366760253906, "learning_rate": 1.452857142857143e-05, "loss": 0.7864, "step": 17393 }, { "epoch": 70.99591836734695, "grad_norm": 26.97173500061035, "learning_rate": 1.4526530612244899e-05, "loss": 1.7804, "step": 17394 }, { "epoch": 71.0, "grad_norm": 31.672130584716797, "learning_rate": 1.4524489795918367e-05, "loss": 1.6287, "step": 17395 }, { "epoch": 71.00408163265305, "grad_norm": 33.92504119873047, "learning_rate": 1.4522448979591838e-05, "loss": 1.2954, "step": 17396 }, { "epoch": 71.00816326530612, "grad_norm": 27.6010799407959, "learning_rate": 1.4520408163265307e-05, "loss": 0.1492, "step": 17397 }, { "epoch": 71.01224489795918, "grad_norm": 25.44630241394043, "learning_rate": 1.4518367346938777e-05, "loss": 1.3958, "step": 17398 }, { "epoch": 71.01632653061225, "grad_norm": 28.341169357299805, "learning_rate": 1.4516326530612246e-05, "loss": 1.5116, "step": 17399 }, { "epoch": 71.0204081632653, "grad_norm": 34.12006378173828, "learning_rate": 1.4514285714285713e-05, "loss": 0.2702, "step": 17400 }, { "epoch": 71.02448979591837, "grad_norm": 32.963157653808594, "learning_rate": 1.4512244897959185e-05, "loss": 1.0821, "step": 17401 }, { "epoch": 71.02857142857142, "grad_norm": 37.368473052978516, "learning_rate": 1.4510204081632653e-05, "loss": 0.5076, "step": 17402 }, { "epoch": 71.0326530612245, "grad_norm": 33.35099792480469, "learning_rate": 1.4508163265306123e-05, "loss": 0.4867, "step": 17403 }, { "epoch": 71.03673469387755, "grad_norm": 44.09389877319336, "learning_rate": 1.4506122448979592e-05, "loss": 0.5458, "step": 17404 }, { "epoch": 71.04081632653062, "grad_norm": 19.50520896911621, "learning_rate": 1.4504081632653062e-05, "loss": 2.065, "step": 17405 }, { "epoch": 71.04489795918367, "grad_norm": 25.72402572631836, "learning_rate": 1.4502040816326531e-05, "loss": 1.7996, "step": 17406 }, { "epoch": 71.04897959183674, "grad_norm": 34.88530349731445, "learning_rate": 1.45e-05, "loss": 0.8195, "step": 17407 }, { "epoch": 71.0530612244898, "grad_norm": 34.79682922363281, "learning_rate": 1.449795918367347e-05, "loss": 0.9026, "step": 17408 }, { "epoch": 71.05714285714286, "grad_norm": 29.395009994506836, "learning_rate": 1.449591836734694e-05, "loss": 1.4358, "step": 17409 }, { "epoch": 71.06122448979592, "grad_norm": 35.4732551574707, "learning_rate": 1.449387755102041e-05, "loss": 1.3975, "step": 17410 }, { "epoch": 71.06530612244897, "grad_norm": 59.03767013549805, "learning_rate": 1.4491836734693879e-05, "loss": 1.1879, "step": 17411 }, { "epoch": 71.06938775510204, "grad_norm": 29.162445068359375, "learning_rate": 1.4489795918367346e-05, "loss": 0.7735, "step": 17412 }, { "epoch": 71.0734693877551, "grad_norm": 31.92576026916504, "learning_rate": 1.4487755102040818e-05, "loss": 0.3366, "step": 17413 }, { "epoch": 71.07755102040817, "grad_norm": 30.74854850769043, "learning_rate": 1.4485714285714285e-05, "loss": 1.4594, "step": 17414 }, { "epoch": 71.08163265306122, "grad_norm": 30.56386947631836, "learning_rate": 1.4483673469387757e-05, "loss": 1.5205, "step": 17415 }, { "epoch": 71.08571428571429, "grad_norm": 26.83991813659668, "learning_rate": 1.4481632653061225e-05, "loss": 1.6455, "step": 17416 }, { "epoch": 71.08979591836734, "grad_norm": 37.5759162902832, "learning_rate": 1.4479591836734693e-05, "loss": 0.9938, "step": 17417 }, { "epoch": 71.09387755102041, "grad_norm": 38.564456939697266, "learning_rate": 1.4477551020408164e-05, "loss": 0.2037, "step": 17418 }, { "epoch": 71.09795918367347, "grad_norm": 34.058250427246094, "learning_rate": 1.4475510204081633e-05, "loss": 0.9134, "step": 17419 }, { "epoch": 71.10204081632654, "grad_norm": 18.562910079956055, "learning_rate": 1.4473469387755103e-05, "loss": 0.1393, "step": 17420 }, { "epoch": 71.10612244897959, "grad_norm": 39.77659225463867, "learning_rate": 1.4471428571428572e-05, "loss": 1.4074, "step": 17421 }, { "epoch": 71.11020408163266, "grad_norm": 34.90956497192383, "learning_rate": 1.4469387755102041e-05, "loss": 0.8792, "step": 17422 }, { "epoch": 71.11428571428571, "grad_norm": 27.190460205078125, "learning_rate": 1.4467346938775511e-05, "loss": 0.1878, "step": 17423 }, { "epoch": 71.11836734693877, "grad_norm": 31.647905349731445, "learning_rate": 1.446530612244898e-05, "loss": 1.6545, "step": 17424 }, { "epoch": 71.12244897959184, "grad_norm": 23.233896255493164, "learning_rate": 1.446326530612245e-05, "loss": 2.2464, "step": 17425 }, { "epoch": 71.12653061224489, "grad_norm": 26.764026641845703, "learning_rate": 1.446122448979592e-05, "loss": 1.6268, "step": 17426 }, { "epoch": 71.13061224489796, "grad_norm": 36.20952224731445, "learning_rate": 1.445918367346939e-05, "loss": 0.5857, "step": 17427 }, { "epoch": 71.13469387755102, "grad_norm": 43.424034118652344, "learning_rate": 1.4457142857142857e-05, "loss": 0.7994, "step": 17428 }, { "epoch": 71.13877551020408, "grad_norm": 45.14561080932617, "learning_rate": 1.4455102040816326e-05, "loss": 0.5439, "step": 17429 }, { "epoch": 71.14285714285714, "grad_norm": 49.61396408081055, "learning_rate": 1.4453061224489796e-05, "loss": 0.9745, "step": 17430 }, { "epoch": 71.14693877551021, "grad_norm": 34.0402946472168, "learning_rate": 1.4451020408163265e-05, "loss": 0.4903, "step": 17431 }, { "epoch": 71.15102040816326, "grad_norm": 59.663570404052734, "learning_rate": 1.4448979591836736e-05, "loss": 1.1874, "step": 17432 }, { "epoch": 71.15510204081633, "grad_norm": 23.192378997802734, "learning_rate": 1.4446938775510205e-05, "loss": 1.4985, "step": 17433 }, { "epoch": 71.15918367346939, "grad_norm": 30.62580108642578, "learning_rate": 1.4444897959183673e-05, "loss": 1.7478, "step": 17434 }, { "epoch": 71.16326530612245, "grad_norm": 35.53620529174805, "learning_rate": 1.4442857142857144e-05, "loss": 0.4462, "step": 17435 }, { "epoch": 71.16734693877551, "grad_norm": 33.244510650634766, "learning_rate": 1.4440816326530613e-05, "loss": 1.6216, "step": 17436 }, { "epoch": 71.17142857142858, "grad_norm": 41.745018005371094, "learning_rate": 1.4438775510204083e-05, "loss": 0.8485, "step": 17437 }, { "epoch": 71.17551020408163, "grad_norm": 41.982852935791016, "learning_rate": 1.4436734693877552e-05, "loss": 1.2519, "step": 17438 }, { "epoch": 71.17959183673469, "grad_norm": 74.15123748779297, "learning_rate": 1.443469387755102e-05, "loss": 0.3245, "step": 17439 }, { "epoch": 71.18367346938776, "grad_norm": 28.723073959350586, "learning_rate": 1.4432653061224491e-05, "loss": 0.9114, "step": 17440 }, { "epoch": 71.18775510204081, "grad_norm": 49.98759460449219, "learning_rate": 1.4430612244897959e-05, "loss": 1.103, "step": 17441 }, { "epoch": 71.19183673469388, "grad_norm": 30.893421173095703, "learning_rate": 1.442857142857143e-05, "loss": 1.9044, "step": 17442 }, { "epoch": 71.19591836734693, "grad_norm": 35.54208755493164, "learning_rate": 1.4426530612244898e-05, "loss": 1.286, "step": 17443 }, { "epoch": 71.2, "grad_norm": 32.21017837524414, "learning_rate": 1.4424489795918367e-05, "loss": 1.4403, "step": 17444 }, { "epoch": 71.20408163265306, "grad_norm": 32.516563415527344, "learning_rate": 1.4422448979591837e-05, "loss": 1.077, "step": 17445 }, { "epoch": 71.20816326530613, "grad_norm": 25.1561336517334, "learning_rate": 1.4420408163265306e-05, "loss": 1.6546, "step": 17446 }, { "epoch": 71.21224489795918, "grad_norm": 31.641599655151367, "learning_rate": 1.4418367346938777e-05, "loss": 1.2412, "step": 17447 }, { "epoch": 71.21632653061225, "grad_norm": 26.20082664489746, "learning_rate": 1.4416326530612245e-05, "loss": 1.7493, "step": 17448 }, { "epoch": 71.2204081632653, "grad_norm": 23.072959899902344, "learning_rate": 1.4414285714285716e-05, "loss": 2.097, "step": 17449 }, { "epoch": 71.22448979591837, "grad_norm": 27.627906799316406, "learning_rate": 1.4412244897959185e-05, "loss": 0.508, "step": 17450 }, { "epoch": 71.22857142857143, "grad_norm": 33.237060546875, "learning_rate": 1.4410204081632654e-05, "loss": 1.3385, "step": 17451 }, { "epoch": 71.2326530612245, "grad_norm": 15.865534782409668, "learning_rate": 1.4408163265306124e-05, "loss": 2.7376, "step": 17452 }, { "epoch": 71.23673469387755, "grad_norm": 22.576597213745117, "learning_rate": 1.4406122448979593e-05, "loss": 0.1315, "step": 17453 }, { "epoch": 71.2408163265306, "grad_norm": 42.515655517578125, "learning_rate": 1.4404081632653063e-05, "loss": 0.4616, "step": 17454 }, { "epoch": 71.24489795918367, "grad_norm": 32.261356353759766, "learning_rate": 1.440204081632653e-05, "loss": 0.7795, "step": 17455 }, { "epoch": 71.24897959183673, "grad_norm": 44.759559631347656, "learning_rate": 1.44e-05, "loss": 0.7307, "step": 17456 }, { "epoch": 71.2530612244898, "grad_norm": 43.495361328125, "learning_rate": 1.439795918367347e-05, "loss": 0.5939, "step": 17457 }, { "epoch": 71.25714285714285, "grad_norm": 14.874250411987305, "learning_rate": 1.4395918367346939e-05, "loss": 2.4477, "step": 17458 }, { "epoch": 71.26122448979592, "grad_norm": 28.1774959564209, "learning_rate": 1.439387755102041e-05, "loss": 0.364, "step": 17459 }, { "epoch": 71.26530612244898, "grad_norm": 25.940170288085938, "learning_rate": 1.4391836734693878e-05, "loss": 1.7937, "step": 17460 }, { "epoch": 71.26938775510204, "grad_norm": 30.154882431030273, "learning_rate": 1.4389795918367347e-05, "loss": 1.5406, "step": 17461 }, { "epoch": 71.2734693877551, "grad_norm": 29.811796188354492, "learning_rate": 1.4387755102040817e-05, "loss": 1.5578, "step": 17462 }, { "epoch": 71.27755102040817, "grad_norm": 38.72909164428711, "learning_rate": 1.4385714285714286e-05, "loss": 0.7498, "step": 17463 }, { "epoch": 71.28163265306122, "grad_norm": 36.38433837890625, "learning_rate": 1.4383673469387757e-05, "loss": 1.088, "step": 17464 }, { "epoch": 71.28571428571429, "grad_norm": 26.327802658081055, "learning_rate": 1.4381632653061226e-05, "loss": 1.8374, "step": 17465 }, { "epoch": 71.28979591836735, "grad_norm": 33.81923294067383, "learning_rate": 1.4379591836734693e-05, "loss": 1.2162, "step": 17466 }, { "epoch": 71.29387755102042, "grad_norm": 30.25436782836914, "learning_rate": 1.4377551020408165e-05, "loss": 1.4559, "step": 17467 }, { "epoch": 71.29795918367347, "grad_norm": 25.778949737548828, "learning_rate": 1.4375510204081632e-05, "loss": 1.4889, "step": 17468 }, { "epoch": 71.30204081632652, "grad_norm": 23.161457061767578, "learning_rate": 1.4373469387755104e-05, "loss": 1.6763, "step": 17469 }, { "epoch": 71.3061224489796, "grad_norm": 36.11173629760742, "learning_rate": 1.4371428571428571e-05, "loss": 0.7167, "step": 17470 }, { "epoch": 71.31020408163265, "grad_norm": 23.089582443237305, "learning_rate": 1.4369387755102044e-05, "loss": 0.1673, "step": 17471 }, { "epoch": 71.31428571428572, "grad_norm": 23.16563606262207, "learning_rate": 1.436734693877551e-05, "loss": 2.2116, "step": 17472 }, { "epoch": 71.31836734693877, "grad_norm": 37.87847900390625, "learning_rate": 1.436530612244898e-05, "loss": 0.3838, "step": 17473 }, { "epoch": 71.32244897959184, "grad_norm": 24.370723724365234, "learning_rate": 1.436326530612245e-05, "loss": 1.681, "step": 17474 }, { "epoch": 71.3265306122449, "grad_norm": 26.27596092224121, "learning_rate": 1.4361224489795919e-05, "loss": 1.851, "step": 17475 }, { "epoch": 71.33061224489796, "grad_norm": 25.776161193847656, "learning_rate": 1.435918367346939e-05, "loss": 1.7083, "step": 17476 }, { "epoch": 71.33469387755102, "grad_norm": 32.2031364440918, "learning_rate": 1.4357142857142858e-05, "loss": 1.0236, "step": 17477 }, { "epoch": 71.33877551020409, "grad_norm": 21.26482582092285, "learning_rate": 1.4355102040816327e-05, "loss": 1.816, "step": 17478 }, { "epoch": 71.34285714285714, "grad_norm": 32.78057861328125, "learning_rate": 1.4353061224489797e-05, "loss": 0.6695, "step": 17479 }, { "epoch": 71.34693877551021, "grad_norm": 37.69892883300781, "learning_rate": 1.4351020408163266e-05, "loss": 1.0646, "step": 17480 }, { "epoch": 71.35102040816327, "grad_norm": 33.90829849243164, "learning_rate": 1.4348979591836737e-05, "loss": 0.801, "step": 17481 }, { "epoch": 71.35510204081632, "grad_norm": 42.130985260009766, "learning_rate": 1.4346938775510204e-05, "loss": 0.5786, "step": 17482 }, { "epoch": 71.35918367346939, "grad_norm": 48.12641906738281, "learning_rate": 1.4344897959183673e-05, "loss": 0.4268, "step": 17483 }, { "epoch": 71.36326530612244, "grad_norm": 29.2788143157959, "learning_rate": 1.4342857142857143e-05, "loss": 0.4075, "step": 17484 }, { "epoch": 71.36734693877551, "grad_norm": 26.75771713256836, "learning_rate": 1.4340816326530612e-05, "loss": 0.4446, "step": 17485 }, { "epoch": 71.37142857142857, "grad_norm": 27.60970687866211, "learning_rate": 1.4338775510204083e-05, "loss": 1.3086, "step": 17486 }, { "epoch": 71.37551020408164, "grad_norm": 29.208826065063477, "learning_rate": 1.4336734693877551e-05, "loss": 1.6841, "step": 17487 }, { "epoch": 71.37959183673469, "grad_norm": 23.290613174438477, "learning_rate": 1.433469387755102e-05, "loss": 1.8801, "step": 17488 }, { "epoch": 71.38367346938776, "grad_norm": 48.14360427856445, "learning_rate": 1.433265306122449e-05, "loss": 0.784, "step": 17489 }, { "epoch": 71.38775510204081, "grad_norm": 30.459674835205078, "learning_rate": 1.433061224489796e-05, "loss": 1.4827, "step": 17490 }, { "epoch": 71.39183673469388, "grad_norm": 28.30599021911621, "learning_rate": 1.432857142857143e-05, "loss": 0.9654, "step": 17491 }, { "epoch": 71.39591836734694, "grad_norm": 37.741485595703125, "learning_rate": 1.4326530612244899e-05, "loss": 0.6539, "step": 17492 }, { "epoch": 71.4, "grad_norm": 22.364377975463867, "learning_rate": 1.432448979591837e-05, "loss": 0.1504, "step": 17493 }, { "epoch": 71.40408163265306, "grad_norm": 68.47409057617188, "learning_rate": 1.4322448979591838e-05, "loss": 1.5496, "step": 17494 }, { "epoch": 71.40816326530613, "grad_norm": 28.025794982910156, "learning_rate": 1.4320408163265305e-05, "loss": 2.1117, "step": 17495 }, { "epoch": 71.41224489795918, "grad_norm": 27.109241485595703, "learning_rate": 1.4318367346938778e-05, "loss": 1.333, "step": 17496 }, { "epoch": 71.41632653061224, "grad_norm": 36.26441192626953, "learning_rate": 1.4316326530612245e-05, "loss": 0.5292, "step": 17497 }, { "epoch": 71.42040816326531, "grad_norm": 35.83395767211914, "learning_rate": 1.4314285714285717e-05, "loss": 1.2183, "step": 17498 }, { "epoch": 71.42448979591836, "grad_norm": 28.38265609741211, "learning_rate": 1.4312244897959184e-05, "loss": 1.4504, "step": 17499 }, { "epoch": 71.42857142857143, "grad_norm": 32.175682067871094, "learning_rate": 1.4310204081632653e-05, "loss": 1.3325, "step": 17500 }, { "epoch": 71.43265306122449, "grad_norm": 35.3746452331543, "learning_rate": 1.4308163265306123e-05, "loss": 0.5283, "step": 17501 }, { "epoch": 71.43673469387755, "grad_norm": 40.58602523803711, "learning_rate": 1.4306122448979592e-05, "loss": 0.7987, "step": 17502 }, { "epoch": 71.44081632653061, "grad_norm": 41.89470672607422, "learning_rate": 1.4304081632653063e-05, "loss": 1.2009, "step": 17503 }, { "epoch": 71.44489795918368, "grad_norm": 23.762319564819336, "learning_rate": 1.4302040816326531e-05, "loss": 1.9327, "step": 17504 }, { "epoch": 71.44897959183673, "grad_norm": 22.088043212890625, "learning_rate": 1.43e-05, "loss": 1.6966, "step": 17505 }, { "epoch": 71.4530612244898, "grad_norm": 41.623321533203125, "learning_rate": 1.429795918367347e-05, "loss": 1.326, "step": 17506 }, { "epoch": 71.45714285714286, "grad_norm": 31.297468185424805, "learning_rate": 1.4295918367346938e-05, "loss": 1.5769, "step": 17507 }, { "epoch": 71.46122448979592, "grad_norm": 21.333683013916016, "learning_rate": 1.429387755102041e-05, "loss": 1.724, "step": 17508 }, { "epoch": 71.46530612244898, "grad_norm": 32.78299331665039, "learning_rate": 1.4291836734693877e-05, "loss": 0.501, "step": 17509 }, { "epoch": 71.46938775510205, "grad_norm": 49.09074783325195, "learning_rate": 1.4289795918367346e-05, "loss": 0.9661, "step": 17510 }, { "epoch": 71.4734693877551, "grad_norm": 24.59444808959961, "learning_rate": 1.4287755102040817e-05, "loss": 2.4685, "step": 17511 }, { "epoch": 71.47755102040816, "grad_norm": 34.40309524536133, "learning_rate": 1.4285714285714285e-05, "loss": 0.3725, "step": 17512 }, { "epoch": 71.48163265306123, "grad_norm": 44.509010314941406, "learning_rate": 1.4283673469387756e-05, "loss": 0.5898, "step": 17513 }, { "epoch": 71.48571428571428, "grad_norm": 41.25103759765625, "learning_rate": 1.4281632653061225e-05, "loss": 0.5272, "step": 17514 }, { "epoch": 71.48979591836735, "grad_norm": 43.95425033569336, "learning_rate": 1.4279591836734695e-05, "loss": 0.8764, "step": 17515 }, { "epoch": 71.4938775510204, "grad_norm": 36.31156539916992, "learning_rate": 1.4277551020408164e-05, "loss": 0.3637, "step": 17516 }, { "epoch": 71.49795918367347, "grad_norm": 45.2584228515625, "learning_rate": 1.4275510204081633e-05, "loss": 0.5805, "step": 17517 }, { "epoch": 71.50204081632653, "grad_norm": 21.38178825378418, "learning_rate": 1.4273469387755103e-05, "loss": 2.2135, "step": 17518 }, { "epoch": 71.5061224489796, "grad_norm": 38.549781799316406, "learning_rate": 1.4271428571428572e-05, "loss": 0.6955, "step": 17519 }, { "epoch": 71.51020408163265, "grad_norm": 36.10201644897461, "learning_rate": 1.4269387755102043e-05, "loss": 1.712, "step": 17520 }, { "epoch": 71.51428571428572, "grad_norm": 35.31621170043945, "learning_rate": 1.4267346938775512e-05, "loss": 1.2124, "step": 17521 }, { "epoch": 71.51836734693877, "grad_norm": 25.893203735351562, "learning_rate": 1.4265306122448979e-05, "loss": 1.7391, "step": 17522 }, { "epoch": 71.52244897959184, "grad_norm": 51.853355407714844, "learning_rate": 1.4263265306122451e-05, "loss": 0.6954, "step": 17523 }, { "epoch": 71.5265306122449, "grad_norm": 31.36744499206543, "learning_rate": 1.4261224489795918e-05, "loss": 1.4928, "step": 17524 }, { "epoch": 71.53061224489795, "grad_norm": 25.999052047729492, "learning_rate": 1.425918367346939e-05, "loss": 2.2246, "step": 17525 }, { "epoch": 71.53469387755102, "grad_norm": 24.292863845825195, "learning_rate": 1.4257142857142857e-05, "loss": 1.6628, "step": 17526 }, { "epoch": 71.53877551020408, "grad_norm": 27.9376163482666, "learning_rate": 1.4255102040816326e-05, "loss": 1.8686, "step": 17527 }, { "epoch": 71.54285714285714, "grad_norm": 37.84221267700195, "learning_rate": 1.4253061224489797e-05, "loss": 0.7321, "step": 17528 }, { "epoch": 71.5469387755102, "grad_norm": 38.382423400878906, "learning_rate": 1.4251020408163266e-05, "loss": 1.4479, "step": 17529 }, { "epoch": 71.55102040816327, "grad_norm": 29.373401641845703, "learning_rate": 1.4248979591836736e-05, "loss": 0.3334, "step": 17530 }, { "epoch": 71.55510204081632, "grad_norm": 49.79681396484375, "learning_rate": 1.4246938775510205e-05, "loss": 0.2182, "step": 17531 }, { "epoch": 71.55918367346939, "grad_norm": 10.437447547912598, "learning_rate": 1.4244897959183674e-05, "loss": 0.0786, "step": 17532 }, { "epoch": 71.56326530612245, "grad_norm": 33.07132339477539, "learning_rate": 1.4242857142857144e-05, "loss": 0.4421, "step": 17533 }, { "epoch": 71.56734693877551, "grad_norm": 26.966548919677734, "learning_rate": 1.4240816326530611e-05, "loss": 1.7258, "step": 17534 }, { "epoch": 71.57142857142857, "grad_norm": 22.96752166748047, "learning_rate": 1.4238775510204084e-05, "loss": 0.2027, "step": 17535 }, { "epoch": 71.57551020408164, "grad_norm": 40.836673736572266, "learning_rate": 1.423673469387755e-05, "loss": 1.0541, "step": 17536 }, { "epoch": 71.57959183673469, "grad_norm": 38.58992385864258, "learning_rate": 1.4234693877551023e-05, "loss": 0.8425, "step": 17537 }, { "epoch": 71.58367346938776, "grad_norm": 25.86297035217285, "learning_rate": 1.423265306122449e-05, "loss": 1.4489, "step": 17538 }, { "epoch": 71.58775510204082, "grad_norm": 29.63428497314453, "learning_rate": 1.4230612244897959e-05, "loss": 0.9479, "step": 17539 }, { "epoch": 71.59183673469387, "grad_norm": 10.06913948059082, "learning_rate": 1.422857142857143e-05, "loss": 3.1066, "step": 17540 }, { "epoch": 71.59591836734694, "grad_norm": 22.20474624633789, "learning_rate": 1.4226530612244898e-05, "loss": 2.1953, "step": 17541 }, { "epoch": 71.6, "grad_norm": 18.423254013061523, "learning_rate": 1.4224489795918369e-05, "loss": 2.2703, "step": 17542 }, { "epoch": 71.60408163265306, "grad_norm": 27.760635375976562, "learning_rate": 1.4222448979591837e-05, "loss": 1.2277, "step": 17543 }, { "epoch": 71.60816326530612, "grad_norm": 38.21486282348633, "learning_rate": 1.4220408163265306e-05, "loss": 1.4973, "step": 17544 }, { "epoch": 71.61224489795919, "grad_norm": 36.605682373046875, "learning_rate": 1.4218367346938777e-05, "loss": 0.348, "step": 17545 }, { "epoch": 71.61632653061224, "grad_norm": 25.25324821472168, "learning_rate": 1.4216326530612246e-05, "loss": 1.8459, "step": 17546 }, { "epoch": 71.62040816326531, "grad_norm": 29.56826400756836, "learning_rate": 1.4214285714285716e-05, "loss": 1.6657, "step": 17547 }, { "epoch": 71.62448979591836, "grad_norm": 37.0842399597168, "learning_rate": 1.4212244897959185e-05, "loss": 0.2959, "step": 17548 }, { "epoch": 71.62857142857143, "grad_norm": 20.38844871520996, "learning_rate": 1.4210204081632652e-05, "loss": 2.2492, "step": 17549 }, { "epoch": 71.63265306122449, "grad_norm": 45.39198303222656, "learning_rate": 1.4208163265306124e-05, "loss": 1.2152, "step": 17550 }, { "epoch": 71.63673469387756, "grad_norm": 35.61827087402344, "learning_rate": 1.4206122448979591e-05, "loss": 0.6954, "step": 17551 }, { "epoch": 71.64081632653061, "grad_norm": 27.372722625732422, "learning_rate": 1.4204081632653064e-05, "loss": 0.3849, "step": 17552 }, { "epoch": 71.64489795918368, "grad_norm": 20.067691802978516, "learning_rate": 1.420204081632653e-05, "loss": 2.1364, "step": 17553 }, { "epoch": 71.64897959183673, "grad_norm": 32.97804260253906, "learning_rate": 1.42e-05, "loss": 1.598, "step": 17554 }, { "epoch": 71.65306122448979, "grad_norm": 29.55084991455078, "learning_rate": 1.419795918367347e-05, "loss": 0.8368, "step": 17555 }, { "epoch": 71.65714285714286, "grad_norm": 40.83935546875, "learning_rate": 1.4195918367346939e-05, "loss": 0.6341, "step": 17556 }, { "epoch": 71.66122448979591, "grad_norm": 35.029117584228516, "learning_rate": 1.419387755102041e-05, "loss": 0.931, "step": 17557 }, { "epoch": 71.66530612244898, "grad_norm": 22.73387908935547, "learning_rate": 1.4191836734693878e-05, "loss": 1.6314, "step": 17558 }, { "epoch": 71.66938775510204, "grad_norm": 23.04544448852539, "learning_rate": 1.4189795918367349e-05, "loss": 1.6716, "step": 17559 }, { "epoch": 71.6734693877551, "grad_norm": 26.63194465637207, "learning_rate": 1.4187755102040818e-05, "loss": 1.7519, "step": 17560 }, { "epoch": 71.67755102040816, "grad_norm": 33.22182083129883, "learning_rate": 1.4185714285714285e-05, "loss": 1.2255, "step": 17561 }, { "epoch": 71.68163265306123, "grad_norm": 37.16228103637695, "learning_rate": 1.4183673469387757e-05, "loss": 1.1121, "step": 17562 }, { "epoch": 71.68571428571428, "grad_norm": 35.55284118652344, "learning_rate": 1.4181632653061224e-05, "loss": 1.0799, "step": 17563 }, { "epoch": 71.68979591836735, "grad_norm": 32.65415954589844, "learning_rate": 1.4179591836734696e-05, "loss": 0.9537, "step": 17564 }, { "epoch": 71.6938775510204, "grad_norm": 28.962799072265625, "learning_rate": 1.4177551020408163e-05, "loss": 2.0447, "step": 17565 }, { "epoch": 71.69795918367348, "grad_norm": 31.90474510192871, "learning_rate": 1.4175510204081632e-05, "loss": 1.5012, "step": 17566 }, { "epoch": 71.70204081632653, "grad_norm": 33.48033905029297, "learning_rate": 1.4173469387755103e-05, "loss": 1.6153, "step": 17567 }, { "epoch": 71.70612244897958, "grad_norm": 52.07389450073242, "learning_rate": 1.4171428571428572e-05, "loss": 0.5249, "step": 17568 }, { "epoch": 71.71020408163265, "grad_norm": 21.198163986206055, "learning_rate": 1.4169387755102042e-05, "loss": 0.127, "step": 17569 }, { "epoch": 71.71428571428571, "grad_norm": 21.88442039489746, "learning_rate": 1.416734693877551e-05, "loss": 2.3191, "step": 17570 }, { "epoch": 71.71836734693878, "grad_norm": 32.82267761230469, "learning_rate": 1.416530612244898e-05, "loss": 0.995, "step": 17571 }, { "epoch": 71.72244897959183, "grad_norm": 23.4742488861084, "learning_rate": 1.416326530612245e-05, "loss": 0.2207, "step": 17572 }, { "epoch": 71.7265306122449, "grad_norm": 28.140748977661133, "learning_rate": 1.4161224489795919e-05, "loss": 1.4251, "step": 17573 }, { "epoch": 71.73061224489796, "grad_norm": 34.93598937988281, "learning_rate": 1.415918367346939e-05, "loss": 1.3639, "step": 17574 }, { "epoch": 71.73469387755102, "grad_norm": 37.38443374633789, "learning_rate": 1.4157142857142858e-05, "loss": 1.545, "step": 17575 }, { "epoch": 71.73877551020408, "grad_norm": 33.51789093017578, "learning_rate": 1.4155102040816325e-05, "loss": 1.513, "step": 17576 }, { "epoch": 71.74285714285715, "grad_norm": 42.38975524902344, "learning_rate": 1.4153061224489798e-05, "loss": 0.6138, "step": 17577 }, { "epoch": 71.7469387755102, "grad_norm": 45.23508834838867, "learning_rate": 1.4151020408163265e-05, "loss": 0.4511, "step": 17578 }, { "epoch": 71.75102040816327, "grad_norm": 36.808326721191406, "learning_rate": 1.4148979591836737e-05, "loss": 0.89, "step": 17579 }, { "epoch": 71.75510204081633, "grad_norm": 25.415328979492188, "learning_rate": 1.4146938775510204e-05, "loss": 2.1479, "step": 17580 }, { "epoch": 71.7591836734694, "grad_norm": 28.931596755981445, "learning_rate": 1.4144897959183673e-05, "loss": 1.1835, "step": 17581 }, { "epoch": 71.76326530612245, "grad_norm": 41.56619644165039, "learning_rate": 1.4142857142857143e-05, "loss": 1.6372, "step": 17582 }, { "epoch": 71.7673469387755, "grad_norm": 16.876102447509766, "learning_rate": 1.4140816326530612e-05, "loss": 2.5885, "step": 17583 }, { "epoch": 71.77142857142857, "grad_norm": 15.094793319702148, "learning_rate": 1.4138775510204083e-05, "loss": 0.0558, "step": 17584 }, { "epoch": 71.77551020408163, "grad_norm": 41.550148010253906, "learning_rate": 1.4136734693877552e-05, "loss": 0.602, "step": 17585 }, { "epoch": 71.7795918367347, "grad_norm": 15.713393211364746, "learning_rate": 1.4134693877551022e-05, "loss": 0.1275, "step": 17586 }, { "epoch": 71.78367346938775, "grad_norm": 31.47442054748535, "learning_rate": 1.4132653061224491e-05, "loss": 0.443, "step": 17587 }, { "epoch": 71.78775510204082, "grad_norm": 34.71648406982422, "learning_rate": 1.4130612244897958e-05, "loss": 0.8688, "step": 17588 }, { "epoch": 71.79183673469387, "grad_norm": 44.71626281738281, "learning_rate": 1.412857142857143e-05, "loss": 0.3917, "step": 17589 }, { "epoch": 71.79591836734694, "grad_norm": 32.092105865478516, "learning_rate": 1.4126530612244897e-05, "loss": 1.2983, "step": 17590 }, { "epoch": 71.8, "grad_norm": 46.11088180541992, "learning_rate": 1.412448979591837e-05, "loss": 0.5167, "step": 17591 }, { "epoch": 71.80408163265307, "grad_norm": 22.773826599121094, "learning_rate": 1.4122448979591837e-05, "loss": 1.7134, "step": 17592 }, { "epoch": 71.80816326530612, "grad_norm": 22.85352325439453, "learning_rate": 1.4120408163265306e-05, "loss": 0.1513, "step": 17593 }, { "epoch": 71.81224489795919, "grad_norm": 31.917966842651367, "learning_rate": 1.4118367346938776e-05, "loss": 1.3028, "step": 17594 }, { "epoch": 71.81632653061224, "grad_norm": 36.135467529296875, "learning_rate": 1.4116326530612245e-05, "loss": 1.71, "step": 17595 }, { "epoch": 71.82040816326531, "grad_norm": 34.377899169921875, "learning_rate": 1.4114285714285715e-05, "loss": 0.7401, "step": 17596 }, { "epoch": 71.82448979591837, "grad_norm": 36.41447448730469, "learning_rate": 1.4112244897959184e-05, "loss": 1.8129, "step": 17597 }, { "epoch": 71.82857142857142, "grad_norm": 27.556699752807617, "learning_rate": 1.4110204081632653e-05, "loss": 1.8904, "step": 17598 }, { "epoch": 71.83265306122449, "grad_norm": 25.4722843170166, "learning_rate": 1.4108163265306124e-05, "loss": 1.7223, "step": 17599 }, { "epoch": 71.83673469387755, "grad_norm": 32.07802963256836, "learning_rate": 1.4106122448979592e-05, "loss": 1.1225, "step": 17600 }, { "epoch": 71.84081632653061, "grad_norm": 32.19136047363281, "learning_rate": 1.4104081632653063e-05, "loss": 1.1433, "step": 17601 }, { "epoch": 71.84489795918367, "grad_norm": 36.599388122558594, "learning_rate": 1.4102040816326532e-05, "loss": 0.8176, "step": 17602 }, { "epoch": 71.84897959183674, "grad_norm": 34.21379470825195, "learning_rate": 1.4099999999999999e-05, "loss": 0.4766, "step": 17603 }, { "epoch": 71.85306122448979, "grad_norm": 30.15479278564453, "learning_rate": 1.4097959183673471e-05, "loss": 0.9811, "step": 17604 }, { "epoch": 71.85714285714286, "grad_norm": 36.735740661621094, "learning_rate": 1.4095918367346938e-05, "loss": 1.1603, "step": 17605 }, { "epoch": 71.86122448979592, "grad_norm": 52.97211456298828, "learning_rate": 1.409387755102041e-05, "loss": 1.0133, "step": 17606 }, { "epoch": 71.86530612244898, "grad_norm": 27.019880294799805, "learning_rate": 1.4091836734693877e-05, "loss": 1.5893, "step": 17607 }, { "epoch": 71.86938775510204, "grad_norm": 24.29104995727539, "learning_rate": 1.408979591836735e-05, "loss": 1.7842, "step": 17608 }, { "epoch": 71.87346938775511, "grad_norm": 35.72347640991211, "learning_rate": 1.4087755102040817e-05, "loss": 1.2129, "step": 17609 }, { "epoch": 71.87755102040816, "grad_norm": 45.6962890625, "learning_rate": 1.4085714285714286e-05, "loss": 0.493, "step": 17610 }, { "epoch": 71.88163265306123, "grad_norm": 25.87833595275879, "learning_rate": 1.4083673469387756e-05, "loss": 1.7751, "step": 17611 }, { "epoch": 71.88571428571429, "grad_norm": 50.62788772583008, "learning_rate": 1.4081632653061225e-05, "loss": 0.5866, "step": 17612 }, { "epoch": 71.88979591836734, "grad_norm": 30.793424606323242, "learning_rate": 1.4079591836734695e-05, "loss": 1.4326, "step": 17613 }, { "epoch": 71.89387755102041, "grad_norm": 31.069929122924805, "learning_rate": 1.4077551020408164e-05, "loss": 1.3255, "step": 17614 }, { "epoch": 71.89795918367346, "grad_norm": 34.1274528503418, "learning_rate": 1.4075510204081631e-05, "loss": 1.267, "step": 17615 }, { "epoch": 71.90204081632653, "grad_norm": 37.11832809448242, "learning_rate": 1.4073469387755104e-05, "loss": 0.5076, "step": 17616 }, { "epoch": 71.90612244897959, "grad_norm": 28.491052627563477, "learning_rate": 1.407142857142857e-05, "loss": 1.6301, "step": 17617 }, { "epoch": 71.91020408163266, "grad_norm": 38.77592086791992, "learning_rate": 1.4069387755102043e-05, "loss": 0.8973, "step": 17618 }, { "epoch": 71.91428571428571, "grad_norm": 35.05384063720703, "learning_rate": 1.406734693877551e-05, "loss": 1.4847, "step": 17619 }, { "epoch": 71.91836734693878, "grad_norm": 31.255298614501953, "learning_rate": 1.4065306122448979e-05, "loss": 0.9596, "step": 17620 }, { "epoch": 71.92244897959183, "grad_norm": 34.294010162353516, "learning_rate": 1.406326530612245e-05, "loss": 1.3073, "step": 17621 }, { "epoch": 71.9265306122449, "grad_norm": 42.88719940185547, "learning_rate": 1.4061224489795918e-05, "loss": 0.5612, "step": 17622 }, { "epoch": 71.93061224489796, "grad_norm": 33.636451721191406, "learning_rate": 1.4059183673469389e-05, "loss": 0.6127, "step": 17623 }, { "epoch": 71.93469387755103, "grad_norm": 44.26923370361328, "learning_rate": 1.4057142857142858e-05, "loss": 0.3744, "step": 17624 }, { "epoch": 71.93877551020408, "grad_norm": 29.481678009033203, "learning_rate": 1.4055102040816326e-05, "loss": 0.5004, "step": 17625 }, { "epoch": 71.94285714285714, "grad_norm": 33.9607048034668, "learning_rate": 1.4053061224489797e-05, "loss": 1.052, "step": 17626 }, { "epoch": 71.9469387755102, "grad_norm": 36.936317443847656, "learning_rate": 1.4051020408163266e-05, "loss": 0.9153, "step": 17627 }, { "epoch": 71.95102040816326, "grad_norm": 40.72119140625, "learning_rate": 1.4048979591836736e-05, "loss": 0.7679, "step": 17628 }, { "epoch": 71.95510204081633, "grad_norm": 26.205602645874023, "learning_rate": 1.4046938775510205e-05, "loss": 2.1615, "step": 17629 }, { "epoch": 71.95918367346938, "grad_norm": 25.634910583496094, "learning_rate": 1.4044897959183676e-05, "loss": 2.1769, "step": 17630 }, { "epoch": 71.96326530612245, "grad_norm": 39.20718002319336, "learning_rate": 1.4042857142857144e-05, "loss": 0.8634, "step": 17631 }, { "epoch": 71.9673469387755, "grad_norm": 43.67211151123047, "learning_rate": 1.4040816326530612e-05, "loss": 1.0045, "step": 17632 }, { "epoch": 71.97142857142858, "grad_norm": 24.75200080871582, "learning_rate": 1.4038775510204084e-05, "loss": 1.8653, "step": 17633 }, { "epoch": 71.97551020408163, "grad_norm": 20.114322662353516, "learning_rate": 1.4036734693877551e-05, "loss": 2.3255, "step": 17634 }, { "epoch": 71.9795918367347, "grad_norm": 31.531213760375977, "learning_rate": 1.4034693877551023e-05, "loss": 1.1017, "step": 17635 }, { "epoch": 71.98367346938775, "grad_norm": 36.32338333129883, "learning_rate": 1.403265306122449e-05, "loss": 1.3785, "step": 17636 }, { "epoch": 71.98775510204082, "grad_norm": 18.282426834106445, "learning_rate": 1.4030612244897959e-05, "loss": 2.3655, "step": 17637 }, { "epoch": 71.99183673469388, "grad_norm": 37.00395202636719, "learning_rate": 1.402857142857143e-05, "loss": 1.1527, "step": 17638 }, { "epoch": 71.99591836734695, "grad_norm": 30.818376541137695, "learning_rate": 1.4026530612244898e-05, "loss": 1.4527, "step": 17639 }, { "epoch": 72.0, "grad_norm": 33.6529426574707, "learning_rate": 1.4024489795918369e-05, "loss": 0.9309, "step": 17640 }, { "epoch": 72.00408163265305, "grad_norm": 14.91042709350586, "learning_rate": 1.4022448979591838e-05, "loss": 2.4024, "step": 17641 }, { "epoch": 72.00816326530612, "grad_norm": 34.37447738647461, "learning_rate": 1.4020408163265305e-05, "loss": 0.7022, "step": 17642 }, { "epoch": 72.01224489795918, "grad_norm": 18.786056518554688, "learning_rate": 1.4018367346938777e-05, "loss": 2.2421, "step": 17643 }, { "epoch": 72.01632653061225, "grad_norm": 51.559757232666016, "learning_rate": 1.4016326530612244e-05, "loss": 1.4134, "step": 17644 }, { "epoch": 72.0204081632653, "grad_norm": 33.74325180053711, "learning_rate": 1.4014285714285716e-05, "loss": 0.4441, "step": 17645 }, { "epoch": 72.02448979591837, "grad_norm": 29.389076232910156, "learning_rate": 1.4012244897959183e-05, "loss": 1.0102, "step": 17646 }, { "epoch": 72.02857142857142, "grad_norm": 38.46343231201172, "learning_rate": 1.4010204081632652e-05, "loss": 0.6688, "step": 17647 }, { "epoch": 72.0326530612245, "grad_norm": 51.01160430908203, "learning_rate": 1.4008163265306123e-05, "loss": 0.9738, "step": 17648 }, { "epoch": 72.03673469387755, "grad_norm": 37.32111740112305, "learning_rate": 1.4006122448979592e-05, "loss": 1.1888, "step": 17649 }, { "epoch": 72.04081632653062, "grad_norm": 37.167625427246094, "learning_rate": 1.4004081632653062e-05, "loss": 0.4938, "step": 17650 }, { "epoch": 72.04489795918367, "grad_norm": 29.698863983154297, "learning_rate": 1.4002040816326531e-05, "loss": 1.8243, "step": 17651 }, { "epoch": 72.04897959183674, "grad_norm": 40.891990661621094, "learning_rate": 1.4000000000000001e-05, "loss": 1.0343, "step": 17652 }, { "epoch": 72.0530612244898, "grad_norm": 43.571083068847656, "learning_rate": 1.399795918367347e-05, "loss": 0.8245, "step": 17653 }, { "epoch": 72.05714285714286, "grad_norm": 24.73324203491211, "learning_rate": 1.3995918367346939e-05, "loss": 1.6358, "step": 17654 }, { "epoch": 72.06122448979592, "grad_norm": 22.0317325592041, "learning_rate": 1.399387755102041e-05, "loss": 0.1632, "step": 17655 }, { "epoch": 72.06530612244897, "grad_norm": 34.01750183105469, "learning_rate": 1.3991836734693878e-05, "loss": 1.2591, "step": 17656 }, { "epoch": 72.06938775510204, "grad_norm": 51.66968536376953, "learning_rate": 1.3989795918367349e-05, "loss": 1.1179, "step": 17657 }, { "epoch": 72.0734693877551, "grad_norm": 20.292652130126953, "learning_rate": 1.3987755102040818e-05, "loss": 2.1092, "step": 17658 }, { "epoch": 72.07755102040817, "grad_norm": 26.791297912597656, "learning_rate": 1.3985714285714285e-05, "loss": 1.6125, "step": 17659 }, { "epoch": 72.08163265306122, "grad_norm": 28.643016815185547, "learning_rate": 1.3983673469387757e-05, "loss": 1.5321, "step": 17660 }, { "epoch": 72.08571428571429, "grad_norm": 46.098907470703125, "learning_rate": 1.3981632653061224e-05, "loss": 0.8289, "step": 17661 }, { "epoch": 72.08979591836734, "grad_norm": 28.407438278198242, "learning_rate": 1.3979591836734696e-05, "loss": 1.3806, "step": 17662 }, { "epoch": 72.09387755102041, "grad_norm": 31.116098403930664, "learning_rate": 1.3977551020408164e-05, "loss": 1.4773, "step": 17663 }, { "epoch": 72.09795918367347, "grad_norm": 31.104480743408203, "learning_rate": 1.3975510204081632e-05, "loss": 0.2199, "step": 17664 }, { "epoch": 72.10204081632654, "grad_norm": 28.86467933654785, "learning_rate": 1.3973469387755103e-05, "loss": 0.9109, "step": 17665 }, { "epoch": 72.10612244897959, "grad_norm": 47.44987487792969, "learning_rate": 1.3971428571428572e-05, "loss": 0.7295, "step": 17666 }, { "epoch": 72.11020408163266, "grad_norm": 28.738088607788086, "learning_rate": 1.3969387755102042e-05, "loss": 0.4708, "step": 17667 }, { "epoch": 72.11428571428571, "grad_norm": 25.022167205810547, "learning_rate": 1.3967346938775511e-05, "loss": 1.6851, "step": 17668 }, { "epoch": 72.11836734693877, "grad_norm": 31.63542938232422, "learning_rate": 1.3965306122448978e-05, "loss": 1.1431, "step": 17669 }, { "epoch": 72.12244897959184, "grad_norm": 27.81113624572754, "learning_rate": 1.396326530612245e-05, "loss": 1.8653, "step": 17670 }, { "epoch": 72.12653061224489, "grad_norm": 35.975215911865234, "learning_rate": 1.3961224489795918e-05, "loss": 0.7342, "step": 17671 }, { "epoch": 72.13061224489796, "grad_norm": 24.560544967651367, "learning_rate": 1.395918367346939e-05, "loss": 2.4312, "step": 17672 }, { "epoch": 72.13469387755102, "grad_norm": 45.367774963378906, "learning_rate": 1.3957142857142857e-05, "loss": 1.5113, "step": 17673 }, { "epoch": 72.13877551020408, "grad_norm": 30.6473445892334, "learning_rate": 1.3955102040816329e-05, "loss": 1.8193, "step": 17674 }, { "epoch": 72.14285714285714, "grad_norm": 43.6734733581543, "learning_rate": 1.3953061224489796e-05, "loss": 1.1862, "step": 17675 }, { "epoch": 72.14693877551021, "grad_norm": 17.81019401550293, "learning_rate": 1.3951020408163265e-05, "loss": 2.2234, "step": 17676 }, { "epoch": 72.15102040816326, "grad_norm": 22.980846405029297, "learning_rate": 1.3948979591836736e-05, "loss": 1.7066, "step": 17677 }, { "epoch": 72.15510204081633, "grad_norm": 11.404090881347656, "learning_rate": 1.3946938775510204e-05, "loss": 3.0964, "step": 17678 }, { "epoch": 72.15918367346939, "grad_norm": 27.667274475097656, "learning_rate": 1.3944897959183675e-05, "loss": 1.6541, "step": 17679 }, { "epoch": 72.16326530612245, "grad_norm": 32.754852294921875, "learning_rate": 1.3942857142857144e-05, "loss": 1.0918, "step": 17680 }, { "epoch": 72.16734693877551, "grad_norm": 32.1446418762207, "learning_rate": 1.3940816326530612e-05, "loss": 1.4531, "step": 17681 }, { "epoch": 72.17142857142858, "grad_norm": 21.191194534301758, "learning_rate": 1.3938775510204083e-05, "loss": 0.139, "step": 17682 }, { "epoch": 72.17551020408163, "grad_norm": 2.1336145401000977, "learning_rate": 1.3936734693877552e-05, "loss": 0.0406, "step": 17683 }, { "epoch": 72.17959183673469, "grad_norm": 36.8455924987793, "learning_rate": 1.3934693877551022e-05, "loss": 1.5994, "step": 17684 }, { "epoch": 72.18367346938776, "grad_norm": 37.34503936767578, "learning_rate": 1.3932653061224491e-05, "loss": 2.0356, "step": 17685 }, { "epoch": 72.18775510204081, "grad_norm": 45.58113098144531, "learning_rate": 1.3930612244897958e-05, "loss": 0.382, "step": 17686 }, { "epoch": 72.19183673469388, "grad_norm": 42.13269805908203, "learning_rate": 1.392857142857143e-05, "loss": 0.8776, "step": 17687 }, { "epoch": 72.19591836734693, "grad_norm": 29.701709747314453, "learning_rate": 1.3926530612244898e-05, "loss": 1.8854, "step": 17688 }, { "epoch": 72.2, "grad_norm": 30.296524047851562, "learning_rate": 1.392448979591837e-05, "loss": 0.9288, "step": 17689 }, { "epoch": 72.20408163265306, "grad_norm": 41.09574508666992, "learning_rate": 1.3922448979591837e-05, "loss": 1.2586, "step": 17690 }, { "epoch": 72.20816326530613, "grad_norm": 23.74885368347168, "learning_rate": 1.3920408163265306e-05, "loss": 2.105, "step": 17691 }, { "epoch": 72.21224489795918, "grad_norm": 25.287757873535156, "learning_rate": 1.3918367346938776e-05, "loss": 2.1362, "step": 17692 }, { "epoch": 72.21632653061225, "grad_norm": 38.30463790893555, "learning_rate": 1.3916326530612245e-05, "loss": 0.5309, "step": 17693 }, { "epoch": 72.2204081632653, "grad_norm": 27.588031768798828, "learning_rate": 1.3914285714285716e-05, "loss": 1.6699, "step": 17694 }, { "epoch": 72.22448979591837, "grad_norm": 23.281320571899414, "learning_rate": 1.3912244897959184e-05, "loss": 2.0583, "step": 17695 }, { "epoch": 72.22857142857143, "grad_norm": 41.22287368774414, "learning_rate": 1.3910204081632655e-05, "loss": 0.9001, "step": 17696 }, { "epoch": 72.2326530612245, "grad_norm": 27.28838348388672, "learning_rate": 1.3908163265306124e-05, "loss": 1.4785, "step": 17697 }, { "epoch": 72.23673469387755, "grad_norm": 22.828859329223633, "learning_rate": 1.3906122448979591e-05, "loss": 1.6025, "step": 17698 }, { "epoch": 72.2408163265306, "grad_norm": 35.16033172607422, "learning_rate": 1.3904081632653063e-05, "loss": 0.4731, "step": 17699 }, { "epoch": 72.24489795918367, "grad_norm": 34.29200744628906, "learning_rate": 1.390204081632653e-05, "loss": 0.786, "step": 17700 }, { "epoch": 72.24897959183673, "grad_norm": 31.54082489013672, "learning_rate": 1.3900000000000002e-05, "loss": 1.4218, "step": 17701 }, { "epoch": 72.2530612244898, "grad_norm": 24.61200523376465, "learning_rate": 1.389795918367347e-05, "loss": 1.8382, "step": 17702 }, { "epoch": 72.25714285714285, "grad_norm": 21.13283348083496, "learning_rate": 1.3895918367346938e-05, "loss": 1.6351, "step": 17703 }, { "epoch": 72.26122448979592, "grad_norm": 37.206844329833984, "learning_rate": 1.3893877551020409e-05, "loss": 1.3877, "step": 17704 }, { "epoch": 72.26530612244898, "grad_norm": 37.824615478515625, "learning_rate": 1.3891836734693878e-05, "loss": 1.5871, "step": 17705 }, { "epoch": 72.26938775510204, "grad_norm": 25.51947784423828, "learning_rate": 1.3889795918367348e-05, "loss": 1.8166, "step": 17706 }, { "epoch": 72.2734693877551, "grad_norm": 24.309188842773438, "learning_rate": 1.3887755102040817e-05, "loss": 1.6192, "step": 17707 }, { "epoch": 72.27755102040817, "grad_norm": 37.95889663696289, "learning_rate": 1.3885714285714286e-05, "loss": 0.3387, "step": 17708 }, { "epoch": 72.28163265306122, "grad_norm": 37.970367431640625, "learning_rate": 1.3883673469387756e-05, "loss": 1.3613, "step": 17709 }, { "epoch": 72.28571428571429, "grad_norm": 38.24419403076172, "learning_rate": 1.3881632653061225e-05, "loss": 1.2365, "step": 17710 }, { "epoch": 72.28979591836735, "grad_norm": 37.549495697021484, "learning_rate": 1.3879591836734696e-05, "loss": 0.54, "step": 17711 }, { "epoch": 72.29387755102042, "grad_norm": 27.097301483154297, "learning_rate": 1.3877551020408165e-05, "loss": 1.1637, "step": 17712 }, { "epoch": 72.29795918367347, "grad_norm": 42.943599700927734, "learning_rate": 1.3875510204081632e-05, "loss": 1.2514, "step": 17713 }, { "epoch": 72.30204081632652, "grad_norm": 32.97138595581055, "learning_rate": 1.3873469387755104e-05, "loss": 0.4569, "step": 17714 }, { "epoch": 72.3061224489796, "grad_norm": 26.49961280822754, "learning_rate": 1.3871428571428571e-05, "loss": 1.7218, "step": 17715 }, { "epoch": 72.31020408163265, "grad_norm": 32.68117904663086, "learning_rate": 1.3869387755102042e-05, "loss": 1.081, "step": 17716 }, { "epoch": 72.31428571428572, "grad_norm": 28.687864303588867, "learning_rate": 1.386734693877551e-05, "loss": 1.2266, "step": 17717 }, { "epoch": 72.31836734693877, "grad_norm": 34.904518127441406, "learning_rate": 1.386530612244898e-05, "loss": 0.8022, "step": 17718 }, { "epoch": 72.32244897959184, "grad_norm": 37.83548355102539, "learning_rate": 1.386326530612245e-05, "loss": 0.4891, "step": 17719 }, { "epoch": 72.3265306122449, "grad_norm": 33.70672607421875, "learning_rate": 1.3861224489795918e-05, "loss": 1.0912, "step": 17720 }, { "epoch": 72.33061224489796, "grad_norm": 22.70732879638672, "learning_rate": 1.3859183673469389e-05, "loss": 2.1779, "step": 17721 }, { "epoch": 72.33469387755102, "grad_norm": 24.58376693725586, "learning_rate": 1.3857142857142858e-05, "loss": 0.1477, "step": 17722 }, { "epoch": 72.33877551020409, "grad_norm": 36.33856964111328, "learning_rate": 1.3855102040816328e-05, "loss": 0.5467, "step": 17723 }, { "epoch": 72.34285714285714, "grad_norm": 24.09872055053711, "learning_rate": 1.3853061224489797e-05, "loss": 2.2547, "step": 17724 }, { "epoch": 72.34693877551021, "grad_norm": 27.007808685302734, "learning_rate": 1.3851020408163264e-05, "loss": 1.6757, "step": 17725 }, { "epoch": 72.35102040816327, "grad_norm": 41.71842575073242, "learning_rate": 1.3848979591836736e-05, "loss": 0.4524, "step": 17726 }, { "epoch": 72.35510204081632, "grad_norm": 33.973880767822266, "learning_rate": 1.3846938775510204e-05, "loss": 1.4444, "step": 17727 }, { "epoch": 72.35918367346939, "grad_norm": 31.76700782775879, "learning_rate": 1.3844897959183676e-05, "loss": 0.6331, "step": 17728 }, { "epoch": 72.36326530612244, "grad_norm": 26.039228439331055, "learning_rate": 1.3842857142857143e-05, "loss": 0.878, "step": 17729 }, { "epoch": 72.36734693877551, "grad_norm": 36.2745361328125, "learning_rate": 1.3840816326530612e-05, "loss": 0.5614, "step": 17730 }, { "epoch": 72.37142857142857, "grad_norm": 28.867979049682617, "learning_rate": 1.3838775510204082e-05, "loss": 0.9603, "step": 17731 }, { "epoch": 72.37551020408164, "grad_norm": 24.558616638183594, "learning_rate": 1.3836734693877551e-05, "loss": 1.6726, "step": 17732 }, { "epoch": 72.37959183673469, "grad_norm": 34.37812805175781, "learning_rate": 1.3834693877551022e-05, "loss": 0.9014, "step": 17733 }, { "epoch": 72.38367346938776, "grad_norm": 23.302778244018555, "learning_rate": 1.383265306122449e-05, "loss": 1.7769, "step": 17734 }, { "epoch": 72.38775510204081, "grad_norm": 31.34446144104004, "learning_rate": 1.383061224489796e-05, "loss": 1.6389, "step": 17735 }, { "epoch": 72.39183673469388, "grad_norm": 49.74403381347656, "learning_rate": 1.382857142857143e-05, "loss": 0.9767, "step": 17736 }, { "epoch": 72.39591836734694, "grad_norm": 34.8144645690918, "learning_rate": 1.3826530612244899e-05, "loss": 1.6018, "step": 17737 }, { "epoch": 72.4, "grad_norm": 26.88292694091797, "learning_rate": 1.3824489795918369e-05, "loss": 0.3716, "step": 17738 }, { "epoch": 72.40408163265306, "grad_norm": 37.484676361083984, "learning_rate": 1.3822448979591838e-05, "loss": 0.1969, "step": 17739 }, { "epoch": 72.40816326530613, "grad_norm": 24.651151657104492, "learning_rate": 1.3820408163265305e-05, "loss": 1.5123, "step": 17740 }, { "epoch": 72.41224489795918, "grad_norm": 26.727561950683594, "learning_rate": 1.3818367346938777e-05, "loss": 1.2742, "step": 17741 }, { "epoch": 72.41632653061224, "grad_norm": 25.543743133544922, "learning_rate": 1.3816326530612244e-05, "loss": 1.792, "step": 17742 }, { "epoch": 72.42040816326531, "grad_norm": 32.84389114379883, "learning_rate": 1.3814285714285715e-05, "loss": 0.5579, "step": 17743 }, { "epoch": 72.42448979591836, "grad_norm": 18.12797737121582, "learning_rate": 1.3812244897959184e-05, "loss": 2.7467, "step": 17744 }, { "epoch": 72.42857142857143, "grad_norm": 26.6365909576416, "learning_rate": 1.3810204081632654e-05, "loss": 1.4008, "step": 17745 }, { "epoch": 72.43265306122449, "grad_norm": 27.88610076904297, "learning_rate": 1.3808163265306123e-05, "loss": 1.5563, "step": 17746 }, { "epoch": 72.43673469387755, "grad_norm": 18.28870391845703, "learning_rate": 1.3806122448979592e-05, "loss": 0.1345, "step": 17747 }, { "epoch": 72.44081632653061, "grad_norm": 18.461423873901367, "learning_rate": 1.3804081632653062e-05, "loss": 0.1973, "step": 17748 }, { "epoch": 72.44489795918368, "grad_norm": 28.25582504272461, "learning_rate": 1.3802040816326531e-05, "loss": 1.3967, "step": 17749 }, { "epoch": 72.44897959183673, "grad_norm": 21.276403427124023, "learning_rate": 1.3800000000000002e-05, "loss": 1.7072, "step": 17750 }, { "epoch": 72.4530612244898, "grad_norm": 36.432247161865234, "learning_rate": 1.379795918367347e-05, "loss": 0.7001, "step": 17751 }, { "epoch": 72.45714285714286, "grad_norm": 20.226430892944336, "learning_rate": 1.3795918367346938e-05, "loss": 1.8117, "step": 17752 }, { "epoch": 72.46122448979592, "grad_norm": 29.712860107421875, "learning_rate": 1.379387755102041e-05, "loss": 1.3931, "step": 17753 }, { "epoch": 72.46530612244898, "grad_norm": 20.403545379638672, "learning_rate": 1.3791836734693877e-05, "loss": 2.1655, "step": 17754 }, { "epoch": 72.46938775510205, "grad_norm": 31.82986831665039, "learning_rate": 1.378979591836735e-05, "loss": 1.5944, "step": 17755 }, { "epoch": 72.4734693877551, "grad_norm": 25.19063949584961, "learning_rate": 1.3787755102040816e-05, "loss": 1.3121, "step": 17756 }, { "epoch": 72.47755102040816, "grad_norm": 34.869022369384766, "learning_rate": 1.3785714285714285e-05, "loss": 1.1079, "step": 17757 }, { "epoch": 72.48163265306123, "grad_norm": 28.95627784729004, "learning_rate": 1.3783673469387756e-05, "loss": 1.3921, "step": 17758 }, { "epoch": 72.48571428571428, "grad_norm": 36.28879928588867, "learning_rate": 1.3781632653061224e-05, "loss": 0.79, "step": 17759 }, { "epoch": 72.48979591836735, "grad_norm": 30.752059936523438, "learning_rate": 1.3779591836734695e-05, "loss": 0.3803, "step": 17760 }, { "epoch": 72.4938775510204, "grad_norm": 30.7022705078125, "learning_rate": 1.3777551020408164e-05, "loss": 1.0097, "step": 17761 }, { "epoch": 72.49795918367347, "grad_norm": 24.062036514282227, "learning_rate": 1.3775510204081633e-05, "loss": 0.3591, "step": 17762 }, { "epoch": 72.50204081632653, "grad_norm": 26.484058380126953, "learning_rate": 1.3773469387755103e-05, "loss": 1.6807, "step": 17763 }, { "epoch": 72.5061224489796, "grad_norm": 30.177038192749023, "learning_rate": 1.3771428571428572e-05, "loss": 1.4092, "step": 17764 }, { "epoch": 72.51020408163265, "grad_norm": 24.318958282470703, "learning_rate": 1.3769387755102042e-05, "loss": 1.6211, "step": 17765 }, { "epoch": 72.51428571428572, "grad_norm": 24.437591552734375, "learning_rate": 1.3767346938775511e-05, "loss": 1.6991, "step": 17766 }, { "epoch": 72.51836734693877, "grad_norm": 22.679100036621094, "learning_rate": 1.3765306122448982e-05, "loss": 1.9085, "step": 17767 }, { "epoch": 72.52244897959184, "grad_norm": 32.138702392578125, "learning_rate": 1.376326530612245e-05, "loss": 0.8512, "step": 17768 }, { "epoch": 72.5265306122449, "grad_norm": 40.00104522705078, "learning_rate": 1.3761224489795918e-05, "loss": 0.4913, "step": 17769 }, { "epoch": 72.53061224489795, "grad_norm": 36.86896896362305, "learning_rate": 1.3759183673469388e-05, "loss": 1.6911, "step": 17770 }, { "epoch": 72.53469387755102, "grad_norm": 17.269283294677734, "learning_rate": 1.3757142857142857e-05, "loss": 0.1398, "step": 17771 }, { "epoch": 72.53877551020408, "grad_norm": 32.674072265625, "learning_rate": 1.3755102040816328e-05, "loss": 1.1981, "step": 17772 }, { "epoch": 72.54285714285714, "grad_norm": 29.529682159423828, "learning_rate": 1.3753061224489796e-05, "loss": 1.4079, "step": 17773 }, { "epoch": 72.5469387755102, "grad_norm": 40.34035110473633, "learning_rate": 1.3751020408163265e-05, "loss": 1.3234, "step": 17774 }, { "epoch": 72.55102040816327, "grad_norm": 34.34831237792969, "learning_rate": 1.3748979591836736e-05, "loss": 0.9056, "step": 17775 }, { "epoch": 72.55510204081632, "grad_norm": 28.228351593017578, "learning_rate": 1.3746938775510205e-05, "loss": 1.6568, "step": 17776 }, { "epoch": 72.55918367346939, "grad_norm": 37.223243713378906, "learning_rate": 1.3744897959183675e-05, "loss": 0.5369, "step": 17777 }, { "epoch": 72.56326530612245, "grad_norm": 51.49028015136719, "learning_rate": 1.3742857142857144e-05, "loss": 0.8909, "step": 17778 }, { "epoch": 72.56734693877551, "grad_norm": 38.02440643310547, "learning_rate": 1.3740816326530611e-05, "loss": 0.9761, "step": 17779 }, { "epoch": 72.57142857142857, "grad_norm": 23.04787826538086, "learning_rate": 1.3738775510204083e-05, "loss": 2.1016, "step": 17780 }, { "epoch": 72.57551020408164, "grad_norm": 25.971378326416016, "learning_rate": 1.373673469387755e-05, "loss": 1.4343, "step": 17781 }, { "epoch": 72.57959183673469, "grad_norm": 22.506071090698242, "learning_rate": 1.3734693877551023e-05, "loss": 2.0706, "step": 17782 }, { "epoch": 72.58367346938776, "grad_norm": 34.19980239868164, "learning_rate": 1.373265306122449e-05, "loss": 0.5098, "step": 17783 }, { "epoch": 72.58775510204082, "grad_norm": 39.5840950012207, "learning_rate": 1.3730612244897959e-05, "loss": 1.0253, "step": 17784 }, { "epoch": 72.59183673469387, "grad_norm": 26.808475494384766, "learning_rate": 1.3728571428571429e-05, "loss": 2.1515, "step": 17785 }, { "epoch": 72.59591836734694, "grad_norm": 32.488121032714844, "learning_rate": 1.3726530612244898e-05, "loss": 0.8413, "step": 17786 }, { "epoch": 72.6, "grad_norm": 39.74887466430664, "learning_rate": 1.3724489795918368e-05, "loss": 0.9061, "step": 17787 }, { "epoch": 72.60408163265306, "grad_norm": 27.260196685791016, "learning_rate": 1.3722448979591837e-05, "loss": 1.813, "step": 17788 }, { "epoch": 72.60816326530612, "grad_norm": 23.585098266601562, "learning_rate": 1.3720408163265308e-05, "loss": 1.8212, "step": 17789 }, { "epoch": 72.61224489795919, "grad_norm": 42.42730712890625, "learning_rate": 1.3718367346938777e-05, "loss": 0.4432, "step": 17790 }, { "epoch": 72.61632653061224, "grad_norm": 35.03060531616211, "learning_rate": 1.3716326530612245e-05, "loss": 1.3082, "step": 17791 }, { "epoch": 72.62040816326531, "grad_norm": 24.5199031829834, "learning_rate": 1.3714285714285716e-05, "loss": 0.1592, "step": 17792 }, { "epoch": 72.62448979591836, "grad_norm": 23.5184383392334, "learning_rate": 1.3712244897959185e-05, "loss": 0.1475, "step": 17793 }, { "epoch": 72.62857142857143, "grad_norm": 41.19882583618164, "learning_rate": 1.3710204081632655e-05, "loss": 0.5354, "step": 17794 }, { "epoch": 72.63265306122449, "grad_norm": 43.62185287475586, "learning_rate": 1.3708163265306124e-05, "loss": 0.7027, "step": 17795 }, { "epoch": 72.63673469387756, "grad_norm": 35.24570846557617, "learning_rate": 1.3706122448979591e-05, "loss": 1.0123, "step": 17796 }, { "epoch": 72.64081632653061, "grad_norm": 41.72402572631836, "learning_rate": 1.3704081632653062e-05, "loss": 0.5012, "step": 17797 }, { "epoch": 72.64489795918368, "grad_norm": 36.861942291259766, "learning_rate": 1.370204081632653e-05, "loss": 0.7973, "step": 17798 }, { "epoch": 72.64897959183673, "grad_norm": 38.14605712890625, "learning_rate": 1.3700000000000001e-05, "loss": 0.6935, "step": 17799 }, { "epoch": 72.65306122448979, "grad_norm": 35.595924377441406, "learning_rate": 1.369795918367347e-05, "loss": 0.6598, "step": 17800 }, { "epoch": 72.65714285714286, "grad_norm": 36.849769592285156, "learning_rate": 1.3695918367346939e-05, "loss": 1.2856, "step": 17801 }, { "epoch": 72.66122448979591, "grad_norm": 31.00726318359375, "learning_rate": 1.3693877551020409e-05, "loss": 0.5326, "step": 17802 }, { "epoch": 72.66530612244898, "grad_norm": 33.59033966064453, "learning_rate": 1.3691836734693878e-05, "loss": 1.4289, "step": 17803 }, { "epoch": 72.66938775510204, "grad_norm": 36.891239166259766, "learning_rate": 1.3689795918367348e-05, "loss": 0.5571, "step": 17804 }, { "epoch": 72.6734693877551, "grad_norm": 28.657079696655273, "learning_rate": 1.3687755102040817e-05, "loss": 0.3174, "step": 17805 }, { "epoch": 72.67755102040816, "grad_norm": 31.621784210205078, "learning_rate": 1.3685714285714284e-05, "loss": 0.3216, "step": 17806 }, { "epoch": 72.68163265306123, "grad_norm": 24.687156677246094, "learning_rate": 1.3683673469387757e-05, "loss": 1.5653, "step": 17807 }, { "epoch": 72.68571428571428, "grad_norm": 33.086605072021484, "learning_rate": 1.3681632653061224e-05, "loss": 1.5881, "step": 17808 }, { "epoch": 72.68979591836735, "grad_norm": 41.14884567260742, "learning_rate": 1.3679591836734696e-05, "loss": 1.23, "step": 17809 }, { "epoch": 72.6938775510204, "grad_norm": 50.657169342041016, "learning_rate": 1.3677551020408163e-05, "loss": 0.367, "step": 17810 }, { "epoch": 72.69795918367348, "grad_norm": 41.72218322753906, "learning_rate": 1.3675510204081635e-05, "loss": 1.1624, "step": 17811 }, { "epoch": 72.70204081632653, "grad_norm": 31.024473190307617, "learning_rate": 1.3673469387755102e-05, "loss": 1.0415, "step": 17812 }, { "epoch": 72.70612244897958, "grad_norm": 45.02285385131836, "learning_rate": 1.3671428571428571e-05, "loss": 0.6134, "step": 17813 }, { "epoch": 72.71020408163265, "grad_norm": 44.00933837890625, "learning_rate": 1.3669387755102042e-05, "loss": 0.4695, "step": 17814 }, { "epoch": 72.71428571428571, "grad_norm": 28.6466121673584, "learning_rate": 1.366734693877551e-05, "loss": 1.5782, "step": 17815 }, { "epoch": 72.71836734693878, "grad_norm": 23.064062118530273, "learning_rate": 1.3665306122448981e-05, "loss": 1.6982, "step": 17816 }, { "epoch": 72.72244897959183, "grad_norm": 36.522605895996094, "learning_rate": 1.366326530612245e-05, "loss": 0.6204, "step": 17817 }, { "epoch": 72.7265306122449, "grad_norm": 32.197261810302734, "learning_rate": 1.3661224489795919e-05, "loss": 1.2087, "step": 17818 }, { "epoch": 72.73061224489796, "grad_norm": 32.37971496582031, "learning_rate": 1.365918367346939e-05, "loss": 1.3222, "step": 17819 }, { "epoch": 72.73469387755102, "grad_norm": 22.510971069335938, "learning_rate": 1.3657142857142858e-05, "loss": 0.1991, "step": 17820 }, { "epoch": 72.73877551020408, "grad_norm": 31.31910514831543, "learning_rate": 1.3655102040816329e-05, "loss": 1.0376, "step": 17821 }, { "epoch": 72.74285714285715, "grad_norm": 20.901973724365234, "learning_rate": 1.3653061224489796e-05, "loss": 2.2825, "step": 17822 }, { "epoch": 72.7469387755102, "grad_norm": 37.51643371582031, "learning_rate": 1.3651020408163264e-05, "loss": 0.9522, "step": 17823 }, { "epoch": 72.75102040816327, "grad_norm": 14.464999198913574, "learning_rate": 1.3648979591836735e-05, "loss": 0.0897, "step": 17824 }, { "epoch": 72.75510204081633, "grad_norm": 28.733266830444336, "learning_rate": 1.3646938775510204e-05, "loss": 0.2679, "step": 17825 }, { "epoch": 72.7591836734694, "grad_norm": 34.78225326538086, "learning_rate": 1.3644897959183674e-05, "loss": 0.729, "step": 17826 }, { "epoch": 72.76326530612245, "grad_norm": 26.08211326599121, "learning_rate": 1.3642857142857143e-05, "loss": 1.7624, "step": 17827 }, { "epoch": 72.7673469387755, "grad_norm": 36.0191764831543, "learning_rate": 1.3640816326530612e-05, "loss": 0.2062, "step": 17828 }, { "epoch": 72.77142857142857, "grad_norm": 38.79636764526367, "learning_rate": 1.3638775510204082e-05, "loss": 0.9101, "step": 17829 }, { "epoch": 72.77551020408163, "grad_norm": 35.376556396484375, "learning_rate": 1.3636734693877551e-05, "loss": 0.6151, "step": 17830 }, { "epoch": 72.7795918367347, "grad_norm": 39.42985153198242, "learning_rate": 1.3634693877551022e-05, "loss": 0.8974, "step": 17831 }, { "epoch": 72.78367346938775, "grad_norm": 42.61201095581055, "learning_rate": 1.363265306122449e-05, "loss": 0.4265, "step": 17832 }, { "epoch": 72.78775510204082, "grad_norm": 35.47927474975586, "learning_rate": 1.3630612244897961e-05, "loss": 0.972, "step": 17833 }, { "epoch": 72.79183673469387, "grad_norm": 25.869911193847656, "learning_rate": 1.362857142857143e-05, "loss": 1.6622, "step": 17834 }, { "epoch": 72.79591836734694, "grad_norm": 36.725975036621094, "learning_rate": 1.3626530612244897e-05, "loss": 0.3673, "step": 17835 }, { "epoch": 72.8, "grad_norm": 24.662179946899414, "learning_rate": 1.362448979591837e-05, "loss": 1.7657, "step": 17836 }, { "epoch": 72.80408163265307, "grad_norm": 36.656402587890625, "learning_rate": 1.3622448979591836e-05, "loss": 1.392, "step": 17837 }, { "epoch": 72.80816326530612, "grad_norm": 46.59343719482422, "learning_rate": 1.3620408163265309e-05, "loss": 1.1009, "step": 17838 }, { "epoch": 72.81224489795919, "grad_norm": 28.28110122680664, "learning_rate": 1.3618367346938776e-05, "loss": 1.4984, "step": 17839 }, { "epoch": 72.81632653061224, "grad_norm": 38.68058776855469, "learning_rate": 1.3616326530612245e-05, "loss": 0.7751, "step": 17840 }, { "epoch": 72.82040816326531, "grad_norm": 48.206825256347656, "learning_rate": 1.3614285714285715e-05, "loss": 1.456, "step": 17841 }, { "epoch": 72.82448979591837, "grad_norm": 32.65537643432617, "learning_rate": 1.3612244897959184e-05, "loss": 0.5055, "step": 17842 }, { "epoch": 72.82857142857142, "grad_norm": 30.21169090270996, "learning_rate": 1.3610204081632654e-05, "loss": 1.5664, "step": 17843 }, { "epoch": 72.83265306122449, "grad_norm": 30.041107177734375, "learning_rate": 1.3608163265306123e-05, "loss": 1.0131, "step": 17844 }, { "epoch": 72.83673469387755, "grad_norm": 17.025714874267578, "learning_rate": 1.3606122448979592e-05, "loss": 2.554, "step": 17845 }, { "epoch": 72.84081632653061, "grad_norm": 45.27341079711914, "learning_rate": 1.3604081632653063e-05, "loss": 0.4799, "step": 17846 }, { "epoch": 72.84489795918367, "grad_norm": 20.807086944580078, "learning_rate": 1.3602040816326531e-05, "loss": 2.177, "step": 17847 }, { "epoch": 72.84897959183674, "grad_norm": 20.301048278808594, "learning_rate": 1.3600000000000002e-05, "loss": 2.2465, "step": 17848 }, { "epoch": 72.85306122448979, "grad_norm": 39.65583419799805, "learning_rate": 1.3597959183673469e-05, "loss": 0.3981, "step": 17849 }, { "epoch": 72.85714285714286, "grad_norm": 31.428199768066406, "learning_rate": 1.3595918367346938e-05, "loss": 0.4223, "step": 17850 }, { "epoch": 72.86122448979592, "grad_norm": 31.9637393951416, "learning_rate": 1.3593877551020408e-05, "loss": 1.5029, "step": 17851 }, { "epoch": 72.86530612244898, "grad_norm": 40.553245544433594, "learning_rate": 1.3591836734693877e-05, "loss": 1.0714, "step": 17852 }, { "epoch": 72.86938775510204, "grad_norm": 39.96726989746094, "learning_rate": 1.3589795918367348e-05, "loss": 1.3681, "step": 17853 }, { "epoch": 72.87346938775511, "grad_norm": 40.900333404541016, "learning_rate": 1.3587755102040817e-05, "loss": 0.5339, "step": 17854 }, { "epoch": 72.87755102040816, "grad_norm": 33.133453369140625, "learning_rate": 1.3585714285714287e-05, "loss": 0.6803, "step": 17855 }, { "epoch": 72.88163265306123, "grad_norm": 56.97482681274414, "learning_rate": 1.3583673469387756e-05, "loss": 0.7302, "step": 17856 }, { "epoch": 72.88571428571429, "grad_norm": 51.01908874511719, "learning_rate": 1.3581632653061225e-05, "loss": 0.4066, "step": 17857 }, { "epoch": 72.88979591836734, "grad_norm": 39.99040222167969, "learning_rate": 1.3579591836734695e-05, "loss": 0.7804, "step": 17858 }, { "epoch": 72.89387755102041, "grad_norm": 44.884483337402344, "learning_rate": 1.3577551020408164e-05, "loss": 0.5554, "step": 17859 }, { "epoch": 72.89795918367346, "grad_norm": 30.94364356994629, "learning_rate": 1.3575510204081635e-05, "loss": 1.7674, "step": 17860 }, { "epoch": 72.90204081632653, "grad_norm": 35.48088073730469, "learning_rate": 1.3573469387755103e-05, "loss": 1.622, "step": 17861 }, { "epoch": 72.90612244897959, "grad_norm": 25.242517471313477, "learning_rate": 1.357142857142857e-05, "loss": 0.1365, "step": 17862 }, { "epoch": 72.91020408163266, "grad_norm": 32.01761245727539, "learning_rate": 1.3569387755102043e-05, "loss": 1.4293, "step": 17863 }, { "epoch": 72.91428571428571, "grad_norm": 32.103660583496094, "learning_rate": 1.356734693877551e-05, "loss": 0.7619, "step": 17864 }, { "epoch": 72.91836734693878, "grad_norm": 19.899545669555664, "learning_rate": 1.3565306122448982e-05, "loss": 0.1317, "step": 17865 }, { "epoch": 72.92244897959183, "grad_norm": 45.62821578979492, "learning_rate": 1.3563265306122449e-05, "loss": 0.5069, "step": 17866 }, { "epoch": 72.9265306122449, "grad_norm": 38.55633544921875, "learning_rate": 1.3561224489795918e-05, "loss": 0.753, "step": 17867 }, { "epoch": 72.93061224489796, "grad_norm": 34.3126106262207, "learning_rate": 1.3559183673469388e-05, "loss": 1.187, "step": 17868 }, { "epoch": 72.93469387755103, "grad_norm": 47.166961669921875, "learning_rate": 1.3557142857142857e-05, "loss": 1.355, "step": 17869 }, { "epoch": 72.93877551020408, "grad_norm": 44.208641052246094, "learning_rate": 1.3555102040816328e-05, "loss": 1.133, "step": 17870 }, { "epoch": 72.94285714285714, "grad_norm": 26.2178955078125, "learning_rate": 1.3553061224489797e-05, "loss": 1.801, "step": 17871 }, { "epoch": 72.9469387755102, "grad_norm": 43.014183044433594, "learning_rate": 1.3551020408163265e-05, "loss": 0.9196, "step": 17872 }, { "epoch": 72.95102040816326, "grad_norm": 31.851951599121094, "learning_rate": 1.3548979591836736e-05, "loss": 0.7887, "step": 17873 }, { "epoch": 72.95510204081633, "grad_norm": 43.20406723022461, "learning_rate": 1.3546938775510205e-05, "loss": 1.3453, "step": 17874 }, { "epoch": 72.95918367346938, "grad_norm": 17.404434204101562, "learning_rate": 1.3544897959183675e-05, "loss": 2.3446, "step": 17875 }, { "epoch": 72.96326530612245, "grad_norm": 45.119197845458984, "learning_rate": 1.3542857142857142e-05, "loss": 0.5759, "step": 17876 }, { "epoch": 72.9673469387755, "grad_norm": 26.652315139770508, "learning_rate": 1.3540816326530615e-05, "loss": 2.2122, "step": 17877 }, { "epoch": 72.97142857142858, "grad_norm": 34.346092224121094, "learning_rate": 1.3538775510204082e-05, "loss": 0.9494, "step": 17878 }, { "epoch": 72.97551020408163, "grad_norm": 38.117652893066406, "learning_rate": 1.353673469387755e-05, "loss": 1.1424, "step": 17879 }, { "epoch": 72.9795918367347, "grad_norm": 24.562715530395508, "learning_rate": 1.3534693877551021e-05, "loss": 0.2603, "step": 17880 }, { "epoch": 72.98367346938775, "grad_norm": 42.96906661987305, "learning_rate": 1.353265306122449e-05, "loss": 0.5219, "step": 17881 }, { "epoch": 72.98775510204082, "grad_norm": 35.27711868286133, "learning_rate": 1.353061224489796e-05, "loss": 1.2186, "step": 17882 }, { "epoch": 72.99183673469388, "grad_norm": 34.72586441040039, "learning_rate": 1.352857142857143e-05, "loss": 1.3253, "step": 17883 }, { "epoch": 72.99591836734695, "grad_norm": 37.95924758911133, "learning_rate": 1.3526530612244898e-05, "loss": 0.4129, "step": 17884 }, { "epoch": 73.0, "grad_norm": 25.951751708984375, "learning_rate": 1.3524489795918369e-05, "loss": 1.6409, "step": 17885 }, { "epoch": 73.00408163265305, "grad_norm": 26.604557037353516, "learning_rate": 1.3522448979591837e-05, "loss": 0.7819, "step": 17886 }, { "epoch": 73.00816326530612, "grad_norm": 26.531978607177734, "learning_rate": 1.3520408163265308e-05, "loss": 1.694, "step": 17887 }, { "epoch": 73.01224489795918, "grad_norm": 21.394807815551758, "learning_rate": 1.3518367346938777e-05, "loss": 2.096, "step": 17888 }, { "epoch": 73.01632653061225, "grad_norm": 45.079566955566406, "learning_rate": 1.3516326530612244e-05, "loss": 0.7034, "step": 17889 }, { "epoch": 73.0204081632653, "grad_norm": 27.68168067932129, "learning_rate": 1.3514285714285716e-05, "loss": 0.9024, "step": 17890 }, { "epoch": 73.02448979591837, "grad_norm": 33.350860595703125, "learning_rate": 1.3512244897959183e-05, "loss": 0.3406, "step": 17891 }, { "epoch": 73.02857142857142, "grad_norm": 33.215553283691406, "learning_rate": 1.3510204081632655e-05, "loss": 0.8484, "step": 17892 }, { "epoch": 73.0326530612245, "grad_norm": 37.85312271118164, "learning_rate": 1.3508163265306123e-05, "loss": 0.7993, "step": 17893 }, { "epoch": 73.03673469387755, "grad_norm": 40.776405334472656, "learning_rate": 1.3506122448979591e-05, "loss": 0.913, "step": 17894 }, { "epoch": 73.04081632653062, "grad_norm": 33.10970687866211, "learning_rate": 1.3504081632653062e-05, "loss": 1.2483, "step": 17895 }, { "epoch": 73.04489795918367, "grad_norm": 28.738040924072266, "learning_rate": 1.350204081632653e-05, "loss": 1.3547, "step": 17896 }, { "epoch": 73.04897959183674, "grad_norm": 27.75623893737793, "learning_rate": 1.3500000000000001e-05, "loss": 1.6824, "step": 17897 }, { "epoch": 73.0530612244898, "grad_norm": 23.611400604248047, "learning_rate": 1.349795918367347e-05, "loss": 0.1589, "step": 17898 }, { "epoch": 73.05714285714286, "grad_norm": 30.383119583129883, "learning_rate": 1.3495918367346939e-05, "loss": 1.4506, "step": 17899 }, { "epoch": 73.06122448979592, "grad_norm": 36.94198226928711, "learning_rate": 1.349387755102041e-05, "loss": 0.6835, "step": 17900 }, { "epoch": 73.06530612244897, "grad_norm": 39.5736198425293, "learning_rate": 1.3491836734693878e-05, "loss": 0.8614, "step": 17901 }, { "epoch": 73.06938775510204, "grad_norm": 24.42780303955078, "learning_rate": 1.3489795918367349e-05, "loss": 1.6357, "step": 17902 }, { "epoch": 73.0734693877551, "grad_norm": 64.28211212158203, "learning_rate": 1.3487755102040816e-05, "loss": 0.523, "step": 17903 }, { "epoch": 73.07755102040817, "grad_norm": 24.715478897094727, "learning_rate": 1.3485714285714288e-05, "loss": 1.6339, "step": 17904 }, { "epoch": 73.08163265306122, "grad_norm": 28.595256805419922, "learning_rate": 1.3483673469387755e-05, "loss": 1.5081, "step": 17905 }, { "epoch": 73.08571428571429, "grad_norm": 31.352130889892578, "learning_rate": 1.3481632653061224e-05, "loss": 0.5039, "step": 17906 }, { "epoch": 73.08979591836734, "grad_norm": 32.83536911010742, "learning_rate": 1.3479591836734694e-05, "loss": 1.0867, "step": 17907 }, { "epoch": 73.09387755102041, "grad_norm": 28.615604400634766, "learning_rate": 1.3477551020408163e-05, "loss": 1.5316, "step": 17908 }, { "epoch": 73.09795918367347, "grad_norm": 38.37514114379883, "learning_rate": 1.3475510204081634e-05, "loss": 0.5351, "step": 17909 }, { "epoch": 73.10204081632654, "grad_norm": 40.37834167480469, "learning_rate": 1.3473469387755103e-05, "loss": 0.4144, "step": 17910 }, { "epoch": 73.10612244897959, "grad_norm": 31.701129913330078, "learning_rate": 1.3471428571428571e-05, "loss": 1.1894, "step": 17911 }, { "epoch": 73.11020408163266, "grad_norm": 35.7054443359375, "learning_rate": 1.3469387755102042e-05, "loss": 0.3995, "step": 17912 }, { "epoch": 73.11428571428571, "grad_norm": 35.59941101074219, "learning_rate": 1.346734693877551e-05, "loss": 0.3434, "step": 17913 }, { "epoch": 73.11836734693877, "grad_norm": 19.0274658203125, "learning_rate": 1.3465306122448981e-05, "loss": 0.1931, "step": 17914 }, { "epoch": 73.12244897959184, "grad_norm": 53.50324630737305, "learning_rate": 1.346326530612245e-05, "loss": 1.0303, "step": 17915 }, { "epoch": 73.12653061224489, "grad_norm": 30.81330108642578, "learning_rate": 1.3461224489795917e-05, "loss": 0.2525, "step": 17916 }, { "epoch": 73.13061224489796, "grad_norm": 26.947919845581055, "learning_rate": 1.345918367346939e-05, "loss": 1.8291, "step": 17917 }, { "epoch": 73.13469387755102, "grad_norm": 30.438358306884766, "learning_rate": 1.3457142857142857e-05, "loss": 0.8962, "step": 17918 }, { "epoch": 73.13877551020408, "grad_norm": 45.40069580078125, "learning_rate": 1.3455102040816329e-05, "loss": 1.1716, "step": 17919 }, { "epoch": 73.14285714285714, "grad_norm": 25.69312858581543, "learning_rate": 1.3453061224489796e-05, "loss": 1.5859, "step": 17920 }, { "epoch": 73.14693877551021, "grad_norm": 34.7464485168457, "learning_rate": 1.3451020408163265e-05, "loss": 1.6444, "step": 17921 }, { "epoch": 73.15102040816326, "grad_norm": 17.84786033630371, "learning_rate": 1.3448979591836735e-05, "loss": 2.7268, "step": 17922 }, { "epoch": 73.15510204081633, "grad_norm": 39.51325988769531, "learning_rate": 1.3446938775510204e-05, "loss": 0.4707, "step": 17923 }, { "epoch": 73.15918367346939, "grad_norm": 32.685611724853516, "learning_rate": 1.3444897959183675e-05, "loss": 0.6054, "step": 17924 }, { "epoch": 73.16326530612245, "grad_norm": 52.31137466430664, "learning_rate": 1.3442857142857143e-05, "loss": 0.5769, "step": 17925 }, { "epoch": 73.16734693877551, "grad_norm": 29.35309600830078, "learning_rate": 1.3440816326530614e-05, "loss": 0.7361, "step": 17926 }, { "epoch": 73.17142857142858, "grad_norm": 35.19041442871094, "learning_rate": 1.3438775510204083e-05, "loss": 0.3978, "step": 17927 }, { "epoch": 73.17551020408163, "grad_norm": 32.01993179321289, "learning_rate": 1.343673469387755e-05, "loss": 0.4691, "step": 17928 }, { "epoch": 73.17959183673469, "grad_norm": 31.443235397338867, "learning_rate": 1.3434693877551022e-05, "loss": 0.8155, "step": 17929 }, { "epoch": 73.18367346938776, "grad_norm": 26.197628021240234, "learning_rate": 1.343265306122449e-05, "loss": 0.8806, "step": 17930 }, { "epoch": 73.18775510204081, "grad_norm": 35.200443267822266, "learning_rate": 1.3430612244897961e-05, "loss": 0.7931, "step": 17931 }, { "epoch": 73.19183673469388, "grad_norm": 38.802452087402344, "learning_rate": 1.3428571428571429e-05, "loss": 0.455, "step": 17932 }, { "epoch": 73.19591836734693, "grad_norm": 44.0900993347168, "learning_rate": 1.3426530612244897e-05, "loss": 0.8234, "step": 17933 }, { "epoch": 73.2, "grad_norm": 38.07278060913086, "learning_rate": 1.3424489795918368e-05, "loss": 1.0592, "step": 17934 }, { "epoch": 73.20408163265306, "grad_norm": 14.287765502929688, "learning_rate": 1.3422448979591837e-05, "loss": 0.0829, "step": 17935 }, { "epoch": 73.20816326530613, "grad_norm": 23.863086700439453, "learning_rate": 1.3420408163265307e-05, "loss": 1.7755, "step": 17936 }, { "epoch": 73.21224489795918, "grad_norm": 33.52842330932617, "learning_rate": 1.3418367346938776e-05, "loss": 0.5077, "step": 17937 }, { "epoch": 73.21632653061225, "grad_norm": 27.948368072509766, "learning_rate": 1.3416326530612245e-05, "loss": 0.4173, "step": 17938 }, { "epoch": 73.2204081632653, "grad_norm": 25.003141403198242, "learning_rate": 1.3414285714285715e-05, "loss": 1.817, "step": 17939 }, { "epoch": 73.22448979591837, "grad_norm": 29.40753936767578, "learning_rate": 1.3412244897959184e-05, "loss": 1.3545, "step": 17940 }, { "epoch": 73.22857142857143, "grad_norm": 50.38881301879883, "learning_rate": 1.3410204081632655e-05, "loss": 0.4316, "step": 17941 }, { "epoch": 73.2326530612245, "grad_norm": 26.90229034423828, "learning_rate": 1.3408163265306123e-05, "loss": 1.6702, "step": 17942 }, { "epoch": 73.23673469387755, "grad_norm": 37.14543533325195, "learning_rate": 1.340612244897959e-05, "loss": 0.4778, "step": 17943 }, { "epoch": 73.2408163265306, "grad_norm": 36.87237548828125, "learning_rate": 1.3404081632653063e-05, "loss": 1.081, "step": 17944 }, { "epoch": 73.24489795918367, "grad_norm": 41.105255126953125, "learning_rate": 1.340204081632653e-05, "loss": 0.3967, "step": 17945 }, { "epoch": 73.24897959183673, "grad_norm": 26.119197845458984, "learning_rate": 1.3400000000000002e-05, "loss": 1.6692, "step": 17946 }, { "epoch": 73.2530612244898, "grad_norm": 30.910058975219727, "learning_rate": 1.339795918367347e-05, "loss": 1.22, "step": 17947 }, { "epoch": 73.25714285714285, "grad_norm": 30.231523513793945, "learning_rate": 1.3395918367346941e-05, "loss": 1.361, "step": 17948 }, { "epoch": 73.26122448979592, "grad_norm": 26.5549259185791, "learning_rate": 1.3393877551020409e-05, "loss": 1.4295, "step": 17949 }, { "epoch": 73.26530612244898, "grad_norm": 34.144378662109375, "learning_rate": 1.3391836734693877e-05, "loss": 1.262, "step": 17950 }, { "epoch": 73.26938775510204, "grad_norm": 33.64292907714844, "learning_rate": 1.3389795918367348e-05, "loss": 0.9911, "step": 17951 }, { "epoch": 73.2734693877551, "grad_norm": 33.955665588378906, "learning_rate": 1.3387755102040817e-05, "loss": 0.9571, "step": 17952 }, { "epoch": 73.27755102040817, "grad_norm": 23.8374080657959, "learning_rate": 1.3385714285714287e-05, "loss": 1.6762, "step": 17953 }, { "epoch": 73.28163265306122, "grad_norm": 40.80317306518555, "learning_rate": 1.3383673469387756e-05, "loss": 0.4873, "step": 17954 }, { "epoch": 73.28571428571429, "grad_norm": 23.59691047668457, "learning_rate": 1.3381632653061223e-05, "loss": 2.1185, "step": 17955 }, { "epoch": 73.28979591836735, "grad_norm": 45.70146560668945, "learning_rate": 1.3379591836734695e-05, "loss": 0.7007, "step": 17956 }, { "epoch": 73.29387755102042, "grad_norm": 30.41753578186035, "learning_rate": 1.3377551020408163e-05, "loss": 0.608, "step": 17957 }, { "epoch": 73.29795918367347, "grad_norm": 42.3511962890625, "learning_rate": 1.3375510204081635e-05, "loss": 1.3025, "step": 17958 }, { "epoch": 73.30204081632652, "grad_norm": 46.66192626953125, "learning_rate": 1.3373469387755102e-05, "loss": 0.5854, "step": 17959 }, { "epoch": 73.3061224489796, "grad_norm": 28.98726463317871, "learning_rate": 1.337142857142857e-05, "loss": 1.6668, "step": 17960 }, { "epoch": 73.31020408163265, "grad_norm": 31.967744827270508, "learning_rate": 1.3369387755102041e-05, "loss": 0.3472, "step": 17961 }, { "epoch": 73.31428571428572, "grad_norm": 28.47153091430664, "learning_rate": 1.336734693877551e-05, "loss": 1.4167, "step": 17962 }, { "epoch": 73.31836734693877, "grad_norm": 22.489791870117188, "learning_rate": 1.336530612244898e-05, "loss": 1.6006, "step": 17963 }, { "epoch": 73.32244897959184, "grad_norm": 40.16399002075195, "learning_rate": 1.336326530612245e-05, "loss": 1.3596, "step": 17964 }, { "epoch": 73.3265306122449, "grad_norm": 38.24876022338867, "learning_rate": 1.3361224489795918e-05, "loss": 0.5692, "step": 17965 }, { "epoch": 73.33061224489796, "grad_norm": 39.919219970703125, "learning_rate": 1.3359183673469389e-05, "loss": 0.4507, "step": 17966 }, { "epoch": 73.33469387755102, "grad_norm": 29.899927139282227, "learning_rate": 1.3357142857142858e-05, "loss": 1.2709, "step": 17967 }, { "epoch": 73.33877551020409, "grad_norm": 19.890832901000977, "learning_rate": 1.3355102040816328e-05, "loss": 0.1554, "step": 17968 }, { "epoch": 73.34285714285714, "grad_norm": 23.77206802368164, "learning_rate": 1.3353061224489797e-05, "loss": 1.8755, "step": 17969 }, { "epoch": 73.34693877551021, "grad_norm": 35.744667053222656, "learning_rate": 1.3351020408163267e-05, "loss": 1.3132, "step": 17970 }, { "epoch": 73.35102040816327, "grad_norm": 40.87100601196289, "learning_rate": 1.3348979591836736e-05, "loss": 0.591, "step": 17971 }, { "epoch": 73.35510204081632, "grad_norm": 30.63364601135254, "learning_rate": 1.3346938775510203e-05, "loss": 0.4006, "step": 17972 }, { "epoch": 73.35918367346939, "grad_norm": 35.33991241455078, "learning_rate": 1.3344897959183676e-05, "loss": 1.1779, "step": 17973 }, { "epoch": 73.36326530612244, "grad_norm": 33.184295654296875, "learning_rate": 1.3342857142857143e-05, "loss": 1.0907, "step": 17974 }, { "epoch": 73.36734693877551, "grad_norm": 48.22415542602539, "learning_rate": 1.3340816326530615e-05, "loss": 0.4575, "step": 17975 }, { "epoch": 73.37142857142857, "grad_norm": 31.307125091552734, "learning_rate": 1.3338775510204082e-05, "loss": 0.4798, "step": 17976 }, { "epoch": 73.37551020408164, "grad_norm": 35.52760696411133, "learning_rate": 1.333673469387755e-05, "loss": 0.553, "step": 17977 }, { "epoch": 73.37959183673469, "grad_norm": 55.4232177734375, "learning_rate": 1.3334693877551021e-05, "loss": 1.1557, "step": 17978 }, { "epoch": 73.38367346938776, "grad_norm": 32.078182220458984, "learning_rate": 1.333265306122449e-05, "loss": 0.388, "step": 17979 }, { "epoch": 73.38775510204081, "grad_norm": 34.58544921875, "learning_rate": 1.333061224489796e-05, "loss": 0.8593, "step": 17980 }, { "epoch": 73.39183673469388, "grad_norm": 31.921581268310547, "learning_rate": 1.332857142857143e-05, "loss": 1.5464, "step": 17981 }, { "epoch": 73.39591836734694, "grad_norm": 41.894981384277344, "learning_rate": 1.3326530612244897e-05, "loss": 1.4378, "step": 17982 }, { "epoch": 73.4, "grad_norm": 45.500728607177734, "learning_rate": 1.3324489795918369e-05, "loss": 0.3682, "step": 17983 }, { "epoch": 73.40408163265306, "grad_norm": 21.65711212158203, "learning_rate": 1.3322448979591836e-05, "loss": 2.1729, "step": 17984 }, { "epoch": 73.40816326530613, "grad_norm": 23.268505096435547, "learning_rate": 1.3320408163265308e-05, "loss": 0.1847, "step": 17985 }, { "epoch": 73.41224489795918, "grad_norm": 32.897865295410156, "learning_rate": 1.3318367346938775e-05, "loss": 1.4589, "step": 17986 }, { "epoch": 73.41632653061224, "grad_norm": 23.824682235717773, "learning_rate": 1.3316326530612244e-05, "loss": 2.4385, "step": 17987 }, { "epoch": 73.42040816326531, "grad_norm": 23.262874603271484, "learning_rate": 1.3314285714285715e-05, "loss": 2.0536, "step": 17988 }, { "epoch": 73.42448979591836, "grad_norm": 52.98782730102539, "learning_rate": 1.3312244897959183e-05, "loss": 0.5778, "step": 17989 }, { "epoch": 73.42857142857143, "grad_norm": 36.814945220947266, "learning_rate": 1.3310204081632654e-05, "loss": 0.9226, "step": 17990 }, { "epoch": 73.43265306122449, "grad_norm": 33.27399826049805, "learning_rate": 1.3308163265306123e-05, "loss": 0.9081, "step": 17991 }, { "epoch": 73.43673469387755, "grad_norm": 28.142807006835938, "learning_rate": 1.3306122448979593e-05, "loss": 0.9008, "step": 17992 }, { "epoch": 73.44081632653061, "grad_norm": 25.554594039916992, "learning_rate": 1.3304081632653062e-05, "loss": 1.5066, "step": 17993 }, { "epoch": 73.44489795918368, "grad_norm": 32.03127670288086, "learning_rate": 1.3302040816326531e-05, "loss": 1.4034, "step": 17994 }, { "epoch": 73.44897959183673, "grad_norm": 28.95673370361328, "learning_rate": 1.3300000000000001e-05, "loss": 1.4612, "step": 17995 }, { "epoch": 73.4530612244898, "grad_norm": 38.30625915527344, "learning_rate": 1.329795918367347e-05, "loss": 0.6597, "step": 17996 }, { "epoch": 73.45714285714286, "grad_norm": 27.998546600341797, "learning_rate": 1.329591836734694e-05, "loss": 1.7426, "step": 17997 }, { "epoch": 73.46122448979592, "grad_norm": 26.111738204956055, "learning_rate": 1.329387755102041e-05, "loss": 1.5421, "step": 17998 }, { "epoch": 73.46530612244898, "grad_norm": 21.29060173034668, "learning_rate": 1.3291836734693877e-05, "loss": 2.1705, "step": 17999 }, { "epoch": 73.46938775510205, "grad_norm": 15.723959922790527, "learning_rate": 1.3289795918367349e-05, "loss": 2.4149, "step": 18000 }, { "epoch": 73.4734693877551, "grad_norm": 22.234004974365234, "learning_rate": 1.3287755102040816e-05, "loss": 1.5874, "step": 18001 }, { "epoch": 73.47755102040816, "grad_norm": 17.714141845703125, "learning_rate": 1.3285714285714288e-05, "loss": 2.2141, "step": 18002 }, { "epoch": 73.48163265306123, "grad_norm": 50.42155838012695, "learning_rate": 1.3283673469387755e-05, "loss": 1.4368, "step": 18003 }, { "epoch": 73.48571428571428, "grad_norm": 29.846202850341797, "learning_rate": 1.3281632653061224e-05, "loss": 1.6499, "step": 18004 }, { "epoch": 73.48979591836735, "grad_norm": 24.06521224975586, "learning_rate": 1.3279591836734695e-05, "loss": 1.6801, "step": 18005 }, { "epoch": 73.4938775510204, "grad_norm": 28.779361724853516, "learning_rate": 1.3277551020408164e-05, "loss": 1.221, "step": 18006 }, { "epoch": 73.49795918367347, "grad_norm": 40.86833190917969, "learning_rate": 1.3275510204081634e-05, "loss": 1.0962, "step": 18007 }, { "epoch": 73.50204081632653, "grad_norm": 35.83506393432617, "learning_rate": 1.3273469387755103e-05, "loss": 0.9532, "step": 18008 }, { "epoch": 73.5061224489796, "grad_norm": 39.56917190551758, "learning_rate": 1.327142857142857e-05, "loss": 0.7499, "step": 18009 }, { "epoch": 73.51020408163265, "grad_norm": 29.00887107849121, "learning_rate": 1.3269387755102042e-05, "loss": 0.1618, "step": 18010 }, { "epoch": 73.51428571428572, "grad_norm": 9.370219230651855, "learning_rate": 1.326734693877551e-05, "loss": 3.0812, "step": 18011 }, { "epoch": 73.51836734693877, "grad_norm": 40.92613220214844, "learning_rate": 1.3265306122448982e-05, "loss": 0.7105, "step": 18012 }, { "epoch": 73.52244897959184, "grad_norm": 35.48265838623047, "learning_rate": 1.3263265306122449e-05, "loss": 0.9842, "step": 18013 }, { "epoch": 73.5265306122449, "grad_norm": 35.765830993652344, "learning_rate": 1.326122448979592e-05, "loss": 0.6295, "step": 18014 }, { "epoch": 73.53061224489795, "grad_norm": 22.56346893310547, "learning_rate": 1.3259183673469388e-05, "loss": 1.6781, "step": 18015 }, { "epoch": 73.53469387755102, "grad_norm": 23.933767318725586, "learning_rate": 1.3257142857142857e-05, "loss": 1.6922, "step": 18016 }, { "epoch": 73.53877551020408, "grad_norm": 29.429996490478516, "learning_rate": 1.3255102040816327e-05, "loss": 1.7603, "step": 18017 }, { "epoch": 73.54285714285714, "grad_norm": 49.092288970947266, "learning_rate": 1.3253061224489796e-05, "loss": 0.7883, "step": 18018 }, { "epoch": 73.5469387755102, "grad_norm": 35.295841217041016, "learning_rate": 1.3251020408163267e-05, "loss": 0.31, "step": 18019 }, { "epoch": 73.55102040816327, "grad_norm": 30.697967529296875, "learning_rate": 1.3248979591836735e-05, "loss": 1.0052, "step": 18020 }, { "epoch": 73.55510204081632, "grad_norm": 29.70427703857422, "learning_rate": 1.3246938775510204e-05, "loss": 1.4259, "step": 18021 }, { "epoch": 73.55918367346939, "grad_norm": 25.303115844726562, "learning_rate": 1.3244897959183675e-05, "loss": 1.8034, "step": 18022 }, { "epoch": 73.56326530612245, "grad_norm": 19.051513671875, "learning_rate": 1.3242857142857144e-05, "loss": 2.2994, "step": 18023 }, { "epoch": 73.56734693877551, "grad_norm": 29.65814208984375, "learning_rate": 1.3240816326530614e-05, "loss": 0.2088, "step": 18024 }, { "epoch": 73.57142857142857, "grad_norm": 20.154455184936523, "learning_rate": 1.3238775510204083e-05, "loss": 1.8019, "step": 18025 }, { "epoch": 73.57551020408164, "grad_norm": 40.35548782348633, "learning_rate": 1.323673469387755e-05, "loss": 0.8422, "step": 18026 }, { "epoch": 73.57959183673469, "grad_norm": 34.25481414794922, "learning_rate": 1.3234693877551022e-05, "loss": 1.2687, "step": 18027 }, { "epoch": 73.58367346938776, "grad_norm": 33.91179656982422, "learning_rate": 1.323265306122449e-05, "loss": 1.2392, "step": 18028 }, { "epoch": 73.58775510204082, "grad_norm": 42.1729736328125, "learning_rate": 1.3230612244897962e-05, "loss": 1.07, "step": 18029 }, { "epoch": 73.59183673469387, "grad_norm": 45.30398178100586, "learning_rate": 1.3228571428571429e-05, "loss": 1.0876, "step": 18030 }, { "epoch": 73.59591836734694, "grad_norm": 18.800180435180664, "learning_rate": 1.3226530612244898e-05, "loss": 2.189, "step": 18031 }, { "epoch": 73.6, "grad_norm": 42.460243225097656, "learning_rate": 1.3224489795918368e-05, "loss": 1.5175, "step": 18032 }, { "epoch": 73.60408163265306, "grad_norm": 48.614524841308594, "learning_rate": 1.3222448979591837e-05, "loss": 0.4862, "step": 18033 }, { "epoch": 73.60816326530612, "grad_norm": 32.18144226074219, "learning_rate": 1.3220408163265307e-05, "loss": 0.7011, "step": 18034 }, { "epoch": 73.61224489795919, "grad_norm": 23.633258819580078, "learning_rate": 1.3218367346938776e-05, "loss": 0.3496, "step": 18035 }, { "epoch": 73.61632653061224, "grad_norm": 24.894668579101562, "learning_rate": 1.3216326530612247e-05, "loss": 1.5621, "step": 18036 }, { "epoch": 73.62040816326531, "grad_norm": 40.74708557128906, "learning_rate": 1.3214285714285716e-05, "loss": 0.5028, "step": 18037 }, { "epoch": 73.62448979591836, "grad_norm": 20.40390968322754, "learning_rate": 1.3212244897959183e-05, "loss": 2.2375, "step": 18038 }, { "epoch": 73.62857142857143, "grad_norm": 47.639556884765625, "learning_rate": 1.3210204081632655e-05, "loss": 0.647, "step": 18039 }, { "epoch": 73.63265306122449, "grad_norm": 27.411561965942383, "learning_rate": 1.3208163265306122e-05, "loss": 0.3461, "step": 18040 }, { "epoch": 73.63673469387756, "grad_norm": 34.09207534790039, "learning_rate": 1.3206122448979594e-05, "loss": 1.219, "step": 18041 }, { "epoch": 73.64081632653061, "grad_norm": 34.337039947509766, "learning_rate": 1.3204081632653061e-05, "loss": 1.455, "step": 18042 }, { "epoch": 73.64489795918368, "grad_norm": 45.24274444580078, "learning_rate": 1.320204081632653e-05, "loss": 1.3052, "step": 18043 }, { "epoch": 73.64897959183673, "grad_norm": 21.42826271057129, "learning_rate": 1.32e-05, "loss": 2.5555, "step": 18044 }, { "epoch": 73.65306122448979, "grad_norm": 34.13368606567383, "learning_rate": 1.319795918367347e-05, "loss": 1.4559, "step": 18045 }, { "epoch": 73.65714285714286, "grad_norm": 40.81700897216797, "learning_rate": 1.319591836734694e-05, "loss": 0.665, "step": 18046 }, { "epoch": 73.66122448979591, "grad_norm": 39.823638916015625, "learning_rate": 1.3193877551020409e-05, "loss": 1.2426, "step": 18047 }, { "epoch": 73.66530612244898, "grad_norm": 24.6901798248291, "learning_rate": 1.3191836734693878e-05, "loss": 1.622, "step": 18048 }, { "epoch": 73.66938775510204, "grad_norm": 40.07171630859375, "learning_rate": 1.3189795918367348e-05, "loss": 0.5609, "step": 18049 }, { "epoch": 73.6734693877551, "grad_norm": 34.072776794433594, "learning_rate": 1.3187755102040817e-05, "loss": 0.9318, "step": 18050 }, { "epoch": 73.67755102040816, "grad_norm": 55.027442932128906, "learning_rate": 1.3185714285714287e-05, "loss": 1.2198, "step": 18051 }, { "epoch": 73.68163265306123, "grad_norm": 30.342214584350586, "learning_rate": 1.3183673469387756e-05, "loss": 1.4368, "step": 18052 }, { "epoch": 73.68571428571428, "grad_norm": 32.29597473144531, "learning_rate": 1.3181632653061223e-05, "loss": 1.9022, "step": 18053 }, { "epoch": 73.68979591836735, "grad_norm": 40.4734001159668, "learning_rate": 1.3179591836734696e-05, "loss": 0.8503, "step": 18054 }, { "epoch": 73.6938775510204, "grad_norm": 37.090213775634766, "learning_rate": 1.3177551020408163e-05, "loss": 0.4543, "step": 18055 }, { "epoch": 73.69795918367348, "grad_norm": 27.609493255615234, "learning_rate": 1.3175510204081635e-05, "loss": 2.0372, "step": 18056 }, { "epoch": 73.70204081632653, "grad_norm": 35.72185134887695, "learning_rate": 1.3173469387755102e-05, "loss": 1.3206, "step": 18057 }, { "epoch": 73.70612244897958, "grad_norm": 23.42815399169922, "learning_rate": 1.3171428571428571e-05, "loss": 2.1554, "step": 18058 }, { "epoch": 73.71020408163265, "grad_norm": 34.4061164855957, "learning_rate": 1.3169387755102041e-05, "loss": 0.7505, "step": 18059 }, { "epoch": 73.71428571428571, "grad_norm": 31.039209365844727, "learning_rate": 1.316734693877551e-05, "loss": 1.2419, "step": 18060 }, { "epoch": 73.71836734693878, "grad_norm": 27.52886199951172, "learning_rate": 1.316530612244898e-05, "loss": 1.2906, "step": 18061 }, { "epoch": 73.72244897959183, "grad_norm": 32.47056198120117, "learning_rate": 1.316326530612245e-05, "loss": 0.8687, "step": 18062 }, { "epoch": 73.7265306122449, "grad_norm": 28.3742733001709, "learning_rate": 1.316122448979592e-05, "loss": 1.6705, "step": 18063 }, { "epoch": 73.73061224489796, "grad_norm": 24.6632080078125, "learning_rate": 1.3159183673469389e-05, "loss": 1.6618, "step": 18064 }, { "epoch": 73.73469387755102, "grad_norm": 34.54072189331055, "learning_rate": 1.3157142857142856e-05, "loss": 0.2194, "step": 18065 }, { "epoch": 73.73877551020408, "grad_norm": 28.00118064880371, "learning_rate": 1.3155102040816328e-05, "loss": 2.1089, "step": 18066 }, { "epoch": 73.74285714285715, "grad_norm": 36.50355911254883, "learning_rate": 1.3153061224489795e-05, "loss": 1.2081, "step": 18067 }, { "epoch": 73.7469387755102, "grad_norm": 24.024097442626953, "learning_rate": 1.3151020408163268e-05, "loss": 1.7523, "step": 18068 }, { "epoch": 73.75102040816327, "grad_norm": 41.577880859375, "learning_rate": 1.3148979591836735e-05, "loss": 0.3848, "step": 18069 }, { "epoch": 73.75510204081633, "grad_norm": 36.41632080078125, "learning_rate": 1.3146938775510204e-05, "loss": 1.3053, "step": 18070 }, { "epoch": 73.7591836734694, "grad_norm": 38.0585823059082, "learning_rate": 1.3144897959183674e-05, "loss": 0.7875, "step": 18071 }, { "epoch": 73.76326530612245, "grad_norm": 27.908544540405273, "learning_rate": 1.3142857142857143e-05, "loss": 1.5624, "step": 18072 }, { "epoch": 73.7673469387755, "grad_norm": 19.294403076171875, "learning_rate": 1.3140816326530613e-05, "loss": 2.2476, "step": 18073 }, { "epoch": 73.77142857142857, "grad_norm": 20.418357849121094, "learning_rate": 1.3138775510204082e-05, "loss": 0.134, "step": 18074 }, { "epoch": 73.77551020408163, "grad_norm": 39.05662536621094, "learning_rate": 1.3136734693877551e-05, "loss": 0.4943, "step": 18075 }, { "epoch": 73.7795918367347, "grad_norm": 21.661109924316406, "learning_rate": 1.3134693877551022e-05, "loss": 0.1476, "step": 18076 }, { "epoch": 73.78367346938775, "grad_norm": 35.411888122558594, "learning_rate": 1.313265306122449e-05, "loss": 1.3122, "step": 18077 }, { "epoch": 73.78775510204082, "grad_norm": 2.127340078353882, "learning_rate": 1.3130612244897961e-05, "loss": 0.0398, "step": 18078 }, { "epoch": 73.79183673469387, "grad_norm": 30.194067001342773, "learning_rate": 1.312857142857143e-05, "loss": 1.6727, "step": 18079 }, { "epoch": 73.79591836734694, "grad_norm": 22.588275909423828, "learning_rate": 1.3126530612244897e-05, "loss": 0.1252, "step": 18080 }, { "epoch": 73.8, "grad_norm": 23.517730712890625, "learning_rate": 1.3124489795918369e-05, "loss": 2.1085, "step": 18081 }, { "epoch": 73.80408163265307, "grad_norm": 36.26360321044922, "learning_rate": 1.3122448979591836e-05, "loss": 0.742, "step": 18082 }, { "epoch": 73.80816326530612, "grad_norm": 40.25083541870117, "learning_rate": 1.3120408163265308e-05, "loss": 1.391, "step": 18083 }, { "epoch": 73.81224489795919, "grad_norm": 32.867435455322266, "learning_rate": 1.3118367346938775e-05, "loss": 1.0593, "step": 18084 }, { "epoch": 73.81632653061224, "grad_norm": 32.53237533569336, "learning_rate": 1.3116326530612246e-05, "loss": 0.3297, "step": 18085 }, { "epoch": 73.82040816326531, "grad_norm": 18.19441795349121, "learning_rate": 1.3114285714285715e-05, "loss": 0.131, "step": 18086 }, { "epoch": 73.82448979591837, "grad_norm": 26.11942481994629, "learning_rate": 1.3112244897959184e-05, "loss": 1.3488, "step": 18087 }, { "epoch": 73.82857142857142, "grad_norm": 33.79995346069336, "learning_rate": 1.3110204081632654e-05, "loss": 0.8699, "step": 18088 }, { "epoch": 73.83265306122449, "grad_norm": 31.781612396240234, "learning_rate": 1.3108163265306123e-05, "loss": 1.3926, "step": 18089 }, { "epoch": 73.83673469387755, "grad_norm": 28.5769100189209, "learning_rate": 1.3106122448979593e-05, "loss": 1.826, "step": 18090 }, { "epoch": 73.84081632653061, "grad_norm": 36.07743453979492, "learning_rate": 1.3104081632653062e-05, "loss": 1.2048, "step": 18091 }, { "epoch": 73.84489795918367, "grad_norm": 28.42498779296875, "learning_rate": 1.310204081632653e-05, "loss": 0.441, "step": 18092 }, { "epoch": 73.84897959183674, "grad_norm": 32.82954788208008, "learning_rate": 1.3100000000000002e-05, "loss": 0.9739, "step": 18093 }, { "epoch": 73.85306122448979, "grad_norm": 20.56001091003418, "learning_rate": 1.3097959183673469e-05, "loss": 2.1482, "step": 18094 }, { "epoch": 73.85714285714286, "grad_norm": 42.1739387512207, "learning_rate": 1.3095918367346941e-05, "loss": 0.8742, "step": 18095 }, { "epoch": 73.86122448979592, "grad_norm": 29.751779556274414, "learning_rate": 1.3093877551020408e-05, "loss": 1.5533, "step": 18096 }, { "epoch": 73.86530612244898, "grad_norm": 23.470123291015625, "learning_rate": 1.3091836734693877e-05, "loss": 0.2015, "step": 18097 }, { "epoch": 73.86938775510204, "grad_norm": 23.91524887084961, "learning_rate": 1.3089795918367347e-05, "loss": 1.7137, "step": 18098 }, { "epoch": 73.87346938775511, "grad_norm": 29.605224609375, "learning_rate": 1.3087755102040816e-05, "loss": 1.4047, "step": 18099 }, { "epoch": 73.87755102040816, "grad_norm": 33.55498123168945, "learning_rate": 1.3085714285714287e-05, "loss": 1.593, "step": 18100 }, { "epoch": 73.88163265306123, "grad_norm": 21.085926055908203, "learning_rate": 1.3083673469387756e-05, "loss": 2.2535, "step": 18101 }, { "epoch": 73.88571428571429, "grad_norm": 33.846248626708984, "learning_rate": 1.3081632653061224e-05, "loss": 1.572, "step": 18102 }, { "epoch": 73.88979591836734, "grad_norm": 27.639326095581055, "learning_rate": 1.3079591836734695e-05, "loss": 1.845, "step": 18103 }, { "epoch": 73.89387755102041, "grad_norm": 21.9427547454834, "learning_rate": 1.3077551020408164e-05, "loss": 0.1368, "step": 18104 }, { "epoch": 73.89795918367346, "grad_norm": 37.96583938598633, "learning_rate": 1.3075510204081634e-05, "loss": 1.0577, "step": 18105 }, { "epoch": 73.90204081632653, "grad_norm": 19.569265365600586, "learning_rate": 1.3073469387755103e-05, "loss": 2.0682, "step": 18106 }, { "epoch": 73.90612244897959, "grad_norm": 34.69205093383789, "learning_rate": 1.3071428571428574e-05, "loss": 1.1285, "step": 18107 }, { "epoch": 73.91020408163266, "grad_norm": 33.9083251953125, "learning_rate": 1.3069387755102042e-05, "loss": 0.5357, "step": 18108 }, { "epoch": 73.91428571428571, "grad_norm": 18.49054718017578, "learning_rate": 1.306734693877551e-05, "loss": 0.1406, "step": 18109 }, { "epoch": 73.91836734693878, "grad_norm": 27.09832763671875, "learning_rate": 1.3065306122448982e-05, "loss": 1.6529, "step": 18110 }, { "epoch": 73.92244897959183, "grad_norm": 36.49867630004883, "learning_rate": 1.3063265306122449e-05, "loss": 1.1759, "step": 18111 }, { "epoch": 73.9265306122449, "grad_norm": 31.187562942504883, "learning_rate": 1.306122448979592e-05, "loss": 0.7741, "step": 18112 }, { "epoch": 73.93061224489796, "grad_norm": 36.1087760925293, "learning_rate": 1.3059183673469388e-05, "loss": 1.0157, "step": 18113 }, { "epoch": 73.93469387755103, "grad_norm": 39.29718780517578, "learning_rate": 1.3057142857142857e-05, "loss": 0.2063, "step": 18114 }, { "epoch": 73.93877551020408, "grad_norm": 31.502046585083008, "learning_rate": 1.3055102040816328e-05, "loss": 1.5738, "step": 18115 }, { "epoch": 73.94285714285714, "grad_norm": 25.489418029785156, "learning_rate": 1.3053061224489796e-05, "loss": 2.1076, "step": 18116 }, { "epoch": 73.9469387755102, "grad_norm": 32.273624420166016, "learning_rate": 1.3051020408163267e-05, "loss": 0.8987, "step": 18117 }, { "epoch": 73.95102040816326, "grad_norm": 46.26582336425781, "learning_rate": 1.3048979591836736e-05, "loss": 0.9164, "step": 18118 }, { "epoch": 73.95510204081633, "grad_norm": 34.37712860107422, "learning_rate": 1.3046938775510203e-05, "loss": 0.4487, "step": 18119 }, { "epoch": 73.95918367346938, "grad_norm": 25.29599380493164, "learning_rate": 1.3044897959183675e-05, "loss": 1.413, "step": 18120 }, { "epoch": 73.96326530612245, "grad_norm": 39.32146072387695, "learning_rate": 1.3042857142857142e-05, "loss": 1.6872, "step": 18121 }, { "epoch": 73.9673469387755, "grad_norm": 24.81873321533203, "learning_rate": 1.3040816326530614e-05, "loss": 1.8221, "step": 18122 }, { "epoch": 73.97142857142858, "grad_norm": 29.329973220825195, "learning_rate": 1.3038775510204081e-05, "loss": 1.5031, "step": 18123 }, { "epoch": 73.97551020408163, "grad_norm": 38.733585357666016, "learning_rate": 1.303673469387755e-05, "loss": 0.783, "step": 18124 }, { "epoch": 73.9795918367347, "grad_norm": 35.872928619384766, "learning_rate": 1.303469387755102e-05, "loss": 1.0157, "step": 18125 }, { "epoch": 73.98367346938775, "grad_norm": 39.61609649658203, "learning_rate": 1.303265306122449e-05, "loss": 1.1739, "step": 18126 }, { "epoch": 73.98775510204082, "grad_norm": 31.453947067260742, "learning_rate": 1.303061224489796e-05, "loss": 0.6128, "step": 18127 }, { "epoch": 73.99183673469388, "grad_norm": 25.124122619628906, "learning_rate": 1.3028571428571429e-05, "loss": 1.6208, "step": 18128 }, { "epoch": 73.99591836734695, "grad_norm": 32.38993453979492, "learning_rate": 1.30265306122449e-05, "loss": 0.4793, "step": 18129 }, { "epoch": 74.0, "grad_norm": 38.468868255615234, "learning_rate": 1.3024489795918368e-05, "loss": 1.632, "step": 18130 }, { "epoch": 74.00408163265305, "grad_norm": 32.347530364990234, "learning_rate": 1.3022448979591837e-05, "loss": 0.9858, "step": 18131 }, { "epoch": 74.00816326530612, "grad_norm": 29.309995651245117, "learning_rate": 1.3020408163265308e-05, "loss": 0.9342, "step": 18132 }, { "epoch": 74.01224489795918, "grad_norm": 53.756282806396484, "learning_rate": 1.3018367346938776e-05, "loss": 0.1905, "step": 18133 }, { "epoch": 74.01632653061225, "grad_norm": 21.277956008911133, "learning_rate": 1.3016326530612247e-05, "loss": 1.7713, "step": 18134 }, { "epoch": 74.0204081632653, "grad_norm": 33.165184020996094, "learning_rate": 1.3014285714285716e-05, "loss": 1.2338, "step": 18135 }, { "epoch": 74.02448979591837, "grad_norm": 36.29977798461914, "learning_rate": 1.3012244897959183e-05, "loss": 0.4689, "step": 18136 }, { "epoch": 74.02857142857142, "grad_norm": 34.700599670410156, "learning_rate": 1.3010204081632653e-05, "loss": 1.3432, "step": 18137 }, { "epoch": 74.0326530612245, "grad_norm": 36.669681549072266, "learning_rate": 1.3008163265306122e-05, "loss": 0.346, "step": 18138 }, { "epoch": 74.03673469387755, "grad_norm": 28.07687759399414, "learning_rate": 1.3006122448979593e-05, "loss": 1.294, "step": 18139 }, { "epoch": 74.04081632653062, "grad_norm": 41.41702651977539, "learning_rate": 1.3004081632653062e-05, "loss": 1.1024, "step": 18140 }, { "epoch": 74.04489795918367, "grad_norm": 29.636762619018555, "learning_rate": 1.300204081632653e-05, "loss": 0.4471, "step": 18141 }, { "epoch": 74.04897959183674, "grad_norm": 48.78947830200195, "learning_rate": 1.3000000000000001e-05, "loss": 0.3091, "step": 18142 }, { "epoch": 74.0530612244898, "grad_norm": 29.709054946899414, "learning_rate": 1.299795918367347e-05, "loss": 0.5855, "step": 18143 }, { "epoch": 74.05714285714286, "grad_norm": 48.55655288696289, "learning_rate": 1.299591836734694e-05, "loss": 0.4681, "step": 18144 }, { "epoch": 74.06122448979592, "grad_norm": 22.628026962280273, "learning_rate": 1.2993877551020409e-05, "loss": 1.4659, "step": 18145 }, { "epoch": 74.06530612244897, "grad_norm": 32.192161560058594, "learning_rate": 1.2991836734693876e-05, "loss": 0.4414, "step": 18146 }, { "epoch": 74.06938775510204, "grad_norm": 27.023157119750977, "learning_rate": 1.2989795918367348e-05, "loss": 0.8535, "step": 18147 }, { "epoch": 74.0734693877551, "grad_norm": 30.536916732788086, "learning_rate": 1.2987755102040816e-05, "loss": 0.3062, "step": 18148 }, { "epoch": 74.07755102040817, "grad_norm": 39.70927047729492, "learning_rate": 1.2985714285714288e-05, "loss": 0.6788, "step": 18149 }, { "epoch": 74.08163265306122, "grad_norm": 34.48149871826172, "learning_rate": 1.2983673469387755e-05, "loss": 0.502, "step": 18150 }, { "epoch": 74.08571428571429, "grad_norm": 28.609100341796875, "learning_rate": 1.2981632653061227e-05, "loss": 1.3614, "step": 18151 }, { "epoch": 74.08979591836734, "grad_norm": 31.454740524291992, "learning_rate": 1.2979591836734694e-05, "loss": 1.0325, "step": 18152 }, { "epoch": 74.09387755102041, "grad_norm": 26.337339401245117, "learning_rate": 1.2977551020408163e-05, "loss": 1.6526, "step": 18153 }, { "epoch": 74.09795918367347, "grad_norm": 31.3582706451416, "learning_rate": 1.2975510204081634e-05, "loss": 0.1942, "step": 18154 }, { "epoch": 74.10204081632654, "grad_norm": 39.70932388305664, "learning_rate": 1.2973469387755102e-05, "loss": 0.8152, "step": 18155 }, { "epoch": 74.10612244897959, "grad_norm": 40.96529769897461, "learning_rate": 1.2971428571428573e-05, "loss": 0.3487, "step": 18156 }, { "epoch": 74.11020408163266, "grad_norm": 35.40553665161133, "learning_rate": 1.2969387755102042e-05, "loss": 0.5128, "step": 18157 }, { "epoch": 74.11428571428571, "grad_norm": 25.189889907836914, "learning_rate": 1.296734693877551e-05, "loss": 2.4032, "step": 18158 }, { "epoch": 74.11836734693877, "grad_norm": 29.105697631835938, "learning_rate": 1.2965306122448981e-05, "loss": 1.2051, "step": 18159 }, { "epoch": 74.12244897959184, "grad_norm": 38.09334945678711, "learning_rate": 1.296326530612245e-05, "loss": 1.073, "step": 18160 }, { "epoch": 74.12653061224489, "grad_norm": 38.5652961730957, "learning_rate": 1.296122448979592e-05, "loss": 0.4529, "step": 18161 }, { "epoch": 74.13061224489796, "grad_norm": 26.183765411376953, "learning_rate": 1.2959183673469389e-05, "loss": 1.4953, "step": 18162 }, { "epoch": 74.13469387755102, "grad_norm": 25.665607452392578, "learning_rate": 1.2957142857142856e-05, "loss": 1.7664, "step": 18163 }, { "epoch": 74.13877551020408, "grad_norm": 28.025238037109375, "learning_rate": 1.2955102040816327e-05, "loss": 1.7337, "step": 18164 }, { "epoch": 74.14285714285714, "grad_norm": 38.219913482666016, "learning_rate": 1.2953061224489796e-05, "loss": 1.1533, "step": 18165 }, { "epoch": 74.14693877551021, "grad_norm": 38.71417999267578, "learning_rate": 1.2951020408163266e-05, "loss": 0.7049, "step": 18166 }, { "epoch": 74.15102040816326, "grad_norm": 32.06975173950195, "learning_rate": 1.2948979591836735e-05, "loss": 1.3768, "step": 18167 }, { "epoch": 74.15510204081633, "grad_norm": 26.00064468383789, "learning_rate": 1.2946938775510204e-05, "loss": 1.5396, "step": 18168 }, { "epoch": 74.15918367346939, "grad_norm": 26.468955993652344, "learning_rate": 1.2944897959183674e-05, "loss": 0.3604, "step": 18169 }, { "epoch": 74.16326530612245, "grad_norm": 32.31467819213867, "learning_rate": 1.2942857142857143e-05, "loss": 0.8025, "step": 18170 }, { "epoch": 74.16734693877551, "grad_norm": 32.29081344604492, "learning_rate": 1.2940816326530614e-05, "loss": 1.4777, "step": 18171 }, { "epoch": 74.17142857142858, "grad_norm": 32.24236297607422, "learning_rate": 1.2938775510204082e-05, "loss": 1.3818, "step": 18172 }, { "epoch": 74.17551020408163, "grad_norm": 41.66331100463867, "learning_rate": 1.2936734693877553e-05, "loss": 0.5291, "step": 18173 }, { "epoch": 74.17959183673469, "grad_norm": 20.245235443115234, "learning_rate": 1.2934693877551022e-05, "loss": 2.084, "step": 18174 }, { "epoch": 74.18367346938776, "grad_norm": 43.19109344482422, "learning_rate": 1.2932653061224489e-05, "loss": 1.4732, "step": 18175 }, { "epoch": 74.18775510204081, "grad_norm": 35.01222610473633, "learning_rate": 1.2930612244897961e-05, "loss": 0.9784, "step": 18176 }, { "epoch": 74.19183673469388, "grad_norm": 31.6375675201416, "learning_rate": 1.2928571428571428e-05, "loss": 1.4969, "step": 18177 }, { "epoch": 74.19591836734693, "grad_norm": 41.541099548339844, "learning_rate": 1.29265306122449e-05, "loss": 1.2599, "step": 18178 }, { "epoch": 74.2, "grad_norm": 22.040660858154297, "learning_rate": 1.2924489795918368e-05, "loss": 1.8548, "step": 18179 }, { "epoch": 74.20408163265306, "grad_norm": 37.1032600402832, "learning_rate": 1.2922448979591836e-05, "loss": 0.4256, "step": 18180 }, { "epoch": 74.20816326530613, "grad_norm": 15.657506942749023, "learning_rate": 1.2920408163265307e-05, "loss": 2.4052, "step": 18181 }, { "epoch": 74.21224489795918, "grad_norm": 35.331058502197266, "learning_rate": 1.2918367346938776e-05, "loss": 0.7423, "step": 18182 }, { "epoch": 74.21632653061225, "grad_norm": 28.048376083374023, "learning_rate": 1.2916326530612246e-05, "loss": 1.6038, "step": 18183 }, { "epoch": 74.2204081632653, "grad_norm": 33.003299713134766, "learning_rate": 1.2914285714285715e-05, "loss": 0.6012, "step": 18184 }, { "epoch": 74.22448979591837, "grad_norm": 22.720861434936523, "learning_rate": 1.2912244897959184e-05, "loss": 1.7341, "step": 18185 }, { "epoch": 74.22857142857143, "grad_norm": 43.6999626159668, "learning_rate": 1.2910204081632654e-05, "loss": 0.744, "step": 18186 }, { "epoch": 74.2326530612245, "grad_norm": 41.8906364440918, "learning_rate": 1.2908163265306123e-05, "loss": 1.2317, "step": 18187 }, { "epoch": 74.23673469387755, "grad_norm": 35.370750427246094, "learning_rate": 1.2906122448979594e-05, "loss": 1.6093, "step": 18188 }, { "epoch": 74.2408163265306, "grad_norm": 50.22892761230469, "learning_rate": 1.2904081632653063e-05, "loss": 0.4869, "step": 18189 }, { "epoch": 74.24489795918367, "grad_norm": 35.32278060913086, "learning_rate": 1.290204081632653e-05, "loss": 1.2547, "step": 18190 }, { "epoch": 74.24897959183673, "grad_norm": 37.56561279296875, "learning_rate": 1.29e-05, "loss": 0.2435, "step": 18191 }, { "epoch": 74.2530612244898, "grad_norm": 26.527124404907227, "learning_rate": 1.2897959183673469e-05, "loss": 2.0266, "step": 18192 }, { "epoch": 74.25714285714285, "grad_norm": 28.900197982788086, "learning_rate": 1.289591836734694e-05, "loss": 0.7958, "step": 18193 }, { "epoch": 74.26122448979592, "grad_norm": 30.415006637573242, "learning_rate": 1.2893877551020408e-05, "loss": 0.157, "step": 18194 }, { "epoch": 74.26530612244898, "grad_norm": 12.43770694732666, "learning_rate": 1.2891836734693879e-05, "loss": 0.0776, "step": 18195 }, { "epoch": 74.26938775510204, "grad_norm": 32.287017822265625, "learning_rate": 1.2889795918367348e-05, "loss": 0.9805, "step": 18196 }, { "epoch": 74.2734693877551, "grad_norm": 37.166236877441406, "learning_rate": 1.2887755102040816e-05, "loss": 0.6557, "step": 18197 }, { "epoch": 74.27755102040817, "grad_norm": 38.12360382080078, "learning_rate": 1.2885714285714287e-05, "loss": 0.4112, "step": 18198 }, { "epoch": 74.28163265306122, "grad_norm": 32.78750991821289, "learning_rate": 1.2883673469387756e-05, "loss": 1.395, "step": 18199 }, { "epoch": 74.28571428571429, "grad_norm": 24.148117065429688, "learning_rate": 1.2881632653061226e-05, "loss": 1.551, "step": 18200 }, { "epoch": 74.28979591836735, "grad_norm": 26.29322624206543, "learning_rate": 1.2879591836734695e-05, "loss": 1.48, "step": 18201 }, { "epoch": 74.29387755102042, "grad_norm": 25.933032989501953, "learning_rate": 1.2877551020408162e-05, "loss": 1.6578, "step": 18202 }, { "epoch": 74.29795918367347, "grad_norm": 25.539016723632812, "learning_rate": 1.2875510204081634e-05, "loss": 1.6609, "step": 18203 }, { "epoch": 74.30204081632652, "grad_norm": 32.07863998413086, "learning_rate": 1.2873469387755102e-05, "loss": 0.7102, "step": 18204 }, { "epoch": 74.3061224489796, "grad_norm": 34.93537902832031, "learning_rate": 1.2871428571428574e-05, "loss": 0.4719, "step": 18205 }, { "epoch": 74.31020408163265, "grad_norm": 35.562774658203125, "learning_rate": 1.2869387755102041e-05, "loss": 0.6185, "step": 18206 }, { "epoch": 74.31428571428572, "grad_norm": 28.82865333557129, "learning_rate": 1.286734693877551e-05, "loss": 1.3132, "step": 18207 }, { "epoch": 74.31836734693877, "grad_norm": 31.408653259277344, "learning_rate": 1.286530612244898e-05, "loss": 0.4831, "step": 18208 }, { "epoch": 74.32244897959184, "grad_norm": 39.047149658203125, "learning_rate": 1.2863265306122449e-05, "loss": 1.3108, "step": 18209 }, { "epoch": 74.3265306122449, "grad_norm": 38.645484924316406, "learning_rate": 1.286122448979592e-05, "loss": 0.5131, "step": 18210 }, { "epoch": 74.33061224489796, "grad_norm": 19.279531478881836, "learning_rate": 1.2859183673469388e-05, "loss": 0.1723, "step": 18211 }, { "epoch": 74.33469387755102, "grad_norm": 40.102970123291016, "learning_rate": 1.2857142857142857e-05, "loss": 1.4207, "step": 18212 }, { "epoch": 74.33877551020409, "grad_norm": 31.737131118774414, "learning_rate": 1.2855102040816328e-05, "loss": 0.3433, "step": 18213 }, { "epoch": 74.34285714285714, "grad_norm": 28.75909423828125, "learning_rate": 1.2853061224489797e-05, "loss": 1.7277, "step": 18214 }, { "epoch": 74.34693877551021, "grad_norm": 26.191162109375, "learning_rate": 1.2851020408163267e-05, "loss": 2.1549, "step": 18215 }, { "epoch": 74.35102040816327, "grad_norm": 43.789241790771484, "learning_rate": 1.2848979591836736e-05, "loss": 0.772, "step": 18216 }, { "epoch": 74.35510204081632, "grad_norm": 42.84164047241211, "learning_rate": 1.2846938775510206e-05, "loss": 1.1658, "step": 18217 }, { "epoch": 74.35918367346939, "grad_norm": 26.251670837402344, "learning_rate": 1.2844897959183674e-05, "loss": 1.2678, "step": 18218 }, { "epoch": 74.36326530612244, "grad_norm": 36.999267578125, "learning_rate": 1.2842857142857142e-05, "loss": 1.0908, "step": 18219 }, { "epoch": 74.36734693877551, "grad_norm": 25.373851776123047, "learning_rate": 1.2840816326530613e-05, "loss": 2.0812, "step": 18220 }, { "epoch": 74.37142857142857, "grad_norm": 20.86971664428711, "learning_rate": 1.2838775510204082e-05, "loss": 2.1637, "step": 18221 }, { "epoch": 74.37551020408164, "grad_norm": 17.303401947021484, "learning_rate": 1.2836734693877552e-05, "loss": 2.1928, "step": 18222 }, { "epoch": 74.37959183673469, "grad_norm": 25.233078002929688, "learning_rate": 1.2834693877551021e-05, "loss": 1.6231, "step": 18223 }, { "epoch": 74.38367346938776, "grad_norm": 28.891845703125, "learning_rate": 1.283265306122449e-05, "loss": 1.6961, "step": 18224 }, { "epoch": 74.38775510204081, "grad_norm": 33.472530364990234, "learning_rate": 1.283061224489796e-05, "loss": 0.903, "step": 18225 }, { "epoch": 74.39183673469388, "grad_norm": 25.735267639160156, "learning_rate": 1.282857142857143e-05, "loss": 0.6999, "step": 18226 }, { "epoch": 74.39591836734694, "grad_norm": 27.263521194458008, "learning_rate": 1.28265306122449e-05, "loss": 1.4311, "step": 18227 }, { "epoch": 74.4, "grad_norm": 36.892539978027344, "learning_rate": 1.2824489795918369e-05, "loss": 0.5452, "step": 18228 }, { "epoch": 74.40408163265306, "grad_norm": 34.065711975097656, "learning_rate": 1.2822448979591836e-05, "loss": 0.3192, "step": 18229 }, { "epoch": 74.40816326530613, "grad_norm": 26.733652114868164, "learning_rate": 1.2820408163265308e-05, "loss": 1.5922, "step": 18230 }, { "epoch": 74.41224489795918, "grad_norm": 20.044645309448242, "learning_rate": 1.2818367346938775e-05, "loss": 2.097, "step": 18231 }, { "epoch": 74.41632653061224, "grad_norm": 27.30939292907715, "learning_rate": 1.2816326530612247e-05, "loss": 1.5108, "step": 18232 }, { "epoch": 74.42040816326531, "grad_norm": 33.459903717041016, "learning_rate": 1.2814285714285714e-05, "loss": 0.5981, "step": 18233 }, { "epoch": 74.42448979591836, "grad_norm": 38.63322830200195, "learning_rate": 1.2812244897959183e-05, "loss": 0.4234, "step": 18234 }, { "epoch": 74.42857142857143, "grad_norm": 54.980411529541016, "learning_rate": 1.2810204081632654e-05, "loss": 0.6132, "step": 18235 }, { "epoch": 74.43265306122449, "grad_norm": 41.36488342285156, "learning_rate": 1.2808163265306122e-05, "loss": 1.2131, "step": 18236 }, { "epoch": 74.43673469387755, "grad_norm": 37.073760986328125, "learning_rate": 1.2806122448979593e-05, "loss": 0.5156, "step": 18237 }, { "epoch": 74.44081632653061, "grad_norm": 31.938634872436523, "learning_rate": 1.2804081632653062e-05, "loss": 1.3867, "step": 18238 }, { "epoch": 74.44489795918368, "grad_norm": 27.991758346557617, "learning_rate": 1.280204081632653e-05, "loss": 1.8098, "step": 18239 }, { "epoch": 74.44897959183673, "grad_norm": 34.39806365966797, "learning_rate": 1.2800000000000001e-05, "loss": 1.417, "step": 18240 }, { "epoch": 74.4530612244898, "grad_norm": 48.623382568359375, "learning_rate": 1.279795918367347e-05, "loss": 1.1007, "step": 18241 }, { "epoch": 74.45714285714286, "grad_norm": 46.58073425292969, "learning_rate": 1.279591836734694e-05, "loss": 1.4196, "step": 18242 }, { "epoch": 74.46122448979592, "grad_norm": 32.708953857421875, "learning_rate": 1.2793877551020408e-05, "loss": 1.6715, "step": 18243 }, { "epoch": 74.46530612244898, "grad_norm": 36.34534454345703, "learning_rate": 1.279183673469388e-05, "loss": 0.4093, "step": 18244 }, { "epoch": 74.46938775510205, "grad_norm": 36.76115036010742, "learning_rate": 1.2789795918367347e-05, "loss": 0.6832, "step": 18245 }, { "epoch": 74.4734693877551, "grad_norm": 24.439348220825195, "learning_rate": 1.2787755102040816e-05, "loss": 1.7528, "step": 18246 }, { "epoch": 74.47755102040816, "grad_norm": 54.184730529785156, "learning_rate": 1.2785714285714286e-05, "loss": 1.4172, "step": 18247 }, { "epoch": 74.48163265306123, "grad_norm": 35.42074966430664, "learning_rate": 1.2783673469387755e-05, "loss": 0.8685, "step": 18248 }, { "epoch": 74.48571428571428, "grad_norm": 41.95248794555664, "learning_rate": 1.2781632653061226e-05, "loss": 0.798, "step": 18249 }, { "epoch": 74.48979591836735, "grad_norm": 31.173524856567383, "learning_rate": 1.2779591836734694e-05, "loss": 1.1363, "step": 18250 }, { "epoch": 74.4938775510204, "grad_norm": 15.444681167602539, "learning_rate": 1.2777551020408163e-05, "loss": 0.1161, "step": 18251 }, { "epoch": 74.49795918367347, "grad_norm": 39.9539680480957, "learning_rate": 1.2775510204081634e-05, "loss": 0.8358, "step": 18252 }, { "epoch": 74.50204081632653, "grad_norm": 35.42667007446289, "learning_rate": 1.2773469387755103e-05, "loss": 1.5623, "step": 18253 }, { "epoch": 74.5061224489796, "grad_norm": 18.15620994567871, "learning_rate": 1.2771428571428573e-05, "loss": 0.1293, "step": 18254 }, { "epoch": 74.51020408163265, "grad_norm": 33.186405181884766, "learning_rate": 1.2769387755102042e-05, "loss": 1.8642, "step": 18255 }, { "epoch": 74.51428571428572, "grad_norm": 33.74794006347656, "learning_rate": 1.2767346938775509e-05, "loss": 1.0391, "step": 18256 }, { "epoch": 74.51836734693877, "grad_norm": 33.119876861572266, "learning_rate": 1.2765306122448981e-05, "loss": 0.1861, "step": 18257 }, { "epoch": 74.52244897959184, "grad_norm": 36.89698791503906, "learning_rate": 1.2763265306122448e-05, "loss": 1.272, "step": 18258 }, { "epoch": 74.5265306122449, "grad_norm": 38.905765533447266, "learning_rate": 1.276122448979592e-05, "loss": 0.9248, "step": 18259 }, { "epoch": 74.53061224489795, "grad_norm": 34.16682815551758, "learning_rate": 1.2759183673469388e-05, "loss": 0.3166, "step": 18260 }, { "epoch": 74.53469387755102, "grad_norm": 44.786529541015625, "learning_rate": 1.2757142857142856e-05, "loss": 0.5438, "step": 18261 }, { "epoch": 74.53877551020408, "grad_norm": 36.867332458496094, "learning_rate": 1.2755102040816327e-05, "loss": 1.0394, "step": 18262 }, { "epoch": 74.54285714285714, "grad_norm": 41.80188751220703, "learning_rate": 1.2753061224489796e-05, "loss": 0.7037, "step": 18263 }, { "epoch": 74.5469387755102, "grad_norm": 38.96897888183594, "learning_rate": 1.2751020408163266e-05, "loss": 0.7066, "step": 18264 }, { "epoch": 74.55102040816327, "grad_norm": 58.98237228393555, "learning_rate": 1.2748979591836735e-05, "loss": 1.0513, "step": 18265 }, { "epoch": 74.55510204081632, "grad_norm": 17.23326873779297, "learning_rate": 1.2746938775510206e-05, "loss": 2.4969, "step": 18266 }, { "epoch": 74.55918367346939, "grad_norm": 36.22200393676758, "learning_rate": 1.2744897959183674e-05, "loss": 0.3093, "step": 18267 }, { "epoch": 74.56326530612245, "grad_norm": 23.711633682250977, "learning_rate": 1.2742857142857143e-05, "loss": 0.3399, "step": 18268 }, { "epoch": 74.56734693877551, "grad_norm": 40.601661682128906, "learning_rate": 1.2740816326530614e-05, "loss": 1.0405, "step": 18269 }, { "epoch": 74.57142857142857, "grad_norm": 11.64771842956543, "learning_rate": 1.2738775510204081e-05, "loss": 3.0732, "step": 18270 }, { "epoch": 74.57551020408164, "grad_norm": 33.47275161743164, "learning_rate": 1.2736734693877553e-05, "loss": 1.3848, "step": 18271 }, { "epoch": 74.57959183673469, "grad_norm": 25.564712524414062, "learning_rate": 1.273469387755102e-05, "loss": 1.6323, "step": 18272 }, { "epoch": 74.58367346938776, "grad_norm": 22.458799362182617, "learning_rate": 1.2732653061224489e-05, "loss": 0.1985, "step": 18273 }, { "epoch": 74.58775510204082, "grad_norm": 34.274024963378906, "learning_rate": 1.273061224489796e-05, "loss": 1.4825, "step": 18274 }, { "epoch": 74.59183673469387, "grad_norm": 34.873565673828125, "learning_rate": 1.2728571428571428e-05, "loss": 0.4786, "step": 18275 }, { "epoch": 74.59591836734694, "grad_norm": 17.24724006652832, "learning_rate": 1.2726530612244899e-05, "loss": 0.1269, "step": 18276 }, { "epoch": 74.6, "grad_norm": 31.125469207763672, "learning_rate": 1.2724489795918368e-05, "loss": 1.004, "step": 18277 }, { "epoch": 74.60408163265306, "grad_norm": 18.95606231689453, "learning_rate": 1.2722448979591837e-05, "loss": 0.131, "step": 18278 }, { "epoch": 74.60816326530612, "grad_norm": 31.62232208251953, "learning_rate": 1.2720408163265307e-05, "loss": 0.9715, "step": 18279 }, { "epoch": 74.61224489795919, "grad_norm": 38.38346862792969, "learning_rate": 1.2718367346938776e-05, "loss": 0.9185, "step": 18280 }, { "epoch": 74.61632653061224, "grad_norm": 30.59050750732422, "learning_rate": 1.2716326530612246e-05, "loss": 1.346, "step": 18281 }, { "epoch": 74.62040816326531, "grad_norm": 44.21078109741211, "learning_rate": 1.2714285714285715e-05, "loss": 1.5207, "step": 18282 }, { "epoch": 74.62448979591836, "grad_norm": 26.108234405517578, "learning_rate": 1.2712244897959182e-05, "loss": 1.6971, "step": 18283 }, { "epoch": 74.62857142857143, "grad_norm": 41.65818786621094, "learning_rate": 1.2710204081632655e-05, "loss": 0.8816, "step": 18284 }, { "epoch": 74.63265306122449, "grad_norm": 26.50481414794922, "learning_rate": 1.2708163265306122e-05, "loss": 1.4111, "step": 18285 }, { "epoch": 74.63673469387756, "grad_norm": 24.072887420654297, "learning_rate": 1.2706122448979594e-05, "loss": 2.0893, "step": 18286 }, { "epoch": 74.64081632653061, "grad_norm": 19.552846908569336, "learning_rate": 1.2704081632653061e-05, "loss": 2.0346, "step": 18287 }, { "epoch": 74.64489795918368, "grad_norm": 25.12181282043457, "learning_rate": 1.2702040816326533e-05, "loss": 1.6511, "step": 18288 }, { "epoch": 74.64897959183673, "grad_norm": 32.015987396240234, "learning_rate": 1.27e-05, "loss": 0.9291, "step": 18289 }, { "epoch": 74.65306122448979, "grad_norm": 32.84764099121094, "learning_rate": 1.269795918367347e-05, "loss": 1.6325, "step": 18290 }, { "epoch": 74.65714285714286, "grad_norm": 47.987457275390625, "learning_rate": 1.269591836734694e-05, "loss": 0.4966, "step": 18291 }, { "epoch": 74.66122448979591, "grad_norm": 40.42520523071289, "learning_rate": 1.2693877551020409e-05, "loss": 0.5075, "step": 18292 }, { "epoch": 74.66530612244898, "grad_norm": 26.582918167114258, "learning_rate": 1.2691836734693879e-05, "loss": 1.9552, "step": 18293 }, { "epoch": 74.66938775510204, "grad_norm": 29.004945755004883, "learning_rate": 1.2689795918367348e-05, "loss": 1.6174, "step": 18294 }, { "epoch": 74.6734693877551, "grad_norm": 37.51321792602539, "learning_rate": 1.2687755102040817e-05, "loss": 1.1938, "step": 18295 }, { "epoch": 74.67755102040816, "grad_norm": 2.4392361640930176, "learning_rate": 1.2685714285714287e-05, "loss": 0.0338, "step": 18296 }, { "epoch": 74.68163265306123, "grad_norm": 21.02518081665039, "learning_rate": 1.2683673469387754e-05, "loss": 2.1296, "step": 18297 }, { "epoch": 74.68571428571428, "grad_norm": 41.73831558227539, "learning_rate": 1.2681632653061227e-05, "loss": 0.5414, "step": 18298 }, { "epoch": 74.68979591836735, "grad_norm": 33.96097946166992, "learning_rate": 1.2679591836734694e-05, "loss": 0.5465, "step": 18299 }, { "epoch": 74.6938775510204, "grad_norm": 26.008493423461914, "learning_rate": 1.2677551020408162e-05, "loss": 1.2101, "step": 18300 }, { "epoch": 74.69795918367348, "grad_norm": 23.717920303344727, "learning_rate": 1.2675510204081633e-05, "loss": 2.2247, "step": 18301 }, { "epoch": 74.70204081632653, "grad_norm": 27.602205276489258, "learning_rate": 1.2673469387755102e-05, "loss": 1.6795, "step": 18302 }, { "epoch": 74.70612244897958, "grad_norm": 39.806461334228516, "learning_rate": 1.2671428571428572e-05, "loss": 1.3288, "step": 18303 }, { "epoch": 74.71020408163265, "grad_norm": 35.79029083251953, "learning_rate": 1.2669387755102041e-05, "loss": 0.3579, "step": 18304 }, { "epoch": 74.71428571428571, "grad_norm": 25.2593994140625, "learning_rate": 1.266734693877551e-05, "loss": 2.2678, "step": 18305 }, { "epoch": 74.71836734693878, "grad_norm": 46.89613723754883, "learning_rate": 1.266530612244898e-05, "loss": 0.7677, "step": 18306 }, { "epoch": 74.72244897959183, "grad_norm": 32.47652053833008, "learning_rate": 1.266326530612245e-05, "loss": 1.771, "step": 18307 }, { "epoch": 74.7265306122449, "grad_norm": 26.978206634521484, "learning_rate": 1.266122448979592e-05, "loss": 1.7688, "step": 18308 }, { "epoch": 74.73061224489796, "grad_norm": 27.874832153320312, "learning_rate": 1.2659183673469389e-05, "loss": 1.6198, "step": 18309 }, { "epoch": 74.73469387755102, "grad_norm": 23.705215454101562, "learning_rate": 1.2657142857142859e-05, "loss": 1.7097, "step": 18310 }, { "epoch": 74.73877551020408, "grad_norm": 42.66550827026367, "learning_rate": 1.2655102040816328e-05, "loss": 0.5039, "step": 18311 }, { "epoch": 74.74285714285715, "grad_norm": 35.60118103027344, "learning_rate": 1.2653061224489795e-05, "loss": 0.2761, "step": 18312 }, { "epoch": 74.7469387755102, "grad_norm": 32.40504455566406, "learning_rate": 1.2651020408163267e-05, "loss": 0.9812, "step": 18313 }, { "epoch": 74.75102040816327, "grad_norm": 30.766555786132812, "learning_rate": 1.2648979591836734e-05, "loss": 1.5572, "step": 18314 }, { "epoch": 74.75510204081633, "grad_norm": 26.879981994628906, "learning_rate": 1.2646938775510207e-05, "loss": 1.3959, "step": 18315 }, { "epoch": 74.7591836734694, "grad_norm": 43.06412887573242, "learning_rate": 1.2644897959183674e-05, "loss": 0.9905, "step": 18316 }, { "epoch": 74.76326530612245, "grad_norm": 44.02383804321289, "learning_rate": 1.2642857142857143e-05, "loss": 0.4546, "step": 18317 }, { "epoch": 74.7673469387755, "grad_norm": 34.164710998535156, "learning_rate": 1.2640816326530613e-05, "loss": 1.2485, "step": 18318 }, { "epoch": 74.77142857142857, "grad_norm": 52.45691680908203, "learning_rate": 1.2638775510204082e-05, "loss": 1.0837, "step": 18319 }, { "epoch": 74.77551020408163, "grad_norm": 24.980693817138672, "learning_rate": 1.2636734693877552e-05, "loss": 2.0725, "step": 18320 }, { "epoch": 74.7795918367347, "grad_norm": 43.492431640625, "learning_rate": 1.2634693877551021e-05, "loss": 0.937, "step": 18321 }, { "epoch": 74.78367346938775, "grad_norm": 27.941843032836914, "learning_rate": 1.263265306122449e-05, "loss": 1.5443, "step": 18322 }, { "epoch": 74.78775510204082, "grad_norm": 14.71590518951416, "learning_rate": 1.263061224489796e-05, "loss": 0.1324, "step": 18323 }, { "epoch": 74.79183673469387, "grad_norm": 27.44138526916504, "learning_rate": 1.2628571428571428e-05, "loss": 1.847, "step": 18324 }, { "epoch": 74.79591836734694, "grad_norm": 41.88212585449219, "learning_rate": 1.26265306122449e-05, "loss": 0.8614, "step": 18325 }, { "epoch": 74.8, "grad_norm": 33.77387619018555, "learning_rate": 1.2624489795918367e-05, "loss": 1.1534, "step": 18326 }, { "epoch": 74.80408163265307, "grad_norm": 31.488676071166992, "learning_rate": 1.2622448979591836e-05, "loss": 0.3271, "step": 18327 }, { "epoch": 74.80816326530612, "grad_norm": 39.249603271484375, "learning_rate": 1.2620408163265306e-05, "loss": 1.0097, "step": 18328 }, { "epoch": 74.81224489795919, "grad_norm": 20.08913803100586, "learning_rate": 1.2618367346938775e-05, "loss": 1.7644, "step": 18329 }, { "epoch": 74.81632653061224, "grad_norm": 32.14157485961914, "learning_rate": 1.2616326530612246e-05, "loss": 1.3873, "step": 18330 }, { "epoch": 74.82040816326531, "grad_norm": 35.62550735473633, "learning_rate": 1.2614285714285715e-05, "loss": 0.7974, "step": 18331 }, { "epoch": 74.82448979591837, "grad_norm": 29.685596466064453, "learning_rate": 1.2612244897959185e-05, "loss": 1.2447, "step": 18332 }, { "epoch": 74.82857142857142, "grad_norm": 25.494220733642578, "learning_rate": 1.2610204081632654e-05, "loss": 1.6506, "step": 18333 }, { "epoch": 74.83265306122449, "grad_norm": 20.466590881347656, "learning_rate": 1.2608163265306123e-05, "loss": 2.3376, "step": 18334 }, { "epoch": 74.83673469387755, "grad_norm": 28.743650436401367, "learning_rate": 1.2606122448979593e-05, "loss": 1.1479, "step": 18335 }, { "epoch": 74.84081632653061, "grad_norm": 34.7000732421875, "learning_rate": 1.2604081632653062e-05, "loss": 1.4121, "step": 18336 }, { "epoch": 74.84489795918367, "grad_norm": 35.89444351196289, "learning_rate": 1.2602040816326533e-05, "loss": 0.8615, "step": 18337 }, { "epoch": 74.84897959183674, "grad_norm": 29.58962631225586, "learning_rate": 1.2600000000000001e-05, "loss": 0.4758, "step": 18338 }, { "epoch": 74.85306122448979, "grad_norm": 26.04862403869629, "learning_rate": 1.2597959183673468e-05, "loss": 1.724, "step": 18339 }, { "epoch": 74.85714285714286, "grad_norm": 26.446393966674805, "learning_rate": 1.259591836734694e-05, "loss": 0.8998, "step": 18340 }, { "epoch": 74.86122448979592, "grad_norm": 38.94043731689453, "learning_rate": 1.2593877551020408e-05, "loss": 0.8899, "step": 18341 }, { "epoch": 74.86530612244898, "grad_norm": 28.58487319946289, "learning_rate": 1.259183673469388e-05, "loss": 1.5111, "step": 18342 }, { "epoch": 74.86938775510204, "grad_norm": 36.911155700683594, "learning_rate": 1.2589795918367347e-05, "loss": 0.4702, "step": 18343 }, { "epoch": 74.87346938775511, "grad_norm": 38.4452018737793, "learning_rate": 1.2587755102040816e-05, "loss": 1.0235, "step": 18344 }, { "epoch": 74.87755102040816, "grad_norm": 31.203914642333984, "learning_rate": 1.2585714285714286e-05, "loss": 1.5354, "step": 18345 }, { "epoch": 74.88163265306123, "grad_norm": 38.865081787109375, "learning_rate": 1.2583673469387755e-05, "loss": 0.9108, "step": 18346 }, { "epoch": 74.88571428571429, "grad_norm": 25.19491958618164, "learning_rate": 1.2581632653061226e-05, "loss": 2.0551, "step": 18347 }, { "epoch": 74.88979591836734, "grad_norm": 16.19626808166504, "learning_rate": 1.2579591836734695e-05, "loss": 0.12, "step": 18348 }, { "epoch": 74.89387755102041, "grad_norm": 27.898366928100586, "learning_rate": 1.2577551020408162e-05, "loss": 1.5902, "step": 18349 }, { "epoch": 74.89795918367346, "grad_norm": 42.574920654296875, "learning_rate": 1.2575510204081634e-05, "loss": 1.2328, "step": 18350 }, { "epoch": 74.90204081632653, "grad_norm": 30.041385650634766, "learning_rate": 1.2573469387755101e-05, "loss": 1.4486, "step": 18351 }, { "epoch": 74.90612244897959, "grad_norm": 24.27577781677246, "learning_rate": 1.2571428571428573e-05, "loss": 1.6156, "step": 18352 }, { "epoch": 74.91020408163266, "grad_norm": 34.90257263183594, "learning_rate": 1.256938775510204e-05, "loss": 0.8574, "step": 18353 }, { "epoch": 74.91428571428571, "grad_norm": 31.443706512451172, "learning_rate": 1.2567346938775513e-05, "loss": 0.8343, "step": 18354 }, { "epoch": 74.91836734693878, "grad_norm": 33.94536590576172, "learning_rate": 1.256530612244898e-05, "loss": 1.1678, "step": 18355 }, { "epoch": 74.92244897959183, "grad_norm": 36.98469543457031, "learning_rate": 1.2563265306122449e-05, "loss": 0.5872, "step": 18356 }, { "epoch": 74.9265306122449, "grad_norm": 23.22099494934082, "learning_rate": 1.2561224489795919e-05, "loss": 2.2563, "step": 18357 }, { "epoch": 74.93061224489796, "grad_norm": 36.98017120361328, "learning_rate": 1.2559183673469388e-05, "loss": 0.7607, "step": 18358 }, { "epoch": 74.93469387755103, "grad_norm": 31.42034149169922, "learning_rate": 1.2557142857142858e-05, "loss": 0.7889, "step": 18359 }, { "epoch": 74.93877551020408, "grad_norm": 31.209619522094727, "learning_rate": 1.2555102040816327e-05, "loss": 0.3593, "step": 18360 }, { "epoch": 74.94285714285714, "grad_norm": 32.464942932128906, "learning_rate": 1.2553061224489796e-05, "loss": 0.7722, "step": 18361 }, { "epoch": 74.9469387755102, "grad_norm": 45.29400634765625, "learning_rate": 1.2551020408163267e-05, "loss": 0.5344, "step": 18362 }, { "epoch": 74.95102040816326, "grad_norm": 17.866640090942383, "learning_rate": 1.2548979591836735e-05, "loss": 2.7424, "step": 18363 }, { "epoch": 74.95510204081633, "grad_norm": 38.97431945800781, "learning_rate": 1.2546938775510206e-05, "loss": 0.6283, "step": 18364 }, { "epoch": 74.95918367346938, "grad_norm": 37.729286193847656, "learning_rate": 1.2544897959183675e-05, "loss": 1.339, "step": 18365 }, { "epoch": 74.96326530612245, "grad_norm": 43.301029205322266, "learning_rate": 1.2542857142857142e-05, "loss": 0.734, "step": 18366 }, { "epoch": 74.9673469387755, "grad_norm": 27.458816528320312, "learning_rate": 1.2540816326530614e-05, "loss": 0.1861, "step": 18367 }, { "epoch": 74.97142857142858, "grad_norm": 20.86372947692871, "learning_rate": 1.2538775510204081e-05, "loss": 2.1833, "step": 18368 }, { "epoch": 74.97551020408163, "grad_norm": 24.211307525634766, "learning_rate": 1.2536734693877553e-05, "loss": 1.6822, "step": 18369 }, { "epoch": 74.9795918367347, "grad_norm": 35.51274108886719, "learning_rate": 1.253469387755102e-05, "loss": 1.2182, "step": 18370 }, { "epoch": 74.98367346938775, "grad_norm": 43.00797653198242, "learning_rate": 1.253265306122449e-05, "loss": 1.1673, "step": 18371 }, { "epoch": 74.98775510204082, "grad_norm": 44.82099151611328, "learning_rate": 1.253061224489796e-05, "loss": 1.0486, "step": 18372 }, { "epoch": 74.99183673469388, "grad_norm": 25.626850128173828, "learning_rate": 1.2528571428571429e-05, "loss": 1.7045, "step": 18373 }, { "epoch": 74.99591836734695, "grad_norm": 39.4273681640625, "learning_rate": 1.25265306122449e-05, "loss": 0.4785, "step": 18374 }, { "epoch": 75.0, "grad_norm": 25.42376708984375, "learning_rate": 1.2524489795918368e-05, "loss": 0.2356, "step": 18375 }, { "epoch": 75.00408163265305, "grad_norm": 39.6469841003418, "learning_rate": 1.2522448979591839e-05, "loss": 1.1225, "step": 18376 }, { "epoch": 75.00816326530612, "grad_norm": 31.82373809814453, "learning_rate": 1.2520408163265307e-05, "loss": 0.9496, "step": 18377 }, { "epoch": 75.01224489795918, "grad_norm": 30.68246841430664, "learning_rate": 1.2518367346938774e-05, "loss": 0.3892, "step": 18378 }, { "epoch": 75.01632653061225, "grad_norm": 43.286563873291016, "learning_rate": 1.2516326530612247e-05, "loss": 0.3247, "step": 18379 }, { "epoch": 75.0204081632653, "grad_norm": 29.50999641418457, "learning_rate": 1.2514285714285714e-05, "loss": 1.0482, "step": 18380 }, { "epoch": 75.02448979591837, "grad_norm": 26.712478637695312, "learning_rate": 1.2512244897959186e-05, "loss": 1.5995, "step": 18381 }, { "epoch": 75.02857142857142, "grad_norm": 40.18388748168945, "learning_rate": 1.2510204081632653e-05, "loss": 0.4489, "step": 18382 }, { "epoch": 75.0326530612245, "grad_norm": 35.36701583862305, "learning_rate": 1.2508163265306122e-05, "loss": 0.5766, "step": 18383 }, { "epoch": 75.03673469387755, "grad_norm": 31.593399047851562, "learning_rate": 1.2506122448979592e-05, "loss": 0.3616, "step": 18384 }, { "epoch": 75.04081632653062, "grad_norm": 33.24531173706055, "learning_rate": 1.2504081632653061e-05, "loss": 0.2936, "step": 18385 }, { "epoch": 75.04489795918367, "grad_norm": 34.87917709350586, "learning_rate": 1.2502040816326532e-05, "loss": 1.4555, "step": 18386 }, { "epoch": 75.04897959183674, "grad_norm": 28.69843864440918, "learning_rate": 1.25e-05, "loss": 1.1578, "step": 18387 }, { "epoch": 75.0530612244898, "grad_norm": 43.0860595703125, "learning_rate": 1.249795918367347e-05, "loss": 0.5445, "step": 18388 }, { "epoch": 75.05714285714286, "grad_norm": 35.233158111572266, "learning_rate": 1.249591836734694e-05, "loss": 0.9475, "step": 18389 }, { "epoch": 75.06122448979592, "grad_norm": 38.25835037231445, "learning_rate": 1.2493877551020409e-05, "loss": 0.7653, "step": 18390 }, { "epoch": 75.06530612244897, "grad_norm": 35.58351516723633, "learning_rate": 1.2491836734693878e-05, "loss": 1.2475, "step": 18391 }, { "epoch": 75.06938775510204, "grad_norm": 24.192827224731445, "learning_rate": 1.2489795918367348e-05, "loss": 1.6089, "step": 18392 }, { "epoch": 75.0734693877551, "grad_norm": 21.48030662536621, "learning_rate": 1.2487755102040817e-05, "loss": 1.4277, "step": 18393 }, { "epoch": 75.07755102040817, "grad_norm": 25.993518829345703, "learning_rate": 1.2485714285714287e-05, "loss": 1.6858, "step": 18394 }, { "epoch": 75.08163265306122, "grad_norm": 33.47483825683594, "learning_rate": 1.2483673469387756e-05, "loss": 1.0014, "step": 18395 }, { "epoch": 75.08571428571429, "grad_norm": 30.12257957458496, "learning_rate": 1.2481632653061225e-05, "loss": 1.5504, "step": 18396 }, { "epoch": 75.08979591836734, "grad_norm": 24.579225540161133, "learning_rate": 1.2479591836734694e-05, "loss": 0.1662, "step": 18397 }, { "epoch": 75.09387755102041, "grad_norm": 26.488048553466797, "learning_rate": 1.2477551020408164e-05, "loss": 1.5473, "step": 18398 }, { "epoch": 75.09795918367347, "grad_norm": 30.54543113708496, "learning_rate": 1.2475510204081633e-05, "loss": 0.7363, "step": 18399 }, { "epoch": 75.10204081632654, "grad_norm": 43.91975784301758, "learning_rate": 1.2473469387755104e-05, "loss": 0.8895, "step": 18400 }, { "epoch": 75.10612244897959, "grad_norm": 55.191097259521484, "learning_rate": 1.2471428571428571e-05, "loss": 0.482, "step": 18401 }, { "epoch": 75.11020408163266, "grad_norm": 29.902650833129883, "learning_rate": 1.2469387755102041e-05, "loss": 0.6985, "step": 18402 }, { "epoch": 75.11428571428571, "grad_norm": 28.33390235900879, "learning_rate": 1.246734693877551e-05, "loss": 1.6847, "step": 18403 }, { "epoch": 75.11836734693877, "grad_norm": 20.251745223999023, "learning_rate": 1.246530612244898e-05, "loss": 1.9926, "step": 18404 }, { "epoch": 75.12244897959184, "grad_norm": 32.11642837524414, "learning_rate": 1.246326530612245e-05, "loss": 0.4432, "step": 18405 }, { "epoch": 75.12653061224489, "grad_norm": 24.784788131713867, "learning_rate": 1.246122448979592e-05, "loss": 1.776, "step": 18406 }, { "epoch": 75.13061224489796, "grad_norm": 42.96361541748047, "learning_rate": 1.2459183673469387e-05, "loss": 0.7706, "step": 18407 }, { "epoch": 75.13469387755102, "grad_norm": 36.80760192871094, "learning_rate": 1.2457142857142858e-05, "loss": 1.0023, "step": 18408 }, { "epoch": 75.13877551020408, "grad_norm": 18.10572624206543, "learning_rate": 1.2455102040816326e-05, "loss": 2.1598, "step": 18409 }, { "epoch": 75.14285714285714, "grad_norm": 30.91506004333496, "learning_rate": 1.2453061224489797e-05, "loss": 1.2861, "step": 18410 }, { "epoch": 75.14693877551021, "grad_norm": 31.02870750427246, "learning_rate": 1.2451020408163266e-05, "loss": 1.4547, "step": 18411 }, { "epoch": 75.15102040816326, "grad_norm": 27.85061264038086, "learning_rate": 1.2448979591836735e-05, "loss": 1.5078, "step": 18412 }, { "epoch": 75.15510204081633, "grad_norm": 30.52571678161621, "learning_rate": 1.2446938775510205e-05, "loss": 0.5043, "step": 18413 }, { "epoch": 75.15918367346939, "grad_norm": 29.02681541442871, "learning_rate": 1.2444897959183674e-05, "loss": 1.3011, "step": 18414 }, { "epoch": 75.16326530612245, "grad_norm": 16.59699058532715, "learning_rate": 1.2442857142857143e-05, "loss": 2.6783, "step": 18415 }, { "epoch": 75.16734693877551, "grad_norm": 23.835803985595703, "learning_rate": 1.2440816326530613e-05, "loss": 2.0866, "step": 18416 }, { "epoch": 75.17142857142858, "grad_norm": 38.582088470458984, "learning_rate": 1.2438775510204082e-05, "loss": 1.5775, "step": 18417 }, { "epoch": 75.17551020408163, "grad_norm": 38.195926666259766, "learning_rate": 1.2436734693877551e-05, "loss": 0.7857, "step": 18418 }, { "epoch": 75.17959183673469, "grad_norm": 34.79793167114258, "learning_rate": 1.2434693877551021e-05, "loss": 0.6285, "step": 18419 }, { "epoch": 75.18367346938776, "grad_norm": 39.226348876953125, "learning_rate": 1.243265306122449e-05, "loss": 0.4817, "step": 18420 }, { "epoch": 75.18775510204081, "grad_norm": 31.99001693725586, "learning_rate": 1.243061224489796e-05, "loss": 0.9438, "step": 18421 }, { "epoch": 75.19183673469388, "grad_norm": 24.836097717285156, "learning_rate": 1.242857142857143e-05, "loss": 1.6261, "step": 18422 }, { "epoch": 75.19591836734693, "grad_norm": 38.36240005493164, "learning_rate": 1.2426530612244898e-05, "loss": 0.8818, "step": 18423 }, { "epoch": 75.2, "grad_norm": 46.178096771240234, "learning_rate": 1.2424489795918367e-05, "loss": 0.897, "step": 18424 }, { "epoch": 75.20408163265306, "grad_norm": 27.932260513305664, "learning_rate": 1.2422448979591838e-05, "loss": 1.4371, "step": 18425 }, { "epoch": 75.20816326530613, "grad_norm": 27.2803955078125, "learning_rate": 1.2420408163265307e-05, "loss": 1.0919, "step": 18426 }, { "epoch": 75.21224489795918, "grad_norm": 11.140819549560547, "learning_rate": 1.2418367346938777e-05, "loss": 3.0646, "step": 18427 }, { "epoch": 75.21632653061225, "grad_norm": 34.31413269042969, "learning_rate": 1.2416326530612246e-05, "loss": 1.5205, "step": 18428 }, { "epoch": 75.2204081632653, "grad_norm": 23.146377563476562, "learning_rate": 1.2414285714285715e-05, "loss": 2.3781, "step": 18429 }, { "epoch": 75.22448979591837, "grad_norm": 38.158424377441406, "learning_rate": 1.2412244897959184e-05, "loss": 1.0868, "step": 18430 }, { "epoch": 75.22857142857143, "grad_norm": 28.176916122436523, "learning_rate": 1.2410204081632654e-05, "loss": 1.3842, "step": 18431 }, { "epoch": 75.2326530612245, "grad_norm": 33.33687210083008, "learning_rate": 1.2408163265306123e-05, "loss": 0.4578, "step": 18432 }, { "epoch": 75.23673469387755, "grad_norm": 32.894962310791016, "learning_rate": 1.2406122448979593e-05, "loss": 1.1026, "step": 18433 }, { "epoch": 75.2408163265306, "grad_norm": 26.291645050048828, "learning_rate": 1.240408163265306e-05, "loss": 0.3425, "step": 18434 }, { "epoch": 75.24489795918367, "grad_norm": 42.10120391845703, "learning_rate": 1.2402040816326531e-05, "loss": 1.2638, "step": 18435 }, { "epoch": 75.24897959183673, "grad_norm": 21.123313903808594, "learning_rate": 1.24e-05, "loss": 2.182, "step": 18436 }, { "epoch": 75.2530612244898, "grad_norm": 25.71392250061035, "learning_rate": 1.239795918367347e-05, "loss": 0.2301, "step": 18437 }, { "epoch": 75.25714285714285, "grad_norm": 35.961692810058594, "learning_rate": 1.239591836734694e-05, "loss": 0.8431, "step": 18438 }, { "epoch": 75.26122448979592, "grad_norm": 19.030593872070312, "learning_rate": 1.239387755102041e-05, "loss": 2.1509, "step": 18439 }, { "epoch": 75.26530612244898, "grad_norm": 50.14082717895508, "learning_rate": 1.2391836734693877e-05, "loss": 0.4622, "step": 18440 }, { "epoch": 75.26938775510204, "grad_norm": 39.38096237182617, "learning_rate": 1.2389795918367347e-05, "loss": 0.7679, "step": 18441 }, { "epoch": 75.2734693877551, "grad_norm": 32.896766662597656, "learning_rate": 1.2387755102040816e-05, "loss": 0.2959, "step": 18442 }, { "epoch": 75.27755102040817, "grad_norm": 33.09515380859375, "learning_rate": 1.2385714285714287e-05, "loss": 0.6707, "step": 18443 }, { "epoch": 75.28163265306122, "grad_norm": 44.487342834472656, "learning_rate": 1.2383673469387756e-05, "loss": 0.7321, "step": 18444 }, { "epoch": 75.28571428571429, "grad_norm": 11.046504020690918, "learning_rate": 1.2381632653061224e-05, "loss": 0.1049, "step": 18445 }, { "epoch": 75.28979591836735, "grad_norm": 38.64208984375, "learning_rate": 1.2379591836734695e-05, "loss": 0.5036, "step": 18446 }, { "epoch": 75.29387755102042, "grad_norm": 32.86423873901367, "learning_rate": 1.2377551020408164e-05, "loss": 1.3706, "step": 18447 }, { "epoch": 75.29795918367347, "grad_norm": 27.08598518371582, "learning_rate": 1.2375510204081634e-05, "loss": 1.7568, "step": 18448 }, { "epoch": 75.30204081632652, "grad_norm": 34.01131057739258, "learning_rate": 1.2373469387755103e-05, "loss": 1.6448, "step": 18449 }, { "epoch": 75.3061224489796, "grad_norm": 36.48422622680664, "learning_rate": 1.2371428571428574e-05, "loss": 1.0097, "step": 18450 }, { "epoch": 75.31020408163265, "grad_norm": 32.48285675048828, "learning_rate": 1.236938775510204e-05, "loss": 0.8789, "step": 18451 }, { "epoch": 75.31428571428572, "grad_norm": 43.96036148071289, "learning_rate": 1.2367346938775511e-05, "loss": 0.6616, "step": 18452 }, { "epoch": 75.31836734693877, "grad_norm": 31.407869338989258, "learning_rate": 1.236530612244898e-05, "loss": 1.3062, "step": 18453 }, { "epoch": 75.32244897959184, "grad_norm": 27.162405014038086, "learning_rate": 1.236326530612245e-05, "loss": 1.7665, "step": 18454 }, { "epoch": 75.3265306122449, "grad_norm": 30.58188247680664, "learning_rate": 1.236122448979592e-05, "loss": 0.5648, "step": 18455 }, { "epoch": 75.33061224489796, "grad_norm": 35.619873046875, "learning_rate": 1.2359183673469388e-05, "loss": 0.4609, "step": 18456 }, { "epoch": 75.33469387755102, "grad_norm": 27.304967880249023, "learning_rate": 1.2357142857142857e-05, "loss": 1.5769, "step": 18457 }, { "epoch": 75.33877551020409, "grad_norm": 20.812166213989258, "learning_rate": 1.2355102040816327e-05, "loss": 1.7342, "step": 18458 }, { "epoch": 75.34285714285714, "grad_norm": 30.790956497192383, "learning_rate": 1.2353061224489796e-05, "loss": 1.765, "step": 18459 }, { "epoch": 75.34693877551021, "grad_norm": 30.865751266479492, "learning_rate": 1.2351020408163267e-05, "loss": 1.4844, "step": 18460 }, { "epoch": 75.35102040816327, "grad_norm": 35.74713897705078, "learning_rate": 1.2348979591836736e-05, "loss": 1.1801, "step": 18461 }, { "epoch": 75.35510204081632, "grad_norm": 35.36039352416992, "learning_rate": 1.2346938775510204e-05, "loss": 1.2686, "step": 18462 }, { "epoch": 75.35918367346939, "grad_norm": 35.034385681152344, "learning_rate": 1.2344897959183673e-05, "loss": 0.2986, "step": 18463 }, { "epoch": 75.36326530612244, "grad_norm": 36.90501022338867, "learning_rate": 1.2342857142857144e-05, "loss": 0.6761, "step": 18464 }, { "epoch": 75.36734693877551, "grad_norm": 21.87041664123535, "learning_rate": 1.2340816326530613e-05, "loss": 2.1151, "step": 18465 }, { "epoch": 75.37142857142857, "grad_norm": 34.45770263671875, "learning_rate": 1.2338775510204083e-05, "loss": 1.1905, "step": 18466 }, { "epoch": 75.37551020408164, "grad_norm": 34.54051971435547, "learning_rate": 1.233673469387755e-05, "loss": 0.7404, "step": 18467 }, { "epoch": 75.37959183673469, "grad_norm": 31.40477180480957, "learning_rate": 1.233469387755102e-05, "loss": 1.3367, "step": 18468 }, { "epoch": 75.38367346938776, "grad_norm": 34.3898811340332, "learning_rate": 1.233265306122449e-05, "loss": 0.1401, "step": 18469 }, { "epoch": 75.38775510204081, "grad_norm": 18.866397857666016, "learning_rate": 1.233061224489796e-05, "loss": 0.1243, "step": 18470 }, { "epoch": 75.39183673469388, "grad_norm": 38.16663360595703, "learning_rate": 1.2328571428571429e-05, "loss": 0.8435, "step": 18471 }, { "epoch": 75.39591836734694, "grad_norm": 29.988569259643555, "learning_rate": 1.2326530612244898e-05, "loss": 1.3308, "step": 18472 }, { "epoch": 75.4, "grad_norm": 27.7139892578125, "learning_rate": 1.2324489795918368e-05, "loss": 1.5046, "step": 18473 }, { "epoch": 75.40408163265306, "grad_norm": 36.7675666809082, "learning_rate": 1.2322448979591837e-05, "loss": 1.29, "step": 18474 }, { "epoch": 75.40816326530613, "grad_norm": 34.64764404296875, "learning_rate": 1.2320408163265308e-05, "loss": 0.8876, "step": 18475 }, { "epoch": 75.41224489795918, "grad_norm": 27.348161697387695, "learning_rate": 1.2318367346938776e-05, "loss": 1.7433, "step": 18476 }, { "epoch": 75.41632653061224, "grad_norm": 32.8865966796875, "learning_rate": 1.2316326530612247e-05, "loss": 1.2074, "step": 18477 }, { "epoch": 75.42040816326531, "grad_norm": 35.54623794555664, "learning_rate": 1.2314285714285714e-05, "loss": 0.9116, "step": 18478 }, { "epoch": 75.42448979591836, "grad_norm": 26.317575454711914, "learning_rate": 1.2312244897959185e-05, "loss": 2.125, "step": 18479 }, { "epoch": 75.42857142857143, "grad_norm": 27.526647567749023, "learning_rate": 1.2310204081632653e-05, "loss": 2.0213, "step": 18480 }, { "epoch": 75.43265306122449, "grad_norm": 18.123531341552734, "learning_rate": 1.2308163265306124e-05, "loss": 0.1269, "step": 18481 }, { "epoch": 75.43673469387755, "grad_norm": 25.01050567626953, "learning_rate": 1.2306122448979593e-05, "loss": 2.0533, "step": 18482 }, { "epoch": 75.44081632653061, "grad_norm": 28.701234817504883, "learning_rate": 1.2304081632653061e-05, "loss": 1.6468, "step": 18483 }, { "epoch": 75.44489795918368, "grad_norm": 37.196720123291016, "learning_rate": 1.230204081632653e-05, "loss": 0.3792, "step": 18484 }, { "epoch": 75.44897959183673, "grad_norm": 26.361970901489258, "learning_rate": 1.23e-05, "loss": 0.4197, "step": 18485 }, { "epoch": 75.4530612244898, "grad_norm": 27.04402732849121, "learning_rate": 1.229795918367347e-05, "loss": 0.3126, "step": 18486 }, { "epoch": 75.45714285714286, "grad_norm": 39.258262634277344, "learning_rate": 1.229591836734694e-05, "loss": 0.7238, "step": 18487 }, { "epoch": 75.46122448979592, "grad_norm": 30.2332706451416, "learning_rate": 1.2293877551020409e-05, "loss": 0.3054, "step": 18488 }, { "epoch": 75.46530612244898, "grad_norm": 19.01336097717285, "learning_rate": 1.2291836734693878e-05, "loss": 0.1266, "step": 18489 }, { "epoch": 75.46938775510205, "grad_norm": 34.12726974487305, "learning_rate": 1.2289795918367347e-05, "loss": 0.4609, "step": 18490 }, { "epoch": 75.4734693877551, "grad_norm": 29.75006675720215, "learning_rate": 1.2287755102040817e-05, "loss": 1.5501, "step": 18491 }, { "epoch": 75.47755102040816, "grad_norm": 29.015356063842773, "learning_rate": 1.2285714285714286e-05, "loss": 1.354, "step": 18492 }, { "epoch": 75.48163265306123, "grad_norm": 33.90206527709961, "learning_rate": 1.2283673469387756e-05, "loss": 1.4103, "step": 18493 }, { "epoch": 75.48571428571428, "grad_norm": 27.5728816986084, "learning_rate": 1.2281632653061224e-05, "loss": 0.932, "step": 18494 }, { "epoch": 75.48979591836735, "grad_norm": 24.52958106994629, "learning_rate": 1.2279591836734694e-05, "loss": 0.8485, "step": 18495 }, { "epoch": 75.4938775510204, "grad_norm": 28.078433990478516, "learning_rate": 1.2277551020408163e-05, "loss": 0.3653, "step": 18496 }, { "epoch": 75.49795918367347, "grad_norm": 32.51041793823242, "learning_rate": 1.2275510204081633e-05, "loss": 1.5168, "step": 18497 }, { "epoch": 75.50204081632653, "grad_norm": 40.36273956298828, "learning_rate": 1.2273469387755102e-05, "loss": 1.469, "step": 18498 }, { "epoch": 75.5061224489796, "grad_norm": 25.195838928222656, "learning_rate": 1.2271428571428573e-05, "loss": 0.1465, "step": 18499 }, { "epoch": 75.51020408163265, "grad_norm": 31.932544708251953, "learning_rate": 1.2269387755102042e-05, "loss": 1.1239, "step": 18500 }, { "epoch": 75.51428571428572, "grad_norm": 48.87790298461914, "learning_rate": 1.226734693877551e-05, "loss": 0.5704, "step": 18501 }, { "epoch": 75.51836734693877, "grad_norm": 18.60498809814453, "learning_rate": 1.2265306122448981e-05, "loss": 0.1215, "step": 18502 }, { "epoch": 75.52244897959184, "grad_norm": 37.738338470458984, "learning_rate": 1.226326530612245e-05, "loss": 0.4343, "step": 18503 }, { "epoch": 75.5265306122449, "grad_norm": 41.21022033691406, "learning_rate": 1.226122448979592e-05, "loss": 1.1579, "step": 18504 }, { "epoch": 75.53061224489795, "grad_norm": 24.23767852783203, "learning_rate": 1.2259183673469387e-05, "loss": 1.6386, "step": 18505 }, { "epoch": 75.53469387755102, "grad_norm": 27.616455078125, "learning_rate": 1.2257142857142858e-05, "loss": 1.3864, "step": 18506 }, { "epoch": 75.53877551020408, "grad_norm": 31.55421257019043, "learning_rate": 1.2255102040816327e-05, "loss": 0.6048, "step": 18507 }, { "epoch": 75.54285714285714, "grad_norm": 32.13665771484375, "learning_rate": 1.2253061224489797e-05, "loss": 1.8187, "step": 18508 }, { "epoch": 75.5469387755102, "grad_norm": 55.44786071777344, "learning_rate": 1.2251020408163266e-05, "loss": 0.675, "step": 18509 }, { "epoch": 75.55102040816327, "grad_norm": 34.48698425292969, "learning_rate": 1.2248979591836737e-05, "loss": 1.0377, "step": 18510 }, { "epoch": 75.55510204081632, "grad_norm": 39.29429244995117, "learning_rate": 1.2246938775510204e-05, "loss": 0.7157, "step": 18511 }, { "epoch": 75.55918367346939, "grad_norm": 45.83561706542969, "learning_rate": 1.2244897959183674e-05, "loss": 1.241, "step": 18512 }, { "epoch": 75.56326530612245, "grad_norm": 40.57990264892578, "learning_rate": 1.2242857142857143e-05, "loss": 1.0028, "step": 18513 }, { "epoch": 75.56734693877551, "grad_norm": 28.574626922607422, "learning_rate": 1.2240816326530614e-05, "loss": 1.6727, "step": 18514 }, { "epoch": 75.57142857142857, "grad_norm": 37.41630172729492, "learning_rate": 1.2238775510204082e-05, "loss": 1.4249, "step": 18515 }, { "epoch": 75.57551020408164, "grad_norm": 38.737728118896484, "learning_rate": 1.2236734693877551e-05, "loss": 0.8705, "step": 18516 }, { "epoch": 75.57959183673469, "grad_norm": 44.24790954589844, "learning_rate": 1.223469387755102e-05, "loss": 0.7261, "step": 18517 }, { "epoch": 75.58367346938776, "grad_norm": 33.099754333496094, "learning_rate": 1.223265306122449e-05, "loss": 0.429, "step": 18518 }, { "epoch": 75.58775510204082, "grad_norm": 42.84606170654297, "learning_rate": 1.223061224489796e-05, "loss": 1.3672, "step": 18519 }, { "epoch": 75.59183673469387, "grad_norm": 20.977357864379883, "learning_rate": 1.222857142857143e-05, "loss": 0.2007, "step": 18520 }, { "epoch": 75.59591836734694, "grad_norm": 21.566715240478516, "learning_rate": 1.2226530612244899e-05, "loss": 2.0968, "step": 18521 }, { "epoch": 75.6, "grad_norm": 48.5921516418457, "learning_rate": 1.2224489795918367e-05, "loss": 1.4608, "step": 18522 }, { "epoch": 75.60408163265306, "grad_norm": 39.95259094238281, "learning_rate": 1.2222448979591836e-05, "loss": 0.799, "step": 18523 }, { "epoch": 75.60816326530612, "grad_norm": 19.4924373626709, "learning_rate": 1.2220408163265307e-05, "loss": 2.5058, "step": 18524 }, { "epoch": 75.61224489795919, "grad_norm": 38.567386627197266, "learning_rate": 1.2218367346938776e-05, "loss": 1.1552, "step": 18525 }, { "epoch": 75.61632653061224, "grad_norm": 26.229114532470703, "learning_rate": 1.2216326530612246e-05, "loss": 1.5798, "step": 18526 }, { "epoch": 75.62040816326531, "grad_norm": 32.670555114746094, "learning_rate": 1.2214285714285715e-05, "loss": 1.277, "step": 18527 }, { "epoch": 75.62448979591836, "grad_norm": 36.746124267578125, "learning_rate": 1.2212244897959184e-05, "loss": 1.2698, "step": 18528 }, { "epoch": 75.62857142857143, "grad_norm": 26.19192123413086, "learning_rate": 1.2210204081632654e-05, "loss": 1.46, "step": 18529 }, { "epoch": 75.63265306122449, "grad_norm": 14.589000701904297, "learning_rate": 1.2208163265306123e-05, "loss": 2.3818, "step": 18530 }, { "epoch": 75.63673469387756, "grad_norm": 32.09377670288086, "learning_rate": 1.2206122448979594e-05, "loss": 0.8205, "step": 18531 }, { "epoch": 75.64081632653061, "grad_norm": 45.795528411865234, "learning_rate": 1.2204081632653062e-05, "loss": 1.107, "step": 18532 }, { "epoch": 75.64489795918368, "grad_norm": 34.913204193115234, "learning_rate": 1.2202040816326531e-05, "loss": 1.2165, "step": 18533 }, { "epoch": 75.64897959183673, "grad_norm": 8.116737365722656, "learning_rate": 1.22e-05, "loss": 0.0725, "step": 18534 }, { "epoch": 75.65306122448979, "grad_norm": 33.51398468017578, "learning_rate": 1.219795918367347e-05, "loss": 1.4941, "step": 18535 }, { "epoch": 75.65714285714286, "grad_norm": 34.87560272216797, "learning_rate": 1.219591836734694e-05, "loss": 0.9668, "step": 18536 }, { "epoch": 75.66122448979591, "grad_norm": 26.50151824951172, "learning_rate": 1.219387755102041e-05, "loss": 1.5887, "step": 18537 }, { "epoch": 75.66530612244898, "grad_norm": 26.295751571655273, "learning_rate": 1.2191836734693877e-05, "loss": 2.0894, "step": 18538 }, { "epoch": 75.66938775510204, "grad_norm": 34.21438980102539, "learning_rate": 1.2189795918367348e-05, "loss": 1.4413, "step": 18539 }, { "epoch": 75.6734693877551, "grad_norm": 31.28879165649414, "learning_rate": 1.2187755102040816e-05, "loss": 0.5043, "step": 18540 }, { "epoch": 75.67755102040816, "grad_norm": 35.55325698852539, "learning_rate": 1.2185714285714287e-05, "loss": 1.0764, "step": 18541 }, { "epoch": 75.68163265306123, "grad_norm": 28.03182029724121, "learning_rate": 1.2183673469387756e-05, "loss": 1.5687, "step": 18542 }, { "epoch": 75.68571428571428, "grad_norm": 24.08534049987793, "learning_rate": 1.2181632653061226e-05, "loss": 0.1727, "step": 18543 }, { "epoch": 75.68979591836735, "grad_norm": 32.49620819091797, "learning_rate": 1.2179591836734693e-05, "loss": 1.9652, "step": 18544 }, { "epoch": 75.6938775510204, "grad_norm": 21.67719268798828, "learning_rate": 1.2177551020408164e-05, "loss": 2.1905, "step": 18545 }, { "epoch": 75.69795918367348, "grad_norm": 33.650455474853516, "learning_rate": 1.2175510204081633e-05, "loss": 1.1516, "step": 18546 }, { "epoch": 75.70204081632653, "grad_norm": 32.898128509521484, "learning_rate": 1.2173469387755103e-05, "loss": 0.9558, "step": 18547 }, { "epoch": 75.70612244897958, "grad_norm": 37.2633056640625, "learning_rate": 1.2171428571428572e-05, "loss": 0.7042, "step": 18548 }, { "epoch": 75.71020408163265, "grad_norm": 67.78143310546875, "learning_rate": 1.216938775510204e-05, "loss": 1.0097, "step": 18549 }, { "epoch": 75.71428571428571, "grad_norm": 49.08584213256836, "learning_rate": 1.216734693877551e-05, "loss": 0.3752, "step": 18550 }, { "epoch": 75.71836734693878, "grad_norm": 46.37417984008789, "learning_rate": 1.216530612244898e-05, "loss": 0.3969, "step": 18551 }, { "epoch": 75.72244897959183, "grad_norm": 36.57964324951172, "learning_rate": 1.2163265306122449e-05, "loss": 0.5079, "step": 18552 }, { "epoch": 75.7265306122449, "grad_norm": 31.939544677734375, "learning_rate": 1.216122448979592e-05, "loss": 0.6744, "step": 18553 }, { "epoch": 75.73061224489796, "grad_norm": 38.5056037902832, "learning_rate": 1.2159183673469388e-05, "loss": 0.5645, "step": 18554 }, { "epoch": 75.73469387755102, "grad_norm": 39.885345458984375, "learning_rate": 1.2157142857142857e-05, "loss": 0.7375, "step": 18555 }, { "epoch": 75.73877551020408, "grad_norm": 43.08781051635742, "learning_rate": 1.2155102040816328e-05, "loss": 0.6524, "step": 18556 }, { "epoch": 75.74285714285715, "grad_norm": 40.524959564208984, "learning_rate": 1.2153061224489796e-05, "loss": 0.4382, "step": 18557 }, { "epoch": 75.7469387755102, "grad_norm": 30.172739028930664, "learning_rate": 1.2151020408163265e-05, "loss": 1.398, "step": 18558 }, { "epoch": 75.75102040816327, "grad_norm": 31.998281478881836, "learning_rate": 1.2148979591836736e-05, "loss": 0.6394, "step": 18559 }, { "epoch": 75.75510204081633, "grad_norm": 24.227859497070312, "learning_rate": 1.2146938775510205e-05, "loss": 1.9988, "step": 18560 }, { "epoch": 75.7591836734694, "grad_norm": 18.113285064697266, "learning_rate": 1.2144897959183673e-05, "loss": 2.2703, "step": 18561 }, { "epoch": 75.76326530612245, "grad_norm": 25.789125442504883, "learning_rate": 1.2142857142857144e-05, "loss": 0.1822, "step": 18562 }, { "epoch": 75.7673469387755, "grad_norm": 28.344545364379883, "learning_rate": 1.2140816326530613e-05, "loss": 1.2766, "step": 18563 }, { "epoch": 75.77142857142857, "grad_norm": 22.9359130859375, "learning_rate": 1.2138775510204083e-05, "loss": 1.7495, "step": 18564 }, { "epoch": 75.77551020408163, "grad_norm": 37.201229095458984, "learning_rate": 1.2136734693877552e-05, "loss": 0.4859, "step": 18565 }, { "epoch": 75.7795918367347, "grad_norm": 28.59467887878418, "learning_rate": 1.2134693877551021e-05, "loss": 1.3475, "step": 18566 }, { "epoch": 75.78367346938775, "grad_norm": 25.30497169494629, "learning_rate": 1.213265306122449e-05, "loss": 1.5653, "step": 18567 }, { "epoch": 75.78775510204082, "grad_norm": 33.35445022583008, "learning_rate": 1.213061224489796e-05, "loss": 1.6617, "step": 18568 }, { "epoch": 75.79183673469387, "grad_norm": 1.9792243242263794, "learning_rate": 1.2128571428571429e-05, "loss": 0.0333, "step": 18569 }, { "epoch": 75.79591836734694, "grad_norm": 38.432891845703125, "learning_rate": 1.21265306122449e-05, "loss": 1.2306, "step": 18570 }, { "epoch": 75.8, "grad_norm": 35.52922821044922, "learning_rate": 1.2124489795918367e-05, "loss": 0.9461, "step": 18571 }, { "epoch": 75.80408163265307, "grad_norm": 23.72456169128418, "learning_rate": 1.2122448979591837e-05, "loss": 0.1651, "step": 18572 }, { "epoch": 75.80816326530612, "grad_norm": 29.025144577026367, "learning_rate": 1.2120408163265306e-05, "loss": 1.7405, "step": 18573 }, { "epoch": 75.81224489795919, "grad_norm": 35.58956527709961, "learning_rate": 1.2118367346938777e-05, "loss": 0.4143, "step": 18574 }, { "epoch": 75.81632653061224, "grad_norm": 23.42902946472168, "learning_rate": 1.2116326530612245e-05, "loss": 1.6554, "step": 18575 }, { "epoch": 75.82040816326531, "grad_norm": 30.63212776184082, "learning_rate": 1.2114285714285716e-05, "loss": 0.1831, "step": 18576 }, { "epoch": 75.82448979591837, "grad_norm": 40.66564178466797, "learning_rate": 1.2112244897959183e-05, "loss": 0.5664, "step": 18577 }, { "epoch": 75.82857142857142, "grad_norm": 23.764848709106445, "learning_rate": 1.2110204081632654e-05, "loss": 2.077, "step": 18578 }, { "epoch": 75.83265306122449, "grad_norm": 33.428672790527344, "learning_rate": 1.2108163265306122e-05, "loss": 0.959, "step": 18579 }, { "epoch": 75.83673469387755, "grad_norm": 42.6054573059082, "learning_rate": 1.2106122448979593e-05, "loss": 0.9273, "step": 18580 }, { "epoch": 75.84081632653061, "grad_norm": 23.327028274536133, "learning_rate": 1.2104081632653062e-05, "loss": 1.7775, "step": 18581 }, { "epoch": 75.84489795918367, "grad_norm": 28.05514144897461, "learning_rate": 1.210204081632653e-05, "loss": 0.9793, "step": 18582 }, { "epoch": 75.84897959183674, "grad_norm": 31.56673240661621, "learning_rate": 1.2100000000000001e-05, "loss": 0.5432, "step": 18583 }, { "epoch": 75.85306122448979, "grad_norm": 24.19212532043457, "learning_rate": 1.209795918367347e-05, "loss": 1.7494, "step": 18584 }, { "epoch": 75.85714285714286, "grad_norm": 26.234294891357422, "learning_rate": 1.2095918367346939e-05, "loss": 0.3009, "step": 18585 }, { "epoch": 75.86122448979592, "grad_norm": 28.803037643432617, "learning_rate": 1.209387755102041e-05, "loss": 0.782, "step": 18586 }, { "epoch": 75.86530612244898, "grad_norm": 30.115829467773438, "learning_rate": 1.2091836734693878e-05, "loss": 0.9043, "step": 18587 }, { "epoch": 75.86938775510204, "grad_norm": 32.72512435913086, "learning_rate": 1.2089795918367347e-05, "loss": 0.7253, "step": 18588 }, { "epoch": 75.87346938775511, "grad_norm": 32.49948501586914, "learning_rate": 1.2087755102040817e-05, "loss": 0.4452, "step": 18589 }, { "epoch": 75.87755102040816, "grad_norm": 21.59322166442871, "learning_rate": 1.2085714285714286e-05, "loss": 0.1884, "step": 18590 }, { "epoch": 75.88163265306123, "grad_norm": 16.52165412902832, "learning_rate": 1.2083673469387757e-05, "loss": 0.144, "step": 18591 }, { "epoch": 75.88571428571429, "grad_norm": 48.641475677490234, "learning_rate": 1.2081632653061225e-05, "loss": 1.0662, "step": 18592 }, { "epoch": 75.88979591836734, "grad_norm": 25.16634750366211, "learning_rate": 1.2079591836734694e-05, "loss": 1.6694, "step": 18593 }, { "epoch": 75.89387755102041, "grad_norm": 20.118799209594727, "learning_rate": 1.2077551020408163e-05, "loss": 2.1162, "step": 18594 }, { "epoch": 75.89795918367346, "grad_norm": 39.236846923828125, "learning_rate": 1.2075510204081634e-05, "loss": 0.3661, "step": 18595 }, { "epoch": 75.90204081632653, "grad_norm": 32.77893829345703, "learning_rate": 1.2073469387755102e-05, "loss": 0.8224, "step": 18596 }, { "epoch": 75.90612244897959, "grad_norm": 22.52012062072754, "learning_rate": 1.2071428571428573e-05, "loss": 1.8463, "step": 18597 }, { "epoch": 75.91020408163266, "grad_norm": 26.50850486755371, "learning_rate": 1.2069387755102042e-05, "loss": 1.639, "step": 18598 }, { "epoch": 75.91428571428571, "grad_norm": 25.39051628112793, "learning_rate": 1.206734693877551e-05, "loss": 1.3548, "step": 18599 }, { "epoch": 75.91836734693878, "grad_norm": 40.75796890258789, "learning_rate": 1.206530612244898e-05, "loss": 0.9033, "step": 18600 }, { "epoch": 75.92244897959183, "grad_norm": 32.96440505981445, "learning_rate": 1.206326530612245e-05, "loss": 0.5571, "step": 18601 }, { "epoch": 75.9265306122449, "grad_norm": 28.237829208374023, "learning_rate": 1.2061224489795919e-05, "loss": 1.8266, "step": 18602 }, { "epoch": 75.93061224489796, "grad_norm": 17.619659423828125, "learning_rate": 1.205918367346939e-05, "loss": 2.1938, "step": 18603 }, { "epoch": 75.93469387755103, "grad_norm": 31.32935905456543, "learning_rate": 1.2057142857142856e-05, "loss": 1.216, "step": 18604 }, { "epoch": 75.93877551020408, "grad_norm": 25.97566795349121, "learning_rate": 1.2055102040816327e-05, "loss": 0.2903, "step": 18605 }, { "epoch": 75.94285714285714, "grad_norm": 34.527164459228516, "learning_rate": 1.2053061224489796e-05, "loss": 0.401, "step": 18606 }, { "epoch": 75.9469387755102, "grad_norm": 34.344482421875, "learning_rate": 1.2051020408163266e-05, "loss": 0.4692, "step": 18607 }, { "epoch": 75.95102040816326, "grad_norm": 27.612995147705078, "learning_rate": 1.2048979591836735e-05, "loss": 1.6679, "step": 18608 }, { "epoch": 75.95510204081633, "grad_norm": 26.628644943237305, "learning_rate": 1.2046938775510206e-05, "loss": 1.5813, "step": 18609 }, { "epoch": 75.95918367346938, "grad_norm": 33.981502532958984, "learning_rate": 1.2044897959183674e-05, "loss": 1.4297, "step": 18610 }, { "epoch": 75.96326530612245, "grad_norm": 32.77565002441406, "learning_rate": 1.2042857142857143e-05, "loss": 1.3495, "step": 18611 }, { "epoch": 75.9673469387755, "grad_norm": 37.21457290649414, "learning_rate": 1.2040816326530612e-05, "loss": 1.1419, "step": 18612 }, { "epoch": 75.97142857142858, "grad_norm": 29.702531814575195, "learning_rate": 1.2038775510204083e-05, "loss": 1.5219, "step": 18613 }, { "epoch": 75.97551020408163, "grad_norm": 64.09033966064453, "learning_rate": 1.2036734693877551e-05, "loss": 1.4085, "step": 18614 }, { "epoch": 75.9795918367347, "grad_norm": 20.788881301879883, "learning_rate": 1.203469387755102e-05, "loss": 1.6684, "step": 18615 }, { "epoch": 75.98367346938775, "grad_norm": 29.55547523498535, "learning_rate": 1.203265306122449e-05, "loss": 1.2277, "step": 18616 }, { "epoch": 75.98775510204082, "grad_norm": 40.5693473815918, "learning_rate": 1.203061224489796e-05, "loss": 0.4682, "step": 18617 }, { "epoch": 75.99183673469388, "grad_norm": 34.97886276245117, "learning_rate": 1.202857142857143e-05, "loss": 0.903, "step": 18618 }, { "epoch": 75.99591836734695, "grad_norm": 35.193450927734375, "learning_rate": 1.2026530612244899e-05, "loss": 0.7711, "step": 18619 }, { "epoch": 76.0, "grad_norm": 40.47613525390625, "learning_rate": 1.202448979591837e-05, "loss": 0.5526, "step": 18620 }, { "epoch": 76.00408163265305, "grad_norm": 30.396615982055664, "learning_rate": 1.2022448979591837e-05, "loss": 1.1222, "step": 18621 }, { "epoch": 76.00816326530612, "grad_norm": 31.61905860900879, "learning_rate": 1.2020408163265307e-05, "loss": 1.1748, "step": 18622 }, { "epoch": 76.01224489795918, "grad_norm": 30.32189178466797, "learning_rate": 1.2018367346938776e-05, "loss": 1.7319, "step": 18623 }, { "epoch": 76.01632653061225, "grad_norm": 25.308345794677734, "learning_rate": 1.2016326530612246e-05, "loss": 0.1889, "step": 18624 }, { "epoch": 76.0204081632653, "grad_norm": 26.59856414794922, "learning_rate": 1.2014285714285715e-05, "loss": 0.4309, "step": 18625 }, { "epoch": 76.02448979591837, "grad_norm": 24.69417381286621, "learning_rate": 1.2012244897959184e-05, "loss": 1.6699, "step": 18626 }, { "epoch": 76.02857142857142, "grad_norm": 38.10626983642578, "learning_rate": 1.2010204081632653e-05, "loss": 1.271, "step": 18627 }, { "epoch": 76.0326530612245, "grad_norm": 35.98490905761719, "learning_rate": 1.2008163265306123e-05, "loss": 0.6535, "step": 18628 }, { "epoch": 76.03673469387755, "grad_norm": 53.90053939819336, "learning_rate": 1.2006122448979592e-05, "loss": 1.3211, "step": 18629 }, { "epoch": 76.04081632653062, "grad_norm": 31.803659439086914, "learning_rate": 1.2004081632653063e-05, "loss": 0.8883, "step": 18630 }, { "epoch": 76.04489795918367, "grad_norm": 26.952856063842773, "learning_rate": 1.2002040816326531e-05, "loss": 1.6152, "step": 18631 }, { "epoch": 76.04897959183674, "grad_norm": 21.990381240844727, "learning_rate": 1.2e-05, "loss": 1.556, "step": 18632 }, { "epoch": 76.0530612244898, "grad_norm": 30.064449310302734, "learning_rate": 1.1997959183673469e-05, "loss": 1.2923, "step": 18633 }, { "epoch": 76.05714285714286, "grad_norm": 21.29148292541504, "learning_rate": 1.199591836734694e-05, "loss": 2.1879, "step": 18634 }, { "epoch": 76.06122448979592, "grad_norm": 22.5410099029541, "learning_rate": 1.1993877551020408e-05, "loss": 1.5786, "step": 18635 }, { "epoch": 76.06530612244897, "grad_norm": 33.707271575927734, "learning_rate": 1.1991836734693879e-05, "loss": 0.4676, "step": 18636 }, { "epoch": 76.06938775510204, "grad_norm": 36.7337532043457, "learning_rate": 1.1989795918367348e-05, "loss": 1.0014, "step": 18637 }, { "epoch": 76.0734693877551, "grad_norm": 26.255537033081055, "learning_rate": 1.1987755102040817e-05, "loss": 1.6707, "step": 18638 }, { "epoch": 76.07755102040817, "grad_norm": 49.15919876098633, "learning_rate": 1.1985714285714285e-05, "loss": 1.4667, "step": 18639 }, { "epoch": 76.08163265306122, "grad_norm": 26.721141815185547, "learning_rate": 1.1983673469387756e-05, "loss": 1.7764, "step": 18640 }, { "epoch": 76.08571428571429, "grad_norm": 33.966556549072266, "learning_rate": 1.1981632653061225e-05, "loss": 1.3718, "step": 18641 }, { "epoch": 76.08979591836734, "grad_norm": 35.587833404541016, "learning_rate": 1.1979591836734694e-05, "loss": 0.6706, "step": 18642 }, { "epoch": 76.09387755102041, "grad_norm": 35.932682037353516, "learning_rate": 1.1977551020408164e-05, "loss": 1.2711, "step": 18643 }, { "epoch": 76.09795918367347, "grad_norm": 27.706789016723633, "learning_rate": 1.1975510204081633e-05, "loss": 0.8526, "step": 18644 }, { "epoch": 76.10204081632654, "grad_norm": 28.841325759887695, "learning_rate": 1.1973469387755103e-05, "loss": 0.6349, "step": 18645 }, { "epoch": 76.10612244897959, "grad_norm": 38.83097457885742, "learning_rate": 1.1971428571428572e-05, "loss": 1.4924, "step": 18646 }, { "epoch": 76.11020408163266, "grad_norm": 43.6468620300293, "learning_rate": 1.1969387755102043e-05, "loss": 0.9428, "step": 18647 }, { "epoch": 76.11428571428571, "grad_norm": 33.409423828125, "learning_rate": 1.196734693877551e-05, "loss": 0.2028, "step": 18648 }, { "epoch": 76.11836734693877, "grad_norm": 27.747955322265625, "learning_rate": 1.196530612244898e-05, "loss": 0.3234, "step": 18649 }, { "epoch": 76.12244897959184, "grad_norm": 36.41257858276367, "learning_rate": 1.196326530612245e-05, "loss": 0.8717, "step": 18650 }, { "epoch": 76.12653061224489, "grad_norm": 32.832645416259766, "learning_rate": 1.196122448979592e-05, "loss": 1.4707, "step": 18651 }, { "epoch": 76.13061224489796, "grad_norm": 31.78109359741211, "learning_rate": 1.1959183673469389e-05, "loss": 1.4352, "step": 18652 }, { "epoch": 76.13469387755102, "grad_norm": 34.440738677978516, "learning_rate": 1.1957142857142857e-05, "loss": 1.0752, "step": 18653 }, { "epoch": 76.13877551020408, "grad_norm": 35.70554733276367, "learning_rate": 1.1955102040816326e-05, "loss": 1.1188, "step": 18654 }, { "epoch": 76.14285714285714, "grad_norm": 29.994895935058594, "learning_rate": 1.1953061224489797e-05, "loss": 1.5035, "step": 18655 }, { "epoch": 76.14693877551021, "grad_norm": 37.33081817626953, "learning_rate": 1.1951020408163266e-05, "loss": 1.1437, "step": 18656 }, { "epoch": 76.15102040816326, "grad_norm": 28.077457427978516, "learning_rate": 1.1948979591836736e-05, "loss": 0.4032, "step": 18657 }, { "epoch": 76.15510204081633, "grad_norm": 33.71855163574219, "learning_rate": 1.1946938775510205e-05, "loss": 0.972, "step": 18658 }, { "epoch": 76.15918367346939, "grad_norm": 39.55023193359375, "learning_rate": 1.1944897959183674e-05, "loss": 0.6391, "step": 18659 }, { "epoch": 76.16326530612245, "grad_norm": 34.17998123168945, "learning_rate": 1.1942857142857142e-05, "loss": 0.3198, "step": 18660 }, { "epoch": 76.16734693877551, "grad_norm": 16.693937301635742, "learning_rate": 1.1940816326530613e-05, "loss": 2.68, "step": 18661 }, { "epoch": 76.17142857142858, "grad_norm": 28.063922882080078, "learning_rate": 1.1938775510204082e-05, "loss": 1.2834, "step": 18662 }, { "epoch": 76.17551020408163, "grad_norm": 46.534576416015625, "learning_rate": 1.1936734693877552e-05, "loss": 1.2378, "step": 18663 }, { "epoch": 76.17959183673469, "grad_norm": 34.88228225708008, "learning_rate": 1.193469387755102e-05, "loss": 0.939, "step": 18664 }, { "epoch": 76.18367346938776, "grad_norm": 27.423112869262695, "learning_rate": 1.193265306122449e-05, "loss": 0.3398, "step": 18665 }, { "epoch": 76.18775510204081, "grad_norm": 30.532264709472656, "learning_rate": 1.1930612244897959e-05, "loss": 0.7901, "step": 18666 }, { "epoch": 76.19183673469388, "grad_norm": 26.963891983032227, "learning_rate": 1.192857142857143e-05, "loss": 2.0389, "step": 18667 }, { "epoch": 76.19591836734693, "grad_norm": 9.757955551147461, "learning_rate": 1.1926530612244898e-05, "loss": 3.0486, "step": 18668 }, { "epoch": 76.2, "grad_norm": 42.27067947387695, "learning_rate": 1.1924489795918369e-05, "loss": 0.6878, "step": 18669 }, { "epoch": 76.20408163265306, "grad_norm": 46.47350311279297, "learning_rate": 1.1922448979591837e-05, "loss": 0.7661, "step": 18670 }, { "epoch": 76.20816326530613, "grad_norm": 41.944583892822266, "learning_rate": 1.1920408163265306e-05, "loss": 0.7683, "step": 18671 }, { "epoch": 76.21224489795918, "grad_norm": 32.75992965698242, "learning_rate": 1.1918367346938777e-05, "loss": 1.1124, "step": 18672 }, { "epoch": 76.21632653061225, "grad_norm": 44.91211700439453, "learning_rate": 1.1916326530612246e-05, "loss": 0.844, "step": 18673 }, { "epoch": 76.2204081632653, "grad_norm": 31.74003791809082, "learning_rate": 1.1914285714285716e-05, "loss": 1.1092, "step": 18674 }, { "epoch": 76.22448979591837, "grad_norm": 27.548063278198242, "learning_rate": 1.1912244897959183e-05, "loss": 1.4669, "step": 18675 }, { "epoch": 76.22857142857143, "grad_norm": 44.95589065551758, "learning_rate": 1.1910204081632654e-05, "loss": 0.9614, "step": 18676 }, { "epoch": 76.2326530612245, "grad_norm": 35.170528411865234, "learning_rate": 1.1908163265306123e-05, "loss": 0.4976, "step": 18677 }, { "epoch": 76.23673469387755, "grad_norm": 34.06850051879883, "learning_rate": 1.1906122448979593e-05, "loss": 0.531, "step": 18678 }, { "epoch": 76.2408163265306, "grad_norm": 16.51113510131836, "learning_rate": 1.1904081632653062e-05, "loss": 2.4741, "step": 18679 }, { "epoch": 76.24489795918367, "grad_norm": 23.089502334594727, "learning_rate": 1.1902040816326532e-05, "loss": 1.5428, "step": 18680 }, { "epoch": 76.24897959183673, "grad_norm": 47.55449295043945, "learning_rate": 1.19e-05, "loss": 1.0333, "step": 18681 }, { "epoch": 76.2530612244898, "grad_norm": 31.468387603759766, "learning_rate": 1.189795918367347e-05, "loss": 0.6849, "step": 18682 }, { "epoch": 76.25714285714285, "grad_norm": 37.05256271362305, "learning_rate": 1.1895918367346939e-05, "loss": 1.0296, "step": 18683 }, { "epoch": 76.26122448979592, "grad_norm": 45.33098602294922, "learning_rate": 1.189387755102041e-05, "loss": 0.8129, "step": 18684 }, { "epoch": 76.26530612244898, "grad_norm": 28.499710083007812, "learning_rate": 1.1891836734693878e-05, "loss": 1.6383, "step": 18685 }, { "epoch": 76.26938775510204, "grad_norm": 35.88850402832031, "learning_rate": 1.1889795918367347e-05, "loss": 1.0899, "step": 18686 }, { "epoch": 76.2734693877551, "grad_norm": 22.595569610595703, "learning_rate": 1.1887755102040816e-05, "loss": 0.2629, "step": 18687 }, { "epoch": 76.27755102040817, "grad_norm": 34.34865188598633, "learning_rate": 1.1885714285714286e-05, "loss": 0.3912, "step": 18688 }, { "epoch": 76.28163265306122, "grad_norm": 25.441631317138672, "learning_rate": 1.1883673469387755e-05, "loss": 1.5967, "step": 18689 }, { "epoch": 76.28571428571429, "grad_norm": 26.64910316467285, "learning_rate": 1.1881632653061226e-05, "loss": 1.9015, "step": 18690 }, { "epoch": 76.28979591836735, "grad_norm": 27.152206420898438, "learning_rate": 1.1879591836734695e-05, "loss": 1.3611, "step": 18691 }, { "epoch": 76.29387755102042, "grad_norm": 31.584369659423828, "learning_rate": 1.1877551020408163e-05, "loss": 0.4154, "step": 18692 }, { "epoch": 76.29795918367347, "grad_norm": 31.221572875976562, "learning_rate": 1.1875510204081632e-05, "loss": 0.9561, "step": 18693 }, { "epoch": 76.30204081632652, "grad_norm": 41.11809539794922, "learning_rate": 1.1873469387755103e-05, "loss": 0.7835, "step": 18694 }, { "epoch": 76.3061224489796, "grad_norm": 17.450380325317383, "learning_rate": 1.1871428571428572e-05, "loss": 0.1174, "step": 18695 }, { "epoch": 76.31020408163265, "grad_norm": 23.19286346435547, "learning_rate": 1.1869387755102042e-05, "loss": 2.0202, "step": 18696 }, { "epoch": 76.31428571428572, "grad_norm": 23.7205867767334, "learning_rate": 1.186734693877551e-05, "loss": 2.3906, "step": 18697 }, { "epoch": 76.31836734693877, "grad_norm": 41.01299285888672, "learning_rate": 1.186530612244898e-05, "loss": 0.898, "step": 18698 }, { "epoch": 76.32244897959184, "grad_norm": 18.674108505249023, "learning_rate": 1.186326530612245e-05, "loss": 0.1186, "step": 18699 }, { "epoch": 76.3265306122449, "grad_norm": 1.4457865953445435, "learning_rate": 1.1861224489795919e-05, "loss": 0.024, "step": 18700 }, { "epoch": 76.33061224489796, "grad_norm": 34.02369689941406, "learning_rate": 1.185918367346939e-05, "loss": 1.5891, "step": 18701 }, { "epoch": 76.33469387755102, "grad_norm": 36.14324951171875, "learning_rate": 1.1857142857142858e-05, "loss": 0.4504, "step": 18702 }, { "epoch": 76.33877551020409, "grad_norm": 37.06048583984375, "learning_rate": 1.1855102040816327e-05, "loss": 0.4466, "step": 18703 }, { "epoch": 76.34285714285714, "grad_norm": 40.01921844482422, "learning_rate": 1.1853061224489796e-05, "loss": 0.8502, "step": 18704 }, { "epoch": 76.34693877551021, "grad_norm": 36.4544677734375, "learning_rate": 1.1851020408163266e-05, "loss": 0.8478, "step": 18705 }, { "epoch": 76.35102040816327, "grad_norm": 32.95042037963867, "learning_rate": 1.1848979591836735e-05, "loss": 0.641, "step": 18706 }, { "epoch": 76.35510204081632, "grad_norm": 25.771623611450195, "learning_rate": 1.1846938775510206e-05, "loss": 1.5853, "step": 18707 }, { "epoch": 76.35918367346939, "grad_norm": 22.152549743652344, "learning_rate": 1.1844897959183673e-05, "loss": 1.9655, "step": 18708 }, { "epoch": 76.36326530612244, "grad_norm": 26.744102478027344, "learning_rate": 1.1842857142857143e-05, "loss": 0.4522, "step": 18709 }, { "epoch": 76.36734693877551, "grad_norm": 28.037057876586914, "learning_rate": 1.1840816326530612e-05, "loss": 1.4425, "step": 18710 }, { "epoch": 76.37142857142857, "grad_norm": 24.641746520996094, "learning_rate": 1.1838775510204083e-05, "loss": 1.5788, "step": 18711 }, { "epoch": 76.37551020408164, "grad_norm": 35.34871292114258, "learning_rate": 1.1836734693877552e-05, "loss": 1.0014, "step": 18712 }, { "epoch": 76.37959183673469, "grad_norm": 30.305418014526367, "learning_rate": 1.1834693877551022e-05, "loss": 0.4199, "step": 18713 }, { "epoch": 76.38367346938776, "grad_norm": 36.19449996948242, "learning_rate": 1.183265306122449e-05, "loss": 1.0433, "step": 18714 }, { "epoch": 76.38775510204081, "grad_norm": 28.5789852142334, "learning_rate": 1.183061224489796e-05, "loss": 1.2875, "step": 18715 }, { "epoch": 76.39183673469388, "grad_norm": 39.8807258605957, "learning_rate": 1.1828571428571429e-05, "loss": 1.0917, "step": 18716 }, { "epoch": 76.39591836734694, "grad_norm": 41.381046295166016, "learning_rate": 1.1826530612244899e-05, "loss": 0.3969, "step": 18717 }, { "epoch": 76.4, "grad_norm": 27.52936363220215, "learning_rate": 1.1824489795918368e-05, "loss": 1.5042, "step": 18718 }, { "epoch": 76.40408163265306, "grad_norm": 32.05779266357422, "learning_rate": 1.1822448979591837e-05, "loss": 0.2596, "step": 18719 }, { "epoch": 76.40816326530613, "grad_norm": 53.1595458984375, "learning_rate": 1.1820408163265306e-05, "loss": 0.7297, "step": 18720 }, { "epoch": 76.41224489795918, "grad_norm": 26.760406494140625, "learning_rate": 1.1818367346938776e-05, "loss": 0.2856, "step": 18721 }, { "epoch": 76.41632653061224, "grad_norm": 15.715784072875977, "learning_rate": 1.1816326530612245e-05, "loss": 2.3717, "step": 18722 }, { "epoch": 76.42040816326531, "grad_norm": 21.121191024780273, "learning_rate": 1.1814285714285715e-05, "loss": 2.1307, "step": 18723 }, { "epoch": 76.42448979591836, "grad_norm": 40.727638244628906, "learning_rate": 1.1812244897959184e-05, "loss": 0.4505, "step": 18724 }, { "epoch": 76.42857142857143, "grad_norm": 28.658262252807617, "learning_rate": 1.1810204081632653e-05, "loss": 0.9204, "step": 18725 }, { "epoch": 76.43265306122449, "grad_norm": 32.35106658935547, "learning_rate": 1.1808163265306124e-05, "loss": 1.3599, "step": 18726 }, { "epoch": 76.43673469387755, "grad_norm": 39.14287567138672, "learning_rate": 1.1806122448979592e-05, "loss": 0.7022, "step": 18727 }, { "epoch": 76.44081632653061, "grad_norm": 40.5759162902832, "learning_rate": 1.1804081632653063e-05, "loss": 0.8403, "step": 18728 }, { "epoch": 76.44489795918368, "grad_norm": 25.6697940826416, "learning_rate": 1.1802040816326532e-05, "loss": 1.4646, "step": 18729 }, { "epoch": 76.44897959183673, "grad_norm": 37.315128326416016, "learning_rate": 1.18e-05, "loss": 1.0733, "step": 18730 }, { "epoch": 76.4530612244898, "grad_norm": 26.408571243286133, "learning_rate": 1.179795918367347e-05, "loss": 2.0515, "step": 18731 }, { "epoch": 76.45714285714286, "grad_norm": 28.02338218688965, "learning_rate": 1.179591836734694e-05, "loss": 1.3032, "step": 18732 }, { "epoch": 76.46122448979592, "grad_norm": 38.57210159301758, "learning_rate": 1.1793877551020409e-05, "loss": 1.5076, "step": 18733 }, { "epoch": 76.46530612244898, "grad_norm": 33.907981872558594, "learning_rate": 1.179183673469388e-05, "loss": 0.3899, "step": 18734 }, { "epoch": 76.46938775510205, "grad_norm": 53.461673736572266, "learning_rate": 1.1789795918367348e-05, "loss": 0.4922, "step": 18735 }, { "epoch": 76.4734693877551, "grad_norm": 33.13919448852539, "learning_rate": 1.1787755102040817e-05, "loss": 0.3144, "step": 18736 }, { "epoch": 76.47755102040816, "grad_norm": 34.19059753417969, "learning_rate": 1.1785714285714286e-05, "loss": 0.589, "step": 18737 }, { "epoch": 76.48163265306123, "grad_norm": 27.371097564697266, "learning_rate": 1.1783673469387756e-05, "loss": 1.7491, "step": 18738 }, { "epoch": 76.48571428571428, "grad_norm": 33.602657318115234, "learning_rate": 1.1781632653061225e-05, "loss": 1.2193, "step": 18739 }, { "epoch": 76.48979591836735, "grad_norm": 23.915599822998047, "learning_rate": 1.1779591836734695e-05, "loss": 1.7114, "step": 18740 }, { "epoch": 76.4938775510204, "grad_norm": 41.933204650878906, "learning_rate": 1.1777551020408163e-05, "loss": 1.2641, "step": 18741 }, { "epoch": 76.49795918367347, "grad_norm": 37.792354583740234, "learning_rate": 1.1775510204081633e-05, "loss": 0.4796, "step": 18742 }, { "epoch": 76.50204081632653, "grad_norm": 27.952672958374023, "learning_rate": 1.1773469387755102e-05, "loss": 1.7325, "step": 18743 }, { "epoch": 76.5061224489796, "grad_norm": 38.115455627441406, "learning_rate": 1.1771428571428572e-05, "loss": 0.6788, "step": 18744 }, { "epoch": 76.51020408163265, "grad_norm": 23.285268783569336, "learning_rate": 1.1769387755102041e-05, "loss": 0.2074, "step": 18745 }, { "epoch": 76.51428571428572, "grad_norm": 28.23149299621582, "learning_rate": 1.1767346938775512e-05, "loss": 1.2442, "step": 18746 }, { "epoch": 76.51836734693877, "grad_norm": 33.95120620727539, "learning_rate": 1.1765306122448979e-05, "loss": 0.7538, "step": 18747 }, { "epoch": 76.52244897959184, "grad_norm": 21.196521759033203, "learning_rate": 1.176326530612245e-05, "loss": 2.0715, "step": 18748 }, { "epoch": 76.5265306122449, "grad_norm": 49.740177154541016, "learning_rate": 1.1761224489795918e-05, "loss": 0.6004, "step": 18749 }, { "epoch": 76.53061224489795, "grad_norm": 16.077255249023438, "learning_rate": 1.1759183673469389e-05, "loss": 0.1631, "step": 18750 }, { "epoch": 76.53469387755102, "grad_norm": 41.093177795410156, "learning_rate": 1.1757142857142858e-05, "loss": 0.8571, "step": 18751 }, { "epoch": 76.53877551020408, "grad_norm": 33.9555549621582, "learning_rate": 1.1755102040816326e-05, "loss": 1.3039, "step": 18752 }, { "epoch": 76.54285714285714, "grad_norm": 30.1479549407959, "learning_rate": 1.1753061224489797e-05, "loss": 1.3164, "step": 18753 }, { "epoch": 76.5469387755102, "grad_norm": 39.33016586303711, "learning_rate": 1.1751020408163266e-05, "loss": 0.4889, "step": 18754 }, { "epoch": 76.55102040816327, "grad_norm": 38.83060073852539, "learning_rate": 1.1748979591836735e-05, "loss": 0.9479, "step": 18755 }, { "epoch": 76.55510204081632, "grad_norm": 24.08755874633789, "learning_rate": 1.1746938775510205e-05, "loss": 1.5431, "step": 18756 }, { "epoch": 76.55918367346939, "grad_norm": 28.45064353942871, "learning_rate": 1.1744897959183674e-05, "loss": 0.8508, "step": 18757 }, { "epoch": 76.56326530612245, "grad_norm": 28.197845458984375, "learning_rate": 1.1742857142857143e-05, "loss": 0.4265, "step": 18758 }, { "epoch": 76.56734693877551, "grad_norm": 20.432104110717773, "learning_rate": 1.1740816326530613e-05, "loss": 1.8209, "step": 18759 }, { "epoch": 76.57142857142857, "grad_norm": 34.418296813964844, "learning_rate": 1.1738775510204082e-05, "loss": 0.5052, "step": 18760 }, { "epoch": 76.57551020408164, "grad_norm": 37.93122482299805, "learning_rate": 1.1736734693877553e-05, "loss": 0.694, "step": 18761 }, { "epoch": 76.57959183673469, "grad_norm": 44.91201400756836, "learning_rate": 1.1734693877551021e-05, "loss": 1.1315, "step": 18762 }, { "epoch": 76.58367346938776, "grad_norm": 35.484928131103516, "learning_rate": 1.173265306122449e-05, "loss": 0.6891, "step": 18763 }, { "epoch": 76.58775510204082, "grad_norm": 31.168642044067383, "learning_rate": 1.1730612244897959e-05, "loss": 1.3161, "step": 18764 }, { "epoch": 76.59183673469387, "grad_norm": 34.520503997802734, "learning_rate": 1.172857142857143e-05, "loss": 1.1261, "step": 18765 }, { "epoch": 76.59591836734694, "grad_norm": 30.66131591796875, "learning_rate": 1.1726530612244898e-05, "loss": 0.4362, "step": 18766 }, { "epoch": 76.6, "grad_norm": 39.44643783569336, "learning_rate": 1.1724489795918369e-05, "loss": 0.9238, "step": 18767 }, { "epoch": 76.60408163265306, "grad_norm": 19.843242645263672, "learning_rate": 1.1722448979591838e-05, "loss": 2.2436, "step": 18768 }, { "epoch": 76.60816326530612, "grad_norm": 26.05154800415039, "learning_rate": 1.1720408163265307e-05, "loss": 0.3314, "step": 18769 }, { "epoch": 76.61224489795919, "grad_norm": 32.916744232177734, "learning_rate": 1.1718367346938775e-05, "loss": 0.4185, "step": 18770 }, { "epoch": 76.61632653061224, "grad_norm": 31.72496223449707, "learning_rate": 1.1716326530612246e-05, "loss": 0.8472, "step": 18771 }, { "epoch": 76.62040816326531, "grad_norm": 31.811323165893555, "learning_rate": 1.1714285714285715e-05, "loss": 1.4167, "step": 18772 }, { "epoch": 76.62448979591836, "grad_norm": 44.328857421875, "learning_rate": 1.1712244897959185e-05, "loss": 0.3539, "step": 18773 }, { "epoch": 76.62857142857143, "grad_norm": 43.62733459472656, "learning_rate": 1.1710204081632652e-05, "loss": 0.6174, "step": 18774 }, { "epoch": 76.63265306122449, "grad_norm": 27.75599479675293, "learning_rate": 1.1708163265306123e-05, "loss": 1.5927, "step": 18775 }, { "epoch": 76.63673469387756, "grad_norm": 26.78977394104004, "learning_rate": 1.1706122448979592e-05, "loss": 0.1445, "step": 18776 }, { "epoch": 76.64081632653061, "grad_norm": 30.71221160888672, "learning_rate": 1.1704081632653062e-05, "loss": 1.3384, "step": 18777 }, { "epoch": 76.64489795918368, "grad_norm": 26.80391502380371, "learning_rate": 1.1702040816326531e-05, "loss": 1.5488, "step": 18778 }, { "epoch": 76.64897959183673, "grad_norm": 39.34798812866211, "learning_rate": 1.1700000000000001e-05, "loss": 1.517, "step": 18779 }, { "epoch": 76.65306122448979, "grad_norm": 9.799142837524414, "learning_rate": 1.169795918367347e-05, "loss": 0.073, "step": 18780 }, { "epoch": 76.65714285714286, "grad_norm": 35.233604431152344, "learning_rate": 1.1695918367346939e-05, "loss": 0.9574, "step": 18781 }, { "epoch": 76.66122448979591, "grad_norm": 37.90220260620117, "learning_rate": 1.1693877551020408e-05, "loss": 0.7886, "step": 18782 }, { "epoch": 76.66530612244898, "grad_norm": 32.26578903198242, "learning_rate": 1.1691836734693878e-05, "loss": 1.4292, "step": 18783 }, { "epoch": 76.66938775510204, "grad_norm": 36.50462341308594, "learning_rate": 1.1689795918367347e-05, "loss": 0.3143, "step": 18784 }, { "epoch": 76.6734693877551, "grad_norm": 33.707427978515625, "learning_rate": 1.1687755102040816e-05, "loss": 0.5181, "step": 18785 }, { "epoch": 76.67755102040816, "grad_norm": 46.41569137573242, "learning_rate": 1.1685714285714287e-05, "loss": 1.0299, "step": 18786 }, { "epoch": 76.68163265306123, "grad_norm": 26.318225860595703, "learning_rate": 1.1683673469387755e-05, "loss": 1.3519, "step": 18787 }, { "epoch": 76.68571428571428, "grad_norm": 29.0411434173584, "learning_rate": 1.1681632653061226e-05, "loss": 0.184, "step": 18788 }, { "epoch": 76.68979591836735, "grad_norm": 28.01218032836914, "learning_rate": 1.1679591836734695e-05, "loss": 1.7441, "step": 18789 }, { "epoch": 76.6938775510204, "grad_norm": 36.58674621582031, "learning_rate": 1.1677551020408165e-05, "loss": 0.4352, "step": 18790 }, { "epoch": 76.69795918367348, "grad_norm": 30.006200790405273, "learning_rate": 1.1675510204081632e-05, "loss": 1.689, "step": 18791 }, { "epoch": 76.70204081632653, "grad_norm": 42.08114242553711, "learning_rate": 1.1673469387755103e-05, "loss": 1.1576, "step": 18792 }, { "epoch": 76.70612244897958, "grad_norm": 36.25368118286133, "learning_rate": 1.1671428571428572e-05, "loss": 0.794, "step": 18793 }, { "epoch": 76.71020408163265, "grad_norm": 37.58098220825195, "learning_rate": 1.1669387755102042e-05, "loss": 0.3508, "step": 18794 }, { "epoch": 76.71428571428571, "grad_norm": 12.762853622436523, "learning_rate": 1.1667346938775511e-05, "loss": 0.098, "step": 18795 }, { "epoch": 76.71836734693878, "grad_norm": 32.73732376098633, "learning_rate": 1.166530612244898e-05, "loss": 0.846, "step": 18796 }, { "epoch": 76.72244897959183, "grad_norm": 44.16073989868164, "learning_rate": 1.1663265306122449e-05, "loss": 1.6818, "step": 18797 }, { "epoch": 76.7265306122449, "grad_norm": 29.42919921875, "learning_rate": 1.166122448979592e-05, "loss": 0.8157, "step": 18798 }, { "epoch": 76.73061224489796, "grad_norm": 23.970962524414062, "learning_rate": 1.1659183673469388e-05, "loss": 2.1114, "step": 18799 }, { "epoch": 76.73469387755102, "grad_norm": 38.6970329284668, "learning_rate": 1.1657142857142859e-05, "loss": 0.6133, "step": 18800 }, { "epoch": 76.73877551020408, "grad_norm": 34.594303131103516, "learning_rate": 1.1655102040816326e-05, "loss": 0.6347, "step": 18801 }, { "epoch": 76.74285714285715, "grad_norm": 32.534141540527344, "learning_rate": 1.1653061224489796e-05, "loss": 0.3537, "step": 18802 }, { "epoch": 76.7469387755102, "grad_norm": 27.26011848449707, "learning_rate": 1.1651020408163265e-05, "loss": 2.033, "step": 18803 }, { "epoch": 76.75102040816327, "grad_norm": 31.598033905029297, "learning_rate": 1.1648979591836736e-05, "loss": 0.1428, "step": 18804 }, { "epoch": 76.75510204081633, "grad_norm": 20.419546127319336, "learning_rate": 1.1646938775510204e-05, "loss": 2.2113, "step": 18805 }, { "epoch": 76.7591836734694, "grad_norm": 35.783294677734375, "learning_rate": 1.1644897959183675e-05, "loss": 0.3988, "step": 18806 }, { "epoch": 76.76326530612245, "grad_norm": 33.78793716430664, "learning_rate": 1.1642857142857144e-05, "loss": 1.2395, "step": 18807 }, { "epoch": 76.7673469387755, "grad_norm": 29.82895851135254, "learning_rate": 1.1640816326530612e-05, "loss": 1.6404, "step": 18808 }, { "epoch": 76.77142857142857, "grad_norm": 32.75886535644531, "learning_rate": 1.1638775510204081e-05, "loss": 1.2318, "step": 18809 }, { "epoch": 76.77551020408163, "grad_norm": 27.161569595336914, "learning_rate": 1.1636734693877552e-05, "loss": 0.8132, "step": 18810 }, { "epoch": 76.7795918367347, "grad_norm": 27.549352645874023, "learning_rate": 1.163469387755102e-05, "loss": 1.2881, "step": 18811 }, { "epoch": 76.78367346938775, "grad_norm": 26.777984619140625, "learning_rate": 1.163265306122449e-05, "loss": 1.6714, "step": 18812 }, { "epoch": 76.78775510204082, "grad_norm": 46.93145751953125, "learning_rate": 1.163061224489796e-05, "loss": 0.5992, "step": 18813 }, { "epoch": 76.79183673469387, "grad_norm": 33.7241325378418, "learning_rate": 1.1628571428571429e-05, "loss": 1.3654, "step": 18814 }, { "epoch": 76.79591836734694, "grad_norm": 48.74189758300781, "learning_rate": 1.16265306122449e-05, "loss": 1.175, "step": 18815 }, { "epoch": 76.8, "grad_norm": 28.816621780395508, "learning_rate": 1.1624489795918368e-05, "loss": 0.5749, "step": 18816 }, { "epoch": 76.80408163265307, "grad_norm": 24.657958984375, "learning_rate": 1.1622448979591839e-05, "loss": 1.5283, "step": 18817 }, { "epoch": 76.80816326530612, "grad_norm": 40.435482025146484, "learning_rate": 1.1620408163265306e-05, "loss": 1.0013, "step": 18818 }, { "epoch": 76.81224489795919, "grad_norm": 31.908884048461914, "learning_rate": 1.1618367346938776e-05, "loss": 1.4687, "step": 18819 }, { "epoch": 76.81632653061224, "grad_norm": 26.77008819580078, "learning_rate": 1.1616326530612245e-05, "loss": 1.695, "step": 18820 }, { "epoch": 76.82040816326531, "grad_norm": 25.19638442993164, "learning_rate": 1.1614285714285716e-05, "loss": 1.6168, "step": 18821 }, { "epoch": 76.82448979591837, "grad_norm": 42.68996810913086, "learning_rate": 1.1612244897959184e-05, "loss": 0.306, "step": 18822 }, { "epoch": 76.82857142857142, "grad_norm": 28.024173736572266, "learning_rate": 1.1610204081632653e-05, "loss": 1.5644, "step": 18823 }, { "epoch": 76.83265306122449, "grad_norm": 37.65638732910156, "learning_rate": 1.1608163265306122e-05, "loss": 0.6151, "step": 18824 }, { "epoch": 76.83673469387755, "grad_norm": 21.997840881347656, "learning_rate": 1.1606122448979593e-05, "loss": 1.6247, "step": 18825 }, { "epoch": 76.84081632653061, "grad_norm": 33.05638122558594, "learning_rate": 1.1604081632653061e-05, "loss": 1.3813, "step": 18826 }, { "epoch": 76.84489795918367, "grad_norm": 16.566938400268555, "learning_rate": 1.1602040816326532e-05, "loss": 0.139, "step": 18827 }, { "epoch": 76.84897959183674, "grad_norm": 48.17668533325195, "learning_rate": 1.16e-05, "loss": 0.4315, "step": 18828 }, { "epoch": 76.85306122448979, "grad_norm": 17.724864959716797, "learning_rate": 1.159795918367347e-05, "loss": 0.1196, "step": 18829 }, { "epoch": 76.85714285714286, "grad_norm": 29.89929962158203, "learning_rate": 1.1595918367346938e-05, "loss": 1.5719, "step": 18830 }, { "epoch": 76.86122448979592, "grad_norm": 25.085596084594727, "learning_rate": 1.1593877551020409e-05, "loss": 1.7401, "step": 18831 }, { "epoch": 76.86530612244898, "grad_norm": 26.797136306762695, "learning_rate": 1.1591836734693878e-05, "loss": 0.8732, "step": 18832 }, { "epoch": 76.86938775510204, "grad_norm": 21.044559478759766, "learning_rate": 1.1589795918367348e-05, "loss": 2.2008, "step": 18833 }, { "epoch": 76.87346938775511, "grad_norm": 33.92384719848633, "learning_rate": 1.1587755102040817e-05, "loss": 0.4253, "step": 18834 }, { "epoch": 76.87755102040816, "grad_norm": 31.95950698852539, "learning_rate": 1.1585714285714286e-05, "loss": 0.7598, "step": 18835 }, { "epoch": 76.88163265306123, "grad_norm": 32.034080505371094, "learning_rate": 1.1583673469387755e-05, "loss": 1.4822, "step": 18836 }, { "epoch": 76.88571428571429, "grad_norm": 22.341054916381836, "learning_rate": 1.1581632653061225e-05, "loss": 2.1306, "step": 18837 }, { "epoch": 76.88979591836734, "grad_norm": 29.477279663085938, "learning_rate": 1.1579591836734694e-05, "loss": 1.2426, "step": 18838 }, { "epoch": 76.89387755102041, "grad_norm": 22.254194259643555, "learning_rate": 1.1577551020408165e-05, "loss": 0.1338, "step": 18839 }, { "epoch": 76.89795918367346, "grad_norm": 43.92393493652344, "learning_rate": 1.1575510204081633e-05, "loss": 0.5612, "step": 18840 }, { "epoch": 76.90204081632653, "grad_norm": 18.323102951049805, "learning_rate": 1.1573469387755102e-05, "loss": 2.1683, "step": 18841 }, { "epoch": 76.90612244897959, "grad_norm": 36.302669525146484, "learning_rate": 1.1571428571428573e-05, "loss": 1.1554, "step": 18842 }, { "epoch": 76.91020408163266, "grad_norm": 33.68654251098633, "learning_rate": 1.1569387755102042e-05, "loss": 1.6718, "step": 18843 }, { "epoch": 76.91428571428571, "grad_norm": 33.87030792236328, "learning_rate": 1.1567346938775512e-05, "loss": 1.2607, "step": 18844 }, { "epoch": 76.91836734693878, "grad_norm": 16.3138484954834, "learning_rate": 1.1565306122448979e-05, "loss": 0.1447, "step": 18845 }, { "epoch": 76.92244897959183, "grad_norm": 21.77088737487793, "learning_rate": 1.156326530612245e-05, "loss": 1.4441, "step": 18846 }, { "epoch": 76.9265306122449, "grad_norm": 36.82709503173828, "learning_rate": 1.1561224489795918e-05, "loss": 1.2731, "step": 18847 }, { "epoch": 76.93061224489796, "grad_norm": 20.167200088500977, "learning_rate": 1.1559183673469389e-05, "loss": 0.1215, "step": 18848 }, { "epoch": 76.93469387755103, "grad_norm": 34.461456298828125, "learning_rate": 1.1557142857142858e-05, "loss": 1.1746, "step": 18849 }, { "epoch": 76.93877551020408, "grad_norm": 19.531612396240234, "learning_rate": 1.1555102040816328e-05, "loss": 2.0102, "step": 18850 }, { "epoch": 76.94285714285714, "grad_norm": 27.38936996459961, "learning_rate": 1.1553061224489795e-05, "loss": 1.6832, "step": 18851 }, { "epoch": 76.9469387755102, "grad_norm": 27.96805763244629, "learning_rate": 1.1551020408163266e-05, "loss": 1.6522, "step": 18852 }, { "epoch": 76.95102040816326, "grad_norm": 29.77520179748535, "learning_rate": 1.1548979591836735e-05, "loss": 1.3353, "step": 18853 }, { "epoch": 76.95510204081633, "grad_norm": 38.880985260009766, "learning_rate": 1.1546938775510205e-05, "loss": 1.3828, "step": 18854 }, { "epoch": 76.95918367346938, "grad_norm": 37.780574798583984, "learning_rate": 1.1544897959183674e-05, "loss": 0.6564, "step": 18855 }, { "epoch": 76.96326530612245, "grad_norm": 34.306854248046875, "learning_rate": 1.1542857142857143e-05, "loss": 0.4233, "step": 18856 }, { "epoch": 76.9673469387755, "grad_norm": 34.72721481323242, "learning_rate": 1.1540816326530612e-05, "loss": 0.3504, "step": 18857 }, { "epoch": 76.97142857142858, "grad_norm": 32.3233528137207, "learning_rate": 1.1538775510204082e-05, "loss": 0.4267, "step": 18858 }, { "epoch": 76.97551020408163, "grad_norm": 21.46146011352539, "learning_rate": 1.1536734693877551e-05, "loss": 1.7457, "step": 18859 }, { "epoch": 76.9795918367347, "grad_norm": 22.079452514648438, "learning_rate": 1.1534693877551022e-05, "loss": 2.0705, "step": 18860 }, { "epoch": 76.98367346938775, "grad_norm": 26.09972381591797, "learning_rate": 1.153265306122449e-05, "loss": 2.157, "step": 18861 }, { "epoch": 76.98775510204082, "grad_norm": 39.09357833862305, "learning_rate": 1.153061224489796e-05, "loss": 0.4959, "step": 18862 }, { "epoch": 76.99183673469388, "grad_norm": 30.88486671447754, "learning_rate": 1.1528571428571428e-05, "loss": 0.751, "step": 18863 }, { "epoch": 76.99591836734695, "grad_norm": 44.31724548339844, "learning_rate": 1.1526530612244899e-05, "loss": 0.3395, "step": 18864 }, { "epoch": 77.0, "grad_norm": 29.67048454284668, "learning_rate": 1.1524489795918367e-05, "loss": 1.8202, "step": 18865 }, { "epoch": 77.00408163265305, "grad_norm": 50.717010498046875, "learning_rate": 1.1522448979591838e-05, "loss": 1.0478, "step": 18866 }, { "epoch": 77.00816326530612, "grad_norm": 33.04330062866211, "learning_rate": 1.1520408163265307e-05, "loss": 1.3142, "step": 18867 }, { "epoch": 77.01224489795918, "grad_norm": 23.844356536865234, "learning_rate": 1.1518367346938776e-05, "loss": 2.006, "step": 18868 }, { "epoch": 77.01632653061225, "grad_norm": 39.124935150146484, "learning_rate": 1.1516326530612246e-05, "loss": 0.7276, "step": 18869 }, { "epoch": 77.0204081632653, "grad_norm": 21.463960647583008, "learning_rate": 1.1514285714285715e-05, "loss": 2.1677, "step": 18870 }, { "epoch": 77.02448979591837, "grad_norm": 24.954450607299805, "learning_rate": 1.1512244897959185e-05, "loss": 1.9515, "step": 18871 }, { "epoch": 77.02857142857142, "grad_norm": 21.951152801513672, "learning_rate": 1.1510204081632654e-05, "loss": 1.5568, "step": 18872 }, { "epoch": 77.0326530612245, "grad_norm": 24.46220588684082, "learning_rate": 1.1508163265306123e-05, "loss": 1.5886, "step": 18873 }, { "epoch": 77.03673469387755, "grad_norm": 28.037200927734375, "learning_rate": 1.1506122448979592e-05, "loss": 1.4564, "step": 18874 }, { "epoch": 77.04081632653062, "grad_norm": 38.11434555053711, "learning_rate": 1.1504081632653062e-05, "loss": 0.5863, "step": 18875 }, { "epoch": 77.04489795918367, "grad_norm": 30.175466537475586, "learning_rate": 1.1502040816326531e-05, "loss": 0.7082, "step": 18876 }, { "epoch": 77.04897959183674, "grad_norm": 36.66768264770508, "learning_rate": 1.1500000000000002e-05, "loss": 0.4659, "step": 18877 }, { "epoch": 77.0530612244898, "grad_norm": 34.888545989990234, "learning_rate": 1.1497959183673469e-05, "loss": 0.3312, "step": 18878 }, { "epoch": 77.05714285714286, "grad_norm": 32.558349609375, "learning_rate": 1.149591836734694e-05, "loss": 1.1375, "step": 18879 }, { "epoch": 77.06122448979592, "grad_norm": 22.364158630371094, "learning_rate": 1.1493877551020408e-05, "loss": 2.0442, "step": 18880 }, { "epoch": 77.06530612244897, "grad_norm": 46.46649169921875, "learning_rate": 1.1491836734693879e-05, "loss": 0.5444, "step": 18881 }, { "epoch": 77.06938775510204, "grad_norm": 33.747650146484375, "learning_rate": 1.1489795918367347e-05, "loss": 0.9316, "step": 18882 }, { "epoch": 77.0734693877551, "grad_norm": 19.90146255493164, "learning_rate": 1.1487755102040818e-05, "loss": 0.1795, "step": 18883 }, { "epoch": 77.07755102040817, "grad_norm": 19.336742401123047, "learning_rate": 1.1485714285714285e-05, "loss": 2.2073, "step": 18884 }, { "epoch": 77.08163265306122, "grad_norm": 38.781700134277344, "learning_rate": 1.1483673469387756e-05, "loss": 0.6895, "step": 18885 }, { "epoch": 77.08571428571429, "grad_norm": 21.800416946411133, "learning_rate": 1.1481632653061224e-05, "loss": 1.4686, "step": 18886 }, { "epoch": 77.08979591836734, "grad_norm": 28.915573120117188, "learning_rate": 1.1479591836734695e-05, "loss": 1.3554, "step": 18887 }, { "epoch": 77.09387755102041, "grad_norm": 33.42341995239258, "learning_rate": 1.1477551020408164e-05, "loss": 0.6764, "step": 18888 }, { "epoch": 77.09795918367347, "grad_norm": 14.458025932312012, "learning_rate": 1.1475510204081633e-05, "loss": 0.1062, "step": 18889 }, { "epoch": 77.10204081632654, "grad_norm": 33.98839569091797, "learning_rate": 1.1473469387755101e-05, "loss": 0.5708, "step": 18890 }, { "epoch": 77.10612244897959, "grad_norm": 34.61210250854492, "learning_rate": 1.1471428571428572e-05, "loss": 0.6947, "step": 18891 }, { "epoch": 77.11020408163266, "grad_norm": 30.288286209106445, "learning_rate": 1.146938775510204e-05, "loss": 0.4595, "step": 18892 }, { "epoch": 77.11428571428571, "grad_norm": 47.68002700805664, "learning_rate": 1.1467346938775511e-05, "loss": 0.8619, "step": 18893 }, { "epoch": 77.11836734693877, "grad_norm": 25.612152099609375, "learning_rate": 1.146530612244898e-05, "loss": 1.5079, "step": 18894 }, { "epoch": 77.12244897959184, "grad_norm": 26.76778221130371, "learning_rate": 1.1463265306122449e-05, "loss": 1.6791, "step": 18895 }, { "epoch": 77.12653061224489, "grad_norm": 44.19997787475586, "learning_rate": 1.146122448979592e-05, "loss": 0.9739, "step": 18896 }, { "epoch": 77.13061224489796, "grad_norm": 38.519981384277344, "learning_rate": 1.1459183673469388e-05, "loss": 0.7853, "step": 18897 }, { "epoch": 77.13469387755102, "grad_norm": 19.93332862854004, "learning_rate": 1.1457142857142859e-05, "loss": 0.1571, "step": 18898 }, { "epoch": 77.13877551020408, "grad_norm": 23.05974006652832, "learning_rate": 1.1455102040816328e-05, "loss": 2.3594, "step": 18899 }, { "epoch": 77.14285714285714, "grad_norm": 27.820947647094727, "learning_rate": 1.1453061224489796e-05, "loss": 0.7912, "step": 18900 }, { "epoch": 77.14693877551021, "grad_norm": 45.00312805175781, "learning_rate": 1.1451020408163265e-05, "loss": 0.7573, "step": 18901 }, { "epoch": 77.15102040816326, "grad_norm": 40.17863082885742, "learning_rate": 1.1448979591836736e-05, "loss": 0.4954, "step": 18902 }, { "epoch": 77.15510204081633, "grad_norm": 32.62615966796875, "learning_rate": 1.1446938775510205e-05, "loss": 1.7939, "step": 18903 }, { "epoch": 77.15918367346939, "grad_norm": 31.534046173095703, "learning_rate": 1.1444897959183675e-05, "loss": 0.4058, "step": 18904 }, { "epoch": 77.16326530612245, "grad_norm": 31.236705780029297, "learning_rate": 1.1442857142857144e-05, "loss": 1.3201, "step": 18905 }, { "epoch": 77.16734693877551, "grad_norm": 27.067630767822266, "learning_rate": 1.1440816326530613e-05, "loss": 1.2976, "step": 18906 }, { "epoch": 77.17142857142858, "grad_norm": 36.406089782714844, "learning_rate": 1.1438775510204082e-05, "loss": 1.0831, "step": 18907 }, { "epoch": 77.17551020408163, "grad_norm": 23.004003524780273, "learning_rate": 1.1436734693877552e-05, "loss": 0.1574, "step": 18908 }, { "epoch": 77.17959183673469, "grad_norm": 34.9970588684082, "learning_rate": 1.1434693877551021e-05, "loss": 0.3231, "step": 18909 }, { "epoch": 77.18367346938776, "grad_norm": 33.01380157470703, "learning_rate": 1.1432653061224491e-05, "loss": 0.7759, "step": 18910 }, { "epoch": 77.18775510204081, "grad_norm": 30.545076370239258, "learning_rate": 1.1430612244897959e-05, "loss": 1.9217, "step": 18911 }, { "epoch": 77.19183673469388, "grad_norm": 27.76570701599121, "learning_rate": 1.1428571428571429e-05, "loss": 0.419, "step": 18912 }, { "epoch": 77.19591836734693, "grad_norm": 29.005088806152344, "learning_rate": 1.1426530612244898e-05, "loss": 1.141, "step": 18913 }, { "epoch": 77.2, "grad_norm": 26.783422470092773, "learning_rate": 1.1424489795918368e-05, "loss": 0.2764, "step": 18914 }, { "epoch": 77.20408163265306, "grad_norm": 39.409461975097656, "learning_rate": 1.1422448979591837e-05, "loss": 0.8717, "step": 18915 }, { "epoch": 77.20816326530613, "grad_norm": 35.18766403198242, "learning_rate": 1.1420408163265308e-05, "loss": 1.0861, "step": 18916 }, { "epoch": 77.21224489795918, "grad_norm": 42.094451904296875, "learning_rate": 1.1418367346938775e-05, "loss": 0.823, "step": 18917 }, { "epoch": 77.21632653061225, "grad_norm": 45.8389778137207, "learning_rate": 1.1416326530612245e-05, "loss": 0.7796, "step": 18918 }, { "epoch": 77.2204081632653, "grad_norm": 27.283538818359375, "learning_rate": 1.1414285714285714e-05, "loss": 1.7579, "step": 18919 }, { "epoch": 77.22448979591837, "grad_norm": 34.383182525634766, "learning_rate": 1.1412244897959185e-05, "loss": 0.5446, "step": 18920 }, { "epoch": 77.22857142857143, "grad_norm": 23.954181671142578, "learning_rate": 1.1410204081632653e-05, "loss": 0.1206, "step": 18921 }, { "epoch": 77.2326530612245, "grad_norm": 37.422019958496094, "learning_rate": 1.1408163265306122e-05, "loss": 0.9027, "step": 18922 }, { "epoch": 77.23673469387755, "grad_norm": 27.065017700195312, "learning_rate": 1.1406122448979593e-05, "loss": 0.2733, "step": 18923 }, { "epoch": 77.2408163265306, "grad_norm": 26.911808013916016, "learning_rate": 1.1404081632653062e-05, "loss": 1.5927, "step": 18924 }, { "epoch": 77.24489795918367, "grad_norm": 27.04705810546875, "learning_rate": 1.1402040816326532e-05, "loss": 0.118, "step": 18925 }, { "epoch": 77.24897959183673, "grad_norm": 33.62211608886719, "learning_rate": 1.1400000000000001e-05, "loss": 0.7804, "step": 18926 }, { "epoch": 77.2530612244898, "grad_norm": 38.54231643676758, "learning_rate": 1.139795918367347e-05, "loss": 0.4399, "step": 18927 }, { "epoch": 77.25714285714285, "grad_norm": 33.40895462036133, "learning_rate": 1.1395918367346939e-05, "loss": 1.3303, "step": 18928 }, { "epoch": 77.26122448979592, "grad_norm": 40.821170806884766, "learning_rate": 1.1393877551020409e-05, "loss": 1.1412, "step": 18929 }, { "epoch": 77.26530612244898, "grad_norm": 25.510366439819336, "learning_rate": 1.1391836734693878e-05, "loss": 1.5718, "step": 18930 }, { "epoch": 77.26938775510204, "grad_norm": 29.442888259887695, "learning_rate": 1.1389795918367348e-05, "loss": 1.6149, "step": 18931 }, { "epoch": 77.2734693877551, "grad_norm": 32.608375549316406, "learning_rate": 1.1387755102040817e-05, "loss": 1.0273, "step": 18932 }, { "epoch": 77.27755102040817, "grad_norm": 37.58829116821289, "learning_rate": 1.1385714285714286e-05, "loss": 0.5097, "step": 18933 }, { "epoch": 77.28163265306122, "grad_norm": 28.59895133972168, "learning_rate": 1.1383673469387755e-05, "loss": 1.3966, "step": 18934 }, { "epoch": 77.28571428571429, "grad_norm": 30.4081974029541, "learning_rate": 1.1381632653061225e-05, "loss": 0.5462, "step": 18935 }, { "epoch": 77.28979591836735, "grad_norm": 25.627351760864258, "learning_rate": 1.1379591836734694e-05, "loss": 1.6028, "step": 18936 }, { "epoch": 77.29387755102042, "grad_norm": 30.53961753845215, "learning_rate": 1.1377551020408165e-05, "loss": 0.6438, "step": 18937 }, { "epoch": 77.29795918367347, "grad_norm": 37.520484924316406, "learning_rate": 1.1375510204081634e-05, "loss": 0.6746, "step": 18938 }, { "epoch": 77.30204081632652, "grad_norm": 18.383756637573242, "learning_rate": 1.1373469387755102e-05, "loss": 2.4503, "step": 18939 }, { "epoch": 77.3061224489796, "grad_norm": 27.489164352416992, "learning_rate": 1.1371428571428571e-05, "loss": 1.5153, "step": 18940 }, { "epoch": 77.31020408163265, "grad_norm": 25.690208435058594, "learning_rate": 1.1369387755102042e-05, "loss": 0.3784, "step": 18941 }, { "epoch": 77.31428571428572, "grad_norm": 34.20573806762695, "learning_rate": 1.136734693877551e-05, "loss": 1.4682, "step": 18942 }, { "epoch": 77.31836734693877, "grad_norm": 40.73683547973633, "learning_rate": 1.1365306122448981e-05, "loss": 0.2608, "step": 18943 }, { "epoch": 77.32244897959184, "grad_norm": 37.025142669677734, "learning_rate": 1.1363265306122448e-05, "loss": 0.8445, "step": 18944 }, { "epoch": 77.3265306122449, "grad_norm": 38.34452819824219, "learning_rate": 1.1361224489795919e-05, "loss": 1.0528, "step": 18945 }, { "epoch": 77.33061224489796, "grad_norm": 26.272899627685547, "learning_rate": 1.1359183673469388e-05, "loss": 1.532, "step": 18946 }, { "epoch": 77.33469387755102, "grad_norm": 32.6060905456543, "learning_rate": 1.1357142857142858e-05, "loss": 0.3156, "step": 18947 }, { "epoch": 77.33877551020409, "grad_norm": 35.9422721862793, "learning_rate": 1.1355102040816327e-05, "loss": 1.0272, "step": 18948 }, { "epoch": 77.34285714285714, "grad_norm": 26.071243286132812, "learning_rate": 1.1353061224489797e-05, "loss": 1.4765, "step": 18949 }, { "epoch": 77.34693877551021, "grad_norm": 31.305429458618164, "learning_rate": 1.1351020408163266e-05, "loss": 0.7783, "step": 18950 }, { "epoch": 77.35102040816327, "grad_norm": 28.52178192138672, "learning_rate": 1.1348979591836735e-05, "loss": 1.4531, "step": 18951 }, { "epoch": 77.35510204081632, "grad_norm": 40.123775482177734, "learning_rate": 1.1346938775510206e-05, "loss": 0.6088, "step": 18952 }, { "epoch": 77.35918367346939, "grad_norm": 39.869659423828125, "learning_rate": 1.1344897959183674e-05, "loss": 0.6276, "step": 18953 }, { "epoch": 77.36326530612244, "grad_norm": 39.59489822387695, "learning_rate": 1.1342857142857143e-05, "loss": 1.0543, "step": 18954 }, { "epoch": 77.36734693877551, "grad_norm": 46.00367736816406, "learning_rate": 1.1340816326530612e-05, "loss": 0.3504, "step": 18955 }, { "epoch": 77.37142857142857, "grad_norm": 40.49078369140625, "learning_rate": 1.1338775510204082e-05, "loss": 0.8418, "step": 18956 }, { "epoch": 77.37551020408164, "grad_norm": 22.69793701171875, "learning_rate": 1.1336734693877551e-05, "loss": 0.2953, "step": 18957 }, { "epoch": 77.37959183673469, "grad_norm": 36.2279167175293, "learning_rate": 1.1334693877551022e-05, "loss": 0.6392, "step": 18958 }, { "epoch": 77.38367346938776, "grad_norm": 48.45732879638672, "learning_rate": 1.133265306122449e-05, "loss": 1.3817, "step": 18959 }, { "epoch": 77.38775510204081, "grad_norm": 41.59475326538086, "learning_rate": 1.1330612244897961e-05, "loss": 1.2697, "step": 18960 }, { "epoch": 77.39183673469388, "grad_norm": 43.395748138427734, "learning_rate": 1.1328571428571428e-05, "loss": 0.4597, "step": 18961 }, { "epoch": 77.39591836734694, "grad_norm": 37.68294906616211, "learning_rate": 1.1326530612244899e-05, "loss": 1.4738, "step": 18962 }, { "epoch": 77.4, "grad_norm": 38.853206634521484, "learning_rate": 1.1324489795918368e-05, "loss": 0.4191, "step": 18963 }, { "epoch": 77.40408163265306, "grad_norm": 31.429197311401367, "learning_rate": 1.1322448979591838e-05, "loss": 0.9886, "step": 18964 }, { "epoch": 77.40816326530613, "grad_norm": 55.10205841064453, "learning_rate": 1.1320408163265307e-05, "loss": 0.3701, "step": 18965 }, { "epoch": 77.41224489795918, "grad_norm": 40.140689849853516, "learning_rate": 1.1318367346938776e-05, "loss": 0.4456, "step": 18966 }, { "epoch": 77.41632653061224, "grad_norm": 20.862850189208984, "learning_rate": 1.1316326530612245e-05, "loss": 2.1243, "step": 18967 }, { "epoch": 77.42040816326531, "grad_norm": 39.2320671081543, "learning_rate": 1.1314285714285715e-05, "loss": 0.5214, "step": 18968 }, { "epoch": 77.42448979591836, "grad_norm": 33.34282302856445, "learning_rate": 1.1312244897959184e-05, "loss": 0.831, "step": 18969 }, { "epoch": 77.42857142857143, "grad_norm": 30.459793090820312, "learning_rate": 1.1310204081632654e-05, "loss": 1.532, "step": 18970 }, { "epoch": 77.43265306122449, "grad_norm": 38.12798309326172, "learning_rate": 1.1308163265306122e-05, "loss": 1.3751, "step": 18971 }, { "epoch": 77.43673469387755, "grad_norm": 17.66819953918457, "learning_rate": 1.1306122448979592e-05, "loss": 0.1206, "step": 18972 }, { "epoch": 77.44081632653061, "grad_norm": 17.193422317504883, "learning_rate": 1.1304081632653061e-05, "loss": 2.6674, "step": 18973 }, { "epoch": 77.44489795918368, "grad_norm": 42.91717529296875, "learning_rate": 1.1302040816326531e-05, "loss": 0.8736, "step": 18974 }, { "epoch": 77.44897959183673, "grad_norm": 38.793209075927734, "learning_rate": 1.13e-05, "loss": 0.6017, "step": 18975 }, { "epoch": 77.4530612244898, "grad_norm": 34.87523651123047, "learning_rate": 1.129795918367347e-05, "loss": 1.5665, "step": 18976 }, { "epoch": 77.45714285714286, "grad_norm": 22.483915328979492, "learning_rate": 1.129591836734694e-05, "loss": 1.5786, "step": 18977 }, { "epoch": 77.46122448979592, "grad_norm": 24.940940856933594, "learning_rate": 1.1293877551020408e-05, "loss": 1.2835, "step": 18978 }, { "epoch": 77.46530612244898, "grad_norm": 35.79182815551758, "learning_rate": 1.1291836734693877e-05, "loss": 1.188, "step": 18979 }, { "epoch": 77.46938775510205, "grad_norm": 21.450639724731445, "learning_rate": 1.1289795918367348e-05, "loss": 0.1317, "step": 18980 }, { "epoch": 77.4734693877551, "grad_norm": 30.959354400634766, "learning_rate": 1.1287755102040817e-05, "loss": 1.2809, "step": 18981 }, { "epoch": 77.47755102040816, "grad_norm": 23.912939071655273, "learning_rate": 1.1285714285714285e-05, "loss": 0.1465, "step": 18982 }, { "epoch": 77.48163265306123, "grad_norm": 27.850860595703125, "learning_rate": 1.1283673469387756e-05, "loss": 1.4838, "step": 18983 }, { "epoch": 77.48571428571428, "grad_norm": 16.31401252746582, "learning_rate": 1.1281632653061225e-05, "loss": 0.1113, "step": 18984 }, { "epoch": 77.48979591836735, "grad_norm": 28.810081481933594, "learning_rate": 1.1279591836734695e-05, "loss": 2.0805, "step": 18985 }, { "epoch": 77.4938775510204, "grad_norm": 41.483116149902344, "learning_rate": 1.1277551020408164e-05, "loss": 1.4631, "step": 18986 }, { "epoch": 77.49795918367347, "grad_norm": 37.460941314697266, "learning_rate": 1.1275510204081635e-05, "loss": 0.394, "step": 18987 }, { "epoch": 77.50204081632653, "grad_norm": 35.36676788330078, "learning_rate": 1.1273469387755102e-05, "loss": 1.202, "step": 18988 }, { "epoch": 77.5061224489796, "grad_norm": 35.465511322021484, "learning_rate": 1.1271428571428572e-05, "loss": 0.4518, "step": 18989 }, { "epoch": 77.51020408163265, "grad_norm": 12.812056541442871, "learning_rate": 1.1269387755102041e-05, "loss": 0.0734, "step": 18990 }, { "epoch": 77.51428571428572, "grad_norm": 34.00222396850586, "learning_rate": 1.1267346938775512e-05, "loss": 1.0993, "step": 18991 }, { "epoch": 77.51836734693877, "grad_norm": 29.14666748046875, "learning_rate": 1.126530612244898e-05, "loss": 1.3132, "step": 18992 }, { "epoch": 77.52244897959184, "grad_norm": 30.88939666748047, "learning_rate": 1.1263265306122449e-05, "loss": 0.3905, "step": 18993 }, { "epoch": 77.5265306122449, "grad_norm": 24.156389236450195, "learning_rate": 1.1261224489795918e-05, "loss": 0.2579, "step": 18994 }, { "epoch": 77.53061224489795, "grad_norm": 28.44242286682129, "learning_rate": 1.1259183673469388e-05, "loss": 1.7066, "step": 18995 }, { "epoch": 77.53469387755102, "grad_norm": 51.76512908935547, "learning_rate": 1.1257142857142857e-05, "loss": 0.4297, "step": 18996 }, { "epoch": 77.53877551020408, "grad_norm": 44.775753021240234, "learning_rate": 1.1255102040816328e-05, "loss": 0.4255, "step": 18997 }, { "epoch": 77.54285714285714, "grad_norm": 26.50697135925293, "learning_rate": 1.1253061224489797e-05, "loss": 1.994, "step": 18998 }, { "epoch": 77.5469387755102, "grad_norm": 40.04758071899414, "learning_rate": 1.1251020408163265e-05, "loss": 0.31, "step": 18999 }, { "epoch": 77.55102040816327, "grad_norm": 30.601337432861328, "learning_rate": 1.1248979591836734e-05, "loss": 1.7185, "step": 19000 }, { "epoch": 77.55510204081632, "grad_norm": 39.727230072021484, "learning_rate": 1.1246938775510205e-05, "loss": 0.6795, "step": 19001 }, { "epoch": 77.55918367346939, "grad_norm": 34.47893142700195, "learning_rate": 1.1244897959183674e-05, "loss": 0.672, "step": 19002 }, { "epoch": 77.56326530612245, "grad_norm": 34.26758575439453, "learning_rate": 1.1242857142857144e-05, "loss": 1.2242, "step": 19003 }, { "epoch": 77.56734693877551, "grad_norm": 10.82506275177002, "learning_rate": 1.1240816326530613e-05, "loss": 3.0543, "step": 19004 }, { "epoch": 77.57142857142857, "grad_norm": 24.8297176361084, "learning_rate": 1.1238775510204082e-05, "loss": 1.7399, "step": 19005 }, { "epoch": 77.57551020408164, "grad_norm": 27.577392578125, "learning_rate": 1.123673469387755e-05, "loss": 1.7238, "step": 19006 }, { "epoch": 77.57959183673469, "grad_norm": 26.76764678955078, "learning_rate": 1.1234693877551021e-05, "loss": 1.2681, "step": 19007 }, { "epoch": 77.58367346938776, "grad_norm": 27.835622787475586, "learning_rate": 1.123265306122449e-05, "loss": 1.5187, "step": 19008 }, { "epoch": 77.58775510204082, "grad_norm": 39.015872955322266, "learning_rate": 1.123061224489796e-05, "loss": 0.9985, "step": 19009 }, { "epoch": 77.59183673469387, "grad_norm": 24.775760650634766, "learning_rate": 1.122857142857143e-05, "loss": 1.546, "step": 19010 }, { "epoch": 77.59591836734694, "grad_norm": 33.40849685668945, "learning_rate": 1.1226530612244898e-05, "loss": 1.6127, "step": 19011 }, { "epoch": 77.6, "grad_norm": 31.453357696533203, "learning_rate": 1.1224489795918369e-05, "loss": 1.4389, "step": 19012 }, { "epoch": 77.60408163265306, "grad_norm": 27.824338912963867, "learning_rate": 1.1222448979591837e-05, "loss": 1.7147, "step": 19013 }, { "epoch": 77.60816326530612, "grad_norm": 22.427536010742188, "learning_rate": 1.1220408163265308e-05, "loss": 1.4224, "step": 19014 }, { "epoch": 77.61224489795919, "grad_norm": 36.57170486450195, "learning_rate": 1.1218367346938775e-05, "loss": 0.449, "step": 19015 }, { "epoch": 77.61632653061224, "grad_norm": 22.324899673461914, "learning_rate": 1.1216326530612246e-05, "loss": 1.6202, "step": 19016 }, { "epoch": 77.62040816326531, "grad_norm": 28.938167572021484, "learning_rate": 1.1214285714285714e-05, "loss": 0.2833, "step": 19017 }, { "epoch": 77.62448979591836, "grad_norm": 24.318693161010742, "learning_rate": 1.1212244897959185e-05, "loss": 2.0262, "step": 19018 }, { "epoch": 77.62857142857143, "grad_norm": 25.937196731567383, "learning_rate": 1.1210204081632654e-05, "loss": 1.5605, "step": 19019 }, { "epoch": 77.63265306122449, "grad_norm": 30.614818572998047, "learning_rate": 1.1208163265306124e-05, "loss": 0.9279, "step": 19020 }, { "epoch": 77.63673469387756, "grad_norm": 31.22758674621582, "learning_rate": 1.1206122448979591e-05, "loss": 1.1059, "step": 19021 }, { "epoch": 77.64081632653061, "grad_norm": 35.28002166748047, "learning_rate": 1.1204081632653062e-05, "loss": 0.6978, "step": 19022 }, { "epoch": 77.64489795918368, "grad_norm": 28.464956283569336, "learning_rate": 1.120204081632653e-05, "loss": 0.8734, "step": 19023 }, { "epoch": 77.64897959183673, "grad_norm": 39.232757568359375, "learning_rate": 1.1200000000000001e-05, "loss": 0.6334, "step": 19024 }, { "epoch": 77.65306122448979, "grad_norm": 34.329856872558594, "learning_rate": 1.119795918367347e-05, "loss": 1.2065, "step": 19025 }, { "epoch": 77.65714285714286, "grad_norm": 32.309268951416016, "learning_rate": 1.1195918367346939e-05, "loss": 0.8366, "step": 19026 }, { "epoch": 77.66122448979591, "grad_norm": 34.66683578491211, "learning_rate": 1.1193877551020408e-05, "loss": 1.276, "step": 19027 }, { "epoch": 77.66530612244898, "grad_norm": 35.7974853515625, "learning_rate": 1.1191836734693878e-05, "loss": 0.9368, "step": 19028 }, { "epoch": 77.66938775510204, "grad_norm": 42.4003791809082, "learning_rate": 1.1189795918367347e-05, "loss": 1.2288, "step": 19029 }, { "epoch": 77.6734693877551, "grad_norm": 22.919897079467773, "learning_rate": 1.1187755102040817e-05, "loss": 1.6137, "step": 19030 }, { "epoch": 77.67755102040816, "grad_norm": 21.725278854370117, "learning_rate": 1.1185714285714286e-05, "loss": 2.0631, "step": 19031 }, { "epoch": 77.68163265306123, "grad_norm": 22.230371475219727, "learning_rate": 1.1183673469387755e-05, "loss": 1.5704, "step": 19032 }, { "epoch": 77.68571428571428, "grad_norm": 35.04036331176758, "learning_rate": 1.1181632653061224e-05, "loss": 0.7353, "step": 19033 }, { "epoch": 77.68979591836735, "grad_norm": 28.54747200012207, "learning_rate": 1.1179591836734694e-05, "loss": 1.7912, "step": 19034 }, { "epoch": 77.6938775510204, "grad_norm": 39.555110931396484, "learning_rate": 1.1177551020408163e-05, "loss": 1.0019, "step": 19035 }, { "epoch": 77.69795918367348, "grad_norm": 32.31488037109375, "learning_rate": 1.1175510204081634e-05, "loss": 1.1867, "step": 19036 }, { "epoch": 77.70204081632653, "grad_norm": 1.928812026977539, "learning_rate": 1.1173469387755103e-05, "loss": 0.0324, "step": 19037 }, { "epoch": 77.70612244897958, "grad_norm": 32.493896484375, "learning_rate": 1.1171428571428571e-05, "loss": 1.1244, "step": 19038 }, { "epoch": 77.71020408163265, "grad_norm": 39.22985076904297, "learning_rate": 1.1169387755102042e-05, "loss": 0.6992, "step": 19039 }, { "epoch": 77.71428571428571, "grad_norm": 46.704463958740234, "learning_rate": 1.116734693877551e-05, "loss": 0.7376, "step": 19040 }, { "epoch": 77.71836734693878, "grad_norm": 30.75138282775879, "learning_rate": 1.1165306122448981e-05, "loss": 1.084, "step": 19041 }, { "epoch": 77.72244897959183, "grad_norm": 16.23151397705078, "learning_rate": 1.116326530612245e-05, "loss": 0.1195, "step": 19042 }, { "epoch": 77.7265306122449, "grad_norm": 34.97035598754883, "learning_rate": 1.1161224489795919e-05, "loss": 0.4684, "step": 19043 }, { "epoch": 77.73061224489796, "grad_norm": 37.61372756958008, "learning_rate": 1.1159183673469388e-05, "loss": 0.6073, "step": 19044 }, { "epoch": 77.73469387755102, "grad_norm": 59.45269775390625, "learning_rate": 1.1157142857142858e-05, "loss": 1.3793, "step": 19045 }, { "epoch": 77.73877551020408, "grad_norm": 15.041055679321289, "learning_rate": 1.1155102040816327e-05, "loss": 0.1097, "step": 19046 }, { "epoch": 77.74285714285715, "grad_norm": 35.80141067504883, "learning_rate": 1.1153061224489798e-05, "loss": 0.9547, "step": 19047 }, { "epoch": 77.7469387755102, "grad_norm": 19.335975646972656, "learning_rate": 1.1151020408163265e-05, "loss": 0.1553, "step": 19048 }, { "epoch": 77.75102040816327, "grad_norm": 33.12737274169922, "learning_rate": 1.1148979591836735e-05, "loss": 1.5104, "step": 19049 }, { "epoch": 77.75510204081633, "grad_norm": 52.83125686645508, "learning_rate": 1.1146938775510204e-05, "loss": 0.4908, "step": 19050 }, { "epoch": 77.7591836734694, "grad_norm": 22.869483947753906, "learning_rate": 1.1144897959183675e-05, "loss": 2.1772, "step": 19051 }, { "epoch": 77.76326530612245, "grad_norm": 32.787479400634766, "learning_rate": 1.1142857142857143e-05, "loss": 1.3614, "step": 19052 }, { "epoch": 77.7673469387755, "grad_norm": 27.868743896484375, "learning_rate": 1.1140816326530614e-05, "loss": 1.6816, "step": 19053 }, { "epoch": 77.77142857142857, "grad_norm": 22.159711837768555, "learning_rate": 1.1138775510204081e-05, "loss": 1.7147, "step": 19054 }, { "epoch": 77.77551020408163, "grad_norm": 26.929492950439453, "learning_rate": 1.1136734693877552e-05, "loss": 0.3358, "step": 19055 }, { "epoch": 77.7795918367347, "grad_norm": 24.667011260986328, "learning_rate": 1.113469387755102e-05, "loss": 2.199, "step": 19056 }, { "epoch": 77.78367346938775, "grad_norm": 15.883163452148438, "learning_rate": 1.1132653061224491e-05, "loss": 2.3757, "step": 19057 }, { "epoch": 77.78775510204082, "grad_norm": 27.5494441986084, "learning_rate": 1.113061224489796e-05, "loss": 1.5445, "step": 19058 }, { "epoch": 77.79183673469387, "grad_norm": 19.8000431060791, "learning_rate": 1.1128571428571429e-05, "loss": 1.9706, "step": 19059 }, { "epoch": 77.79591836734694, "grad_norm": 35.08214569091797, "learning_rate": 1.1126530612244897e-05, "loss": 1.0783, "step": 19060 }, { "epoch": 77.8, "grad_norm": 32.10435104370117, "learning_rate": 1.1124489795918368e-05, "loss": 1.17, "step": 19061 }, { "epoch": 77.80408163265307, "grad_norm": 32.18672561645508, "learning_rate": 1.1122448979591837e-05, "loss": 1.4646, "step": 19062 }, { "epoch": 77.80816326530612, "grad_norm": 24.231122970581055, "learning_rate": 1.1120408163265307e-05, "loss": 1.6635, "step": 19063 }, { "epoch": 77.81224489795919, "grad_norm": 37.14182662963867, "learning_rate": 1.1118367346938776e-05, "loss": 1.3011, "step": 19064 }, { "epoch": 77.81632653061224, "grad_norm": 32.85578536987305, "learning_rate": 1.1116326530612245e-05, "loss": 1.3787, "step": 19065 }, { "epoch": 77.82040816326531, "grad_norm": 25.284372329711914, "learning_rate": 1.1114285714285715e-05, "loss": 0.3994, "step": 19066 }, { "epoch": 77.82448979591837, "grad_norm": 35.65812301635742, "learning_rate": 1.1112244897959184e-05, "loss": 0.9558, "step": 19067 }, { "epoch": 77.82857142857142, "grad_norm": 21.068378448486328, "learning_rate": 1.1110204081632655e-05, "loss": 2.1332, "step": 19068 }, { "epoch": 77.83265306122449, "grad_norm": 30.457298278808594, "learning_rate": 1.1108163265306123e-05, "loss": 0.5105, "step": 19069 }, { "epoch": 77.83673469387755, "grad_norm": 29.851581573486328, "learning_rate": 1.1106122448979592e-05, "loss": 0.3563, "step": 19070 }, { "epoch": 77.84081632653061, "grad_norm": 29.53965950012207, "learning_rate": 1.1104081632653061e-05, "loss": 1.6292, "step": 19071 }, { "epoch": 77.84489795918367, "grad_norm": 33.10186767578125, "learning_rate": 1.1102040816326532e-05, "loss": 1.4422, "step": 19072 }, { "epoch": 77.84897959183674, "grad_norm": 40.26807403564453, "learning_rate": 1.11e-05, "loss": 0.4473, "step": 19073 }, { "epoch": 77.85306122448979, "grad_norm": 22.369760513305664, "learning_rate": 1.1097959183673471e-05, "loss": 1.7955, "step": 19074 }, { "epoch": 77.85714285714286, "grad_norm": 42.33269119262695, "learning_rate": 1.109591836734694e-05, "loss": 0.9834, "step": 19075 }, { "epoch": 77.86122448979592, "grad_norm": 34.68679428100586, "learning_rate": 1.1093877551020409e-05, "loss": 1.1923, "step": 19076 }, { "epoch": 77.86530612244898, "grad_norm": 18.10934066772461, "learning_rate": 1.1091836734693877e-05, "loss": 2.1331, "step": 19077 }, { "epoch": 77.86938775510204, "grad_norm": 27.935129165649414, "learning_rate": 1.1089795918367348e-05, "loss": 1.2369, "step": 19078 }, { "epoch": 77.87346938775511, "grad_norm": 37.235252380371094, "learning_rate": 1.1087755102040817e-05, "loss": 0.8329, "step": 19079 }, { "epoch": 77.87755102040816, "grad_norm": 40.642452239990234, "learning_rate": 1.1085714285714287e-05, "loss": 0.4973, "step": 19080 }, { "epoch": 77.88163265306123, "grad_norm": 44.30293273925781, "learning_rate": 1.1083673469387754e-05, "loss": 0.715, "step": 19081 }, { "epoch": 77.88571428571429, "grad_norm": 30.006155014038086, "learning_rate": 1.1081632653061225e-05, "loss": 0.8704, "step": 19082 }, { "epoch": 77.88979591836734, "grad_norm": 26.16733741760254, "learning_rate": 1.1079591836734694e-05, "loss": 1.3674, "step": 19083 }, { "epoch": 77.89387755102041, "grad_norm": 20.803701400756836, "learning_rate": 1.1077551020408164e-05, "loss": 2.0451, "step": 19084 }, { "epoch": 77.89795918367346, "grad_norm": 33.46469497680664, "learning_rate": 1.1075510204081633e-05, "loss": 0.4176, "step": 19085 }, { "epoch": 77.90204081632653, "grad_norm": 36.07547378540039, "learning_rate": 1.1073469387755104e-05, "loss": 0.9396, "step": 19086 }, { "epoch": 77.90612244897959, "grad_norm": 12.020342826843262, "learning_rate": 1.107142857142857e-05, "loss": 0.156, "step": 19087 }, { "epoch": 77.91020408163266, "grad_norm": 35.32994079589844, "learning_rate": 1.1069387755102041e-05, "loss": 1.0985, "step": 19088 }, { "epoch": 77.91428571428571, "grad_norm": 39.91897201538086, "learning_rate": 1.106734693877551e-05, "loss": 0.7359, "step": 19089 }, { "epoch": 77.91836734693878, "grad_norm": 21.691370010375977, "learning_rate": 1.106530612244898e-05, "loss": 0.1686, "step": 19090 }, { "epoch": 77.92244897959183, "grad_norm": 35.95732498168945, "learning_rate": 1.106326530612245e-05, "loss": 0.4342, "step": 19091 }, { "epoch": 77.9265306122449, "grad_norm": 35.87548828125, "learning_rate": 1.1061224489795918e-05, "loss": 1.4774, "step": 19092 }, { "epoch": 77.93061224489796, "grad_norm": 28.577024459838867, "learning_rate": 1.1059183673469389e-05, "loss": 0.8452, "step": 19093 }, { "epoch": 77.93469387755103, "grad_norm": 31.593721389770508, "learning_rate": 1.1057142857142858e-05, "loss": 2.1017, "step": 19094 }, { "epoch": 77.93877551020408, "grad_norm": 41.16489791870117, "learning_rate": 1.1055102040816328e-05, "loss": 0.9846, "step": 19095 }, { "epoch": 77.94285714285714, "grad_norm": 24.068862915039062, "learning_rate": 1.1053061224489797e-05, "loss": 0.2983, "step": 19096 }, { "epoch": 77.9469387755102, "grad_norm": 28.796586990356445, "learning_rate": 1.1051020408163266e-05, "loss": 1.5678, "step": 19097 }, { "epoch": 77.95102040816326, "grad_norm": 34.14573669433594, "learning_rate": 1.1048979591836734e-05, "loss": 1.1137, "step": 19098 }, { "epoch": 77.95510204081633, "grad_norm": 30.585792541503906, "learning_rate": 1.1046938775510205e-05, "loss": 1.7129, "step": 19099 }, { "epoch": 77.95918367346938, "grad_norm": 29.60816764831543, "learning_rate": 1.1044897959183674e-05, "loss": 0.2838, "step": 19100 }, { "epoch": 77.96326530612245, "grad_norm": 27.986705780029297, "learning_rate": 1.1042857142857144e-05, "loss": 0.3458, "step": 19101 }, { "epoch": 77.9673469387755, "grad_norm": 38.59121322631836, "learning_rate": 1.1040816326530613e-05, "loss": 1.3162, "step": 19102 }, { "epoch": 77.97142857142858, "grad_norm": 27.653310775756836, "learning_rate": 1.1038775510204082e-05, "loss": 0.7313, "step": 19103 }, { "epoch": 77.97551020408163, "grad_norm": 31.200908660888672, "learning_rate": 1.103673469387755e-05, "loss": 1.4873, "step": 19104 }, { "epoch": 77.9795918367347, "grad_norm": 58.809295654296875, "learning_rate": 1.1034693877551021e-05, "loss": 1.1147, "step": 19105 }, { "epoch": 77.98367346938775, "grad_norm": 28.669618606567383, "learning_rate": 1.103265306122449e-05, "loss": 1.3232, "step": 19106 }, { "epoch": 77.98775510204082, "grad_norm": 29.02134895324707, "learning_rate": 1.103061224489796e-05, "loss": 0.8947, "step": 19107 }, { "epoch": 77.99183673469388, "grad_norm": 29.48784637451172, "learning_rate": 1.102857142857143e-05, "loss": 1.3529, "step": 19108 }, { "epoch": 77.99591836734695, "grad_norm": 38.78702163696289, "learning_rate": 1.1026530612244898e-05, "loss": 0.4014, "step": 19109 }, { "epoch": 78.0, "grad_norm": 31.445030212402344, "learning_rate": 1.1024489795918367e-05, "loss": 0.4198, "step": 19110 }, { "epoch": 78.00408163265305, "grad_norm": 29.631725311279297, "learning_rate": 1.1022448979591838e-05, "loss": 0.3483, "step": 19111 }, { "epoch": 78.00816326530612, "grad_norm": 47.0531005859375, "learning_rate": 1.1020408163265306e-05, "loss": 0.7657, "step": 19112 }, { "epoch": 78.01224489795918, "grad_norm": 29.85605239868164, "learning_rate": 1.1018367346938777e-05, "loss": 0.7922, "step": 19113 }, { "epoch": 78.01632653061225, "grad_norm": 22.957414627075195, "learning_rate": 1.1016326530612244e-05, "loss": 1.728, "step": 19114 }, { "epoch": 78.0204081632653, "grad_norm": 25.070104598999023, "learning_rate": 1.1014285714285715e-05, "loss": 1.5434, "step": 19115 }, { "epoch": 78.02448979591837, "grad_norm": 20.088790893554688, "learning_rate": 1.1012244897959183e-05, "loss": 1.3348, "step": 19116 }, { "epoch": 78.02857142857142, "grad_norm": 39.73343276977539, "learning_rate": 1.1010204081632654e-05, "loss": 1.1998, "step": 19117 }, { "epoch": 78.0326530612245, "grad_norm": 20.89920425415039, "learning_rate": 1.1008163265306123e-05, "loss": 1.9964, "step": 19118 }, { "epoch": 78.03673469387755, "grad_norm": 33.57014846801758, "learning_rate": 1.1006122448979593e-05, "loss": 0.9383, "step": 19119 }, { "epoch": 78.04081632653062, "grad_norm": 34.72294998168945, "learning_rate": 1.1004081632653062e-05, "loss": 0.9913, "step": 19120 }, { "epoch": 78.04489795918367, "grad_norm": 39.75799560546875, "learning_rate": 1.1002040816326531e-05, "loss": 0.8947, "step": 19121 }, { "epoch": 78.04897959183674, "grad_norm": 34.991512298583984, "learning_rate": 1.1000000000000001e-05, "loss": 0.6671, "step": 19122 }, { "epoch": 78.0530612244898, "grad_norm": 40.335445404052734, "learning_rate": 1.099795918367347e-05, "loss": 0.7978, "step": 19123 }, { "epoch": 78.05714285714286, "grad_norm": 32.01498794555664, "learning_rate": 1.0995918367346939e-05, "loss": 1.2216, "step": 19124 }, { "epoch": 78.06122448979592, "grad_norm": 34.72183609008789, "learning_rate": 1.0993877551020408e-05, "loss": 1.5631, "step": 19125 }, { "epoch": 78.06530612244897, "grad_norm": 37.30522155761719, "learning_rate": 1.0991836734693878e-05, "loss": 0.5766, "step": 19126 }, { "epoch": 78.06938775510204, "grad_norm": 18.546220779418945, "learning_rate": 1.0989795918367347e-05, "loss": 0.1061, "step": 19127 }, { "epoch": 78.0734693877551, "grad_norm": 35.990196228027344, "learning_rate": 1.0987755102040818e-05, "loss": 1.1849, "step": 19128 }, { "epoch": 78.07755102040817, "grad_norm": 31.690410614013672, "learning_rate": 1.0985714285714287e-05, "loss": 0.5893, "step": 19129 }, { "epoch": 78.08163265306122, "grad_norm": 29.903024673461914, "learning_rate": 1.0983673469387757e-05, "loss": 1.6151, "step": 19130 }, { "epoch": 78.08571428571429, "grad_norm": 19.9449405670166, "learning_rate": 1.0981632653061224e-05, "loss": 2.0939, "step": 19131 }, { "epoch": 78.08979591836734, "grad_norm": 36.11207580566406, "learning_rate": 1.0979591836734695e-05, "loss": 0.4044, "step": 19132 }, { "epoch": 78.09387755102041, "grad_norm": 21.627243041992188, "learning_rate": 1.0977551020408164e-05, "loss": 2.0207, "step": 19133 }, { "epoch": 78.09795918367347, "grad_norm": 42.68904495239258, "learning_rate": 1.0975510204081634e-05, "loss": 0.9696, "step": 19134 }, { "epoch": 78.10204081632654, "grad_norm": 37.45410919189453, "learning_rate": 1.0973469387755103e-05, "loss": 0.9668, "step": 19135 }, { "epoch": 78.10612244897959, "grad_norm": 36.91644287109375, "learning_rate": 1.0971428571428572e-05, "loss": 0.4119, "step": 19136 }, { "epoch": 78.11020408163266, "grad_norm": 12.374415397644043, "learning_rate": 1.096938775510204e-05, "loss": 0.0749, "step": 19137 }, { "epoch": 78.11428571428571, "grad_norm": 33.97311782836914, "learning_rate": 1.0967346938775511e-05, "loss": 1.039, "step": 19138 }, { "epoch": 78.11836734693877, "grad_norm": 27.80671501159668, "learning_rate": 1.096530612244898e-05, "loss": 1.5187, "step": 19139 }, { "epoch": 78.12244897959184, "grad_norm": 33.01019287109375, "learning_rate": 1.096326530612245e-05, "loss": 0.9385, "step": 19140 }, { "epoch": 78.12653061224489, "grad_norm": 20.546489715576172, "learning_rate": 1.0961224489795917e-05, "loss": 0.3369, "step": 19141 }, { "epoch": 78.13061224489796, "grad_norm": 37.53031539916992, "learning_rate": 1.0959183673469388e-05, "loss": 0.4083, "step": 19142 }, { "epoch": 78.13469387755102, "grad_norm": 31.432682037353516, "learning_rate": 1.0957142857142857e-05, "loss": 0.4742, "step": 19143 }, { "epoch": 78.13877551020408, "grad_norm": 39.44013595581055, "learning_rate": 1.0955102040816327e-05, "loss": 0.7022, "step": 19144 }, { "epoch": 78.14285714285714, "grad_norm": 9.691585540771484, "learning_rate": 1.0953061224489796e-05, "loss": 3.0345, "step": 19145 }, { "epoch": 78.14693877551021, "grad_norm": 31.218168258666992, "learning_rate": 1.0951020408163267e-05, "loss": 0.6424, "step": 19146 }, { "epoch": 78.15102040816326, "grad_norm": 30.733734130859375, "learning_rate": 1.0948979591836735e-05, "loss": 1.4331, "step": 19147 }, { "epoch": 78.15510204081633, "grad_norm": 45.166297912597656, "learning_rate": 1.0946938775510204e-05, "loss": 1.2729, "step": 19148 }, { "epoch": 78.15918367346939, "grad_norm": 41.24655532836914, "learning_rate": 1.0944897959183675e-05, "loss": 1.1458, "step": 19149 }, { "epoch": 78.16326530612245, "grad_norm": 36.640419006347656, "learning_rate": 1.0942857142857144e-05, "loss": 0.4145, "step": 19150 }, { "epoch": 78.16734693877551, "grad_norm": 40.017093658447266, "learning_rate": 1.0940816326530612e-05, "loss": 0.4105, "step": 19151 }, { "epoch": 78.17142857142858, "grad_norm": 25.480228424072266, "learning_rate": 1.0938775510204081e-05, "loss": 1.4851, "step": 19152 }, { "epoch": 78.17551020408163, "grad_norm": 36.65553283691406, "learning_rate": 1.0936734693877552e-05, "loss": 0.3933, "step": 19153 }, { "epoch": 78.17959183673469, "grad_norm": 29.288061141967773, "learning_rate": 1.093469387755102e-05, "loss": 0.3262, "step": 19154 }, { "epoch": 78.18367346938776, "grad_norm": 30.680988311767578, "learning_rate": 1.0932653061224491e-05, "loss": 0.793, "step": 19155 }, { "epoch": 78.18775510204081, "grad_norm": 28.815326690673828, "learning_rate": 1.093061224489796e-05, "loss": 1.0475, "step": 19156 }, { "epoch": 78.19183673469388, "grad_norm": 42.12570571899414, "learning_rate": 1.092857142857143e-05, "loss": 1.1372, "step": 19157 }, { "epoch": 78.19591836734693, "grad_norm": 30.028499603271484, "learning_rate": 1.0926530612244898e-05, "loss": 1.234, "step": 19158 }, { "epoch": 78.2, "grad_norm": 27.14491081237793, "learning_rate": 1.0924489795918368e-05, "loss": 0.1879, "step": 19159 }, { "epoch": 78.20408163265306, "grad_norm": 24.665878295898438, "learning_rate": 1.0922448979591837e-05, "loss": 1.6014, "step": 19160 }, { "epoch": 78.20816326530613, "grad_norm": 31.073339462280273, "learning_rate": 1.0920408163265307e-05, "loss": 1.6185, "step": 19161 }, { "epoch": 78.21224489795918, "grad_norm": 30.025632858276367, "learning_rate": 1.0918367346938776e-05, "loss": 1.7402, "step": 19162 }, { "epoch": 78.21632653061225, "grad_norm": 34.10336685180664, "learning_rate": 1.0916326530612245e-05, "loss": 0.4075, "step": 19163 }, { "epoch": 78.2204081632653, "grad_norm": 28.44347381591797, "learning_rate": 1.0914285714285714e-05, "loss": 0.8138, "step": 19164 }, { "epoch": 78.22448979591837, "grad_norm": 28.649560928344727, "learning_rate": 1.0912244897959184e-05, "loss": 1.2535, "step": 19165 }, { "epoch": 78.22857142857143, "grad_norm": 29.882015228271484, "learning_rate": 1.0910204081632653e-05, "loss": 0.7664, "step": 19166 }, { "epoch": 78.2326530612245, "grad_norm": 37.52317428588867, "learning_rate": 1.0908163265306124e-05, "loss": 0.423, "step": 19167 }, { "epoch": 78.23673469387755, "grad_norm": 32.49472427368164, "learning_rate": 1.0906122448979593e-05, "loss": 1.4383, "step": 19168 }, { "epoch": 78.2408163265306, "grad_norm": 38.23249053955078, "learning_rate": 1.0904081632653061e-05, "loss": 0.8056, "step": 19169 }, { "epoch": 78.24489795918367, "grad_norm": 28.53199005126953, "learning_rate": 1.090204081632653e-05, "loss": 1.6627, "step": 19170 }, { "epoch": 78.24897959183673, "grad_norm": 29.745317459106445, "learning_rate": 1.09e-05, "loss": 0.8837, "step": 19171 }, { "epoch": 78.2530612244898, "grad_norm": 35.41521453857422, "learning_rate": 1.089795918367347e-05, "loss": 1.1547, "step": 19172 }, { "epoch": 78.25714285714285, "grad_norm": 26.14878273010254, "learning_rate": 1.089591836734694e-05, "loss": 1.3924, "step": 19173 }, { "epoch": 78.26122448979592, "grad_norm": 25.991012573242188, "learning_rate": 1.0893877551020409e-05, "loss": 1.5295, "step": 19174 }, { "epoch": 78.26530612244898, "grad_norm": 24.085542678833008, "learning_rate": 1.0891836734693878e-05, "loss": 0.12, "step": 19175 }, { "epoch": 78.26938775510204, "grad_norm": 28.323583602905273, "learning_rate": 1.0889795918367346e-05, "loss": 1.4344, "step": 19176 }, { "epoch": 78.2734693877551, "grad_norm": 28.380449295043945, "learning_rate": 1.0887755102040817e-05, "loss": 1.2334, "step": 19177 }, { "epoch": 78.27755102040817, "grad_norm": 32.10598373413086, "learning_rate": 1.0885714285714286e-05, "loss": 0.4501, "step": 19178 }, { "epoch": 78.28163265306122, "grad_norm": 35.79712677001953, "learning_rate": 1.0883673469387756e-05, "loss": 0.4576, "step": 19179 }, { "epoch": 78.28571428571429, "grad_norm": 31.37802505493164, "learning_rate": 1.0881632653061225e-05, "loss": 1.709, "step": 19180 }, { "epoch": 78.28979591836735, "grad_norm": 44.633358001708984, "learning_rate": 1.0879591836734694e-05, "loss": 0.8617, "step": 19181 }, { "epoch": 78.29387755102042, "grad_norm": 28.750808715820312, "learning_rate": 1.0877551020408164e-05, "loss": 0.8747, "step": 19182 }, { "epoch": 78.29795918367347, "grad_norm": 8.488386154174805, "learning_rate": 1.0875510204081633e-05, "loss": 0.1002, "step": 19183 }, { "epoch": 78.30204081632652, "grad_norm": 32.772891998291016, "learning_rate": 1.0873469387755104e-05, "loss": 1.4233, "step": 19184 }, { "epoch": 78.3061224489796, "grad_norm": 39.96989059448242, "learning_rate": 1.0871428571428571e-05, "loss": 1.2443, "step": 19185 }, { "epoch": 78.31020408163265, "grad_norm": 37.49696350097656, "learning_rate": 1.0869387755102041e-05, "loss": 1.0637, "step": 19186 }, { "epoch": 78.31428571428572, "grad_norm": 35.8993034362793, "learning_rate": 1.086734693877551e-05, "loss": 1.3706, "step": 19187 }, { "epoch": 78.31836734693877, "grad_norm": 14.463232040405273, "learning_rate": 1.086530612244898e-05, "loss": 0.1084, "step": 19188 }, { "epoch": 78.32244897959184, "grad_norm": 58.266212463378906, "learning_rate": 1.086326530612245e-05, "loss": 0.6707, "step": 19189 }, { "epoch": 78.3265306122449, "grad_norm": 38.66950225830078, "learning_rate": 1.086122448979592e-05, "loss": 0.7775, "step": 19190 }, { "epoch": 78.33061224489796, "grad_norm": 19.590904235839844, "learning_rate": 1.0859183673469387e-05, "loss": 2.1431, "step": 19191 }, { "epoch": 78.33469387755102, "grad_norm": 27.963281631469727, "learning_rate": 1.0857142857142858e-05, "loss": 1.4981, "step": 19192 }, { "epoch": 78.33877551020409, "grad_norm": 34.65337371826172, "learning_rate": 1.0855102040816327e-05, "loss": 0.679, "step": 19193 }, { "epoch": 78.34285714285714, "grad_norm": 31.626039505004883, "learning_rate": 1.0853061224489797e-05, "loss": 0.3417, "step": 19194 }, { "epoch": 78.34693877551021, "grad_norm": 29.92852783203125, "learning_rate": 1.0851020408163266e-05, "loss": 0.3929, "step": 19195 }, { "epoch": 78.35102040816327, "grad_norm": 37.26982498168945, "learning_rate": 1.0848979591836735e-05, "loss": 0.3206, "step": 19196 }, { "epoch": 78.35510204081632, "grad_norm": 29.565349578857422, "learning_rate": 1.0846938775510204e-05, "loss": 1.7876, "step": 19197 }, { "epoch": 78.35918367346939, "grad_norm": 22.152761459350586, "learning_rate": 1.0844897959183674e-05, "loss": 2.0452, "step": 19198 }, { "epoch": 78.36326530612244, "grad_norm": 34.69742965698242, "learning_rate": 1.0842857142857143e-05, "loss": 1.5118, "step": 19199 }, { "epoch": 78.36734693877551, "grad_norm": 38.002628326416016, "learning_rate": 1.0840816326530613e-05, "loss": 1.0351, "step": 19200 }, { "epoch": 78.37142857142857, "grad_norm": 34.966346740722656, "learning_rate": 1.0838775510204082e-05, "loss": 0.3052, "step": 19201 }, { "epoch": 78.37551020408164, "grad_norm": 26.4698543548584, "learning_rate": 1.0836734693877551e-05, "loss": 0.8059, "step": 19202 }, { "epoch": 78.37959183673469, "grad_norm": 15.712224960327148, "learning_rate": 1.083469387755102e-05, "loss": 0.1103, "step": 19203 }, { "epoch": 78.38367346938776, "grad_norm": 31.577241897583008, "learning_rate": 1.083265306122449e-05, "loss": 0.4087, "step": 19204 }, { "epoch": 78.38775510204081, "grad_norm": 27.660337448120117, "learning_rate": 1.083061224489796e-05, "loss": 0.5152, "step": 19205 }, { "epoch": 78.39183673469388, "grad_norm": 31.207942962646484, "learning_rate": 1.082857142857143e-05, "loss": 0.8688, "step": 19206 }, { "epoch": 78.39591836734694, "grad_norm": 33.2824821472168, "learning_rate": 1.0826530612244899e-05, "loss": 1.1743, "step": 19207 }, { "epoch": 78.4, "grad_norm": 23.789907455444336, "learning_rate": 1.0824489795918367e-05, "loss": 1.9927, "step": 19208 }, { "epoch": 78.40408163265306, "grad_norm": 31.572607040405273, "learning_rate": 1.0822448979591838e-05, "loss": 0.3972, "step": 19209 }, { "epoch": 78.40816326530613, "grad_norm": 39.618621826171875, "learning_rate": 1.0820408163265307e-05, "loss": 0.613, "step": 19210 }, { "epoch": 78.41224489795918, "grad_norm": 26.728620529174805, "learning_rate": 1.0818367346938777e-05, "loss": 1.1701, "step": 19211 }, { "epoch": 78.41632653061224, "grad_norm": 26.587421417236328, "learning_rate": 1.0816326530612246e-05, "loss": 1.3131, "step": 19212 }, { "epoch": 78.42040816326531, "grad_norm": 31.899673461914062, "learning_rate": 1.0814285714285715e-05, "loss": 0.6912, "step": 19213 }, { "epoch": 78.42448979591836, "grad_norm": 26.57581329345703, "learning_rate": 1.0812244897959184e-05, "loss": 1.6879, "step": 19214 }, { "epoch": 78.42857142857143, "grad_norm": 26.976308822631836, "learning_rate": 1.0810204081632654e-05, "loss": 1.3914, "step": 19215 }, { "epoch": 78.43265306122449, "grad_norm": 32.9504280090332, "learning_rate": 1.0808163265306123e-05, "loss": 1.0457, "step": 19216 }, { "epoch": 78.43673469387755, "grad_norm": 44.118141174316406, "learning_rate": 1.0806122448979593e-05, "loss": 0.17, "step": 19217 }, { "epoch": 78.44081632653061, "grad_norm": 40.21876525878906, "learning_rate": 1.080408163265306e-05, "loss": 0.9215, "step": 19218 }, { "epoch": 78.44489795918368, "grad_norm": 23.665855407714844, "learning_rate": 1.0802040816326531e-05, "loss": 1.479, "step": 19219 }, { "epoch": 78.44897959183673, "grad_norm": 15.788021087646484, "learning_rate": 1.08e-05, "loss": 0.1517, "step": 19220 }, { "epoch": 78.4530612244898, "grad_norm": 32.18114471435547, "learning_rate": 1.079795918367347e-05, "loss": 0.4888, "step": 19221 }, { "epoch": 78.45714285714286, "grad_norm": 21.520265579223633, "learning_rate": 1.079591836734694e-05, "loss": 1.9797, "step": 19222 }, { "epoch": 78.46122448979592, "grad_norm": 15.602285385131836, "learning_rate": 1.079387755102041e-05, "loss": 0.1382, "step": 19223 }, { "epoch": 78.46530612244898, "grad_norm": 28.256959915161133, "learning_rate": 1.0791836734693877e-05, "loss": 0.2934, "step": 19224 }, { "epoch": 78.46938775510205, "grad_norm": 36.73512268066406, "learning_rate": 1.0789795918367347e-05, "loss": 0.4295, "step": 19225 }, { "epoch": 78.4734693877551, "grad_norm": 30.50243377685547, "learning_rate": 1.0787755102040816e-05, "loss": 1.6061, "step": 19226 }, { "epoch": 78.47755102040816, "grad_norm": 19.351327896118164, "learning_rate": 1.0785714285714287e-05, "loss": 0.1396, "step": 19227 }, { "epoch": 78.48163265306123, "grad_norm": 26.914962768554688, "learning_rate": 1.0783673469387756e-05, "loss": 1.322, "step": 19228 }, { "epoch": 78.48571428571428, "grad_norm": 43.524627685546875, "learning_rate": 1.0781632653061224e-05, "loss": 0.7866, "step": 19229 }, { "epoch": 78.48979591836735, "grad_norm": 41.06584930419922, "learning_rate": 1.0779591836734693e-05, "loss": 0.6967, "step": 19230 }, { "epoch": 78.4938775510204, "grad_norm": 23.96042823791504, "learning_rate": 1.0777551020408164e-05, "loss": 1.9723, "step": 19231 }, { "epoch": 78.49795918367347, "grad_norm": 29.660400390625, "learning_rate": 1.0775510204081633e-05, "loss": 0.2653, "step": 19232 }, { "epoch": 78.50204081632653, "grad_norm": 33.71318435668945, "learning_rate": 1.0773469387755103e-05, "loss": 0.4105, "step": 19233 }, { "epoch": 78.5061224489796, "grad_norm": 36.593650817871094, "learning_rate": 1.0771428571428572e-05, "loss": 0.3452, "step": 19234 }, { "epoch": 78.51020408163265, "grad_norm": 34.04994583129883, "learning_rate": 1.076938775510204e-05, "loss": 0.7975, "step": 19235 }, { "epoch": 78.51428571428572, "grad_norm": 60.10393524169922, "learning_rate": 1.0767346938775511e-05, "loss": 1.3037, "step": 19236 }, { "epoch": 78.51836734693877, "grad_norm": 26.682193756103516, "learning_rate": 1.076530612244898e-05, "loss": 1.4683, "step": 19237 }, { "epoch": 78.52244897959184, "grad_norm": 32.25718307495117, "learning_rate": 1.076326530612245e-05, "loss": 1.0714, "step": 19238 }, { "epoch": 78.5265306122449, "grad_norm": 23.675098419189453, "learning_rate": 1.076122448979592e-05, "loss": 2.0016, "step": 19239 }, { "epoch": 78.53061224489795, "grad_norm": 22.064594268798828, "learning_rate": 1.0759183673469388e-05, "loss": 2.1723, "step": 19240 }, { "epoch": 78.53469387755102, "grad_norm": 25.628908157348633, "learning_rate": 1.0757142857142857e-05, "loss": 1.6095, "step": 19241 }, { "epoch": 78.53877551020408, "grad_norm": 24.645484924316406, "learning_rate": 1.0755102040816328e-05, "loss": 0.257, "step": 19242 }, { "epoch": 78.54285714285714, "grad_norm": 37.777156829833984, "learning_rate": 1.0753061224489796e-05, "loss": 0.7974, "step": 19243 }, { "epoch": 78.5469387755102, "grad_norm": 32.7593879699707, "learning_rate": 1.0751020408163267e-05, "loss": 0.743, "step": 19244 }, { "epoch": 78.55102040816327, "grad_norm": 23.89344596862793, "learning_rate": 1.0748979591836736e-05, "loss": 0.1304, "step": 19245 }, { "epoch": 78.55510204081632, "grad_norm": 31.645666122436523, "learning_rate": 1.0746938775510204e-05, "loss": 1.3044, "step": 19246 }, { "epoch": 78.55918367346939, "grad_norm": 29.68230628967285, "learning_rate": 1.0744897959183673e-05, "loss": 0.68, "step": 19247 }, { "epoch": 78.56326530612245, "grad_norm": 33.45105743408203, "learning_rate": 1.0742857142857144e-05, "loss": 1.2976, "step": 19248 }, { "epoch": 78.56734693877551, "grad_norm": 30.998884201049805, "learning_rate": 1.0740816326530613e-05, "loss": 1.1586, "step": 19249 }, { "epoch": 78.57142857142857, "grad_norm": 39.38600540161133, "learning_rate": 1.0738775510204083e-05, "loss": 1.1036, "step": 19250 }, { "epoch": 78.57551020408164, "grad_norm": 25.817731857299805, "learning_rate": 1.073673469387755e-05, "loss": 2.0565, "step": 19251 }, { "epoch": 78.57959183673469, "grad_norm": 25.565221786499023, "learning_rate": 1.073469387755102e-05, "loss": 0.2276, "step": 19252 }, { "epoch": 78.58367346938776, "grad_norm": 43.66667938232422, "learning_rate": 1.073265306122449e-05, "loss": 1.2001, "step": 19253 }, { "epoch": 78.58775510204082, "grad_norm": 35.00519943237305, "learning_rate": 1.073061224489796e-05, "loss": 0.887, "step": 19254 }, { "epoch": 78.59183673469387, "grad_norm": 24.413536071777344, "learning_rate": 1.0728571428571429e-05, "loss": 1.9674, "step": 19255 }, { "epoch": 78.59591836734694, "grad_norm": 25.97783660888672, "learning_rate": 1.07265306122449e-05, "loss": 1.6283, "step": 19256 }, { "epoch": 78.6, "grad_norm": 23.032302856445312, "learning_rate": 1.0724489795918367e-05, "loss": 1.5462, "step": 19257 }, { "epoch": 78.60408163265306, "grad_norm": 40.32749557495117, "learning_rate": 1.0722448979591837e-05, "loss": 0.7119, "step": 19258 }, { "epoch": 78.60816326530612, "grad_norm": 24.025569915771484, "learning_rate": 1.0720408163265306e-05, "loss": 2.3366, "step": 19259 }, { "epoch": 78.61224489795919, "grad_norm": 23.623212814331055, "learning_rate": 1.0718367346938776e-05, "loss": 1.7569, "step": 19260 }, { "epoch": 78.61632653061224, "grad_norm": 37.292850494384766, "learning_rate": 1.0716326530612245e-05, "loss": 0.3395, "step": 19261 }, { "epoch": 78.62040816326531, "grad_norm": 40.122589111328125, "learning_rate": 1.0714285714285714e-05, "loss": 0.4675, "step": 19262 }, { "epoch": 78.62448979591836, "grad_norm": 23.168712615966797, "learning_rate": 1.0712244897959185e-05, "loss": 1.6862, "step": 19263 }, { "epoch": 78.62857142857143, "grad_norm": 33.031917572021484, "learning_rate": 1.0710204081632653e-05, "loss": 1.356, "step": 19264 }, { "epoch": 78.63265306122449, "grad_norm": 22.775936126708984, "learning_rate": 1.0708163265306124e-05, "loss": 2.0076, "step": 19265 }, { "epoch": 78.63673469387756, "grad_norm": 56.72749710083008, "learning_rate": 1.0706122448979593e-05, "loss": 1.0601, "step": 19266 }, { "epoch": 78.64081632653061, "grad_norm": 35.13163375854492, "learning_rate": 1.0704081632653063e-05, "loss": 0.8719, "step": 19267 }, { "epoch": 78.64489795918368, "grad_norm": 31.469280242919922, "learning_rate": 1.070204081632653e-05, "loss": 0.3042, "step": 19268 }, { "epoch": 78.64897959183673, "grad_norm": 45.41435623168945, "learning_rate": 1.0700000000000001e-05, "loss": 0.5538, "step": 19269 }, { "epoch": 78.65306122448979, "grad_norm": 37.53538131713867, "learning_rate": 1.069795918367347e-05, "loss": 0.8346, "step": 19270 }, { "epoch": 78.65714285714286, "grad_norm": 30.786720275878906, "learning_rate": 1.069591836734694e-05, "loss": 1.264, "step": 19271 }, { "epoch": 78.66122448979591, "grad_norm": 19.80650520324707, "learning_rate": 1.0693877551020409e-05, "loss": 2.2332, "step": 19272 }, { "epoch": 78.66530612244898, "grad_norm": 22.655637741088867, "learning_rate": 1.0691836734693878e-05, "loss": 1.4926, "step": 19273 }, { "epoch": 78.66938775510204, "grad_norm": 30.022415161132812, "learning_rate": 1.0689795918367347e-05, "loss": 1.3962, "step": 19274 }, { "epoch": 78.6734693877551, "grad_norm": 37.44036102294922, "learning_rate": 1.0687755102040817e-05, "loss": 1.1935, "step": 19275 }, { "epoch": 78.67755102040816, "grad_norm": 31.936115264892578, "learning_rate": 1.0685714285714286e-05, "loss": 0.6207, "step": 19276 }, { "epoch": 78.68163265306123, "grad_norm": 26.979568481445312, "learning_rate": 1.0683673469387757e-05, "loss": 1.5791, "step": 19277 }, { "epoch": 78.68571428571428, "grad_norm": 38.241153717041016, "learning_rate": 1.0681632653061225e-05, "loss": 0.9231, "step": 19278 }, { "epoch": 78.68979591836735, "grad_norm": 33.69845199584961, "learning_rate": 1.0679591836734694e-05, "loss": 1.3342, "step": 19279 }, { "epoch": 78.6938775510204, "grad_norm": 45.693416595458984, "learning_rate": 1.0677551020408163e-05, "loss": 1.0038, "step": 19280 }, { "epoch": 78.69795918367348, "grad_norm": 17.63315773010254, "learning_rate": 1.0675510204081634e-05, "loss": 2.6662, "step": 19281 }, { "epoch": 78.70204081632653, "grad_norm": 41.72992706298828, "learning_rate": 1.0673469387755102e-05, "loss": 0.8803, "step": 19282 }, { "epoch": 78.70612244897958, "grad_norm": 29.225038528442383, "learning_rate": 1.0671428571428573e-05, "loss": 1.6245, "step": 19283 }, { "epoch": 78.71020408163265, "grad_norm": 41.5239143371582, "learning_rate": 1.066938775510204e-05, "loss": 0.4397, "step": 19284 }, { "epoch": 78.71428571428571, "grad_norm": 29.05916404724121, "learning_rate": 1.066734693877551e-05, "loss": 0.8552, "step": 19285 }, { "epoch": 78.71836734693878, "grad_norm": 34.9093017578125, "learning_rate": 1.066530612244898e-05, "loss": 0.5876, "step": 19286 }, { "epoch": 78.72244897959183, "grad_norm": 27.870399475097656, "learning_rate": 1.066326530612245e-05, "loss": 1.5033, "step": 19287 }, { "epoch": 78.7265306122449, "grad_norm": 31.910724639892578, "learning_rate": 1.0661224489795919e-05, "loss": 0.3069, "step": 19288 }, { "epoch": 78.73061224489796, "grad_norm": 29.724166870117188, "learning_rate": 1.0659183673469389e-05, "loss": 0.4586, "step": 19289 }, { "epoch": 78.73469387755102, "grad_norm": 16.12093734741211, "learning_rate": 1.0657142857142858e-05, "loss": 2.3585, "step": 19290 }, { "epoch": 78.73877551020408, "grad_norm": 34.914642333984375, "learning_rate": 1.0655102040816327e-05, "loss": 0.6418, "step": 19291 }, { "epoch": 78.74285714285715, "grad_norm": 28.50415802001953, "learning_rate": 1.0653061224489797e-05, "loss": 1.5627, "step": 19292 }, { "epoch": 78.7469387755102, "grad_norm": 32.0919303894043, "learning_rate": 1.0651020408163266e-05, "loss": 0.6419, "step": 19293 }, { "epoch": 78.75102040816327, "grad_norm": 1.750740885734558, "learning_rate": 1.0648979591836735e-05, "loss": 0.0303, "step": 19294 }, { "epoch": 78.75510204081633, "grad_norm": 35.16206359863281, "learning_rate": 1.0646938775510204e-05, "loss": 0.9212, "step": 19295 }, { "epoch": 78.7591836734694, "grad_norm": 21.71027374267578, "learning_rate": 1.0644897959183674e-05, "loss": 0.1946, "step": 19296 }, { "epoch": 78.76326530612245, "grad_norm": 41.3754768371582, "learning_rate": 1.0642857142857143e-05, "loss": 1.3538, "step": 19297 }, { "epoch": 78.7673469387755, "grad_norm": 28.99750328063965, "learning_rate": 1.0640816326530614e-05, "loss": 1.8878, "step": 19298 }, { "epoch": 78.77142857142857, "grad_norm": 35.45289993286133, "learning_rate": 1.0638775510204082e-05, "loss": 0.2553, "step": 19299 }, { "epoch": 78.77551020408163, "grad_norm": 25.964853286743164, "learning_rate": 1.0636734693877551e-05, "loss": 1.6824, "step": 19300 }, { "epoch": 78.7795918367347, "grad_norm": 25.40831756591797, "learning_rate": 1.063469387755102e-05, "loss": 0.28, "step": 19301 }, { "epoch": 78.78367346938775, "grad_norm": 33.06669235229492, "learning_rate": 1.063265306122449e-05, "loss": 1.4627, "step": 19302 }, { "epoch": 78.78775510204082, "grad_norm": 39.65574264526367, "learning_rate": 1.063061224489796e-05, "loss": 1.1924, "step": 19303 }, { "epoch": 78.79183673469387, "grad_norm": 31.229000091552734, "learning_rate": 1.062857142857143e-05, "loss": 0.4481, "step": 19304 }, { "epoch": 78.79591836734694, "grad_norm": 17.16742706298828, "learning_rate": 1.0626530612244899e-05, "loss": 2.0912, "step": 19305 }, { "epoch": 78.8, "grad_norm": 43.76953125, "learning_rate": 1.0624489795918368e-05, "loss": 1.2913, "step": 19306 }, { "epoch": 78.80408163265307, "grad_norm": 32.6975212097168, "learning_rate": 1.0622448979591836e-05, "loss": 0.3575, "step": 19307 }, { "epoch": 78.80816326530612, "grad_norm": 41.221065521240234, "learning_rate": 1.0620408163265307e-05, "loss": 1.0633, "step": 19308 }, { "epoch": 78.81224489795919, "grad_norm": 35.89735412597656, "learning_rate": 1.0618367346938776e-05, "loss": 1.3689, "step": 19309 }, { "epoch": 78.81632653061224, "grad_norm": 30.84404754638672, "learning_rate": 1.0616326530612246e-05, "loss": 0.5062, "step": 19310 }, { "epoch": 78.82040816326531, "grad_norm": 32.03021240234375, "learning_rate": 1.0614285714285713e-05, "loss": 1.456, "step": 19311 }, { "epoch": 78.82448979591837, "grad_norm": 30.41609001159668, "learning_rate": 1.0612244897959184e-05, "loss": 1.3438, "step": 19312 }, { "epoch": 78.82857142857142, "grad_norm": 21.476261138916016, "learning_rate": 1.0610204081632653e-05, "loss": 1.5754, "step": 19313 }, { "epoch": 78.83265306122449, "grad_norm": 29.67987823486328, "learning_rate": 1.0608163265306123e-05, "loss": 0.1388, "step": 19314 }, { "epoch": 78.83673469387755, "grad_norm": 40.16289138793945, "learning_rate": 1.0606122448979592e-05, "loss": 0.5604, "step": 19315 }, { "epoch": 78.84081632653061, "grad_norm": 43.45804214477539, "learning_rate": 1.0604081632653063e-05, "loss": 0.6998, "step": 19316 }, { "epoch": 78.84489795918367, "grad_norm": 29.22592544555664, "learning_rate": 1.0602040816326531e-05, "loss": 1.2784, "step": 19317 }, { "epoch": 78.84897959183674, "grad_norm": 32.45631408691406, "learning_rate": 1.06e-05, "loss": 1.4635, "step": 19318 }, { "epoch": 78.85306122448979, "grad_norm": 36.6866340637207, "learning_rate": 1.059795918367347e-05, "loss": 0.7753, "step": 19319 }, { "epoch": 78.85714285714286, "grad_norm": 24.16695213317871, "learning_rate": 1.059591836734694e-05, "loss": 1.5942, "step": 19320 }, { "epoch": 78.86122448979592, "grad_norm": 29.916610717773438, "learning_rate": 1.0593877551020408e-05, "loss": 1.5223, "step": 19321 }, { "epoch": 78.86530612244898, "grad_norm": 45.32301712036133, "learning_rate": 1.0591836734693877e-05, "loss": 1.4882, "step": 19322 }, { "epoch": 78.86938775510204, "grad_norm": 18.703262329101562, "learning_rate": 1.0589795918367348e-05, "loss": 2.4566, "step": 19323 }, { "epoch": 78.87346938775511, "grad_norm": 33.314857482910156, "learning_rate": 1.0587755102040816e-05, "loss": 0.4007, "step": 19324 }, { "epoch": 78.87755102040816, "grad_norm": 40.18740463256836, "learning_rate": 1.0585714285714287e-05, "loss": 0.7185, "step": 19325 }, { "epoch": 78.88163265306123, "grad_norm": 32.82929229736328, "learning_rate": 1.0583673469387756e-05, "loss": 0.8406, "step": 19326 }, { "epoch": 78.88571428571429, "grad_norm": 29.34836769104004, "learning_rate": 1.0581632653061226e-05, "loss": 0.1663, "step": 19327 }, { "epoch": 78.88979591836734, "grad_norm": 44.820125579833984, "learning_rate": 1.0579591836734693e-05, "loss": 1.2504, "step": 19328 }, { "epoch": 78.89387755102041, "grad_norm": 25.426063537597656, "learning_rate": 1.0577551020408164e-05, "loss": 0.3315, "step": 19329 }, { "epoch": 78.89795918367346, "grad_norm": 31.921222686767578, "learning_rate": 1.0575510204081633e-05, "loss": 0.6784, "step": 19330 }, { "epoch": 78.90204081632653, "grad_norm": 23.398670196533203, "learning_rate": 1.0573469387755103e-05, "loss": 2.1959, "step": 19331 }, { "epoch": 78.90612244897959, "grad_norm": 34.286354064941406, "learning_rate": 1.0571428571428572e-05, "loss": 1.0761, "step": 19332 }, { "epoch": 78.91020408163266, "grad_norm": 19.2250919342041, "learning_rate": 1.0569387755102041e-05, "loss": 0.1097, "step": 19333 }, { "epoch": 78.91428571428571, "grad_norm": 42.80794906616211, "learning_rate": 1.056734693877551e-05, "loss": 0.3589, "step": 19334 }, { "epoch": 78.91836734693878, "grad_norm": 40.10722351074219, "learning_rate": 1.056530612244898e-05, "loss": 1.03, "step": 19335 }, { "epoch": 78.92244897959183, "grad_norm": 38.81398391723633, "learning_rate": 1.0563265306122449e-05, "loss": 1.1855, "step": 19336 }, { "epoch": 78.9265306122449, "grad_norm": 34.34965515136719, "learning_rate": 1.056122448979592e-05, "loss": 1.0777, "step": 19337 }, { "epoch": 78.93061224489796, "grad_norm": 31.641664505004883, "learning_rate": 1.0559183673469388e-05, "loss": 1.3941, "step": 19338 }, { "epoch": 78.93469387755103, "grad_norm": 37.36878204345703, "learning_rate": 1.0557142857142857e-05, "loss": 1.4169, "step": 19339 }, { "epoch": 78.93877551020408, "grad_norm": 37.02555465698242, "learning_rate": 1.0555102040816326e-05, "loss": 0.8382, "step": 19340 }, { "epoch": 78.94285714285714, "grad_norm": 39.02952194213867, "learning_rate": 1.0553061224489797e-05, "loss": 0.5146, "step": 19341 }, { "epoch": 78.9469387755102, "grad_norm": 46.02096176147461, "learning_rate": 1.0551020408163265e-05, "loss": 0.5012, "step": 19342 }, { "epoch": 78.95102040816326, "grad_norm": 24.728382110595703, "learning_rate": 1.0548979591836736e-05, "loss": 1.6869, "step": 19343 }, { "epoch": 78.95510204081633, "grad_norm": 35.60683822631836, "learning_rate": 1.0546938775510205e-05, "loss": 0.7199, "step": 19344 }, { "epoch": 78.95918367346938, "grad_norm": 26.232452392578125, "learning_rate": 1.0544897959183674e-05, "loss": 2.0899, "step": 19345 }, { "epoch": 78.96326530612245, "grad_norm": 37.47343444824219, "learning_rate": 1.0542857142857144e-05, "loss": 0.5089, "step": 19346 }, { "epoch": 78.9673469387755, "grad_norm": 33.94902038574219, "learning_rate": 1.0540816326530613e-05, "loss": 1.0965, "step": 19347 }, { "epoch": 78.97142857142858, "grad_norm": 31.816946029663086, "learning_rate": 1.0538775510204082e-05, "loss": 1.5434, "step": 19348 }, { "epoch": 78.97551020408163, "grad_norm": 30.91640281677246, "learning_rate": 1.0536734693877552e-05, "loss": 1.7508, "step": 19349 }, { "epoch": 78.9795918367347, "grad_norm": 27.24041748046875, "learning_rate": 1.0534693877551021e-05, "loss": 1.5181, "step": 19350 }, { "epoch": 78.98367346938775, "grad_norm": 34.65135192871094, "learning_rate": 1.053265306122449e-05, "loss": 0.3418, "step": 19351 }, { "epoch": 78.98775510204082, "grad_norm": 27.47554588317871, "learning_rate": 1.053061224489796e-05, "loss": 1.5754, "step": 19352 }, { "epoch": 78.99183673469388, "grad_norm": 31.479604721069336, "learning_rate": 1.052857142857143e-05, "loss": 1.7049, "step": 19353 }, { "epoch": 78.99591836734695, "grad_norm": 35.21372985839844, "learning_rate": 1.05265306122449e-05, "loss": 0.5712, "step": 19354 }, { "epoch": 79.0, "grad_norm": 32.4686279296875, "learning_rate": 1.0524489795918367e-05, "loss": 0.567, "step": 19355 }, { "epoch": 79.00408163265305, "grad_norm": 30.0510311126709, "learning_rate": 1.0522448979591837e-05, "loss": 0.4167, "step": 19356 }, { "epoch": 79.00816326530612, "grad_norm": 24.01874351501465, "learning_rate": 1.0520408163265306e-05, "loss": 1.6228, "step": 19357 }, { "epoch": 79.01224489795918, "grad_norm": 32.143089294433594, "learning_rate": 1.0518367346938777e-05, "loss": 1.2345, "step": 19358 }, { "epoch": 79.01632653061225, "grad_norm": 36.07410430908203, "learning_rate": 1.0516326530612245e-05, "loss": 0.4634, "step": 19359 }, { "epoch": 79.0204081632653, "grad_norm": 30.250764846801758, "learning_rate": 1.0514285714285716e-05, "loss": 0.9779, "step": 19360 }, { "epoch": 79.02448979591837, "grad_norm": 31.525686264038086, "learning_rate": 1.0512244897959183e-05, "loss": 1.3806, "step": 19361 }, { "epoch": 79.02857142857142, "grad_norm": 32.44137954711914, "learning_rate": 1.0510204081632654e-05, "loss": 1.2725, "step": 19362 }, { "epoch": 79.0326530612245, "grad_norm": 21.865856170654297, "learning_rate": 1.0508163265306122e-05, "loss": 1.5359, "step": 19363 }, { "epoch": 79.03673469387755, "grad_norm": 25.32736587524414, "learning_rate": 1.0506122448979593e-05, "loss": 1.599, "step": 19364 }, { "epoch": 79.04081632653062, "grad_norm": 31.774065017700195, "learning_rate": 1.0504081632653062e-05, "loss": 0.3583, "step": 19365 }, { "epoch": 79.04489795918367, "grad_norm": 28.50979995727539, "learning_rate": 1.050204081632653e-05, "loss": 0.45, "step": 19366 }, { "epoch": 79.04897959183674, "grad_norm": 36.70088577270508, "learning_rate": 1.05e-05, "loss": 0.7765, "step": 19367 }, { "epoch": 79.0530612244898, "grad_norm": 27.0852108001709, "learning_rate": 1.049795918367347e-05, "loss": 1.4744, "step": 19368 }, { "epoch": 79.05714285714286, "grad_norm": 33.41968536376953, "learning_rate": 1.0495918367346939e-05, "loss": 0.291, "step": 19369 }, { "epoch": 79.06122448979592, "grad_norm": 37.04287338256836, "learning_rate": 1.049387755102041e-05, "loss": 0.3692, "step": 19370 }, { "epoch": 79.06530612244897, "grad_norm": 35.791465759277344, "learning_rate": 1.0491836734693878e-05, "loss": 1.042, "step": 19371 }, { "epoch": 79.06938775510204, "grad_norm": 32.125022888183594, "learning_rate": 1.0489795918367347e-05, "loss": 0.5696, "step": 19372 }, { "epoch": 79.0734693877551, "grad_norm": 29.805248260498047, "learning_rate": 1.0487755102040816e-05, "loss": 0.3799, "step": 19373 }, { "epoch": 79.07755102040817, "grad_norm": 20.303787231445312, "learning_rate": 1.0485714285714286e-05, "loss": 2.1117, "step": 19374 }, { "epoch": 79.08163265306122, "grad_norm": 32.22248458862305, "learning_rate": 1.0483673469387755e-05, "loss": 1.5743, "step": 19375 }, { "epoch": 79.08571428571429, "grad_norm": 29.8284912109375, "learning_rate": 1.0481632653061226e-05, "loss": 0.7512, "step": 19376 }, { "epoch": 79.08979591836734, "grad_norm": 28.133359909057617, "learning_rate": 1.0479591836734694e-05, "loss": 1.2011, "step": 19377 }, { "epoch": 79.09387755102041, "grad_norm": 33.68282699584961, "learning_rate": 1.0477551020408163e-05, "loss": 0.7404, "step": 19378 }, { "epoch": 79.09795918367347, "grad_norm": 11.452998161315918, "learning_rate": 1.0475510204081634e-05, "loss": 3.0408, "step": 19379 }, { "epoch": 79.10204081632654, "grad_norm": 29.9180908203125, "learning_rate": 1.0473469387755103e-05, "loss": 1.4077, "step": 19380 }, { "epoch": 79.10612244897959, "grad_norm": 28.67740249633789, "learning_rate": 1.0471428571428573e-05, "loss": 1.5126, "step": 19381 }, { "epoch": 79.11020408163266, "grad_norm": 45.69548416137695, "learning_rate": 1.0469387755102042e-05, "loss": 0.9383, "step": 19382 }, { "epoch": 79.11428571428571, "grad_norm": 33.95664596557617, "learning_rate": 1.046734693877551e-05, "loss": 0.7627, "step": 19383 }, { "epoch": 79.11836734693877, "grad_norm": 19.761491775512695, "learning_rate": 1.046530612244898e-05, "loss": 2.1055, "step": 19384 }, { "epoch": 79.12244897959184, "grad_norm": 25.584640502929688, "learning_rate": 1.046326530612245e-05, "loss": 1.4505, "step": 19385 }, { "epoch": 79.12653061224489, "grad_norm": 38.29606628417969, "learning_rate": 1.0461224489795919e-05, "loss": 0.6618, "step": 19386 }, { "epoch": 79.13061224489796, "grad_norm": 38.482059478759766, "learning_rate": 1.045918367346939e-05, "loss": 0.3275, "step": 19387 }, { "epoch": 79.13469387755102, "grad_norm": 33.58898162841797, "learning_rate": 1.0457142857142856e-05, "loss": 0.4724, "step": 19388 }, { "epoch": 79.13877551020408, "grad_norm": 28.778820037841797, "learning_rate": 1.0455102040816327e-05, "loss": 0.5765, "step": 19389 }, { "epoch": 79.14285714285714, "grad_norm": 31.244537353515625, "learning_rate": 1.0453061224489796e-05, "loss": 1.3337, "step": 19390 }, { "epoch": 79.14693877551021, "grad_norm": 8.814250946044922, "learning_rate": 1.0451020408163266e-05, "loss": 0.0618, "step": 19391 }, { "epoch": 79.15102040816326, "grad_norm": 30.708839416503906, "learning_rate": 1.0448979591836735e-05, "loss": 0.6829, "step": 19392 }, { "epoch": 79.15510204081633, "grad_norm": 28.194839477539062, "learning_rate": 1.0446938775510206e-05, "loss": 1.8326, "step": 19393 }, { "epoch": 79.15918367346939, "grad_norm": 13.121613502502441, "learning_rate": 1.0444897959183673e-05, "loss": 0.1577, "step": 19394 }, { "epoch": 79.16326530612245, "grad_norm": 36.81468200683594, "learning_rate": 1.0442857142857143e-05, "loss": 0.3158, "step": 19395 }, { "epoch": 79.16734693877551, "grad_norm": 28.200571060180664, "learning_rate": 1.0440816326530612e-05, "loss": 0.7985, "step": 19396 }, { "epoch": 79.17142857142858, "grad_norm": 36.39101791381836, "learning_rate": 1.0438775510204083e-05, "loss": 0.3994, "step": 19397 }, { "epoch": 79.17551020408163, "grad_norm": 40.88941192626953, "learning_rate": 1.0436734693877551e-05, "loss": 0.4065, "step": 19398 }, { "epoch": 79.17959183673469, "grad_norm": 26.101757049560547, "learning_rate": 1.043469387755102e-05, "loss": 0.1696, "step": 19399 }, { "epoch": 79.18367346938776, "grad_norm": 65.70890045166016, "learning_rate": 1.0432653061224489e-05, "loss": 1.3477, "step": 19400 }, { "epoch": 79.18775510204081, "grad_norm": 41.423709869384766, "learning_rate": 1.043061224489796e-05, "loss": 0.6314, "step": 19401 }, { "epoch": 79.19183673469388, "grad_norm": 48.53728485107422, "learning_rate": 1.0428571428571428e-05, "loss": 0.4895, "step": 19402 }, { "epoch": 79.19591836734693, "grad_norm": 30.51872444152832, "learning_rate": 1.0426530612244899e-05, "loss": 1.2161, "step": 19403 }, { "epoch": 79.2, "grad_norm": 36.006080627441406, "learning_rate": 1.0424489795918368e-05, "loss": 1.3632, "step": 19404 }, { "epoch": 79.20408163265306, "grad_norm": 26.606069564819336, "learning_rate": 1.0422448979591837e-05, "loss": 0.3188, "step": 19405 }, { "epoch": 79.20816326530613, "grad_norm": 29.700634002685547, "learning_rate": 1.0420408163265307e-05, "loss": 1.6957, "step": 19406 }, { "epoch": 79.21224489795918, "grad_norm": 32.114776611328125, "learning_rate": 1.0418367346938776e-05, "loss": 1.1436, "step": 19407 }, { "epoch": 79.21632653061225, "grad_norm": 39.48011016845703, "learning_rate": 1.0416326530612246e-05, "loss": 1.1127, "step": 19408 }, { "epoch": 79.2204081632653, "grad_norm": 25.972640991210938, "learning_rate": 1.0414285714285715e-05, "loss": 1.4599, "step": 19409 }, { "epoch": 79.22448979591837, "grad_norm": 33.78384780883789, "learning_rate": 1.0412244897959184e-05, "loss": 0.3541, "step": 19410 }, { "epoch": 79.22857142857143, "grad_norm": 30.850549697875977, "learning_rate": 1.0410204081632653e-05, "loss": 0.394, "step": 19411 }, { "epoch": 79.2326530612245, "grad_norm": 45.64128875732422, "learning_rate": 1.0408163265306123e-05, "loss": 0.9688, "step": 19412 }, { "epoch": 79.23673469387755, "grad_norm": 24.611692428588867, "learning_rate": 1.0406122448979592e-05, "loss": 2.3057, "step": 19413 }, { "epoch": 79.2408163265306, "grad_norm": 38.64375686645508, "learning_rate": 1.0404081632653063e-05, "loss": 0.791, "step": 19414 }, { "epoch": 79.24489795918367, "grad_norm": 25.982759475708008, "learning_rate": 1.0402040816326532e-05, "loss": 1.6678, "step": 19415 }, { "epoch": 79.24897959183673, "grad_norm": 33.898590087890625, "learning_rate": 1.04e-05, "loss": 0.4865, "step": 19416 }, { "epoch": 79.2530612244898, "grad_norm": 28.044218063354492, "learning_rate": 1.039795918367347e-05, "loss": 1.5397, "step": 19417 }, { "epoch": 79.25714285714285, "grad_norm": 45.64463806152344, "learning_rate": 1.039591836734694e-05, "loss": 0.7724, "step": 19418 }, { "epoch": 79.26122448979592, "grad_norm": 35.2947883605957, "learning_rate": 1.0393877551020409e-05, "loss": 0.5891, "step": 19419 }, { "epoch": 79.26530612244898, "grad_norm": 29.118825912475586, "learning_rate": 1.0391836734693879e-05, "loss": 1.4213, "step": 19420 }, { "epoch": 79.26938775510204, "grad_norm": 23.251462936401367, "learning_rate": 1.0389795918367346e-05, "loss": 1.5146, "step": 19421 }, { "epoch": 79.2734693877551, "grad_norm": 34.525611877441406, "learning_rate": 1.0387755102040817e-05, "loss": 0.6739, "step": 19422 }, { "epoch": 79.27755102040817, "grad_norm": 36.77614974975586, "learning_rate": 1.0385714285714286e-05, "loss": 0.5631, "step": 19423 }, { "epoch": 79.28163265306122, "grad_norm": 39.890628814697266, "learning_rate": 1.0383673469387756e-05, "loss": 0.9689, "step": 19424 }, { "epoch": 79.28571428571429, "grad_norm": 32.91384506225586, "learning_rate": 1.0381632653061225e-05, "loss": 0.3998, "step": 19425 }, { "epoch": 79.28979591836735, "grad_norm": 34.24740982055664, "learning_rate": 1.0379591836734695e-05, "loss": 1.2849, "step": 19426 }, { "epoch": 79.29387755102042, "grad_norm": 31.36310577392578, "learning_rate": 1.0377551020408162e-05, "loss": 0.7883, "step": 19427 }, { "epoch": 79.29795918367347, "grad_norm": 36.57856369018555, "learning_rate": 1.0375510204081633e-05, "loss": 0.6131, "step": 19428 }, { "epoch": 79.30204081632652, "grad_norm": 43.34934616088867, "learning_rate": 1.0373469387755102e-05, "loss": 0.7316, "step": 19429 }, { "epoch": 79.3061224489796, "grad_norm": 27.60753059387207, "learning_rate": 1.0371428571428572e-05, "loss": 1.6303, "step": 19430 }, { "epoch": 79.31020408163265, "grad_norm": 36.93478012084961, "learning_rate": 1.0369387755102041e-05, "loss": 1.0202, "step": 19431 }, { "epoch": 79.31428571428572, "grad_norm": 24.78536033630371, "learning_rate": 1.036734693877551e-05, "loss": 1.6916, "step": 19432 }, { "epoch": 79.31836734693877, "grad_norm": 30.74622344970703, "learning_rate": 1.036530612244898e-05, "loss": 1.2718, "step": 19433 }, { "epoch": 79.32244897959184, "grad_norm": 32.753177642822266, "learning_rate": 1.036326530612245e-05, "loss": 1.1279, "step": 19434 }, { "epoch": 79.3265306122449, "grad_norm": 37.17631149291992, "learning_rate": 1.036122448979592e-05, "loss": 1.5195, "step": 19435 }, { "epoch": 79.33061224489796, "grad_norm": 40.46146011352539, "learning_rate": 1.0359183673469389e-05, "loss": 0.9642, "step": 19436 }, { "epoch": 79.33469387755102, "grad_norm": 32.843109130859375, "learning_rate": 1.0357142857142859e-05, "loss": 0.3765, "step": 19437 }, { "epoch": 79.33877551020409, "grad_norm": 30.920604705810547, "learning_rate": 1.0355102040816326e-05, "loss": 1.3596, "step": 19438 }, { "epoch": 79.34285714285714, "grad_norm": 13.4440279006958, "learning_rate": 1.0353061224489797e-05, "loss": 0.0982, "step": 19439 }, { "epoch": 79.34693877551021, "grad_norm": 28.696378707885742, "learning_rate": 1.0351020408163266e-05, "loss": 0.2606, "step": 19440 }, { "epoch": 79.35102040816327, "grad_norm": 39.725128173828125, "learning_rate": 1.0348979591836736e-05, "loss": 1.0582, "step": 19441 }, { "epoch": 79.35510204081632, "grad_norm": 26.37812042236328, "learning_rate": 1.0346938775510205e-05, "loss": 1.1431, "step": 19442 }, { "epoch": 79.35918367346939, "grad_norm": 23.964096069335938, "learning_rate": 1.0344897959183674e-05, "loss": 1.766, "step": 19443 }, { "epoch": 79.36326530612244, "grad_norm": 51.40104675292969, "learning_rate": 1.0342857142857143e-05, "loss": 0.8634, "step": 19444 }, { "epoch": 79.36734693877551, "grad_norm": 29.599681854248047, "learning_rate": 1.0340816326530613e-05, "loss": 1.675, "step": 19445 }, { "epoch": 79.37142857142857, "grad_norm": 30.096141815185547, "learning_rate": 1.0338775510204082e-05, "loss": 0.8274, "step": 19446 }, { "epoch": 79.37551020408164, "grad_norm": 34.94478225708008, "learning_rate": 1.0336734693877552e-05, "loss": 0.9649, "step": 19447 }, { "epoch": 79.37959183673469, "grad_norm": 27.981403350830078, "learning_rate": 1.0334693877551021e-05, "loss": 1.5297, "step": 19448 }, { "epoch": 79.38367346938776, "grad_norm": 31.145566940307617, "learning_rate": 1.033265306122449e-05, "loss": 0.8368, "step": 19449 }, { "epoch": 79.38775510204081, "grad_norm": 41.61944580078125, "learning_rate": 1.0330612244897959e-05, "loss": 0.7889, "step": 19450 }, { "epoch": 79.39183673469388, "grad_norm": 31.891862869262695, "learning_rate": 1.032857142857143e-05, "loss": 0.8537, "step": 19451 }, { "epoch": 79.39591836734694, "grad_norm": 29.166812896728516, "learning_rate": 1.0326530612244898e-05, "loss": 1.4683, "step": 19452 }, { "epoch": 79.4, "grad_norm": 33.216880798339844, "learning_rate": 1.0324489795918369e-05, "loss": 0.3608, "step": 19453 }, { "epoch": 79.40408163265306, "grad_norm": 32.653263092041016, "learning_rate": 1.0322448979591836e-05, "loss": 1.5033, "step": 19454 }, { "epoch": 79.40816326530613, "grad_norm": 1.9630568027496338, "learning_rate": 1.0320408163265306e-05, "loss": 0.0259, "step": 19455 }, { "epoch": 79.41224489795918, "grad_norm": 23.5762939453125, "learning_rate": 1.0318367346938775e-05, "loss": 1.3826, "step": 19456 }, { "epoch": 79.41632653061224, "grad_norm": 30.887248992919922, "learning_rate": 1.0316326530612246e-05, "loss": 0.3601, "step": 19457 }, { "epoch": 79.42040816326531, "grad_norm": 24.0502986907959, "learning_rate": 1.0314285714285715e-05, "loss": 1.646, "step": 19458 }, { "epoch": 79.42448979591836, "grad_norm": 38.792110443115234, "learning_rate": 1.0312244897959185e-05, "loss": 1.2104, "step": 19459 }, { "epoch": 79.42857142857143, "grad_norm": 24.89921760559082, "learning_rate": 1.0310204081632654e-05, "loss": 2.0, "step": 19460 }, { "epoch": 79.43265306122449, "grad_norm": 43.991397857666016, "learning_rate": 1.0308163265306123e-05, "loss": 0.8718, "step": 19461 }, { "epoch": 79.43673469387755, "grad_norm": 34.7763671875, "learning_rate": 1.0306122448979593e-05, "loss": 1.104, "step": 19462 }, { "epoch": 79.44081632653061, "grad_norm": 34.03960418701172, "learning_rate": 1.0304081632653062e-05, "loss": 0.4418, "step": 19463 }, { "epoch": 79.44489795918368, "grad_norm": 25.28235626220703, "learning_rate": 1.0302040816326533e-05, "loss": 1.5735, "step": 19464 }, { "epoch": 79.44897959183673, "grad_norm": 32.786293029785156, "learning_rate": 1.03e-05, "loss": 0.9037, "step": 19465 }, { "epoch": 79.4530612244898, "grad_norm": 34.81493377685547, "learning_rate": 1.029795918367347e-05, "loss": 0.6676, "step": 19466 }, { "epoch": 79.45714285714286, "grad_norm": 31.424833297729492, "learning_rate": 1.0295918367346939e-05, "loss": 0.4455, "step": 19467 }, { "epoch": 79.46122448979592, "grad_norm": 38.59941101074219, "learning_rate": 1.029387755102041e-05, "loss": 0.4701, "step": 19468 }, { "epoch": 79.46530612244898, "grad_norm": 33.54088592529297, "learning_rate": 1.0291836734693878e-05, "loss": 0.2916, "step": 19469 }, { "epoch": 79.46938775510205, "grad_norm": 32.39643478393555, "learning_rate": 1.0289795918367347e-05, "loss": 1.1283, "step": 19470 }, { "epoch": 79.4734693877551, "grad_norm": 30.27750587463379, "learning_rate": 1.0287755102040816e-05, "loss": 0.374, "step": 19471 }, { "epoch": 79.47755102040816, "grad_norm": 40.860965728759766, "learning_rate": 1.0285714285714286e-05, "loss": 0.7443, "step": 19472 }, { "epoch": 79.48163265306123, "grad_norm": 21.1229248046875, "learning_rate": 1.0283673469387755e-05, "loss": 2.0121, "step": 19473 }, { "epoch": 79.48571428571428, "grad_norm": 16.371137619018555, "learning_rate": 1.0281632653061226e-05, "loss": 2.3287, "step": 19474 }, { "epoch": 79.48979591836735, "grad_norm": 18.124494552612305, "learning_rate": 1.0279591836734695e-05, "loss": 2.1997, "step": 19475 }, { "epoch": 79.4938775510204, "grad_norm": 42.20673370361328, "learning_rate": 1.0277551020408163e-05, "loss": 1.1989, "step": 19476 }, { "epoch": 79.49795918367347, "grad_norm": 33.76472091674805, "learning_rate": 1.0275510204081632e-05, "loss": 0.3949, "step": 19477 }, { "epoch": 79.50204081632653, "grad_norm": 20.722864151000977, "learning_rate": 1.0273469387755103e-05, "loss": 2.1108, "step": 19478 }, { "epoch": 79.5061224489796, "grad_norm": 36.2722282409668, "learning_rate": 1.0271428571428572e-05, "loss": 0.316, "step": 19479 }, { "epoch": 79.51020408163265, "grad_norm": 40.458927154541016, "learning_rate": 1.0269387755102042e-05, "loss": 0.7082, "step": 19480 }, { "epoch": 79.51428571428572, "grad_norm": 34.736698150634766, "learning_rate": 1.026734693877551e-05, "loss": 0.9073, "step": 19481 }, { "epoch": 79.51836734693877, "grad_norm": 24.22553062438965, "learning_rate": 1.026530612244898e-05, "loss": 0.1319, "step": 19482 }, { "epoch": 79.52244897959184, "grad_norm": 38.63711166381836, "learning_rate": 1.0263265306122449e-05, "loss": 1.043, "step": 19483 }, { "epoch": 79.5265306122449, "grad_norm": 23.783384323120117, "learning_rate": 1.0261224489795919e-05, "loss": 1.5948, "step": 19484 }, { "epoch": 79.53061224489795, "grad_norm": 28.352489471435547, "learning_rate": 1.0259183673469388e-05, "loss": 1.5955, "step": 19485 }, { "epoch": 79.53469387755102, "grad_norm": 21.42134666442871, "learning_rate": 1.0257142857142858e-05, "loss": 2.0558, "step": 19486 }, { "epoch": 79.53877551020408, "grad_norm": 19.19025230407715, "learning_rate": 1.0255102040816327e-05, "loss": 2.4101, "step": 19487 }, { "epoch": 79.54285714285714, "grad_norm": 46.33578872680664, "learning_rate": 1.0253061224489796e-05, "loss": 0.2998, "step": 19488 }, { "epoch": 79.5469387755102, "grad_norm": 33.921142578125, "learning_rate": 1.0251020408163267e-05, "loss": 0.7641, "step": 19489 }, { "epoch": 79.55102040816327, "grad_norm": 25.468490600585938, "learning_rate": 1.0248979591836735e-05, "loss": 1.6569, "step": 19490 }, { "epoch": 79.55510204081632, "grad_norm": 23.87845230102539, "learning_rate": 1.0246938775510204e-05, "loss": 2.0222, "step": 19491 }, { "epoch": 79.55918367346939, "grad_norm": 33.017356872558594, "learning_rate": 1.0244897959183673e-05, "loss": 1.0673, "step": 19492 }, { "epoch": 79.56326530612245, "grad_norm": 23.58308219909668, "learning_rate": 1.0242857142857144e-05, "loss": 1.5534, "step": 19493 }, { "epoch": 79.56734693877551, "grad_norm": 44.70350646972656, "learning_rate": 1.0240816326530612e-05, "loss": 0.541, "step": 19494 }, { "epoch": 79.57142857142857, "grad_norm": 36.88394546508789, "learning_rate": 1.0238775510204083e-05, "loss": 1.0403, "step": 19495 }, { "epoch": 79.57551020408164, "grad_norm": 32.4993782043457, "learning_rate": 1.0236734693877552e-05, "loss": 1.3057, "step": 19496 }, { "epoch": 79.57959183673469, "grad_norm": 34.69089889526367, "learning_rate": 1.0234693877551022e-05, "loss": 0.3357, "step": 19497 }, { "epoch": 79.58367346938776, "grad_norm": 22.142684936523438, "learning_rate": 1.023265306122449e-05, "loss": 1.9684, "step": 19498 }, { "epoch": 79.58775510204082, "grad_norm": 38.527931213378906, "learning_rate": 1.023061224489796e-05, "loss": 1.2832, "step": 19499 }, { "epoch": 79.59183673469387, "grad_norm": 34.09144592285156, "learning_rate": 1.0228571428571429e-05, "loss": 1.1581, "step": 19500 }, { "epoch": 79.59591836734694, "grad_norm": 46.52211380004883, "learning_rate": 1.02265306122449e-05, "loss": 0.8527, "step": 19501 }, { "epoch": 79.6, "grad_norm": 24.069442749023438, "learning_rate": 1.0224489795918368e-05, "loss": 0.151, "step": 19502 }, { "epoch": 79.60408163265306, "grad_norm": 20.77997589111328, "learning_rate": 1.0222448979591837e-05, "loss": 0.1927, "step": 19503 }, { "epoch": 79.60816326530612, "grad_norm": 36.914459228515625, "learning_rate": 1.0220408163265306e-05, "loss": 1.0435, "step": 19504 }, { "epoch": 79.61224489795919, "grad_norm": 32.88753128051758, "learning_rate": 1.0218367346938776e-05, "loss": 1.187, "step": 19505 }, { "epoch": 79.61632653061224, "grad_norm": 35.67604446411133, "learning_rate": 1.0216326530612245e-05, "loss": 1.3437, "step": 19506 }, { "epoch": 79.62040816326531, "grad_norm": 47.64834976196289, "learning_rate": 1.0214285714285715e-05, "loss": 0.506, "step": 19507 }, { "epoch": 79.62448979591836, "grad_norm": 20.244657516479492, "learning_rate": 1.0212244897959184e-05, "loss": 0.1789, "step": 19508 }, { "epoch": 79.62857142857143, "grad_norm": 13.180292129516602, "learning_rate": 1.0210204081632653e-05, "loss": 0.1056, "step": 19509 }, { "epoch": 79.63265306122449, "grad_norm": 32.01202392578125, "learning_rate": 1.0208163265306122e-05, "loss": 0.2896, "step": 19510 }, { "epoch": 79.63673469387756, "grad_norm": 32.62770462036133, "learning_rate": 1.0206122448979592e-05, "loss": 0.92, "step": 19511 }, { "epoch": 79.64081632653061, "grad_norm": 29.70740509033203, "learning_rate": 1.0204081632653061e-05, "loss": 0.7848, "step": 19512 }, { "epoch": 79.64489795918368, "grad_norm": 29.17377471923828, "learning_rate": 1.0202040816326532e-05, "loss": 1.4839, "step": 19513 }, { "epoch": 79.64897959183673, "grad_norm": 29.59598731994629, "learning_rate": 1.02e-05, "loss": 0.1624, "step": 19514 }, { "epoch": 79.65306122448979, "grad_norm": 35.306488037109375, "learning_rate": 1.019795918367347e-05, "loss": 1.2815, "step": 19515 }, { "epoch": 79.65714285714286, "grad_norm": 26.990156173706055, "learning_rate": 1.019591836734694e-05, "loss": 1.1334, "step": 19516 }, { "epoch": 79.66122448979591, "grad_norm": 47.2692985534668, "learning_rate": 1.0193877551020409e-05, "loss": 0.6097, "step": 19517 }, { "epoch": 79.66530612244898, "grad_norm": 25.78313446044922, "learning_rate": 1.0191836734693878e-05, "loss": 0.3011, "step": 19518 }, { "epoch": 79.66938775510204, "grad_norm": 36.81938934326172, "learning_rate": 1.0189795918367348e-05, "loss": 0.6395, "step": 19519 }, { "epoch": 79.6734693877551, "grad_norm": 26.071077346801758, "learning_rate": 1.0187755102040817e-05, "loss": 1.5422, "step": 19520 }, { "epoch": 79.67755102040816, "grad_norm": 36.5590934753418, "learning_rate": 1.0185714285714286e-05, "loss": 1.3345, "step": 19521 }, { "epoch": 79.68163265306123, "grad_norm": 28.430065155029297, "learning_rate": 1.0183673469387756e-05, "loss": 1.2994, "step": 19522 }, { "epoch": 79.68571428571428, "grad_norm": 31.46672248840332, "learning_rate": 1.0181632653061225e-05, "loss": 1.1284, "step": 19523 }, { "epoch": 79.68979591836735, "grad_norm": 38.3406867980957, "learning_rate": 1.0179591836734696e-05, "loss": 1.0709, "step": 19524 }, { "epoch": 79.6938775510204, "grad_norm": 21.858055114746094, "learning_rate": 1.0177551020408163e-05, "loss": 0.1567, "step": 19525 }, { "epoch": 79.69795918367348, "grad_norm": 32.563758850097656, "learning_rate": 1.0175510204081633e-05, "loss": 0.9256, "step": 19526 }, { "epoch": 79.70204081632653, "grad_norm": 39.639835357666016, "learning_rate": 1.0173469387755102e-05, "loss": 0.5964, "step": 19527 }, { "epoch": 79.70612244897958, "grad_norm": 26.72265625, "learning_rate": 1.0171428571428573e-05, "loss": 1.9737, "step": 19528 }, { "epoch": 79.71020408163265, "grad_norm": 18.613447189331055, "learning_rate": 1.0169387755102041e-05, "loss": 2.1134, "step": 19529 }, { "epoch": 79.71428571428571, "grad_norm": 12.936646461486816, "learning_rate": 1.0167346938775512e-05, "loss": 0.0926, "step": 19530 }, { "epoch": 79.71836734693878, "grad_norm": 60.81723403930664, "learning_rate": 1.0165306122448979e-05, "loss": 0.435, "step": 19531 }, { "epoch": 79.72244897959183, "grad_norm": 35.09157943725586, "learning_rate": 1.016326530612245e-05, "loss": 1.3241, "step": 19532 }, { "epoch": 79.7265306122449, "grad_norm": 18.19577980041504, "learning_rate": 1.0161224489795918e-05, "loss": 0.1202, "step": 19533 }, { "epoch": 79.73061224489796, "grad_norm": 28.173181533813477, "learning_rate": 1.0159183673469389e-05, "loss": 1.637, "step": 19534 }, { "epoch": 79.73469387755102, "grad_norm": 33.41078567504883, "learning_rate": 1.0157142857142858e-05, "loss": 1.4493, "step": 19535 }, { "epoch": 79.73877551020408, "grad_norm": 24.761173248291016, "learning_rate": 1.0155102040816326e-05, "loss": 1.6641, "step": 19536 }, { "epoch": 79.74285714285715, "grad_norm": 36.06342697143555, "learning_rate": 1.0153061224489795e-05, "loss": 1.0567, "step": 19537 }, { "epoch": 79.7469387755102, "grad_norm": 29.412668228149414, "learning_rate": 1.0151020408163266e-05, "loss": 1.4171, "step": 19538 }, { "epoch": 79.75102040816327, "grad_norm": 28.848766326904297, "learning_rate": 1.0148979591836735e-05, "loss": 1.5987, "step": 19539 }, { "epoch": 79.75510204081633, "grad_norm": 23.837909698486328, "learning_rate": 1.0146938775510205e-05, "loss": 1.5112, "step": 19540 }, { "epoch": 79.7591836734694, "grad_norm": 37.45924377441406, "learning_rate": 1.0144897959183674e-05, "loss": 0.9398, "step": 19541 }, { "epoch": 79.76326530612245, "grad_norm": 16.62302017211914, "learning_rate": 1.0142857142857143e-05, "loss": 0.1179, "step": 19542 }, { "epoch": 79.7673469387755, "grad_norm": 33.007530212402344, "learning_rate": 1.0140816326530613e-05, "loss": 0.3041, "step": 19543 }, { "epoch": 79.77142857142857, "grad_norm": 47.844818115234375, "learning_rate": 1.0138775510204082e-05, "loss": 0.776, "step": 19544 }, { "epoch": 79.77551020408163, "grad_norm": 46.67617416381836, "learning_rate": 1.0136734693877551e-05, "loss": 0.4477, "step": 19545 }, { "epoch": 79.7795918367347, "grad_norm": 42.99774932861328, "learning_rate": 1.0134693877551021e-05, "loss": 0.3062, "step": 19546 }, { "epoch": 79.78367346938775, "grad_norm": 19.61556053161621, "learning_rate": 1.013265306122449e-05, "loss": 0.1507, "step": 19547 }, { "epoch": 79.78775510204082, "grad_norm": 20.983386993408203, "learning_rate": 1.0130612244897959e-05, "loss": 1.4484, "step": 19548 }, { "epoch": 79.79183673469387, "grad_norm": 26.973339080810547, "learning_rate": 1.012857142857143e-05, "loss": 1.4621, "step": 19549 }, { "epoch": 79.79591836734694, "grad_norm": 25.734683990478516, "learning_rate": 1.0126530612244898e-05, "loss": 1.5937, "step": 19550 }, { "epoch": 79.8, "grad_norm": 23.19556427001953, "learning_rate": 1.0124489795918369e-05, "loss": 1.98, "step": 19551 }, { "epoch": 79.80408163265307, "grad_norm": 33.87422180175781, "learning_rate": 1.0122448979591838e-05, "loss": 0.7005, "step": 19552 }, { "epoch": 79.80816326530612, "grad_norm": 26.251558303833008, "learning_rate": 1.0120408163265307e-05, "loss": 2.074, "step": 19553 }, { "epoch": 79.81224489795919, "grad_norm": 35.03199768066406, "learning_rate": 1.0118367346938775e-05, "loss": 0.6367, "step": 19554 }, { "epoch": 79.81632653061224, "grad_norm": 25.75867462158203, "learning_rate": 1.0116326530612246e-05, "loss": 0.2726, "step": 19555 }, { "epoch": 79.82040816326531, "grad_norm": 35.651676177978516, "learning_rate": 1.0114285714285715e-05, "loss": 0.2974, "step": 19556 }, { "epoch": 79.82448979591837, "grad_norm": 30.02542495727539, "learning_rate": 1.0112244897959185e-05, "loss": 1.4603, "step": 19557 }, { "epoch": 79.82857142857142, "grad_norm": 26.056739807128906, "learning_rate": 1.0110204081632652e-05, "loss": 0.8222, "step": 19558 }, { "epoch": 79.83265306122449, "grad_norm": 40.98857879638672, "learning_rate": 1.0108163265306123e-05, "loss": 0.6797, "step": 19559 }, { "epoch": 79.83673469387755, "grad_norm": 49.5626220703125, "learning_rate": 1.0106122448979592e-05, "loss": 0.6884, "step": 19560 }, { "epoch": 79.84081632653061, "grad_norm": 39.33157730102539, "learning_rate": 1.0104081632653062e-05, "loss": 0.7722, "step": 19561 }, { "epoch": 79.84489795918367, "grad_norm": 31.953685760498047, "learning_rate": 1.0102040816326531e-05, "loss": 1.3615, "step": 19562 }, { "epoch": 79.84897959183674, "grad_norm": 31.51580810546875, "learning_rate": 1.0100000000000002e-05, "loss": 0.76, "step": 19563 }, { "epoch": 79.85306122448979, "grad_norm": 37.992130279541016, "learning_rate": 1.0097959183673469e-05, "loss": 0.4467, "step": 19564 }, { "epoch": 79.85714285714286, "grad_norm": 36.30095291137695, "learning_rate": 1.009591836734694e-05, "loss": 0.4635, "step": 19565 }, { "epoch": 79.86122448979592, "grad_norm": 39.01514434814453, "learning_rate": 1.0093877551020408e-05, "loss": 0.8975, "step": 19566 }, { "epoch": 79.86530612244898, "grad_norm": 28.587480545043945, "learning_rate": 1.0091836734693879e-05, "loss": 1.6355, "step": 19567 }, { "epoch": 79.86938775510204, "grad_norm": 38.811344146728516, "learning_rate": 1.0089795918367347e-05, "loss": 1.3074, "step": 19568 }, { "epoch": 79.87346938775511, "grad_norm": 24.105083465576172, "learning_rate": 1.0087755102040816e-05, "loss": 0.1212, "step": 19569 }, { "epoch": 79.87755102040816, "grad_norm": 38.7568244934082, "learning_rate": 1.0085714285714287e-05, "loss": 1.4816, "step": 19570 }, { "epoch": 79.88163265306123, "grad_norm": 28.40924644470215, "learning_rate": 1.0083673469387755e-05, "loss": 1.7796, "step": 19571 }, { "epoch": 79.88571428571429, "grad_norm": 38.215126037597656, "learning_rate": 1.0081632653061224e-05, "loss": 1.4529, "step": 19572 }, { "epoch": 79.88979591836734, "grad_norm": 23.40530776977539, "learning_rate": 1.0079591836734695e-05, "loss": 2.1567, "step": 19573 }, { "epoch": 79.89387755102041, "grad_norm": 29.478273391723633, "learning_rate": 1.0077551020408164e-05, "loss": 1.3025, "step": 19574 }, { "epoch": 79.89795918367346, "grad_norm": 30.376859664916992, "learning_rate": 1.0075510204081632e-05, "loss": 0.8463, "step": 19575 }, { "epoch": 79.90204081632653, "grad_norm": 29.088369369506836, "learning_rate": 1.0073469387755103e-05, "loss": 1.7348, "step": 19576 }, { "epoch": 79.90612244897959, "grad_norm": 25.30569076538086, "learning_rate": 1.0071428571428572e-05, "loss": 1.9669, "step": 19577 }, { "epoch": 79.91020408163266, "grad_norm": 38.78496551513672, "learning_rate": 1.0069387755102042e-05, "loss": 1.3125, "step": 19578 }, { "epoch": 79.91428571428571, "grad_norm": 30.482036590576172, "learning_rate": 1.0067346938775511e-05, "loss": 1.0767, "step": 19579 }, { "epoch": 79.91836734693878, "grad_norm": 34.143863677978516, "learning_rate": 1.006530612244898e-05, "loss": 0.5601, "step": 19580 }, { "epoch": 79.92244897959183, "grad_norm": 31.88512420654297, "learning_rate": 1.0063265306122449e-05, "loss": 0.4607, "step": 19581 }, { "epoch": 79.9265306122449, "grad_norm": 39.3018684387207, "learning_rate": 1.006122448979592e-05, "loss": 0.926, "step": 19582 }, { "epoch": 79.93061224489796, "grad_norm": 59.5703010559082, "learning_rate": 1.0059183673469388e-05, "loss": 1.0818, "step": 19583 }, { "epoch": 79.93469387755103, "grad_norm": 22.44340705871582, "learning_rate": 1.0057142857142859e-05, "loss": 2.0509, "step": 19584 }, { "epoch": 79.93877551020408, "grad_norm": 35.324188232421875, "learning_rate": 1.0055102040816327e-05, "loss": 1.4226, "step": 19585 }, { "epoch": 79.94285714285714, "grad_norm": 33.44642639160156, "learning_rate": 1.0053061224489796e-05, "loss": 0.5232, "step": 19586 }, { "epoch": 79.9469387755102, "grad_norm": 30.85719108581543, "learning_rate": 1.0051020408163265e-05, "loss": 1.2754, "step": 19587 }, { "epoch": 79.95102040816326, "grad_norm": 25.44217872619629, "learning_rate": 1.0048979591836736e-05, "loss": 1.4943, "step": 19588 }, { "epoch": 79.95510204081633, "grad_norm": 36.55059051513672, "learning_rate": 1.0046938775510204e-05, "loss": 0.5657, "step": 19589 }, { "epoch": 79.95918367346938, "grad_norm": 33.173519134521484, "learning_rate": 1.0044897959183675e-05, "loss": 1.4268, "step": 19590 }, { "epoch": 79.96326530612245, "grad_norm": 36.05018997192383, "learning_rate": 1.0042857142857142e-05, "loss": 1.2524, "step": 19591 }, { "epoch": 79.9673469387755, "grad_norm": 29.092636108398438, "learning_rate": 1.0040816326530613e-05, "loss": 0.345, "step": 19592 }, { "epoch": 79.97142857142858, "grad_norm": 32.36345291137695, "learning_rate": 1.0038775510204081e-05, "loss": 0.4163, "step": 19593 }, { "epoch": 79.97551020408163, "grad_norm": 17.225324630737305, "learning_rate": 1.0036734693877552e-05, "loss": 2.6573, "step": 19594 }, { "epoch": 79.9795918367347, "grad_norm": 34.84346008300781, "learning_rate": 1.003469387755102e-05, "loss": 1.1269, "step": 19595 }, { "epoch": 79.98367346938775, "grad_norm": 30.846593856811523, "learning_rate": 1.0032653061224491e-05, "loss": 0.7124, "step": 19596 }, { "epoch": 79.98775510204082, "grad_norm": 39.94313430786133, "learning_rate": 1.0030612244897958e-05, "loss": 0.6891, "step": 19597 }, { "epoch": 79.99183673469388, "grad_norm": 16.534976959228516, "learning_rate": 1.0028571428571429e-05, "loss": 0.1026, "step": 19598 }, { "epoch": 79.99591836734695, "grad_norm": 20.542842864990234, "learning_rate": 1.0026530612244898e-05, "loss": 2.0024, "step": 19599 }, { "epoch": 80.0, "grad_norm": 40.07596206665039, "learning_rate": 1.0024489795918368e-05, "loss": 1.2041, "step": 19600 }, { "epoch": 80.00408163265305, "grad_norm": 23.532438278198242, "learning_rate": 1.0022448979591837e-05, "loss": 2.1237, "step": 19601 }, { "epoch": 80.00816326530612, "grad_norm": 26.73343276977539, "learning_rate": 1.0020408163265306e-05, "loss": 0.3718, "step": 19602 }, { "epoch": 80.01224489795918, "grad_norm": 28.842788696289062, "learning_rate": 1.0018367346938776e-05, "loss": 1.26, "step": 19603 }, { "epoch": 80.01632653061225, "grad_norm": 48.42998504638672, "learning_rate": 1.0016326530612245e-05, "loss": 0.8299, "step": 19604 }, { "epoch": 80.0204081632653, "grad_norm": 10.552799224853516, "learning_rate": 1.0014285714285716e-05, "loss": 3.0278, "step": 19605 }, { "epoch": 80.02448979591837, "grad_norm": 38.46501922607422, "learning_rate": 1.0012244897959185e-05, "loss": 1.2696, "step": 19606 }, { "epoch": 80.02857142857142, "grad_norm": 33.499324798583984, "learning_rate": 1.0010204081632655e-05, "loss": 0.8448, "step": 19607 }, { "epoch": 80.0326530612245, "grad_norm": 17.878299713134766, "learning_rate": 1.0008163265306122e-05, "loss": 2.3865, "step": 19608 }, { "epoch": 80.03673469387755, "grad_norm": 26.27163314819336, "learning_rate": 1.0006122448979593e-05, "loss": 0.1299, "step": 19609 }, { "epoch": 80.04081632653062, "grad_norm": 30.20711326599121, "learning_rate": 1.0004081632653061e-05, "loss": 0.3759, "step": 19610 }, { "epoch": 80.04489795918367, "grad_norm": 23.733856201171875, "learning_rate": 1.0002040816326532e-05, "loss": 0.1459, "step": 19611 }, { "epoch": 80.04897959183674, "grad_norm": 30.907913208007812, "learning_rate": 1e-05, "loss": 1.3967, "step": 19612 }, { "epoch": 80.0530612244898, "grad_norm": 22.06108856201172, "learning_rate": 9.99795918367347e-06, "loss": 0.1597, "step": 19613 }, { "epoch": 80.05714285714286, "grad_norm": 28.77832794189453, "learning_rate": 9.995918367346938e-06, "loss": 0.7739, "step": 19614 }, { "epoch": 80.06122448979592, "grad_norm": 31.340822219848633, "learning_rate": 9.993877551020409e-06, "loss": 0.7549, "step": 19615 }, { "epoch": 80.06530612244897, "grad_norm": 38.29518127441406, "learning_rate": 9.991836734693878e-06, "loss": 1.5167, "step": 19616 }, { "epoch": 80.06938775510204, "grad_norm": 28.351423263549805, "learning_rate": 9.989795918367348e-06, "loss": 0.3254, "step": 19617 }, { "epoch": 80.0734693877551, "grad_norm": 22.74935531616211, "learning_rate": 9.987755102040817e-06, "loss": 1.9898, "step": 19618 }, { "epoch": 80.07755102040817, "grad_norm": 38.76990509033203, "learning_rate": 9.985714285714286e-06, "loss": 0.638, "step": 19619 }, { "epoch": 80.08163265306122, "grad_norm": 43.30360412597656, "learning_rate": 9.983673469387755e-06, "loss": 0.781, "step": 19620 }, { "epoch": 80.08571428571429, "grad_norm": 26.499570846557617, "learning_rate": 9.981632653061225e-06, "loss": 1.4946, "step": 19621 }, { "epoch": 80.08979591836734, "grad_norm": 34.39295959472656, "learning_rate": 9.979591836734694e-06, "loss": 1.0045, "step": 19622 }, { "epoch": 80.09387755102041, "grad_norm": 34.593936920166016, "learning_rate": 9.977551020408165e-06, "loss": 1.0069, "step": 19623 }, { "epoch": 80.09795918367347, "grad_norm": 24.127460479736328, "learning_rate": 9.975510204081632e-06, "loss": 1.9373, "step": 19624 }, { "epoch": 80.10204081632654, "grad_norm": 33.028106689453125, "learning_rate": 9.973469387755102e-06, "loss": 0.6859, "step": 19625 }, { "epoch": 80.10612244897959, "grad_norm": 37.35319137573242, "learning_rate": 9.971428571428571e-06, "loss": 1.022, "step": 19626 }, { "epoch": 80.11020408163266, "grad_norm": 31.35422706604004, "learning_rate": 9.969387755102042e-06, "loss": 0.2681, "step": 19627 }, { "epoch": 80.11428571428571, "grad_norm": 36.24504470825195, "learning_rate": 9.96734693877551e-06, "loss": 0.8933, "step": 19628 }, { "epoch": 80.11836734693877, "grad_norm": 15.420186996459961, "learning_rate": 9.96530612244898e-06, "loss": 0.1224, "step": 19629 }, { "epoch": 80.12244897959184, "grad_norm": 34.41824722290039, "learning_rate": 9.96326530612245e-06, "loss": 0.5969, "step": 19630 }, { "epoch": 80.12653061224489, "grad_norm": 13.264429092407227, "learning_rate": 9.961224489795919e-06, "loss": 0.1007, "step": 19631 }, { "epoch": 80.13061224489796, "grad_norm": 42.72783660888672, "learning_rate": 9.959183673469389e-06, "loss": 0.6456, "step": 19632 }, { "epoch": 80.13469387755102, "grad_norm": 42.8866081237793, "learning_rate": 9.957142857142858e-06, "loss": 0.6971, "step": 19633 }, { "epoch": 80.13877551020408, "grad_norm": 35.24876022338867, "learning_rate": 9.955102040816328e-06, "loss": 0.4958, "step": 19634 }, { "epoch": 80.14285714285714, "grad_norm": 24.864822387695312, "learning_rate": 9.953061224489796e-06, "loss": 0.2607, "step": 19635 }, { "epoch": 80.14693877551021, "grad_norm": 24.243175506591797, "learning_rate": 9.951020408163266e-06, "loss": 2.053, "step": 19636 }, { "epoch": 80.15102040816326, "grad_norm": 33.700531005859375, "learning_rate": 9.948979591836735e-06, "loss": 1.1332, "step": 19637 }, { "epoch": 80.15510204081633, "grad_norm": 30.765840530395508, "learning_rate": 9.946938775510205e-06, "loss": 1.299, "step": 19638 }, { "epoch": 80.15918367346939, "grad_norm": 12.281279563903809, "learning_rate": 9.944897959183674e-06, "loss": 0.0672, "step": 19639 }, { "epoch": 80.16326530612245, "grad_norm": 34.76637649536133, "learning_rate": 9.942857142857143e-06, "loss": 1.3967, "step": 19640 }, { "epoch": 80.16734693877551, "grad_norm": 39.64682388305664, "learning_rate": 9.940816326530612e-06, "loss": 0.9667, "step": 19641 }, { "epoch": 80.17142857142858, "grad_norm": 39.16716766357422, "learning_rate": 9.938775510204082e-06, "loss": 0.872, "step": 19642 }, { "epoch": 80.17551020408163, "grad_norm": 33.0206298828125, "learning_rate": 9.936734693877551e-06, "loss": 0.4474, "step": 19643 }, { "epoch": 80.17959183673469, "grad_norm": 25.021516799926758, "learning_rate": 9.934693877551022e-06, "loss": 1.5291, "step": 19644 }, { "epoch": 80.18367346938776, "grad_norm": 24.3241024017334, "learning_rate": 9.93265306122449e-06, "loss": 1.6451, "step": 19645 }, { "epoch": 80.18775510204081, "grad_norm": 39.074562072753906, "learning_rate": 9.93061224489796e-06, "loss": 0.6759, "step": 19646 }, { "epoch": 80.19183673469388, "grad_norm": 41.39957809448242, "learning_rate": 9.928571428571428e-06, "loss": 1.1593, "step": 19647 }, { "epoch": 80.19591836734693, "grad_norm": 31.48745346069336, "learning_rate": 9.926530612244899e-06, "loss": 1.0563, "step": 19648 }, { "epoch": 80.2, "grad_norm": 35.93496322631836, "learning_rate": 9.924489795918367e-06, "loss": 0.5509, "step": 19649 }, { "epoch": 80.20408163265306, "grad_norm": 28.187135696411133, "learning_rate": 9.922448979591838e-06, "loss": 1.569, "step": 19650 }, { "epoch": 80.20816326530613, "grad_norm": 34.77048110961914, "learning_rate": 9.920408163265305e-06, "loss": 0.9029, "step": 19651 }, { "epoch": 80.21224489795918, "grad_norm": 37.655181884765625, "learning_rate": 9.918367346938776e-06, "loss": 0.43, "step": 19652 }, { "epoch": 80.21632653061225, "grad_norm": 30.248882293701172, "learning_rate": 9.916326530612244e-06, "loss": 1.3054, "step": 19653 }, { "epoch": 80.2204081632653, "grad_norm": 22.786781311035156, "learning_rate": 9.914285714285715e-06, "loss": 2.3021, "step": 19654 }, { "epoch": 80.22448979591837, "grad_norm": 31.86908721923828, "learning_rate": 9.912244897959184e-06, "loss": 1.3712, "step": 19655 }, { "epoch": 80.22857142857143, "grad_norm": 25.923559188842773, "learning_rate": 9.910204081632654e-06, "loss": 0.3244, "step": 19656 }, { "epoch": 80.2326530612245, "grad_norm": 45.103004455566406, "learning_rate": 9.908163265306123e-06, "loss": 0.4186, "step": 19657 }, { "epoch": 80.23673469387755, "grad_norm": 28.37452507019043, "learning_rate": 9.906122448979592e-06, "loss": 1.698, "step": 19658 }, { "epoch": 80.2408163265306, "grad_norm": 32.12741470336914, "learning_rate": 9.904081632653062e-06, "loss": 0.7189, "step": 19659 }, { "epoch": 80.24489795918367, "grad_norm": 32.476898193359375, "learning_rate": 9.902040816326531e-06, "loss": 0.6717, "step": 19660 }, { "epoch": 80.24897959183673, "grad_norm": 38.1829719543457, "learning_rate": 9.900000000000002e-06, "loss": 1.0569, "step": 19661 }, { "epoch": 80.2530612244898, "grad_norm": 46.449947357177734, "learning_rate": 9.897959183673469e-06, "loss": 1.2279, "step": 19662 }, { "epoch": 80.25714285714285, "grad_norm": 37.223602294921875, "learning_rate": 9.89591836734694e-06, "loss": 0.6399, "step": 19663 }, { "epoch": 80.26122448979592, "grad_norm": 29.655778884887695, "learning_rate": 9.893877551020408e-06, "loss": 1.5824, "step": 19664 }, { "epoch": 80.26530612244898, "grad_norm": 31.807897567749023, "learning_rate": 9.891836734693879e-06, "loss": 0.2602, "step": 19665 }, { "epoch": 80.26938775510204, "grad_norm": 42.94203567504883, "learning_rate": 9.889795918367348e-06, "loss": 0.9993, "step": 19666 }, { "epoch": 80.2734693877551, "grad_norm": 31.897174835205078, "learning_rate": 9.887755102040818e-06, "loss": 1.2154, "step": 19667 }, { "epoch": 80.27755102040817, "grad_norm": 19.6190242767334, "learning_rate": 9.885714285714285e-06, "loss": 0.2729, "step": 19668 }, { "epoch": 80.28163265306122, "grad_norm": 22.081392288208008, "learning_rate": 9.883673469387756e-06, "loss": 1.632, "step": 19669 }, { "epoch": 80.28571428571429, "grad_norm": 20.857345581054688, "learning_rate": 9.881632653061225e-06, "loss": 0.1318, "step": 19670 }, { "epoch": 80.28979591836735, "grad_norm": 27.25200843811035, "learning_rate": 9.879591836734695e-06, "loss": 1.5483, "step": 19671 }, { "epoch": 80.29387755102042, "grad_norm": 28.151880264282227, "learning_rate": 9.877551020408164e-06, "loss": 1.5638, "step": 19672 }, { "epoch": 80.29795918367347, "grad_norm": 30.045379638671875, "learning_rate": 9.875510204081633e-06, "loss": 0.4974, "step": 19673 }, { "epoch": 80.30204081632652, "grad_norm": 40.811256408691406, "learning_rate": 9.873469387755102e-06, "loss": 0.865, "step": 19674 }, { "epoch": 80.3061224489796, "grad_norm": 38.40274429321289, "learning_rate": 9.871428571428572e-06, "loss": 0.9915, "step": 19675 }, { "epoch": 80.31020408163265, "grad_norm": 41.666080474853516, "learning_rate": 9.86938775510204e-06, "loss": 0.6857, "step": 19676 }, { "epoch": 80.31428571428572, "grad_norm": 38.25127410888672, "learning_rate": 9.867346938775511e-06, "loss": 0.439, "step": 19677 }, { "epoch": 80.31836734693877, "grad_norm": 45.5934944152832, "learning_rate": 9.86530612244898e-06, "loss": 0.3427, "step": 19678 }, { "epoch": 80.32244897959184, "grad_norm": 27.747920989990234, "learning_rate": 9.863265306122449e-06, "loss": 1.5965, "step": 19679 }, { "epoch": 80.3265306122449, "grad_norm": 34.753456115722656, "learning_rate": 9.861224489795918e-06, "loss": 0.4106, "step": 19680 }, { "epoch": 80.33061224489796, "grad_norm": 26.19932746887207, "learning_rate": 9.859183673469388e-06, "loss": 1.9823, "step": 19681 }, { "epoch": 80.33469387755102, "grad_norm": 33.01057815551758, "learning_rate": 9.857142857142857e-06, "loss": 0.7576, "step": 19682 }, { "epoch": 80.33877551020409, "grad_norm": 36.37684631347656, "learning_rate": 9.855102040816328e-06, "loss": 1.132, "step": 19683 }, { "epoch": 80.34285714285714, "grad_norm": 24.717432022094727, "learning_rate": 9.853061224489796e-06, "loss": 2.1089, "step": 19684 }, { "epoch": 80.34693877551021, "grad_norm": 33.22855758666992, "learning_rate": 9.851020408163265e-06, "loss": 1.4702, "step": 19685 }, { "epoch": 80.35102040816327, "grad_norm": 30.557374954223633, "learning_rate": 9.848979591836736e-06, "loss": 1.1008, "step": 19686 }, { "epoch": 80.35510204081632, "grad_norm": 27.23337173461914, "learning_rate": 9.846938775510205e-06, "loss": 1.4014, "step": 19687 }, { "epoch": 80.35918367346939, "grad_norm": 30.02897071838379, "learning_rate": 9.844897959183673e-06, "loss": 0.3041, "step": 19688 }, { "epoch": 80.36326530612244, "grad_norm": 25.798337936401367, "learning_rate": 9.842857142857144e-06, "loss": 0.3939, "step": 19689 }, { "epoch": 80.36734693877551, "grad_norm": 25.363773345947266, "learning_rate": 9.840816326530613e-06, "loss": 1.6433, "step": 19690 }, { "epoch": 80.37142857142857, "grad_norm": 33.618980407714844, "learning_rate": 9.838775510204082e-06, "loss": 1.2544, "step": 19691 }, { "epoch": 80.37551020408164, "grad_norm": 39.47010803222656, "learning_rate": 9.836734693877552e-06, "loss": 0.3168, "step": 19692 }, { "epoch": 80.37959183673469, "grad_norm": 37.122283935546875, "learning_rate": 9.834693877551021e-06, "loss": 0.6173, "step": 19693 }, { "epoch": 80.38367346938776, "grad_norm": 45.00118637084961, "learning_rate": 9.832653061224491e-06, "loss": 0.4376, "step": 19694 }, { "epoch": 80.38775510204081, "grad_norm": 25.59832000732422, "learning_rate": 9.830612244897959e-06, "loss": 1.648, "step": 19695 }, { "epoch": 80.39183673469388, "grad_norm": 9.505706787109375, "learning_rate": 9.828571428571429e-06, "loss": 0.0821, "step": 19696 }, { "epoch": 80.39591836734694, "grad_norm": 28.626070022583008, "learning_rate": 9.826530612244898e-06, "loss": 1.8378, "step": 19697 }, { "epoch": 80.4, "grad_norm": 31.76034164428711, "learning_rate": 9.824489795918368e-06, "loss": 1.148, "step": 19698 }, { "epoch": 80.40408163265306, "grad_norm": 36.3384895324707, "learning_rate": 9.822448979591837e-06, "loss": 0.4287, "step": 19699 }, { "epoch": 80.40816326530613, "grad_norm": 26.46834373474121, "learning_rate": 9.820408163265308e-06, "loss": 1.2324, "step": 19700 }, { "epoch": 80.41224489795918, "grad_norm": 40.83197021484375, "learning_rate": 9.818367346938775e-06, "loss": 1.2482, "step": 19701 }, { "epoch": 80.41632653061224, "grad_norm": 13.384275436401367, "learning_rate": 9.816326530612245e-06, "loss": 0.1037, "step": 19702 }, { "epoch": 80.42040816326531, "grad_norm": 37.63385772705078, "learning_rate": 9.814285714285714e-06, "loss": 0.5637, "step": 19703 }, { "epoch": 80.42448979591836, "grad_norm": 17.925888061523438, "learning_rate": 9.812244897959185e-06, "loss": 0.1104, "step": 19704 }, { "epoch": 80.42857142857143, "grad_norm": 21.667783737182617, "learning_rate": 9.810204081632654e-06, "loss": 2.0918, "step": 19705 }, { "epoch": 80.43265306122449, "grad_norm": 25.440675735473633, "learning_rate": 9.808163265306122e-06, "loss": 1.7545, "step": 19706 }, { "epoch": 80.43673469387755, "grad_norm": 35.6854133605957, "learning_rate": 9.806122448979591e-06, "loss": 1.14, "step": 19707 }, { "epoch": 80.44081632653061, "grad_norm": 35.14622497558594, "learning_rate": 9.804081632653062e-06, "loss": 0.8763, "step": 19708 }, { "epoch": 80.44489795918368, "grad_norm": 21.198518753051758, "learning_rate": 9.80204081632653e-06, "loss": 1.349, "step": 19709 }, { "epoch": 80.44897959183673, "grad_norm": 38.52900314331055, "learning_rate": 9.800000000000001e-06, "loss": 0.5965, "step": 19710 }, { "epoch": 80.4530612244898, "grad_norm": 28.651554107666016, "learning_rate": 9.79795918367347e-06, "loss": 0.2926, "step": 19711 }, { "epoch": 80.45714285714286, "grad_norm": 15.154583930969238, "learning_rate": 9.795918367346939e-06, "loss": 2.2986, "step": 19712 }, { "epoch": 80.46122448979592, "grad_norm": 33.64406967163086, "learning_rate": 9.79387755102041e-06, "loss": 1.2766, "step": 19713 }, { "epoch": 80.46530612244898, "grad_norm": 29.20659637451172, "learning_rate": 9.791836734693878e-06, "loss": 1.5394, "step": 19714 }, { "epoch": 80.46938775510205, "grad_norm": 34.00453186035156, "learning_rate": 9.789795918367347e-06, "loss": 1.0088, "step": 19715 }, { "epoch": 80.4734693877551, "grad_norm": 28.432035446166992, "learning_rate": 9.787755102040817e-06, "loss": 0.2724, "step": 19716 }, { "epoch": 80.47755102040816, "grad_norm": 39.89359664916992, "learning_rate": 9.785714285714286e-06, "loss": 0.3818, "step": 19717 }, { "epoch": 80.48163265306123, "grad_norm": 22.851806640625, "learning_rate": 9.783673469387755e-06, "loss": 1.9101, "step": 19718 }, { "epoch": 80.48571428571428, "grad_norm": 36.802799224853516, "learning_rate": 9.781632653061225e-06, "loss": 1.4461, "step": 19719 }, { "epoch": 80.48979591836735, "grad_norm": 32.33706283569336, "learning_rate": 9.779591836734694e-06, "loss": 0.9824, "step": 19720 }, { "epoch": 80.4938775510204, "grad_norm": 42.93731689453125, "learning_rate": 9.777551020408165e-06, "loss": 0.8453, "step": 19721 }, { "epoch": 80.49795918367347, "grad_norm": 31.078397750854492, "learning_rate": 9.775510204081634e-06, "loss": 0.2711, "step": 19722 }, { "epoch": 80.50204081632653, "grad_norm": 18.72811508178711, "learning_rate": 9.773469387755102e-06, "loss": 2.0536, "step": 19723 }, { "epoch": 80.5061224489796, "grad_norm": 20.80425262451172, "learning_rate": 9.771428571428571e-06, "loss": 1.9878, "step": 19724 }, { "epoch": 80.51020408163265, "grad_norm": 26.43229103088379, "learning_rate": 9.769387755102042e-06, "loss": 1.639, "step": 19725 }, { "epoch": 80.51428571428572, "grad_norm": 15.926997184753418, "learning_rate": 9.76734693877551e-06, "loss": 0.0934, "step": 19726 }, { "epoch": 80.51836734693877, "grad_norm": 34.3652458190918, "learning_rate": 9.765306122448981e-06, "loss": 0.3922, "step": 19727 }, { "epoch": 80.52244897959184, "grad_norm": 29.443187713623047, "learning_rate": 9.763265306122448e-06, "loss": 1.1693, "step": 19728 }, { "epoch": 80.5265306122449, "grad_norm": 35.08144760131836, "learning_rate": 9.761224489795919e-06, "loss": 1.1857, "step": 19729 }, { "epoch": 80.53061224489795, "grad_norm": 26.72532844543457, "learning_rate": 9.759183673469388e-06, "loss": 1.448, "step": 19730 }, { "epoch": 80.53469387755102, "grad_norm": 40.0522346496582, "learning_rate": 9.757142857142858e-06, "loss": 1.5277, "step": 19731 }, { "epoch": 80.53877551020408, "grad_norm": 39.88787841796875, "learning_rate": 9.755102040816327e-06, "loss": 0.642, "step": 19732 }, { "epoch": 80.54285714285714, "grad_norm": 32.17852783203125, "learning_rate": 9.753061224489797e-06, "loss": 0.3799, "step": 19733 }, { "epoch": 80.5469387755102, "grad_norm": 34.86309814453125, "learning_rate": 9.751020408163265e-06, "loss": 1.2003, "step": 19734 }, { "epoch": 80.55102040816327, "grad_norm": 37.09267807006836, "learning_rate": 9.748979591836735e-06, "loss": 0.6051, "step": 19735 }, { "epoch": 80.55510204081632, "grad_norm": 32.673160552978516, "learning_rate": 9.746938775510204e-06, "loss": 0.739, "step": 19736 }, { "epoch": 80.55918367346939, "grad_norm": 34.045013427734375, "learning_rate": 9.744897959183674e-06, "loss": 1.5661, "step": 19737 }, { "epoch": 80.56326530612245, "grad_norm": 37.56199645996094, "learning_rate": 9.742857142857143e-06, "loss": 1.2807, "step": 19738 }, { "epoch": 80.56734693877551, "grad_norm": 26.585390090942383, "learning_rate": 9.740816326530612e-06, "loss": 1.911, "step": 19739 }, { "epoch": 80.57142857142857, "grad_norm": 30.04134750366211, "learning_rate": 9.738775510204083e-06, "loss": 0.6769, "step": 19740 }, { "epoch": 80.57551020408164, "grad_norm": 28.791540145874023, "learning_rate": 9.736734693877551e-06, "loss": 0.3386, "step": 19741 }, { "epoch": 80.57959183673469, "grad_norm": 38.70149612426758, "learning_rate": 9.73469387755102e-06, "loss": 1.1269, "step": 19742 }, { "epoch": 80.58367346938776, "grad_norm": 24.151016235351562, "learning_rate": 9.73265306122449e-06, "loss": 1.7071, "step": 19743 }, { "epoch": 80.58775510204082, "grad_norm": 30.869741439819336, "learning_rate": 9.73061224489796e-06, "loss": 1.7208, "step": 19744 }, { "epoch": 80.59183673469387, "grad_norm": 29.249969482421875, "learning_rate": 9.728571428571428e-06, "loss": 1.24, "step": 19745 }, { "epoch": 80.59591836734694, "grad_norm": 25.31510353088379, "learning_rate": 9.726530612244899e-06, "loss": 1.4765, "step": 19746 }, { "epoch": 80.6, "grad_norm": 33.10415267944336, "learning_rate": 9.724489795918368e-06, "loss": 1.0344, "step": 19747 }, { "epoch": 80.60408163265306, "grad_norm": 40.381954193115234, "learning_rate": 9.722448979591838e-06, "loss": 1.0506, "step": 19748 }, { "epoch": 80.60816326530612, "grad_norm": 38.797122955322266, "learning_rate": 9.720408163265307e-06, "loss": 0.9937, "step": 19749 }, { "epoch": 80.61224489795919, "grad_norm": 18.58909034729004, "learning_rate": 9.718367346938776e-06, "loss": 0.1761, "step": 19750 }, { "epoch": 80.61632653061224, "grad_norm": 25.547372817993164, "learning_rate": 9.716326530612245e-06, "loss": 1.475, "step": 19751 }, { "epoch": 80.62040816326531, "grad_norm": 36.40546798706055, "learning_rate": 9.714285714285715e-06, "loss": 0.5586, "step": 19752 }, { "epoch": 80.62448979591836, "grad_norm": 43.62030029296875, "learning_rate": 9.712244897959184e-06, "loss": 0.5205, "step": 19753 }, { "epoch": 80.62857142857143, "grad_norm": 32.192134857177734, "learning_rate": 9.710204081632655e-06, "loss": 0.8714, "step": 19754 }, { "epoch": 80.63265306122449, "grad_norm": 28.850027084350586, "learning_rate": 9.708163265306123e-06, "loss": 1.5223, "step": 19755 }, { "epoch": 80.63673469387756, "grad_norm": 22.16461181640625, "learning_rate": 9.706122448979592e-06, "loss": 2.1249, "step": 19756 }, { "epoch": 80.64081632653061, "grad_norm": 32.005882263183594, "learning_rate": 9.704081632653061e-06, "loss": 0.5162, "step": 19757 }, { "epoch": 80.64489795918368, "grad_norm": 44.888160705566406, "learning_rate": 9.702040816326531e-06, "loss": 0.509, "step": 19758 }, { "epoch": 80.64897959183673, "grad_norm": 28.48579216003418, "learning_rate": 9.7e-06, "loss": 1.4948, "step": 19759 }, { "epoch": 80.65306122448979, "grad_norm": 34.30355453491211, "learning_rate": 9.69795918367347e-06, "loss": 0.3063, "step": 19760 }, { "epoch": 80.65714285714286, "grad_norm": 34.636234283447266, "learning_rate": 9.695918367346938e-06, "loss": 1.3015, "step": 19761 }, { "epoch": 80.66122448979591, "grad_norm": 41.06352233886719, "learning_rate": 9.693877551020408e-06, "loss": 0.4919, "step": 19762 }, { "epoch": 80.66530612244898, "grad_norm": 27.5555419921875, "learning_rate": 9.691836734693877e-06, "loss": 1.3893, "step": 19763 }, { "epoch": 80.66938775510204, "grad_norm": 33.7169303894043, "learning_rate": 9.689795918367348e-06, "loss": 0.867, "step": 19764 }, { "epoch": 80.6734693877551, "grad_norm": 34.7475471496582, "learning_rate": 9.687755102040817e-06, "loss": 0.573, "step": 19765 }, { "epoch": 80.67755102040816, "grad_norm": 16.233312606811523, "learning_rate": 9.685714285714287e-06, "loss": 0.1632, "step": 19766 }, { "epoch": 80.68163265306123, "grad_norm": 28.7130069732666, "learning_rate": 9.683673469387756e-06, "loss": 0.4067, "step": 19767 }, { "epoch": 80.68571428571428, "grad_norm": 32.17775344848633, "learning_rate": 9.681632653061225e-06, "loss": 1.3984, "step": 19768 }, { "epoch": 80.68979591836735, "grad_norm": 31.427650451660156, "learning_rate": 9.679591836734694e-06, "loss": 1.1268, "step": 19769 }, { "epoch": 80.6938775510204, "grad_norm": 24.580087661743164, "learning_rate": 9.677551020408164e-06, "loss": 0.7692, "step": 19770 }, { "epoch": 80.69795918367348, "grad_norm": 42.06871032714844, "learning_rate": 9.675510204081633e-06, "loss": 0.2704, "step": 19771 }, { "epoch": 80.70204081632653, "grad_norm": 37.399147033691406, "learning_rate": 9.673469387755102e-06, "loss": 0.3674, "step": 19772 }, { "epoch": 80.70612244897958, "grad_norm": 32.888607025146484, "learning_rate": 9.671428571428572e-06, "loss": 1.4451, "step": 19773 }, { "epoch": 80.71020408163265, "grad_norm": 54.3766975402832, "learning_rate": 9.669387755102041e-06, "loss": 0.9859, "step": 19774 }, { "epoch": 80.71428571428571, "grad_norm": 32.77488327026367, "learning_rate": 9.667346938775512e-06, "loss": 1.2597, "step": 19775 }, { "epoch": 80.71836734693878, "grad_norm": 38.206241607666016, "learning_rate": 9.66530612244898e-06, "loss": 0.3694, "step": 19776 }, { "epoch": 80.72244897959183, "grad_norm": 33.953224182128906, "learning_rate": 9.663265306122451e-06, "loss": 1.4218, "step": 19777 }, { "epoch": 80.7265306122449, "grad_norm": 37.19913864135742, "learning_rate": 9.661224489795918e-06, "loss": 1.2934, "step": 19778 }, { "epoch": 80.73061224489796, "grad_norm": 31.14329719543457, "learning_rate": 9.659183673469389e-06, "loss": 0.64, "step": 19779 }, { "epoch": 80.73469387755102, "grad_norm": 31.977706909179688, "learning_rate": 9.657142857142857e-06, "loss": 0.682, "step": 19780 }, { "epoch": 80.73877551020408, "grad_norm": 35.90385437011719, "learning_rate": 9.655102040816328e-06, "loss": 1.3581, "step": 19781 }, { "epoch": 80.74285714285715, "grad_norm": 17.30140495300293, "learning_rate": 9.653061224489797e-06, "loss": 2.6177, "step": 19782 }, { "epoch": 80.7469387755102, "grad_norm": 37.495018005371094, "learning_rate": 9.651020408163266e-06, "loss": 0.737, "step": 19783 }, { "epoch": 80.75102040816327, "grad_norm": 30.42961883544922, "learning_rate": 9.648979591836734e-06, "loss": 1.2875, "step": 19784 }, { "epoch": 80.75510204081633, "grad_norm": 12.283529281616211, "learning_rate": 9.646938775510205e-06, "loss": 0.1361, "step": 19785 }, { "epoch": 80.7591836734694, "grad_norm": 32.65414810180664, "learning_rate": 9.644897959183674e-06, "loss": 0.8433, "step": 19786 }, { "epoch": 80.76326530612245, "grad_norm": 26.219341278076172, "learning_rate": 9.642857142857144e-06, "loss": 1.4781, "step": 19787 }, { "epoch": 80.7673469387755, "grad_norm": 27.127023696899414, "learning_rate": 9.640816326530613e-06, "loss": 1.6433, "step": 19788 }, { "epoch": 80.77142857142857, "grad_norm": 24.64622688293457, "learning_rate": 9.638775510204082e-06, "loss": 1.5476, "step": 19789 }, { "epoch": 80.77551020408163, "grad_norm": 37.2613639831543, "learning_rate": 9.63673469387755e-06, "loss": 0.7216, "step": 19790 }, { "epoch": 80.7795918367347, "grad_norm": 34.235862731933594, "learning_rate": 9.634693877551021e-06, "loss": 0.4194, "step": 19791 }, { "epoch": 80.78367346938775, "grad_norm": 26.74532699584961, "learning_rate": 9.63265306122449e-06, "loss": 1.0141, "step": 19792 }, { "epoch": 80.78775510204082, "grad_norm": 33.13402557373047, "learning_rate": 9.63061224489796e-06, "loss": 0.3243, "step": 19793 }, { "epoch": 80.79183673469387, "grad_norm": 34.36772918701172, "learning_rate": 9.628571428571428e-06, "loss": 0.3899, "step": 19794 }, { "epoch": 80.79591836734694, "grad_norm": 33.36051559448242, "learning_rate": 9.626530612244898e-06, "loss": 1.1277, "step": 19795 }, { "epoch": 80.8, "grad_norm": 26.892534255981445, "learning_rate": 9.624489795918367e-06, "loss": 1.6197, "step": 19796 }, { "epoch": 80.80408163265307, "grad_norm": 29.896574020385742, "learning_rate": 9.622448979591837e-06, "loss": 1.3657, "step": 19797 }, { "epoch": 80.80816326530612, "grad_norm": 41.452247619628906, "learning_rate": 9.620408163265306e-06, "loss": 1.0979, "step": 19798 }, { "epoch": 80.81224489795919, "grad_norm": 27.02347755432129, "learning_rate": 9.618367346938775e-06, "loss": 2.0498, "step": 19799 }, { "epoch": 80.81632653061224, "grad_norm": 22.240705490112305, "learning_rate": 9.616326530612246e-06, "loss": 1.4409, "step": 19800 }, { "epoch": 80.82040816326531, "grad_norm": 25.69281768798828, "learning_rate": 9.614285714285714e-06, "loss": 0.2667, "step": 19801 }, { "epoch": 80.82448979591837, "grad_norm": 38.17341995239258, "learning_rate": 9.612244897959185e-06, "loss": 0.7466, "step": 19802 }, { "epoch": 80.82857142857142, "grad_norm": 28.396806716918945, "learning_rate": 9.610204081632654e-06, "loss": 0.7672, "step": 19803 }, { "epoch": 80.83265306122449, "grad_norm": 30.68138885498047, "learning_rate": 9.608163265306124e-06, "loss": 1.4338, "step": 19804 }, { "epoch": 80.83673469387755, "grad_norm": 35.497432708740234, "learning_rate": 9.606122448979591e-06, "loss": 0.782, "step": 19805 }, { "epoch": 80.84081632653061, "grad_norm": 31.956008911132812, "learning_rate": 9.604081632653062e-06, "loss": 0.3746, "step": 19806 }, { "epoch": 80.84489795918367, "grad_norm": 27.26508903503418, "learning_rate": 9.60204081632653e-06, "loss": 1.5296, "step": 19807 }, { "epoch": 80.84897959183674, "grad_norm": 23.0290470123291, "learning_rate": 9.600000000000001e-06, "loss": 1.6819, "step": 19808 }, { "epoch": 80.85306122448979, "grad_norm": 41.17890930175781, "learning_rate": 9.59795918367347e-06, "loss": 0.7822, "step": 19809 }, { "epoch": 80.85714285714286, "grad_norm": 28.639673233032227, "learning_rate": 9.595918367346939e-06, "loss": 0.4499, "step": 19810 }, { "epoch": 80.86122448979592, "grad_norm": 18.943893432617188, "learning_rate": 9.593877551020408e-06, "loss": 2.1885, "step": 19811 }, { "epoch": 80.86530612244898, "grad_norm": 30.48643684387207, "learning_rate": 9.591836734693878e-06, "loss": 1.2611, "step": 19812 }, { "epoch": 80.86938775510204, "grad_norm": 37.53820037841797, "learning_rate": 9.589795918367347e-06, "loss": 0.8716, "step": 19813 }, { "epoch": 80.87346938775511, "grad_norm": 34.79216003417969, "learning_rate": 9.587755102040818e-06, "loss": 1.2666, "step": 19814 }, { "epoch": 80.87755102040816, "grad_norm": 24.017812728881836, "learning_rate": 9.585714285714286e-06, "loss": 0.1542, "step": 19815 }, { "epoch": 80.88163265306123, "grad_norm": 35.431129455566406, "learning_rate": 9.583673469387755e-06, "loss": 0.9481, "step": 19816 }, { "epoch": 80.88571428571429, "grad_norm": 28.153799057006836, "learning_rate": 9.581632653061224e-06, "loss": 1.5228, "step": 19817 }, { "epoch": 80.88979591836734, "grad_norm": 29.90407943725586, "learning_rate": 9.579591836734695e-06, "loss": 0.6508, "step": 19818 }, { "epoch": 80.89387755102041, "grad_norm": 20.6030216217041, "learning_rate": 9.577551020408163e-06, "loss": 1.9247, "step": 19819 }, { "epoch": 80.89795918367346, "grad_norm": 38.70077896118164, "learning_rate": 9.575510204081634e-06, "loss": 0.8224, "step": 19820 }, { "epoch": 80.90204081632653, "grad_norm": 32.55107116699219, "learning_rate": 9.573469387755101e-06, "loss": 0.8625, "step": 19821 }, { "epoch": 80.90612244897959, "grad_norm": 30.710500717163086, "learning_rate": 9.571428571428572e-06, "loss": 1.1166, "step": 19822 }, { "epoch": 80.91020408163266, "grad_norm": 1.651618480682373, "learning_rate": 9.56938775510204e-06, "loss": 0.0329, "step": 19823 }, { "epoch": 80.91428571428571, "grad_norm": 29.470020294189453, "learning_rate": 9.56734693877551e-06, "loss": 0.4785, "step": 19824 }, { "epoch": 80.91836734693878, "grad_norm": 16.176937103271484, "learning_rate": 9.56530612244898e-06, "loss": 0.1153, "step": 19825 }, { "epoch": 80.92244897959183, "grad_norm": 38.397186279296875, "learning_rate": 9.56326530612245e-06, "loss": 0.4399, "step": 19826 }, { "epoch": 80.9265306122449, "grad_norm": 29.885746002197266, "learning_rate": 9.561224489795919e-06, "loss": 1.5897, "step": 19827 }, { "epoch": 80.93061224489796, "grad_norm": 25.60024642944336, "learning_rate": 9.559183673469388e-06, "loss": 1.6613, "step": 19828 }, { "epoch": 80.93469387755103, "grad_norm": 22.900257110595703, "learning_rate": 9.557142857142858e-06, "loss": 2.0324, "step": 19829 }, { "epoch": 80.93877551020408, "grad_norm": 27.579641342163086, "learning_rate": 9.555102040816327e-06, "loss": 0.2477, "step": 19830 }, { "epoch": 80.94285714285714, "grad_norm": 28.0472469329834, "learning_rate": 9.553061224489798e-06, "loss": 0.4863, "step": 19831 }, { "epoch": 80.9469387755102, "grad_norm": 24.585514068603516, "learning_rate": 9.551020408163265e-06, "loss": 2.0008, "step": 19832 }, { "epoch": 80.95102040816326, "grad_norm": 13.84062385559082, "learning_rate": 9.548979591836735e-06, "loss": 0.1033, "step": 19833 }, { "epoch": 80.95510204081633, "grad_norm": 32.05696487426758, "learning_rate": 9.546938775510204e-06, "loss": 1.0721, "step": 19834 }, { "epoch": 80.95918367346938, "grad_norm": 32.14529800415039, "learning_rate": 9.544897959183675e-06, "loss": 0.7638, "step": 19835 }, { "epoch": 80.96326530612245, "grad_norm": 34.510986328125, "learning_rate": 9.542857142857143e-06, "loss": 0.4308, "step": 19836 }, { "epoch": 80.9673469387755, "grad_norm": 49.238304138183594, "learning_rate": 9.540816326530614e-06, "loss": 0.9943, "step": 19837 }, { "epoch": 80.97142857142858, "grad_norm": 26.664106369018555, "learning_rate": 9.538775510204081e-06, "loss": 1.2779, "step": 19838 }, { "epoch": 80.97551020408163, "grad_norm": 27.23948860168457, "learning_rate": 9.536734693877552e-06, "loss": 0.2679, "step": 19839 }, { "epoch": 80.9795918367347, "grad_norm": 29.21319580078125, "learning_rate": 9.53469387755102e-06, "loss": 0.5713, "step": 19840 }, { "epoch": 80.98367346938775, "grad_norm": 23.543869018554688, "learning_rate": 9.532653061224491e-06, "loss": 1.5064, "step": 19841 }, { "epoch": 80.98775510204082, "grad_norm": 23.790353775024414, "learning_rate": 9.53061224489796e-06, "loss": 1.6127, "step": 19842 }, { "epoch": 80.99183673469388, "grad_norm": 35.21694564819336, "learning_rate": 9.528571428571429e-06, "loss": 0.6047, "step": 19843 }, { "epoch": 80.99591836734695, "grad_norm": 36.245670318603516, "learning_rate": 9.526530612244897e-06, "loss": 1.3273, "step": 19844 }, { "epoch": 81.0, "grad_norm": 32.30809783935547, "learning_rate": 9.524489795918368e-06, "loss": 0.8504, "step": 19845 }, { "epoch": 81.00408163265305, "grad_norm": 31.879365921020508, "learning_rate": 9.522448979591837e-06, "loss": 1.0229, "step": 19846 }, { "epoch": 81.00816326530612, "grad_norm": 34.90495300292969, "learning_rate": 9.520408163265307e-06, "loss": 1.0275, "step": 19847 }, { "epoch": 81.01224489795918, "grad_norm": 30.30025291442871, "learning_rate": 9.518367346938776e-06, "loss": 0.7464, "step": 19848 }, { "epoch": 81.01632653061225, "grad_norm": 23.40790367126465, "learning_rate": 9.516326530612245e-06, "loss": 0.2524, "step": 19849 }, { "epoch": 81.0204081632653, "grad_norm": 22.183176040649414, "learning_rate": 9.514285714285714e-06, "loss": 1.4935, "step": 19850 }, { "epoch": 81.02448979591837, "grad_norm": 33.056026458740234, "learning_rate": 9.512244897959184e-06, "loss": 1.2007, "step": 19851 }, { "epoch": 81.02857142857142, "grad_norm": 23.887630462646484, "learning_rate": 9.510204081632653e-06, "loss": 1.392, "step": 19852 }, { "epoch": 81.0326530612245, "grad_norm": 30.003454208374023, "learning_rate": 9.508163265306124e-06, "loss": 1.1999, "step": 19853 }, { "epoch": 81.03673469387755, "grad_norm": 24.30411148071289, "learning_rate": 9.506122448979592e-06, "loss": 1.4269, "step": 19854 }, { "epoch": 81.04081632653062, "grad_norm": 32.93296813964844, "learning_rate": 9.504081632653061e-06, "loss": 0.6244, "step": 19855 }, { "epoch": 81.04489795918367, "grad_norm": 28.592281341552734, "learning_rate": 9.502040816326532e-06, "loss": 0.352, "step": 19856 }, { "epoch": 81.04897959183674, "grad_norm": 43.23611831665039, "learning_rate": 9.5e-06, "loss": 0.2062, "step": 19857 }, { "epoch": 81.0530612244898, "grad_norm": 19.201000213623047, "learning_rate": 9.497959183673471e-06, "loss": 0.1311, "step": 19858 }, { "epoch": 81.05714285714286, "grad_norm": 33.47599792480469, "learning_rate": 9.49591836734694e-06, "loss": 1.0585, "step": 19859 }, { "epoch": 81.06122448979592, "grad_norm": 48.857505798339844, "learning_rate": 9.493877551020409e-06, "loss": 0.4919, "step": 19860 }, { "epoch": 81.06530612244897, "grad_norm": 34.18575668334961, "learning_rate": 9.491836734693877e-06, "loss": 1.3306, "step": 19861 }, { "epoch": 81.06938775510204, "grad_norm": 30.72066879272461, "learning_rate": 9.489795918367348e-06, "loss": 0.7845, "step": 19862 }, { "epoch": 81.0734693877551, "grad_norm": 33.65139389038086, "learning_rate": 9.487755102040817e-06, "loss": 0.8647, "step": 19863 }, { "epoch": 81.07755102040817, "grad_norm": 28.639293670654297, "learning_rate": 9.485714285714287e-06, "loss": 1.6865, "step": 19864 }, { "epoch": 81.08163265306122, "grad_norm": 36.445621490478516, "learning_rate": 9.483673469387754e-06, "loss": 0.7149, "step": 19865 }, { "epoch": 81.08571428571429, "grad_norm": 42.66292953491211, "learning_rate": 9.481632653061225e-06, "loss": 0.6764, "step": 19866 }, { "epoch": 81.08979591836734, "grad_norm": 34.716957092285156, "learning_rate": 9.479591836734694e-06, "loss": 0.8783, "step": 19867 }, { "epoch": 81.09387755102041, "grad_norm": 32.26762008666992, "learning_rate": 9.477551020408164e-06, "loss": 0.6937, "step": 19868 }, { "epoch": 81.09795918367347, "grad_norm": 35.611698150634766, "learning_rate": 9.475510204081633e-06, "loss": 1.1074, "step": 19869 }, { "epoch": 81.10204081632654, "grad_norm": 39.35004425048828, "learning_rate": 9.473469387755104e-06, "loss": 0.6257, "step": 19870 }, { "epoch": 81.10612244897959, "grad_norm": 18.412094116210938, "learning_rate": 9.47142857142857e-06, "loss": 2.0355, "step": 19871 }, { "epoch": 81.11020408163266, "grad_norm": 8.734535217285156, "learning_rate": 9.469387755102041e-06, "loss": 3.0137, "step": 19872 }, { "epoch": 81.11428571428571, "grad_norm": 25.599552154541016, "learning_rate": 9.46734693877551e-06, "loss": 1.4868, "step": 19873 }, { "epoch": 81.11836734693877, "grad_norm": 26.082015991210938, "learning_rate": 9.46530612244898e-06, "loss": 0.2931, "step": 19874 }, { "epoch": 81.12244897959184, "grad_norm": 32.02815628051758, "learning_rate": 9.46326530612245e-06, "loss": 0.9974, "step": 19875 }, { "epoch": 81.12653061224489, "grad_norm": 40.6213264465332, "learning_rate": 9.461224489795918e-06, "loss": 0.4243, "step": 19876 }, { "epoch": 81.13061224489796, "grad_norm": 42.25102233886719, "learning_rate": 9.459183673469387e-06, "loss": 0.2992, "step": 19877 }, { "epoch": 81.13469387755102, "grad_norm": 22.215599060058594, "learning_rate": 9.457142857142858e-06, "loss": 0.1852, "step": 19878 }, { "epoch": 81.13877551020408, "grad_norm": 33.47407531738281, "learning_rate": 9.455102040816326e-06, "loss": 0.7441, "step": 19879 }, { "epoch": 81.14285714285714, "grad_norm": 30.18383026123047, "learning_rate": 9.453061224489797e-06, "loss": 1.5878, "step": 19880 }, { "epoch": 81.14693877551021, "grad_norm": 40.6312141418457, "learning_rate": 9.451020408163266e-06, "loss": 0.9349, "step": 19881 }, { "epoch": 81.15102040816326, "grad_norm": 24.961719512939453, "learning_rate": 9.448979591836735e-06, "loss": 1.9845, "step": 19882 }, { "epoch": 81.15510204081633, "grad_norm": 29.144948959350586, "learning_rate": 9.446938775510205e-06, "loss": 0.4033, "step": 19883 }, { "epoch": 81.15918367346939, "grad_norm": 17.01013946533203, "learning_rate": 9.444897959183674e-06, "loss": 2.274, "step": 19884 }, { "epoch": 81.16326530612245, "grad_norm": 29.15430450439453, "learning_rate": 9.442857142857144e-06, "loss": 1.3856, "step": 19885 }, { "epoch": 81.16734693877551, "grad_norm": 27.029821395874023, "learning_rate": 9.440816326530613e-06, "loss": 1.4208, "step": 19886 }, { "epoch": 81.17142857142858, "grad_norm": 37.591712951660156, "learning_rate": 9.438775510204082e-06, "loss": 0.4452, "step": 19887 }, { "epoch": 81.17551020408163, "grad_norm": 47.090721130371094, "learning_rate": 9.436734693877551e-06, "loss": 0.926, "step": 19888 }, { "epoch": 81.17959183673469, "grad_norm": 26.10456085205078, "learning_rate": 9.434693877551021e-06, "loss": 1.4441, "step": 19889 }, { "epoch": 81.18367346938776, "grad_norm": 45.96257781982422, "learning_rate": 9.43265306122449e-06, "loss": 0.5073, "step": 19890 }, { "epoch": 81.18775510204081, "grad_norm": 53.525054931640625, "learning_rate": 9.43061224489796e-06, "loss": 0.9422, "step": 19891 }, { "epoch": 81.19183673469388, "grad_norm": 19.830900192260742, "learning_rate": 9.42857142857143e-06, "loss": 2.1448, "step": 19892 }, { "epoch": 81.19591836734693, "grad_norm": 35.83763122558594, "learning_rate": 9.426530612244898e-06, "loss": 0.5899, "step": 19893 }, { "epoch": 81.2, "grad_norm": 27.2696475982666, "learning_rate": 9.424489795918367e-06, "loss": 0.7629, "step": 19894 }, { "epoch": 81.20408163265306, "grad_norm": 39.08687973022461, "learning_rate": 9.422448979591838e-06, "loss": 0.4181, "step": 19895 }, { "epoch": 81.20816326530613, "grad_norm": 22.028602600097656, "learning_rate": 9.420408163265307e-06, "loss": 1.9061, "step": 19896 }, { "epoch": 81.21224489795918, "grad_norm": 37.22242736816406, "learning_rate": 9.418367346938777e-06, "loss": 0.5616, "step": 19897 }, { "epoch": 81.21632653061225, "grad_norm": 37.552791595458984, "learning_rate": 9.416326530612244e-06, "loss": 0.8155, "step": 19898 }, { "epoch": 81.2204081632653, "grad_norm": 20.885215759277344, "learning_rate": 9.414285714285715e-06, "loss": 0.1738, "step": 19899 }, { "epoch": 81.22448979591837, "grad_norm": 39.86214065551758, "learning_rate": 9.412244897959183e-06, "loss": 1.1163, "step": 19900 }, { "epoch": 81.22857142857143, "grad_norm": 34.73880386352539, "learning_rate": 9.410204081632654e-06, "loss": 0.9976, "step": 19901 }, { "epoch": 81.2326530612245, "grad_norm": 43.37651824951172, "learning_rate": 9.408163265306123e-06, "loss": 0.6618, "step": 19902 }, { "epoch": 81.23673469387755, "grad_norm": 30.422178268432617, "learning_rate": 9.406122448979593e-06, "loss": 0.9591, "step": 19903 }, { "epoch": 81.2408163265306, "grad_norm": 23.079792022705078, "learning_rate": 9.40408163265306e-06, "loss": 1.6684, "step": 19904 }, { "epoch": 81.24489795918367, "grad_norm": 27.130414962768555, "learning_rate": 9.402040816326531e-06, "loss": 1.4908, "step": 19905 }, { "epoch": 81.24897959183673, "grad_norm": 45.4510383605957, "learning_rate": 9.4e-06, "loss": 0.8276, "step": 19906 }, { "epoch": 81.2530612244898, "grad_norm": 43.01475143432617, "learning_rate": 9.39795918367347e-06, "loss": 0.3981, "step": 19907 }, { "epoch": 81.25714285714285, "grad_norm": 37.441341400146484, "learning_rate": 9.395918367346939e-06, "loss": 0.3775, "step": 19908 }, { "epoch": 81.26122448979592, "grad_norm": 24.971168518066406, "learning_rate": 9.393877551020408e-06, "loss": 1.8937, "step": 19909 }, { "epoch": 81.26530612244898, "grad_norm": 32.68193054199219, "learning_rate": 9.391836734693878e-06, "loss": 0.8501, "step": 19910 }, { "epoch": 81.26938775510204, "grad_norm": 41.36827087402344, "learning_rate": 9.389795918367347e-06, "loss": 1.2119, "step": 19911 }, { "epoch": 81.2734693877551, "grad_norm": 36.450382232666016, "learning_rate": 9.387755102040816e-06, "loss": 0.8703, "step": 19912 }, { "epoch": 81.27755102040817, "grad_norm": 27.65570068359375, "learning_rate": 9.385714285714287e-06, "loss": 0.7644, "step": 19913 }, { "epoch": 81.28163265306122, "grad_norm": 27.51578712463379, "learning_rate": 9.383673469387755e-06, "loss": 1.4872, "step": 19914 }, { "epoch": 81.28571428571429, "grad_norm": 36.3286247253418, "learning_rate": 9.381632653061224e-06, "loss": 0.8455, "step": 19915 }, { "epoch": 81.28979591836735, "grad_norm": 34.91893005371094, "learning_rate": 9.379591836734695e-06, "loss": 0.3473, "step": 19916 }, { "epoch": 81.29387755102042, "grad_norm": 27.936904907226562, "learning_rate": 9.377551020408164e-06, "loss": 1.2453, "step": 19917 }, { "epoch": 81.29795918367347, "grad_norm": 25.117263793945312, "learning_rate": 9.375510204081634e-06, "loss": 1.4881, "step": 19918 }, { "epoch": 81.30204081632652, "grad_norm": 25.062397003173828, "learning_rate": 9.373469387755103e-06, "loss": 1.3781, "step": 19919 }, { "epoch": 81.3061224489796, "grad_norm": 41.25769805908203, "learning_rate": 9.371428571428572e-06, "loss": 0.4385, "step": 19920 }, { "epoch": 81.31020408163265, "grad_norm": 37.64305877685547, "learning_rate": 9.36938775510204e-06, "loss": 0.3103, "step": 19921 }, { "epoch": 81.31428571428572, "grad_norm": 33.238059997558594, "learning_rate": 9.367346938775511e-06, "loss": 0.6869, "step": 19922 }, { "epoch": 81.31836734693877, "grad_norm": 29.754335403442383, "learning_rate": 9.36530612244898e-06, "loss": 0.6636, "step": 19923 }, { "epoch": 81.32244897959184, "grad_norm": 32.86503982543945, "learning_rate": 9.36326530612245e-06, "loss": 0.9588, "step": 19924 }, { "epoch": 81.3265306122449, "grad_norm": 6.340383052825928, "learning_rate": 9.36122448979592e-06, "loss": 0.0619, "step": 19925 }, { "epoch": 81.33061224489796, "grad_norm": 24.82904624938965, "learning_rate": 9.359183673469388e-06, "loss": 1.9241, "step": 19926 }, { "epoch": 81.33469387755102, "grad_norm": 29.89761734008789, "learning_rate": 9.357142857142857e-06, "loss": 1.5446, "step": 19927 }, { "epoch": 81.33877551020409, "grad_norm": 33.47135925292969, "learning_rate": 9.355102040816327e-06, "loss": 0.6968, "step": 19928 }, { "epoch": 81.34285714285714, "grad_norm": 31.89466094970703, "learning_rate": 9.353061224489796e-06, "loss": 1.3145, "step": 19929 }, { "epoch": 81.34693877551021, "grad_norm": 23.98441505432129, "learning_rate": 9.351020408163267e-06, "loss": 1.4585, "step": 19930 }, { "epoch": 81.35102040816327, "grad_norm": 24.939878463745117, "learning_rate": 9.348979591836734e-06, "loss": 1.6022, "step": 19931 }, { "epoch": 81.35510204081632, "grad_norm": 27.304269790649414, "learning_rate": 9.346938775510204e-06, "loss": 1.5016, "step": 19932 }, { "epoch": 81.35918367346939, "grad_norm": 37.07040786743164, "learning_rate": 9.344897959183673e-06, "loss": 1.3083, "step": 19933 }, { "epoch": 81.36326530612244, "grad_norm": 32.158653259277344, "learning_rate": 9.342857142857144e-06, "loss": 0.4915, "step": 19934 }, { "epoch": 81.36734693877551, "grad_norm": 28.64301300048828, "learning_rate": 9.340816326530612e-06, "loss": 1.6391, "step": 19935 }, { "epoch": 81.37142857142857, "grad_norm": 26.522302627563477, "learning_rate": 9.338775510204083e-06, "loss": 1.4493, "step": 19936 }, { "epoch": 81.37551020408164, "grad_norm": 36.03902816772461, "learning_rate": 9.336734693877552e-06, "loss": 0.5315, "step": 19937 }, { "epoch": 81.37959183673469, "grad_norm": 17.8309268951416, "learning_rate": 9.33469387755102e-06, "loss": 0.1396, "step": 19938 }, { "epoch": 81.38367346938776, "grad_norm": 15.824902534484863, "learning_rate": 9.33265306122449e-06, "loss": 2.6023, "step": 19939 }, { "epoch": 81.38775510204081, "grad_norm": 30.855587005615234, "learning_rate": 9.33061224489796e-06, "loss": 0.5138, "step": 19940 }, { "epoch": 81.39183673469388, "grad_norm": 23.15493392944336, "learning_rate": 9.328571428571429e-06, "loss": 1.6426, "step": 19941 }, { "epoch": 81.39591836734694, "grad_norm": 24.47955894470215, "learning_rate": 9.326530612244898e-06, "loss": 0.1333, "step": 19942 }, { "epoch": 81.4, "grad_norm": 41.79602813720703, "learning_rate": 9.324489795918368e-06, "loss": 0.9223, "step": 19943 }, { "epoch": 81.40408163265306, "grad_norm": 1.3781936168670654, "learning_rate": 9.322448979591837e-06, "loss": 0.0224, "step": 19944 }, { "epoch": 81.40816326530613, "grad_norm": 28.685848236083984, "learning_rate": 9.320408163265307e-06, "loss": 1.0634, "step": 19945 }, { "epoch": 81.41224489795918, "grad_norm": 28.19173812866211, "learning_rate": 9.318367346938776e-06, "loss": 0.596, "step": 19946 }, { "epoch": 81.41632653061224, "grad_norm": 30.487348556518555, "learning_rate": 9.316326530612247e-06, "loss": 1.2469, "step": 19947 }, { "epoch": 81.42040816326531, "grad_norm": 23.570226669311523, "learning_rate": 9.314285714285714e-06, "loss": 1.3162, "step": 19948 }, { "epoch": 81.42448979591836, "grad_norm": 22.823610305786133, "learning_rate": 9.312244897959184e-06, "loss": 2.1083, "step": 19949 }, { "epoch": 81.42857142857143, "grad_norm": 25.086706161499023, "learning_rate": 9.310204081632653e-06, "loss": 1.5229, "step": 19950 }, { "epoch": 81.43265306122449, "grad_norm": 33.177001953125, "learning_rate": 9.308163265306124e-06, "loss": 0.5621, "step": 19951 }, { "epoch": 81.43673469387755, "grad_norm": 12.917848587036133, "learning_rate": 9.306122448979593e-06, "loss": 0.0998, "step": 19952 }, { "epoch": 81.44081632653061, "grad_norm": 34.22320556640625, "learning_rate": 9.304081632653061e-06, "loss": 0.5449, "step": 19953 }, { "epoch": 81.44489795918368, "grad_norm": 28.764968872070312, "learning_rate": 9.30204081632653e-06, "loss": 1.1473, "step": 19954 }, { "epoch": 81.44897959183673, "grad_norm": 35.859169006347656, "learning_rate": 9.3e-06, "loss": 1.3015, "step": 19955 }, { "epoch": 81.4530612244898, "grad_norm": 37.40180587768555, "learning_rate": 9.29795918367347e-06, "loss": 0.6327, "step": 19956 }, { "epoch": 81.45714285714286, "grad_norm": 42.55752944946289, "learning_rate": 9.29591836734694e-06, "loss": 0.8981, "step": 19957 }, { "epoch": 81.46122448979592, "grad_norm": 40.1766471862793, "learning_rate": 9.293877551020409e-06, "loss": 0.5619, "step": 19958 }, { "epoch": 81.46530612244898, "grad_norm": 37.54365921020508, "learning_rate": 9.291836734693878e-06, "loss": 0.8344, "step": 19959 }, { "epoch": 81.46938775510205, "grad_norm": 35.62504959106445, "learning_rate": 9.289795918367347e-06, "loss": 0.4028, "step": 19960 }, { "epoch": 81.4734693877551, "grad_norm": 29.033424377441406, "learning_rate": 9.287755102040817e-06, "loss": 1.5677, "step": 19961 }, { "epoch": 81.47755102040816, "grad_norm": 22.686054229736328, "learning_rate": 9.285714285714286e-06, "loss": 1.9747, "step": 19962 }, { "epoch": 81.48163265306123, "grad_norm": 29.022727966308594, "learning_rate": 9.283673469387756e-06, "loss": 1.2319, "step": 19963 }, { "epoch": 81.48571428571428, "grad_norm": 35.17997360229492, "learning_rate": 9.281632653061225e-06, "loss": 0.3842, "step": 19964 }, { "epoch": 81.48979591836735, "grad_norm": 33.41398239135742, "learning_rate": 9.279591836734694e-06, "loss": 1.3886, "step": 19965 }, { "epoch": 81.4938775510204, "grad_norm": 31.663572311401367, "learning_rate": 9.277551020408163e-06, "loss": 1.3217, "step": 19966 }, { "epoch": 81.49795918367347, "grad_norm": 10.402281761169434, "learning_rate": 9.275510204081633e-06, "loss": 0.0928, "step": 19967 }, { "epoch": 81.50204081632653, "grad_norm": 36.87015151977539, "learning_rate": 9.273469387755102e-06, "loss": 0.6635, "step": 19968 }, { "epoch": 81.5061224489796, "grad_norm": 34.0013313293457, "learning_rate": 9.271428571428571e-06, "loss": 0.3937, "step": 19969 }, { "epoch": 81.51020408163265, "grad_norm": 35.66109848022461, "learning_rate": 9.269387755102042e-06, "loss": 1.4339, "step": 19970 }, { "epoch": 81.51428571428572, "grad_norm": 28.760154724121094, "learning_rate": 9.26734693877551e-06, "loss": 1.1464, "step": 19971 }, { "epoch": 81.51836734693877, "grad_norm": 32.414039611816406, "learning_rate": 9.26530612244898e-06, "loss": 1.0022, "step": 19972 }, { "epoch": 81.52244897959184, "grad_norm": 28.127525329589844, "learning_rate": 9.26326530612245e-06, "loss": 1.5529, "step": 19973 }, { "epoch": 81.5265306122449, "grad_norm": 26.191734313964844, "learning_rate": 9.26122448979592e-06, "loss": 0.248, "step": 19974 }, { "epoch": 81.53061224489795, "grad_norm": 35.6956672668457, "learning_rate": 9.259183673469387e-06, "loss": 0.3961, "step": 19975 }, { "epoch": 81.53469387755102, "grad_norm": 40.15515899658203, "learning_rate": 9.257142857142858e-06, "loss": 0.7773, "step": 19976 }, { "epoch": 81.53877551020408, "grad_norm": 34.98249053955078, "learning_rate": 9.255102040816327e-06, "loss": 0.2771, "step": 19977 }, { "epoch": 81.54285714285714, "grad_norm": 31.38654899597168, "learning_rate": 9.253061224489797e-06, "loss": 0.434, "step": 19978 }, { "epoch": 81.5469387755102, "grad_norm": 26.668031692504883, "learning_rate": 9.251020408163266e-06, "loss": 0.2436, "step": 19979 }, { "epoch": 81.55102040816327, "grad_norm": 29.089487075805664, "learning_rate": 9.248979591836735e-06, "loss": 0.3337, "step": 19980 }, { "epoch": 81.55510204081632, "grad_norm": 30.77600860595703, "learning_rate": 9.246938775510204e-06, "loss": 0.3599, "step": 19981 }, { "epoch": 81.55918367346939, "grad_norm": 38.921714782714844, "learning_rate": 9.244897959183674e-06, "loss": 0.9715, "step": 19982 }, { "epoch": 81.56326530612245, "grad_norm": 32.50103759765625, "learning_rate": 9.242857142857143e-06, "loss": 1.6372, "step": 19983 }, { "epoch": 81.56734693877551, "grad_norm": 32.546382904052734, "learning_rate": 9.240816326530613e-06, "loss": 1.0121, "step": 19984 }, { "epoch": 81.57142857142857, "grad_norm": 39.72074508666992, "learning_rate": 9.238775510204082e-06, "loss": 1.165, "step": 19985 }, { "epoch": 81.57551020408164, "grad_norm": 53.70100021362305, "learning_rate": 9.236734693877551e-06, "loss": 1.3044, "step": 19986 }, { "epoch": 81.57959183673469, "grad_norm": 36.532493591308594, "learning_rate": 9.23469387755102e-06, "loss": 1.2783, "step": 19987 }, { "epoch": 81.58367346938776, "grad_norm": 37.343868255615234, "learning_rate": 9.23265306122449e-06, "loss": 0.4509, "step": 19988 }, { "epoch": 81.58775510204082, "grad_norm": 23.434900283813477, "learning_rate": 9.23061224489796e-06, "loss": 2.034, "step": 19989 }, { "epoch": 81.59183673469387, "grad_norm": 32.96965408325195, "learning_rate": 9.22857142857143e-06, "loss": 1.2642, "step": 19990 }, { "epoch": 81.59591836734694, "grad_norm": 30.83369255065918, "learning_rate": 9.226530612244899e-06, "loss": 1.2534, "step": 19991 }, { "epoch": 81.6, "grad_norm": 24.485660552978516, "learning_rate": 9.224489795918367e-06, "loss": 1.5652, "step": 19992 }, { "epoch": 81.60408163265306, "grad_norm": 43.983951568603516, "learning_rate": 9.222448979591836e-06, "loss": 1.1094, "step": 19993 }, { "epoch": 81.60816326530612, "grad_norm": 31.941423416137695, "learning_rate": 9.220408163265307e-06, "loss": 1.3945, "step": 19994 }, { "epoch": 81.61224489795919, "grad_norm": 34.622047424316406, "learning_rate": 9.218367346938776e-06, "loss": 0.4737, "step": 19995 }, { "epoch": 81.61632653061224, "grad_norm": 23.575807571411133, "learning_rate": 9.216326530612246e-06, "loss": 1.9583, "step": 19996 }, { "epoch": 81.62040816326531, "grad_norm": 39.808231353759766, "learning_rate": 9.214285714285715e-06, "loss": 0.6454, "step": 19997 }, { "epoch": 81.62448979591836, "grad_norm": 37.25894546508789, "learning_rate": 9.212244897959184e-06, "loss": 1.1236, "step": 19998 }, { "epoch": 81.62857142857143, "grad_norm": 30.597728729248047, "learning_rate": 9.210204081632654e-06, "loss": 0.8774, "step": 19999 }, { "epoch": 81.63265306122449, "grad_norm": 33.5612907409668, "learning_rate": 9.208163265306123e-06, "loss": 0.735, "step": 20000 }, { "epoch": 81.63673469387756, "grad_norm": 34.97580337524414, "learning_rate": 9.206122448979594e-06, "loss": 0.7588, "step": 20001 }, { "epoch": 81.64081632653061, "grad_norm": 23.260408401489258, "learning_rate": 9.20408163265306e-06, "loss": 2.3093, "step": 20002 }, { "epoch": 81.64489795918368, "grad_norm": 34.34178924560547, "learning_rate": 9.202040816326531e-06, "loss": 0.3418, "step": 20003 }, { "epoch": 81.64897959183673, "grad_norm": 24.335803985595703, "learning_rate": 9.2e-06, "loss": 1.4945, "step": 20004 }, { "epoch": 81.65306122448979, "grad_norm": 40.439266204833984, "learning_rate": 9.19795918367347e-06, "loss": 1.027, "step": 20005 }, { "epoch": 81.65714285714286, "grad_norm": 42.27694320678711, "learning_rate": 9.19591836734694e-06, "loss": 0.9285, "step": 20006 }, { "epoch": 81.66122448979591, "grad_norm": 21.773771286010742, "learning_rate": 9.19387755102041e-06, "loss": 2.0218, "step": 20007 }, { "epoch": 81.66530612244898, "grad_norm": 33.22900390625, "learning_rate": 9.191836734693877e-06, "loss": 0.3445, "step": 20008 }, { "epoch": 81.66938775510204, "grad_norm": 34.4176025390625, "learning_rate": 9.189795918367347e-06, "loss": 0.8312, "step": 20009 }, { "epoch": 81.6734693877551, "grad_norm": 43.744956970214844, "learning_rate": 9.187755102040816e-06, "loss": 0.6423, "step": 20010 }, { "epoch": 81.67755102040816, "grad_norm": 34.084434509277344, "learning_rate": 9.185714285714287e-06, "loss": 0.7592, "step": 20011 }, { "epoch": 81.68163265306123, "grad_norm": 36.45392990112305, "learning_rate": 9.183673469387756e-06, "loss": 0.5523, "step": 20012 }, { "epoch": 81.68571428571428, "grad_norm": 32.747833251953125, "learning_rate": 9.181632653061224e-06, "loss": 0.3616, "step": 20013 }, { "epoch": 81.68979591836735, "grad_norm": 36.11008834838867, "learning_rate": 9.179591836734693e-06, "loss": 1.4033, "step": 20014 }, { "epoch": 81.6938775510204, "grad_norm": 35.374752044677734, "learning_rate": 9.177551020408164e-06, "loss": 1.4217, "step": 20015 }, { "epoch": 81.69795918367348, "grad_norm": 26.86469078063965, "learning_rate": 9.175510204081633e-06, "loss": 2.0131, "step": 20016 }, { "epoch": 81.70204081632653, "grad_norm": 30.374563217163086, "learning_rate": 9.173469387755103e-06, "loss": 0.2691, "step": 20017 }, { "epoch": 81.70612244897958, "grad_norm": 23.432538986206055, "learning_rate": 9.171428571428572e-06, "loss": 1.6288, "step": 20018 }, { "epoch": 81.71020408163265, "grad_norm": 32.59701156616211, "learning_rate": 9.16938775510204e-06, "loss": 0.5201, "step": 20019 }, { "epoch": 81.71428571428571, "grad_norm": 26.285703659057617, "learning_rate": 9.16734693877551e-06, "loss": 0.2489, "step": 20020 }, { "epoch": 81.71836734693878, "grad_norm": 29.734054565429688, "learning_rate": 9.16530612244898e-06, "loss": 0.3485, "step": 20021 }, { "epoch": 81.72244897959183, "grad_norm": 28.62775421142578, "learning_rate": 9.163265306122449e-06, "loss": 1.6291, "step": 20022 }, { "epoch": 81.7265306122449, "grad_norm": 32.58745193481445, "learning_rate": 9.16122448979592e-06, "loss": 1.2606, "step": 20023 }, { "epoch": 81.73061224489796, "grad_norm": 26.115251541137695, "learning_rate": 9.159183673469388e-06, "loss": 1.6557, "step": 20024 }, { "epoch": 81.73469387755102, "grad_norm": 12.670997619628906, "learning_rate": 9.157142857142857e-06, "loss": 0.1515, "step": 20025 }, { "epoch": 81.73877551020408, "grad_norm": 36.33301544189453, "learning_rate": 9.155102040816328e-06, "loss": 0.8941, "step": 20026 }, { "epoch": 81.74285714285715, "grad_norm": 30.387727737426758, "learning_rate": 9.153061224489796e-06, "loss": 1.0792, "step": 20027 }, { "epoch": 81.7469387755102, "grad_norm": 33.11698532104492, "learning_rate": 9.151020408163267e-06, "loss": 1.1411, "step": 20028 }, { "epoch": 81.75102040816327, "grad_norm": 38.20751190185547, "learning_rate": 9.148979591836736e-06, "loss": 0.707, "step": 20029 }, { "epoch": 81.75510204081633, "grad_norm": 35.47825622558594, "learning_rate": 9.146938775510205e-06, "loss": 1.5812, "step": 20030 }, { "epoch": 81.7591836734694, "grad_norm": 23.985013961791992, "learning_rate": 9.144897959183673e-06, "loss": 1.7563, "step": 20031 }, { "epoch": 81.76326530612245, "grad_norm": 32.67649841308594, "learning_rate": 9.142857142857144e-06, "loss": 0.3452, "step": 20032 }, { "epoch": 81.7673469387755, "grad_norm": 21.115562438964844, "learning_rate": 9.140816326530613e-06, "loss": 2.0897, "step": 20033 }, { "epoch": 81.77142857142857, "grad_norm": 14.330314636230469, "learning_rate": 9.138775510204083e-06, "loss": 0.1077, "step": 20034 }, { "epoch": 81.77551020408163, "grad_norm": 29.61709976196289, "learning_rate": 9.13673469387755e-06, "loss": 1.4484, "step": 20035 }, { "epoch": 81.7795918367347, "grad_norm": 37.23838424682617, "learning_rate": 9.134693877551021e-06, "loss": 0.6432, "step": 20036 }, { "epoch": 81.78367346938775, "grad_norm": 24.418210983276367, "learning_rate": 9.13265306122449e-06, "loss": 0.4531, "step": 20037 }, { "epoch": 81.78775510204082, "grad_norm": 39.337135314941406, "learning_rate": 9.13061224489796e-06, "loss": 0.4396, "step": 20038 }, { "epoch": 81.79183673469387, "grad_norm": 41.68935012817383, "learning_rate": 9.128571428571429e-06, "loss": 1.2271, "step": 20039 }, { "epoch": 81.79591836734694, "grad_norm": 27.117115020751953, "learning_rate": 9.1265306122449e-06, "loss": 1.5817, "step": 20040 }, { "epoch": 81.8, "grad_norm": 36.54806900024414, "learning_rate": 9.124489795918367e-06, "loss": 1.1479, "step": 20041 }, { "epoch": 81.80408163265307, "grad_norm": 22.805606842041016, "learning_rate": 9.122448979591837e-06, "loss": 1.4404, "step": 20042 }, { "epoch": 81.80816326530612, "grad_norm": 29.3080997467041, "learning_rate": 9.120408163265306e-06, "loss": 1.8483, "step": 20043 }, { "epoch": 81.81224489795919, "grad_norm": 15.408182144165039, "learning_rate": 9.118367346938777e-06, "loss": 0.1073, "step": 20044 }, { "epoch": 81.81632653061224, "grad_norm": 34.73029327392578, "learning_rate": 9.116326530612245e-06, "loss": 0.7882, "step": 20045 }, { "epoch": 81.82040816326531, "grad_norm": 36.582515716552734, "learning_rate": 9.114285714285714e-06, "loss": 0.3586, "step": 20046 }, { "epoch": 81.82448979591837, "grad_norm": 36.54222106933594, "learning_rate": 9.112244897959183e-06, "loss": 0.3354, "step": 20047 }, { "epoch": 81.82857142857142, "grad_norm": 25.252220153808594, "learning_rate": 9.110204081632653e-06, "loss": 0.3559, "step": 20048 }, { "epoch": 81.83265306122449, "grad_norm": 33.75702667236328, "learning_rate": 9.108163265306122e-06, "loss": 1.1266, "step": 20049 }, { "epoch": 81.83673469387755, "grad_norm": 46.499874114990234, "learning_rate": 9.106122448979593e-06, "loss": 0.8276, "step": 20050 }, { "epoch": 81.84081632653061, "grad_norm": 28.12683868408203, "learning_rate": 9.104081632653062e-06, "loss": 0.4218, "step": 20051 }, { "epoch": 81.84489795918367, "grad_norm": 37.55342102050781, "learning_rate": 9.10204081632653e-06, "loss": 0.683, "step": 20052 }, { "epoch": 81.84897959183674, "grad_norm": 29.237520217895508, "learning_rate": 9.100000000000001e-06, "loss": 0.8058, "step": 20053 }, { "epoch": 81.85306122448979, "grad_norm": 32.42459487915039, "learning_rate": 9.09795918367347e-06, "loss": 1.0373, "step": 20054 }, { "epoch": 81.85714285714286, "grad_norm": 44.454124450683594, "learning_rate": 9.09591836734694e-06, "loss": 0.2973, "step": 20055 }, { "epoch": 81.86122448979592, "grad_norm": 38.92527770996094, "learning_rate": 9.093877551020409e-06, "loss": 0.4031, "step": 20056 }, { "epoch": 81.86530612244898, "grad_norm": 20.035287857055664, "learning_rate": 9.091836734693878e-06, "loss": 0.2667, "step": 20057 }, { "epoch": 81.86938775510204, "grad_norm": 32.72227096557617, "learning_rate": 9.089795918367347e-06, "loss": 1.4641, "step": 20058 }, { "epoch": 81.87346938775511, "grad_norm": 40.11271667480469, "learning_rate": 9.087755102040817e-06, "loss": 0.7309, "step": 20059 }, { "epoch": 81.87755102040816, "grad_norm": 20.527938842773438, "learning_rate": 9.085714285714286e-06, "loss": 2.0695, "step": 20060 }, { "epoch": 81.88163265306123, "grad_norm": 16.742570877075195, "learning_rate": 9.083673469387757e-06, "loss": 0.0954, "step": 20061 }, { "epoch": 81.88571428571429, "grad_norm": 39.16618347167969, "learning_rate": 9.081632653061225e-06, "loss": 1.0387, "step": 20062 }, { "epoch": 81.88979591836734, "grad_norm": 56.85094451904297, "learning_rate": 9.079591836734694e-06, "loss": 1.2549, "step": 20063 }, { "epoch": 81.89387755102041, "grad_norm": 25.694400787353516, "learning_rate": 9.077551020408163e-06, "loss": 2.135, "step": 20064 }, { "epoch": 81.89795918367346, "grad_norm": 33.067630767822266, "learning_rate": 9.075510204081634e-06, "loss": 1.2778, "step": 20065 }, { "epoch": 81.90204081632653, "grad_norm": 11.56521987915039, "learning_rate": 9.073469387755102e-06, "loss": 0.1042, "step": 20066 }, { "epoch": 81.90612244897959, "grad_norm": 29.272472381591797, "learning_rate": 9.071428571428573e-06, "loss": 1.4555, "step": 20067 }, { "epoch": 81.91020408163266, "grad_norm": 40.617462158203125, "learning_rate": 9.06938775510204e-06, "loss": 1.0484, "step": 20068 }, { "epoch": 81.91428571428571, "grad_norm": 17.787797927856445, "learning_rate": 9.06734693877551e-06, "loss": 2.3896, "step": 20069 }, { "epoch": 81.91836734693878, "grad_norm": 38.932926177978516, "learning_rate": 9.06530612244898e-06, "loss": 0.3317, "step": 20070 }, { "epoch": 81.92244897959183, "grad_norm": 34.736698150634766, "learning_rate": 9.06326530612245e-06, "loss": 1.4693, "step": 20071 }, { "epoch": 81.9265306122449, "grad_norm": 28.280729293823242, "learning_rate": 9.061224489795919e-06, "loss": 1.2816, "step": 20072 }, { "epoch": 81.93061224489796, "grad_norm": 21.78112030029297, "learning_rate": 9.05918367346939e-06, "loss": 2.0002, "step": 20073 }, { "epoch": 81.93469387755103, "grad_norm": 22.80763053894043, "learning_rate": 9.057142857142856e-06, "loss": 0.0985, "step": 20074 }, { "epoch": 81.93877551020408, "grad_norm": 34.316036224365234, "learning_rate": 9.055102040816327e-06, "loss": 0.2317, "step": 20075 }, { "epoch": 81.94285714285714, "grad_norm": 30.547861099243164, "learning_rate": 9.053061224489796e-06, "loss": 1.084, "step": 20076 }, { "epoch": 81.9469387755102, "grad_norm": 29.01673126220703, "learning_rate": 9.051020408163266e-06, "loss": 1.7227, "step": 20077 }, { "epoch": 81.95102040816326, "grad_norm": 36.3455924987793, "learning_rate": 9.048979591836735e-06, "loss": 0.6596, "step": 20078 }, { "epoch": 81.95510204081633, "grad_norm": 33.447967529296875, "learning_rate": 9.046938775510204e-06, "loss": 1.4009, "step": 20079 }, { "epoch": 81.95918367346938, "grad_norm": 36.7832145690918, "learning_rate": 9.044897959183674e-06, "loss": 0.7967, "step": 20080 }, { "epoch": 81.96326530612245, "grad_norm": 20.3338680267334, "learning_rate": 9.042857142857143e-06, "loss": 0.1379, "step": 20081 }, { "epoch": 81.9673469387755, "grad_norm": 28.787546157836914, "learning_rate": 9.040816326530614e-06, "loss": 0.2585, "step": 20082 }, { "epoch": 81.97142857142858, "grad_norm": 26.356351852416992, "learning_rate": 9.038775510204082e-06, "loss": 1.6621, "step": 20083 }, { "epoch": 81.97551020408163, "grad_norm": 34.05484390258789, "learning_rate": 9.036734693877551e-06, "loss": 0.5875, "step": 20084 }, { "epoch": 81.9795918367347, "grad_norm": 27.482547760009766, "learning_rate": 9.03469387755102e-06, "loss": 1.2984, "step": 20085 }, { "epoch": 81.98367346938775, "grad_norm": 29.06898307800293, "learning_rate": 9.03265306122449e-06, "loss": 1.3874, "step": 20086 }, { "epoch": 81.98775510204082, "grad_norm": 31.380258560180664, "learning_rate": 9.03061224489796e-06, "loss": 1.3913, "step": 20087 }, { "epoch": 81.99183673469388, "grad_norm": 16.89823341369629, "learning_rate": 9.02857142857143e-06, "loss": 0.115, "step": 20088 }, { "epoch": 81.99591836734695, "grad_norm": 25.185834884643555, "learning_rate": 9.026530612244899e-06, "loss": 1.9127, "step": 20089 }, { "epoch": 82.0, "grad_norm": 36.71070861816406, "learning_rate": 9.024489795918368e-06, "loss": 1.2632, "step": 20090 }, { "epoch": 82.00408163265305, "grad_norm": 30.823440551757812, "learning_rate": 9.022448979591836e-06, "loss": 0.458, "step": 20091 }, { "epoch": 82.00816326530612, "grad_norm": 35.14657974243164, "learning_rate": 9.020408163265307e-06, "loss": 0.8218, "step": 20092 }, { "epoch": 82.01224489795918, "grad_norm": 39.175750732421875, "learning_rate": 9.018367346938776e-06, "loss": 0.4165, "step": 20093 }, { "epoch": 82.01632653061225, "grad_norm": 22.764673233032227, "learning_rate": 9.016326530612246e-06, "loss": 2.0919, "step": 20094 }, { "epoch": 82.0204081632653, "grad_norm": 30.418373107910156, "learning_rate": 9.014285714285715e-06, "loss": 1.0524, "step": 20095 }, { "epoch": 82.02448979591837, "grad_norm": 25.47011375427246, "learning_rate": 9.012244897959184e-06, "loss": 1.8665, "step": 20096 }, { "epoch": 82.02857142857142, "grad_norm": 25.55516242980957, "learning_rate": 9.010204081632653e-06, "loss": 1.8783, "step": 20097 }, { "epoch": 82.0326530612245, "grad_norm": 24.053218841552734, "learning_rate": 9.008163265306123e-06, "loss": 1.9158, "step": 20098 }, { "epoch": 82.03673469387755, "grad_norm": 26.15377426147461, "learning_rate": 9.006122448979592e-06, "loss": 0.7031, "step": 20099 }, { "epoch": 82.04081632653062, "grad_norm": 42.206539154052734, "learning_rate": 9.004081632653063e-06, "loss": 0.5933, "step": 20100 }, { "epoch": 82.04489795918367, "grad_norm": 29.629613876342773, "learning_rate": 9.00204081632653e-06, "loss": 1.4094, "step": 20101 }, { "epoch": 82.04897959183674, "grad_norm": 47.96194076538086, "learning_rate": 9e-06, "loss": 1.2302, "step": 20102 }, { "epoch": 82.0530612244898, "grad_norm": 32.422149658203125, "learning_rate": 8.997959183673469e-06, "loss": 1.3256, "step": 20103 }, { "epoch": 82.05714285714286, "grad_norm": 31.234600067138672, "learning_rate": 8.99591836734694e-06, "loss": 0.4359, "step": 20104 }, { "epoch": 82.06122448979592, "grad_norm": 38.21390151977539, "learning_rate": 8.993877551020408e-06, "loss": 0.6747, "step": 20105 }, { "epoch": 82.06530612244897, "grad_norm": 37.47274398803711, "learning_rate": 8.991836734693879e-06, "loss": 0.8332, "step": 20106 }, { "epoch": 82.06938775510204, "grad_norm": 39.064693450927734, "learning_rate": 8.989795918367348e-06, "loss": 0.6037, "step": 20107 }, { "epoch": 82.0734693877551, "grad_norm": 29.067180633544922, "learning_rate": 8.987755102040817e-06, "loss": 0.3218, "step": 20108 }, { "epoch": 82.07755102040817, "grad_norm": 18.287582397460938, "learning_rate": 8.985714285714285e-06, "loss": 2.6065, "step": 20109 }, { "epoch": 82.08163265306122, "grad_norm": 31.228071212768555, "learning_rate": 8.983673469387756e-06, "loss": 0.2627, "step": 20110 }, { "epoch": 82.08571428571429, "grad_norm": 38.0301628112793, "learning_rate": 8.981632653061225e-06, "loss": 1.4839, "step": 20111 }, { "epoch": 82.08979591836734, "grad_norm": 46.73012924194336, "learning_rate": 8.979591836734694e-06, "loss": 0.9408, "step": 20112 }, { "epoch": 82.09387755102041, "grad_norm": 31.139863967895508, "learning_rate": 8.977551020408164e-06, "loss": 0.4044, "step": 20113 }, { "epoch": 82.09795918367347, "grad_norm": 28.14363670349121, "learning_rate": 8.975510204081633e-06, "loss": 0.8323, "step": 20114 }, { "epoch": 82.10204081632654, "grad_norm": 33.419551849365234, "learning_rate": 8.973469387755103e-06, "loss": 1.2097, "step": 20115 }, { "epoch": 82.10612244897959, "grad_norm": 25.83658790588379, "learning_rate": 8.971428571428572e-06, "loss": 1.6703, "step": 20116 }, { "epoch": 82.11020408163266, "grad_norm": 11.592602729797363, "learning_rate": 8.969387755102043e-06, "loss": 3.0137, "step": 20117 }, { "epoch": 82.11428571428571, "grad_norm": 41.640926361083984, "learning_rate": 8.96734693877551e-06, "loss": 1.0595, "step": 20118 }, { "epoch": 82.11836734693877, "grad_norm": 32.926910400390625, "learning_rate": 8.96530612244898e-06, "loss": 1.1189, "step": 20119 }, { "epoch": 82.12244897959184, "grad_norm": 42.93903350830078, "learning_rate": 8.963265306122449e-06, "loss": 0.9943, "step": 20120 }, { "epoch": 82.12653061224489, "grad_norm": 21.741182327270508, "learning_rate": 8.96122448979592e-06, "loss": 0.2829, "step": 20121 }, { "epoch": 82.13061224489796, "grad_norm": 29.978878021240234, "learning_rate": 8.959183673469388e-06, "loss": 1.3132, "step": 20122 }, { "epoch": 82.13469387755102, "grad_norm": 34.307533264160156, "learning_rate": 8.957142857142857e-06, "loss": 0.5247, "step": 20123 }, { "epoch": 82.13877551020408, "grad_norm": 27.114656448364258, "learning_rate": 8.955102040816326e-06, "loss": 1.4507, "step": 20124 }, { "epoch": 82.14285714285714, "grad_norm": 38.116912841796875, "learning_rate": 8.953061224489797e-06, "loss": 1.1034, "step": 20125 }, { "epoch": 82.14693877551021, "grad_norm": 31.627174377441406, "learning_rate": 8.951020408163265e-06, "loss": 0.4191, "step": 20126 }, { "epoch": 82.15102040816326, "grad_norm": 34.43570327758789, "learning_rate": 8.948979591836736e-06, "loss": 0.3295, "step": 20127 }, { "epoch": 82.15510204081633, "grad_norm": 33.404075622558594, "learning_rate": 8.946938775510203e-06, "loss": 0.8084, "step": 20128 }, { "epoch": 82.15918367346939, "grad_norm": 41.0598030090332, "learning_rate": 8.944897959183674e-06, "loss": 1.0729, "step": 20129 }, { "epoch": 82.16326530612245, "grad_norm": 43.60262680053711, "learning_rate": 8.942857142857142e-06, "loss": 0.8701, "step": 20130 }, { "epoch": 82.16734693877551, "grad_norm": 48.03746795654297, "learning_rate": 8.940816326530613e-06, "loss": 0.7741, "step": 20131 }, { "epoch": 82.17142857142858, "grad_norm": 26.19932746887207, "learning_rate": 8.938775510204082e-06, "loss": 0.3428, "step": 20132 }, { "epoch": 82.17551020408163, "grad_norm": 37.521141052246094, "learning_rate": 8.936734693877552e-06, "loss": 1.0559, "step": 20133 }, { "epoch": 82.17959183673469, "grad_norm": 40.292049407958984, "learning_rate": 8.934693877551021e-06, "loss": 0.7046, "step": 20134 }, { "epoch": 82.18367346938776, "grad_norm": 46.669036865234375, "learning_rate": 8.93265306122449e-06, "loss": 0.9721, "step": 20135 }, { "epoch": 82.18775510204081, "grad_norm": 25.455623626708984, "learning_rate": 8.930612244897959e-06, "loss": 1.3595, "step": 20136 }, { "epoch": 82.19183673469388, "grad_norm": 24.589502334594727, "learning_rate": 8.92857142857143e-06, "loss": 1.7239, "step": 20137 }, { "epoch": 82.19591836734693, "grad_norm": 37.35763168334961, "learning_rate": 8.926530612244898e-06, "loss": 0.5839, "step": 20138 }, { "epoch": 82.2, "grad_norm": 34.403377532958984, "learning_rate": 8.924489795918367e-06, "loss": 0.536, "step": 20139 }, { "epoch": 82.20408163265306, "grad_norm": 29.83034324645996, "learning_rate": 8.922448979591837e-06, "loss": 0.3513, "step": 20140 }, { "epoch": 82.20816326530613, "grad_norm": 34.90221405029297, "learning_rate": 8.920408163265306e-06, "loss": 1.0825, "step": 20141 }, { "epoch": 82.21224489795918, "grad_norm": 35.992679595947266, "learning_rate": 8.918367346938777e-06, "loss": 0.2876, "step": 20142 }, { "epoch": 82.21632653061225, "grad_norm": 33.24678039550781, "learning_rate": 8.916326530612246e-06, "loss": 1.3416, "step": 20143 }, { "epoch": 82.2204081632653, "grad_norm": 35.57624816894531, "learning_rate": 8.914285714285716e-06, "loss": 0.8477, "step": 20144 }, { "epoch": 82.22448979591837, "grad_norm": 26.7604923248291, "learning_rate": 8.912244897959183e-06, "loss": 1.4415, "step": 20145 }, { "epoch": 82.22857142857143, "grad_norm": 25.974529266357422, "learning_rate": 8.910204081632654e-06, "loss": 1.3994, "step": 20146 }, { "epoch": 82.2326530612245, "grad_norm": 28.696765899658203, "learning_rate": 8.908163265306123e-06, "loss": 1.5099, "step": 20147 }, { "epoch": 82.23673469387755, "grad_norm": 36.25614929199219, "learning_rate": 8.906122448979593e-06, "loss": 0.6059, "step": 20148 }, { "epoch": 82.2408163265306, "grad_norm": 10.796255111694336, "learning_rate": 8.904081632653062e-06, "loss": 0.0912, "step": 20149 }, { "epoch": 82.24489795918367, "grad_norm": 35.54703140258789, "learning_rate": 8.90204081632653e-06, "loss": 0.5031, "step": 20150 }, { "epoch": 82.24897959183673, "grad_norm": 26.553503036499023, "learning_rate": 8.9e-06, "loss": 1.4789, "step": 20151 }, { "epoch": 82.2530612244898, "grad_norm": 37.921627044677734, "learning_rate": 8.89795918367347e-06, "loss": 0.9024, "step": 20152 }, { "epoch": 82.25714285714285, "grad_norm": 40.88241195678711, "learning_rate": 8.895918367346939e-06, "loss": 1.1224, "step": 20153 }, { "epoch": 82.26122448979592, "grad_norm": 34.740234375, "learning_rate": 8.89387755102041e-06, "loss": 1.0594, "step": 20154 }, { "epoch": 82.26530612244898, "grad_norm": 41.23213195800781, "learning_rate": 8.891836734693878e-06, "loss": 1.269, "step": 20155 }, { "epoch": 82.26938775510204, "grad_norm": 36.713069915771484, "learning_rate": 8.889795918367347e-06, "loss": 0.7257, "step": 20156 }, { "epoch": 82.2734693877551, "grad_norm": 29.101545333862305, "learning_rate": 8.887755102040816e-06, "loss": 1.802, "step": 20157 }, { "epoch": 82.27755102040817, "grad_norm": 31.582195281982422, "learning_rate": 8.885714285714286e-06, "loss": 0.9585, "step": 20158 }, { "epoch": 82.28163265306122, "grad_norm": 31.35145378112793, "learning_rate": 8.883673469387755e-06, "loss": 1.3055, "step": 20159 }, { "epoch": 82.28571428571429, "grad_norm": 36.50284194946289, "learning_rate": 8.881632653061226e-06, "loss": 0.6558, "step": 20160 }, { "epoch": 82.28979591836735, "grad_norm": 20.219945907592773, "learning_rate": 8.879591836734694e-06, "loss": 0.3013, "step": 20161 }, { "epoch": 82.29387755102042, "grad_norm": 29.805526733398438, "learning_rate": 8.877551020408163e-06, "loss": 0.4229, "step": 20162 }, { "epoch": 82.29795918367347, "grad_norm": 39.71722412109375, "learning_rate": 8.875510204081632e-06, "loss": 0.3108, "step": 20163 }, { "epoch": 82.30204081632652, "grad_norm": 15.811539649963379, "learning_rate": 8.873469387755103e-06, "loss": 2.279, "step": 20164 }, { "epoch": 82.3061224489796, "grad_norm": 32.203365325927734, "learning_rate": 8.871428571428571e-06, "loss": 0.9952, "step": 20165 }, { "epoch": 82.31020408163265, "grad_norm": 23.009550094604492, "learning_rate": 8.869387755102042e-06, "loss": 1.6347, "step": 20166 }, { "epoch": 82.31428571428572, "grad_norm": 42.413570404052734, "learning_rate": 8.86734693877551e-06, "loss": 0.8048, "step": 20167 }, { "epoch": 82.31836734693877, "grad_norm": 27.742206573486328, "learning_rate": 8.86530612244898e-06, "loss": 0.3366, "step": 20168 }, { "epoch": 82.32244897959184, "grad_norm": 33.549381256103516, "learning_rate": 8.86326530612245e-06, "loss": 1.2824, "step": 20169 }, { "epoch": 82.3265306122449, "grad_norm": 43.29327392578125, "learning_rate": 8.861224489795919e-06, "loss": 1.2814, "step": 20170 }, { "epoch": 82.33061224489796, "grad_norm": 25.524288177490234, "learning_rate": 8.85918367346939e-06, "loss": 1.1752, "step": 20171 }, { "epoch": 82.33469387755102, "grad_norm": 42.814231872558594, "learning_rate": 8.857142857142857e-06, "loss": 0.6449, "step": 20172 }, { "epoch": 82.33877551020409, "grad_norm": 20.908733367919922, "learning_rate": 8.855102040816327e-06, "loss": 2.0627, "step": 20173 }, { "epoch": 82.34285714285714, "grad_norm": 37.19879913330078, "learning_rate": 8.853061224489796e-06, "loss": 1.3221, "step": 20174 }, { "epoch": 82.34693877551021, "grad_norm": 44.25697326660156, "learning_rate": 8.851020408163266e-06, "loss": 0.6599, "step": 20175 }, { "epoch": 82.35102040816327, "grad_norm": 25.594457626342773, "learning_rate": 8.848979591836735e-06, "loss": 0.3846, "step": 20176 }, { "epoch": 82.35510204081632, "grad_norm": 24.38568878173828, "learning_rate": 8.846938775510206e-06, "loss": 1.4825, "step": 20177 }, { "epoch": 82.35918367346939, "grad_norm": 44.1966438293457, "learning_rate": 8.844897959183673e-06, "loss": 0.5583, "step": 20178 }, { "epoch": 82.36326530612244, "grad_norm": 26.94542121887207, "learning_rate": 8.842857142857143e-06, "loss": 1.561, "step": 20179 }, { "epoch": 82.36734693877551, "grad_norm": 32.57705307006836, "learning_rate": 8.840816326530612e-06, "loss": 0.5681, "step": 20180 }, { "epoch": 82.37142857142857, "grad_norm": 26.990324020385742, "learning_rate": 8.838775510204083e-06, "loss": 1.5868, "step": 20181 }, { "epoch": 82.37551020408164, "grad_norm": 27.389854431152344, "learning_rate": 8.836734693877552e-06, "loss": 1.5132, "step": 20182 }, { "epoch": 82.37959183673469, "grad_norm": 31.85059356689453, "learning_rate": 8.83469387755102e-06, "loss": 0.7232, "step": 20183 }, { "epoch": 82.38367346938776, "grad_norm": 20.684823989868164, "learning_rate": 8.83265306122449e-06, "loss": 1.9362, "step": 20184 }, { "epoch": 82.38775510204081, "grad_norm": 26.616012573242188, "learning_rate": 8.83061224489796e-06, "loss": 0.2617, "step": 20185 }, { "epoch": 82.39183673469388, "grad_norm": 32.102115631103516, "learning_rate": 8.828571428571429e-06, "loss": 0.6185, "step": 20186 }, { "epoch": 82.39591836734694, "grad_norm": 10.319245338439941, "learning_rate": 8.826530612244899e-06, "loss": 0.0885, "step": 20187 }, { "epoch": 82.4, "grad_norm": 31.056718826293945, "learning_rate": 8.824489795918368e-06, "loss": 0.3561, "step": 20188 }, { "epoch": 82.40408163265306, "grad_norm": 37.23225784301758, "learning_rate": 8.822448979591837e-06, "loss": 1.0785, "step": 20189 }, { "epoch": 82.40816326530613, "grad_norm": 24.43077850341797, "learning_rate": 8.820408163265305e-06, "loss": 1.947, "step": 20190 }, { "epoch": 82.41224489795918, "grad_norm": NaN, "learning_rate": 8.818367346938776e-06, "loss": 1.2652, "step": 20191 }, { "epoch": 82.41632653061224, "grad_norm": 14.075174331665039, "learning_rate": 8.818367346938776e-06, "loss": 0.1396, "step": 20192 }, { "epoch": 82.42040816326531, "grad_norm": 36.62604522705078, "learning_rate": 8.816326530612245e-06, "loss": 0.855, "step": 20193 }, { "epoch": 82.42448979591836, "grad_norm": 27.4390869140625, "learning_rate": 8.814285714285715e-06, "loss": 1.5006, "step": 20194 }, { "epoch": 82.42857142857143, "grad_norm": 36.630794525146484, "learning_rate": 8.812244897959184e-06, "loss": 0.3566, "step": 20195 }, { "epoch": 82.43265306122449, "grad_norm": 33.73366165161133, "learning_rate": 8.810204081632653e-06, "loss": 0.7282, "step": 20196 }, { "epoch": 82.43673469387755, "grad_norm": 12.06669807434082, "learning_rate": 8.808163265306123e-06, "loss": 0.1423, "step": 20197 }, { "epoch": 82.44081632653061, "grad_norm": 9.276495933532715, "learning_rate": 8.806122448979592e-06, "loss": 0.0643, "step": 20198 }, { "epoch": 82.44489795918368, "grad_norm": 34.50410079956055, "learning_rate": 8.804081632653063e-06, "loss": 0.68, "step": 20199 }, { "epoch": 82.44897959183673, "grad_norm": 28.084951400756836, "learning_rate": 8.802040816326532e-06, "loss": 1.1535, "step": 20200 }, { "epoch": 82.4530612244898, "grad_norm": 36.51931381225586, "learning_rate": 8.8e-06, "loss": 0.3952, "step": 20201 }, { "epoch": 82.45714285714286, "grad_norm": 40.87002944946289, "learning_rate": 8.79795918367347e-06, "loss": 0.6544, "step": 20202 }, { "epoch": 82.46122448979592, "grad_norm": 29.762516021728516, "learning_rate": 8.79591836734694e-06, "loss": 1.3676, "step": 20203 }, { "epoch": 82.46530612244898, "grad_norm": 29.20145606994629, "learning_rate": 8.793877551020409e-06, "loss": 1.6014, "step": 20204 }, { "epoch": 82.46938775510205, "grad_norm": 38.424964904785156, "learning_rate": 8.791836734693879e-06, "loss": 1.1924, "step": 20205 }, { "epoch": 82.4734693877551, "grad_norm": 38.15648651123047, "learning_rate": 8.789795918367346e-06, "loss": 1.1231, "step": 20206 }, { "epoch": 82.47755102040816, "grad_norm": 25.37257957458496, "learning_rate": 8.787755102040817e-06, "loss": 0.1551, "step": 20207 }, { "epoch": 82.48163265306123, "grad_norm": 26.02988052368164, "learning_rate": 8.785714285714286e-06, "loss": 1.4578, "step": 20208 }, { "epoch": 82.48571428571428, "grad_norm": 36.32155990600586, "learning_rate": 8.783673469387756e-06, "loss": 1.5278, "step": 20209 }, { "epoch": 82.48979591836735, "grad_norm": 35.2178955078125, "learning_rate": 8.781632653061225e-06, "loss": 0.4502, "step": 20210 }, { "epoch": 82.4938775510204, "grad_norm": 34.33460235595703, "learning_rate": 8.779591836734695e-06, "loss": 1.2294, "step": 20211 }, { "epoch": 82.49795918367347, "grad_norm": 36.325992584228516, "learning_rate": 8.777551020408163e-06, "loss": 0.8729, "step": 20212 }, { "epoch": 82.50204081632653, "grad_norm": 28.316160202026367, "learning_rate": 8.775510204081633e-06, "loss": 0.3654, "step": 20213 }, { "epoch": 82.5061224489796, "grad_norm": 25.093544006347656, "learning_rate": 8.773469387755102e-06, "loss": 1.5875, "step": 20214 }, { "epoch": 82.51020408163265, "grad_norm": 49.183509826660156, "learning_rate": 8.771428571428572e-06, "loss": 1.2314, "step": 20215 }, { "epoch": 82.51428571428572, "grad_norm": 26.219608306884766, "learning_rate": 8.769387755102041e-06, "loss": 1.441, "step": 20216 }, { "epoch": 82.51836734693877, "grad_norm": 32.99892807006836, "learning_rate": 8.76734693877551e-06, "loss": 0.3591, "step": 20217 }, { "epoch": 82.52244897959184, "grad_norm": 27.434749603271484, "learning_rate": 8.765306122448979e-06, "loss": 1.6569, "step": 20218 }, { "epoch": 82.5265306122449, "grad_norm": 23.041210174560547, "learning_rate": 8.76326530612245e-06, "loss": 1.5296, "step": 20219 }, { "epoch": 82.53061224489795, "grad_norm": 35.202552795410156, "learning_rate": 8.761224489795918e-06, "loss": 0.3415, "step": 20220 }, { "epoch": 82.53469387755102, "grad_norm": 19.450223922729492, "learning_rate": 8.759183673469389e-06, "loss": 0.0975, "step": 20221 }, { "epoch": 82.53877551020408, "grad_norm": 32.7938117980957, "learning_rate": 8.757142857142858e-06, "loss": 1.0492, "step": 20222 }, { "epoch": 82.54285714285714, "grad_norm": 32.854461669921875, "learning_rate": 8.755102040816326e-06, "loss": 0.8437, "step": 20223 }, { "epoch": 82.5469387755102, "grad_norm": 29.185810089111328, "learning_rate": 8.753061224489797e-06, "loss": 1.2234, "step": 20224 }, { "epoch": 82.55102040816327, "grad_norm": 24.680442810058594, "learning_rate": 8.751020408163266e-06, "loss": 1.9434, "step": 20225 }, { "epoch": 82.55510204081632, "grad_norm": 18.19827651977539, "learning_rate": 8.748979591836736e-06, "loss": 2.3569, "step": 20226 }, { "epoch": 82.55918367346939, "grad_norm": 27.695350646972656, "learning_rate": 8.746938775510205e-06, "loss": 1.6205, "step": 20227 }, { "epoch": 82.56326530612245, "grad_norm": 31.896713256835938, "learning_rate": 8.744897959183674e-06, "loss": 0.6945, "step": 20228 }, { "epoch": 82.56734693877551, "grad_norm": 36.36140823364258, "learning_rate": 8.742857142857143e-06, "loss": 0.967, "step": 20229 }, { "epoch": 82.57142857142857, "grad_norm": 29.516855239868164, "learning_rate": 8.740816326530613e-06, "loss": 0.3753, "step": 20230 }, { "epoch": 82.57551020408164, "grad_norm": 23.982378005981445, "learning_rate": 8.738775510204082e-06, "loss": 2.2804, "step": 20231 }, { "epoch": 82.57959183673469, "grad_norm": 33.21690368652344, "learning_rate": 8.736734693877552e-06, "loss": 0.2679, "step": 20232 }, { "epoch": 82.58367346938776, "grad_norm": 21.470458984375, "learning_rate": 8.734693877551021e-06, "loss": 1.971, "step": 20233 }, { "epoch": 82.58775510204082, "grad_norm": 34.527652740478516, "learning_rate": 8.73265306122449e-06, "loss": 1.4252, "step": 20234 }, { "epoch": 82.59183673469387, "grad_norm": 30.505407333374023, "learning_rate": 8.730612244897959e-06, "loss": 0.228, "step": 20235 }, { "epoch": 82.59591836734694, "grad_norm": 35.85377883911133, "learning_rate": 8.72857142857143e-06, "loss": 1.2664, "step": 20236 }, { "epoch": 82.6, "grad_norm": 16.91668128967285, "learning_rate": 8.726530612244898e-06, "loss": 0.1017, "step": 20237 }, { "epoch": 82.60408163265306, "grad_norm": 41.43220901489258, "learning_rate": 8.724489795918369e-06, "loss": 0.3445, "step": 20238 }, { "epoch": 82.60816326530612, "grad_norm": 25.772808074951172, "learning_rate": 8.722448979591836e-06, "loss": 1.5951, "step": 20239 }, { "epoch": 82.61224489795919, "grad_norm": 26.166860580444336, "learning_rate": 8.720408163265306e-06, "loss": 0.7565, "step": 20240 }, { "epoch": 82.61632653061224, "grad_norm": 28.165597915649414, "learning_rate": 8.718367346938775e-06, "loss": 1.6888, "step": 20241 }, { "epoch": 82.62040816326531, "grad_norm": 20.913311004638672, "learning_rate": 8.716326530612246e-06, "loss": 2.1226, "step": 20242 }, { "epoch": 82.62448979591836, "grad_norm": 28.949146270751953, "learning_rate": 8.714285714285715e-06, "loss": 0.4269, "step": 20243 }, { "epoch": 82.62857142857143, "grad_norm": 30.34911346435547, "learning_rate": 8.712244897959185e-06, "loss": 0.3479, "step": 20244 }, { "epoch": 82.63265306122449, "grad_norm": 32.779930114746094, "learning_rate": 8.710204081632652e-06, "loss": 0.7109, "step": 20245 }, { "epoch": 82.63673469387756, "grad_norm": 33.30695343017578, "learning_rate": 8.708163265306123e-06, "loss": 0.6885, "step": 20246 }, { "epoch": 82.64081632653061, "grad_norm": 17.38140869140625, "learning_rate": 8.706122448979592e-06, "loss": 0.1598, "step": 20247 }, { "epoch": 82.64489795918368, "grad_norm": 30.125844955444336, "learning_rate": 8.704081632653062e-06, "loss": 1.1489, "step": 20248 }, { "epoch": 82.64897959183673, "grad_norm": 35.51666259765625, "learning_rate": 8.702040816326531e-06, "loss": 1.3892, "step": 20249 }, { "epoch": 82.65306122448979, "grad_norm": 27.70293617248535, "learning_rate": 8.7e-06, "loss": 0.2521, "step": 20250 }, { "epoch": 82.65714285714286, "grad_norm": 26.51787567138672, "learning_rate": 8.69795918367347e-06, "loss": 1.2495, "step": 20251 }, { "epoch": 82.66122448979591, "grad_norm": 23.12489128112793, "learning_rate": 8.695918367346939e-06, "loss": 0.1302, "step": 20252 }, { "epoch": 82.66530612244898, "grad_norm": 27.490596771240234, "learning_rate": 8.69387755102041e-06, "loss": 1.5682, "step": 20253 }, { "epoch": 82.66938775510204, "grad_norm": 31.7664794921875, "learning_rate": 8.691836734693878e-06, "loss": 1.2859, "step": 20254 }, { "epoch": 82.6734693877551, "grad_norm": 38.026309967041016, "learning_rate": 8.689795918367347e-06, "loss": 0.371, "step": 20255 }, { "epoch": 82.67755102040816, "grad_norm": 34.67399597167969, "learning_rate": 8.687755102040816e-06, "loss": 0.9812, "step": 20256 }, { "epoch": 82.68163265306123, "grad_norm": 30.96271514892578, "learning_rate": 8.685714285714287e-06, "loss": 1.2215, "step": 20257 }, { "epoch": 82.68571428571428, "grad_norm": 41.21156692504883, "learning_rate": 8.683673469387755e-06, "loss": 0.3186, "step": 20258 }, { "epoch": 82.68979591836735, "grad_norm": 29.566755294799805, "learning_rate": 8.681632653061226e-06, "loss": 1.3741, "step": 20259 }, { "epoch": 82.6938775510204, "grad_norm": 27.570812225341797, "learning_rate": 8.679591836734695e-06, "loss": 0.6152, "step": 20260 }, { "epoch": 82.69795918367348, "grad_norm": 41.42303466796875, "learning_rate": 8.677551020408164e-06, "loss": 0.7423, "step": 20261 }, { "epoch": 82.70204081632653, "grad_norm": 23.2310791015625, "learning_rate": 8.675510204081632e-06, "loss": 1.4211, "step": 20262 }, { "epoch": 82.70612244897958, "grad_norm": 22.46316909790039, "learning_rate": 8.673469387755103e-06, "loss": 1.313, "step": 20263 }, { "epoch": 82.71020408163265, "grad_norm": 33.63069534301758, "learning_rate": 8.671428571428572e-06, "loss": 0.8918, "step": 20264 }, { "epoch": 82.71428571428571, "grad_norm": 28.639482498168945, "learning_rate": 8.669387755102042e-06, "loss": 1.2945, "step": 20265 }, { "epoch": 82.71836734693878, "grad_norm": 26.5430965423584, "learning_rate": 8.667346938775511e-06, "loss": 0.252, "step": 20266 }, { "epoch": 82.72244897959183, "grad_norm": 35.68701171875, "learning_rate": 8.66530612244898e-06, "loss": 0.339, "step": 20267 }, { "epoch": 82.7265306122449, "grad_norm": 29.32169532775879, "learning_rate": 8.663265306122449e-06, "loss": 1.2365, "step": 20268 }, { "epoch": 82.73061224489796, "grad_norm": 33.344818115234375, "learning_rate": 8.661224489795919e-06, "loss": 1.1005, "step": 20269 }, { "epoch": 82.73469387755102, "grad_norm": 31.92816925048828, "learning_rate": 8.659183673469388e-06, "loss": 0.4999, "step": 20270 }, { "epoch": 82.73877551020408, "grad_norm": 31.313230514526367, "learning_rate": 8.657142857142858e-06, "loss": 0.7904, "step": 20271 }, { "epoch": 82.74285714285715, "grad_norm": 33.18925476074219, "learning_rate": 8.655102040816326e-06, "loss": 1.366, "step": 20272 }, { "epoch": 82.7469387755102, "grad_norm": 37.222389221191406, "learning_rate": 8.653061224489796e-06, "loss": 0.3988, "step": 20273 }, { "epoch": 82.75102040816327, "grad_norm": 21.100374221801758, "learning_rate": 8.651020408163265e-06, "loss": 2.1778, "step": 20274 }, { "epoch": 82.75510204081633, "grad_norm": 37.070762634277344, "learning_rate": 8.648979591836735e-06, "loss": 1.0422, "step": 20275 }, { "epoch": 82.7591836734694, "grad_norm": 37.258384704589844, "learning_rate": 8.646938775510204e-06, "loss": 0.5685, "step": 20276 }, { "epoch": 82.76326530612245, "grad_norm": 27.707584381103516, "learning_rate": 8.644897959183675e-06, "loss": 0.223, "step": 20277 }, { "epoch": 82.7673469387755, "grad_norm": 18.051633834838867, "learning_rate": 8.642857142857144e-06, "loss": 2.0509, "step": 20278 }, { "epoch": 82.77142857142857, "grad_norm": 34.40810012817383, "learning_rate": 8.640816326530612e-06, "loss": 0.642, "step": 20279 }, { "epoch": 82.77551020408163, "grad_norm": 32.97000503540039, "learning_rate": 8.638775510204083e-06, "loss": 1.268, "step": 20280 }, { "epoch": 82.7795918367347, "grad_norm": 32.68119812011719, "learning_rate": 8.636734693877552e-06, "loss": 1.3924, "step": 20281 }, { "epoch": 82.78367346938775, "grad_norm": 35.13712692260742, "learning_rate": 8.63469387755102e-06, "loss": 0.6339, "step": 20282 }, { "epoch": 82.78775510204082, "grad_norm": 38.66516876220703, "learning_rate": 8.63265306122449e-06, "loss": 0.7713, "step": 20283 }, { "epoch": 82.79183673469387, "grad_norm": 33.36396789550781, "learning_rate": 8.63061224489796e-06, "loss": 0.796, "step": 20284 }, { "epoch": 82.79591836734694, "grad_norm": 23.971975326538086, "learning_rate": 8.628571428571429e-06, "loss": 1.5359, "step": 20285 }, { "epoch": 82.8, "grad_norm": 25.92452049255371, "learning_rate": 8.6265306122449e-06, "loss": 1.6303, "step": 20286 }, { "epoch": 82.80408163265307, "grad_norm": 29.49590492248535, "learning_rate": 8.624489795918368e-06, "loss": 0.7996, "step": 20287 }, { "epoch": 82.80816326530612, "grad_norm": 47.280025482177734, "learning_rate": 8.622448979591839e-06, "loss": 0.7819, "step": 20288 }, { "epoch": 82.81224489795919, "grad_norm": 44.42241287231445, "learning_rate": 8.620408163265306e-06, "loss": 0.3645, "step": 20289 }, { "epoch": 82.81632653061224, "grad_norm": 31.724916458129883, "learning_rate": 8.618367346938776e-06, "loss": 1.0757, "step": 20290 }, { "epoch": 82.82040816326531, "grad_norm": 25.089948654174805, "learning_rate": 8.616326530612245e-06, "loss": 1.5241, "step": 20291 }, { "epoch": 82.82448979591837, "grad_norm": 28.578399658203125, "learning_rate": 8.614285714285716e-06, "loss": 0.2135, "step": 20292 }, { "epoch": 82.82857142857142, "grad_norm": 21.362491607666016, "learning_rate": 8.612244897959184e-06, "loss": 0.1395, "step": 20293 }, { "epoch": 82.83265306122449, "grad_norm": 32.70291519165039, "learning_rate": 8.610204081632653e-06, "loss": 0.9907, "step": 20294 }, { "epoch": 82.83673469387755, "grad_norm": 32.5744743347168, "learning_rate": 8.608163265306122e-06, "loss": 0.4209, "step": 20295 }, { "epoch": 82.84081632653061, "grad_norm": 23.564950942993164, "learning_rate": 8.606122448979593e-06, "loss": 1.4196, "step": 20296 }, { "epoch": 82.84489795918367, "grad_norm": 40.346309661865234, "learning_rate": 8.604081632653061e-06, "loss": 1.498, "step": 20297 }, { "epoch": 82.84897959183674, "grad_norm": 47.06804275512695, "learning_rate": 8.602040816326532e-06, "loss": 0.4116, "step": 20298 }, { "epoch": 82.85306122448979, "grad_norm": 14.610894203186035, "learning_rate": 8.599999999999999e-06, "loss": 0.1055, "step": 20299 }, { "epoch": 82.85714285714286, "grad_norm": 40.692222595214844, "learning_rate": 8.59795918367347e-06, "loss": 0.9354, "step": 20300 }, { "epoch": 82.86122448979592, "grad_norm": 36.15504455566406, "learning_rate": 8.595918367346938e-06, "loss": 0.9863, "step": 20301 }, { "epoch": 82.86530612244898, "grad_norm": 42.24633026123047, "learning_rate": 8.593877551020409e-06, "loss": 0.9826, "step": 20302 }, { "epoch": 82.86938775510204, "grad_norm": 10.721040725708008, "learning_rate": 8.591836734693878e-06, "loss": 0.0914, "step": 20303 }, { "epoch": 82.87346938775511, "grad_norm": 37.16558074951172, "learning_rate": 8.589795918367348e-06, "loss": 0.8052, "step": 20304 }, { "epoch": 82.87755102040816, "grad_norm": 26.4659481048584, "learning_rate": 8.587755102040817e-06, "loss": 1.524, "step": 20305 }, { "epoch": 82.88163265306123, "grad_norm": 22.93273162841797, "learning_rate": 8.585714285714286e-06, "loss": 2.0211, "step": 20306 }, { "epoch": 82.88571428571429, "grad_norm": 31.240310668945312, "learning_rate": 8.583673469387756e-06, "loss": 1.6849, "step": 20307 }, { "epoch": 82.88979591836734, "grad_norm": 35.41798782348633, "learning_rate": 8.581632653061225e-06, "loss": 0.4341, "step": 20308 }, { "epoch": 82.89387755102041, "grad_norm": 26.798803329467773, "learning_rate": 8.579591836734694e-06, "loss": 1.5293, "step": 20309 }, { "epoch": 82.89795918367346, "grad_norm": 30.550081253051758, "learning_rate": 8.577551020408163e-06, "loss": 1.298, "step": 20310 }, { "epoch": 82.90204081632653, "grad_norm": 30.033226013183594, "learning_rate": 8.575510204081633e-06, "loss": 1.445, "step": 20311 }, { "epoch": 82.90612244897959, "grad_norm": 37.91943359375, "learning_rate": 8.573469387755102e-06, "loss": 1.0347, "step": 20312 }, { "epoch": 82.91020408163266, "grad_norm": 20.32379150390625, "learning_rate": 8.571428571428573e-06, "loss": 2.0661, "step": 20313 }, { "epoch": 82.91428571428571, "grad_norm": 23.221101760864258, "learning_rate": 8.569387755102041e-06, "loss": 1.6064, "step": 20314 }, { "epoch": 82.91836734693878, "grad_norm": 23.628002166748047, "learning_rate": 8.567346938775512e-06, "loss": 0.251, "step": 20315 }, { "epoch": 82.92244897959183, "grad_norm": 26.900440216064453, "learning_rate": 8.565306122448979e-06, "loss": 1.3389, "step": 20316 }, { "epoch": 82.9265306122449, "grad_norm": 24.82538414001465, "learning_rate": 8.56326530612245e-06, "loss": 0.3073, "step": 20317 }, { "epoch": 82.93061224489796, "grad_norm": 20.183195114135742, "learning_rate": 8.561224489795918e-06, "loss": 1.9325, "step": 20318 }, { "epoch": 82.93469387755103, "grad_norm": 28.129304885864258, "learning_rate": 8.559183673469389e-06, "loss": 0.5071, "step": 20319 }, { "epoch": 82.93877551020408, "grad_norm": 1.3938802480697632, "learning_rate": 8.557142857142858e-06, "loss": 0.027, "step": 20320 }, { "epoch": 82.94285714285714, "grad_norm": 34.46779251098633, "learning_rate": 8.555102040816327e-06, "loss": 0.9194, "step": 20321 }, { "epoch": 82.9469387755102, "grad_norm": 32.77820587158203, "learning_rate": 8.553061224489795e-06, "loss": 0.4107, "step": 20322 }, { "epoch": 82.95102040816326, "grad_norm": 22.85291290283203, "learning_rate": 8.551020408163266e-06, "loss": 0.1384, "step": 20323 }, { "epoch": 82.95510204081633, "grad_norm": 40.436607360839844, "learning_rate": 8.548979591836735e-06, "loss": 0.5457, "step": 20324 }, { "epoch": 82.95918367346938, "grad_norm": 10.972574234008789, "learning_rate": 8.546938775510205e-06, "loss": 0.0981, "step": 20325 }, { "epoch": 82.96326530612245, "grad_norm": 33.750675201416016, "learning_rate": 8.544897959183674e-06, "loss": 0.7672, "step": 20326 }, { "epoch": 82.9673469387755, "grad_norm": 30.901233673095703, "learning_rate": 8.542857142857143e-06, "loss": 0.4842, "step": 20327 }, { "epoch": 82.97142857142858, "grad_norm": 23.9129695892334, "learning_rate": 8.540816326530612e-06, "loss": 2.0235, "step": 20328 }, { "epoch": 82.97551020408163, "grad_norm": 28.243871688842773, "learning_rate": 8.538775510204082e-06, "loss": 1.4747, "step": 20329 }, { "epoch": 82.9795918367347, "grad_norm": 26.61664581298828, "learning_rate": 8.536734693877551e-06, "loss": 2.0098, "step": 20330 }, { "epoch": 82.98367346938775, "grad_norm": 35.69361877441406, "learning_rate": 8.534693877551022e-06, "loss": 0.6052, "step": 20331 }, { "epoch": 82.98775510204082, "grad_norm": 34.52029800415039, "learning_rate": 8.53265306122449e-06, "loss": 1.021, "step": 20332 }, { "epoch": 82.99183673469388, "grad_norm": 11.016190528869629, "learning_rate": 8.53061224489796e-06, "loss": 0.0878, "step": 20333 }, { "epoch": 82.99591836734695, "grad_norm": 52.60063552856445, "learning_rate": 8.528571428571428e-06, "loss": 0.5555, "step": 20334 }, { "epoch": 83.0, "grad_norm": 45.18074035644531, "learning_rate": 8.526530612244899e-06, "loss": 0.3578, "step": 20335 }, { "epoch": 83.00408163265305, "grad_norm": 31.482467651367188, "learning_rate": 8.524489795918367e-06, "loss": 1.3064, "step": 20336 }, { "epoch": 83.00816326530612, "grad_norm": 24.6136474609375, "learning_rate": 8.522448979591838e-06, "loss": 0.3035, "step": 20337 }, { "epoch": 83.01224489795918, "grad_norm": 26.2101993560791, "learning_rate": 8.520408163265307e-06, "loss": 0.3213, "step": 20338 }, { "epoch": 83.01632653061225, "grad_norm": 32.109901428222656, "learning_rate": 8.518367346938775e-06, "loss": 0.562, "step": 20339 }, { "epoch": 83.0204081632653, "grad_norm": 28.397775650024414, "learning_rate": 8.516326530612246e-06, "loss": 0.2573, "step": 20340 }, { "epoch": 83.02448979591837, "grad_norm": 33.739158630371094, "learning_rate": 8.514285714285715e-06, "loss": 1.0865, "step": 20341 }, { "epoch": 83.02857142857142, "grad_norm": 25.09795570373535, "learning_rate": 8.512244897959185e-06, "loss": 1.5455, "step": 20342 }, { "epoch": 83.0326530612245, "grad_norm": 40.28825759887695, "learning_rate": 8.510204081632652e-06, "loss": 1.226, "step": 20343 }, { "epoch": 83.03673469387755, "grad_norm": 14.50707721710205, "learning_rate": 8.508163265306123e-06, "loss": 0.1143, "step": 20344 }, { "epoch": 83.04081632653062, "grad_norm": 51.36491012573242, "learning_rate": 8.506122448979592e-06, "loss": 0.9145, "step": 20345 }, { "epoch": 83.04489795918367, "grad_norm": 26.24167823791504, "learning_rate": 8.504081632653062e-06, "loss": 0.29, "step": 20346 }, { "epoch": 83.04897959183674, "grad_norm": 31.702312469482422, "learning_rate": 8.502040816326531e-06, "loss": 0.354, "step": 20347 }, { "epoch": 83.0530612244898, "grad_norm": 40.04167938232422, "learning_rate": 8.500000000000002e-06, "loss": 0.6012, "step": 20348 }, { "epoch": 83.05714285714286, "grad_norm": 36.58199691772461, "learning_rate": 8.497959183673469e-06, "loss": 0.3734, "step": 20349 }, { "epoch": 83.06122448979592, "grad_norm": 25.711389541625977, "learning_rate": 8.49591836734694e-06, "loss": 0.7047, "step": 20350 }, { "epoch": 83.06530612244897, "grad_norm": 28.466510772705078, "learning_rate": 8.493877551020408e-06, "loss": 0.2548, "step": 20351 }, { "epoch": 83.06938775510204, "grad_norm": 29.964740753173828, "learning_rate": 8.491836734693879e-06, "loss": 1.1544, "step": 20352 }, { "epoch": 83.0734693877551, "grad_norm": 35.565704345703125, "learning_rate": 8.489795918367347e-06, "loss": 0.6584, "step": 20353 }, { "epoch": 83.07755102040817, "grad_norm": 27.759685516357422, "learning_rate": 8.487755102040816e-06, "loss": 1.4437, "step": 20354 }, { "epoch": 83.08163265306122, "grad_norm": 21.598182678222656, "learning_rate": 8.485714285714285e-06, "loss": 2.0499, "step": 20355 }, { "epoch": 83.08571428571429, "grad_norm": 32.970367431640625, "learning_rate": 8.483673469387756e-06, "loss": 0.4748, "step": 20356 }, { "epoch": 83.08979591836734, "grad_norm": 34.85887908935547, "learning_rate": 8.481632653061224e-06, "loss": 1.2752, "step": 20357 }, { "epoch": 83.09387755102041, "grad_norm": 12.485551834106445, "learning_rate": 8.479591836734695e-06, "loss": 0.1242, "step": 20358 }, { "epoch": 83.09795918367347, "grad_norm": 50.36604690551758, "learning_rate": 8.477551020408164e-06, "loss": 0.8113, "step": 20359 }, { "epoch": 83.10204081632654, "grad_norm": 33.07549285888672, "learning_rate": 8.475510204081633e-06, "loss": 0.4633, "step": 20360 }, { "epoch": 83.10612244897959, "grad_norm": 26.030000686645508, "learning_rate": 8.473469387755101e-06, "loss": 1.9488, "step": 20361 }, { "epoch": 83.11020408163266, "grad_norm": 5.973254680633545, "learning_rate": 8.471428571428572e-06, "loss": 0.0589, "step": 20362 }, { "epoch": 83.11428571428571, "grad_norm": 41.356407165527344, "learning_rate": 8.46938775510204e-06, "loss": 1.1189, "step": 20363 }, { "epoch": 83.11836734693877, "grad_norm": 35.60695266723633, "learning_rate": 8.467346938775511e-06, "loss": 1.3428, "step": 20364 }, { "epoch": 83.12244897959184, "grad_norm": 28.519466400146484, "learning_rate": 8.46530612244898e-06, "loss": 1.5902, "step": 20365 }, { "epoch": 83.12653061224489, "grad_norm": 25.7138729095459, "learning_rate": 8.463265306122449e-06, "loss": 1.8381, "step": 20366 }, { "epoch": 83.13061224489796, "grad_norm": 21.108232498168945, "learning_rate": 8.46122448979592e-06, "loss": 0.1686, "step": 20367 }, { "epoch": 83.13469387755102, "grad_norm": 13.537555694580078, "learning_rate": 8.459183673469388e-06, "loss": 0.0777, "step": 20368 }, { "epoch": 83.13877551020408, "grad_norm": 33.78063201904297, "learning_rate": 8.457142857142859e-06, "loss": 0.26, "step": 20369 }, { "epoch": 83.14285714285714, "grad_norm": 27.54477882385254, "learning_rate": 8.455102040816328e-06, "loss": 0.2277, "step": 20370 }, { "epoch": 83.14693877551021, "grad_norm": 33.31056594848633, "learning_rate": 8.453061224489796e-06, "loss": 0.8023, "step": 20371 }, { "epoch": 83.15102040816326, "grad_norm": 32.091880798339844, "learning_rate": 8.451020408163265e-06, "loss": 1.1837, "step": 20372 }, { "epoch": 83.15510204081633, "grad_norm": 34.1474723815918, "learning_rate": 8.448979591836736e-06, "loss": 1.2183, "step": 20373 }, { "epoch": 83.15918367346939, "grad_norm": 22.37325096130371, "learning_rate": 8.446938775510204e-06, "loss": 0.2784, "step": 20374 }, { "epoch": 83.16326530612245, "grad_norm": 27.85037612915039, "learning_rate": 8.444897959183675e-06, "loss": 1.6453, "step": 20375 }, { "epoch": 83.16734693877551, "grad_norm": 20.10723876953125, "learning_rate": 8.442857142857142e-06, "loss": 2.0621, "step": 20376 }, { "epoch": 83.17142857142858, "grad_norm": 34.82625198364258, "learning_rate": 8.440816326530613e-06, "loss": 0.678, "step": 20377 }, { "epoch": 83.17551020408163, "grad_norm": 25.23789405822754, "learning_rate": 8.438775510204081e-06, "loss": 2.257, "step": 20378 }, { "epoch": 83.17959183673469, "grad_norm": 28.10319709777832, "learning_rate": 8.436734693877552e-06, "loss": 0.2431, "step": 20379 }, { "epoch": 83.18367346938776, "grad_norm": 27.285051345825195, "learning_rate": 8.43469387755102e-06, "loss": 1.5628, "step": 20380 }, { "epoch": 83.18775510204081, "grad_norm": 24.439023971557617, "learning_rate": 8.432653061224491e-06, "loss": 1.8575, "step": 20381 }, { "epoch": 83.19183673469388, "grad_norm": 22.469528198242188, "learning_rate": 8.430612244897958e-06, "loss": 0.2863, "step": 20382 }, { "epoch": 83.19591836734693, "grad_norm": 29.700098037719727, "learning_rate": 8.428571428571429e-06, "loss": 1.213, "step": 20383 }, { "epoch": 83.2, "grad_norm": 40.47331619262695, "learning_rate": 8.426530612244898e-06, "loss": 0.3558, "step": 20384 }, { "epoch": 83.20408163265306, "grad_norm": 26.905134201049805, "learning_rate": 8.424489795918368e-06, "loss": 0.7366, "step": 20385 }, { "epoch": 83.20816326530613, "grad_norm": 18.767627716064453, "learning_rate": 8.422448979591837e-06, "loss": 2.012, "step": 20386 }, { "epoch": 83.21224489795918, "grad_norm": 34.637725830078125, "learning_rate": 8.420408163265306e-06, "loss": 1.2416, "step": 20387 }, { "epoch": 83.21632653061225, "grad_norm": 28.867904663085938, "learning_rate": 8.418367346938775e-06, "loss": 1.2861, "step": 20388 }, { "epoch": 83.2204081632653, "grad_norm": 28.55331039428711, "learning_rate": 8.416326530612245e-06, "loss": 1.0406, "step": 20389 }, { "epoch": 83.22448979591837, "grad_norm": 32.059078216552734, "learning_rate": 8.414285714285714e-06, "loss": 1.2114, "step": 20390 }, { "epoch": 83.22857142857143, "grad_norm": 41.924171447753906, "learning_rate": 8.412244897959185e-06, "loss": 0.3774, "step": 20391 }, { "epoch": 83.2326530612245, "grad_norm": 19.79633331298828, "learning_rate": 8.410204081632653e-06, "loss": 0.2488, "step": 20392 }, { "epoch": 83.23673469387755, "grad_norm": 29.263568878173828, "learning_rate": 8.408163265306122e-06, "loss": 0.5915, "step": 20393 }, { "epoch": 83.2408163265306, "grad_norm": 32.78343200683594, "learning_rate": 8.406122448979593e-06, "loss": 1.3154, "step": 20394 }, { "epoch": 83.24489795918367, "grad_norm": 36.06252670288086, "learning_rate": 8.404081632653062e-06, "loss": 1.2368, "step": 20395 }, { "epoch": 83.24897959183673, "grad_norm": 33.79446029663086, "learning_rate": 8.402040816326532e-06, "loss": 0.9715, "step": 20396 }, { "epoch": 83.2530612244898, "grad_norm": 13.96279525756836, "learning_rate": 8.400000000000001e-06, "loss": 0.0872, "step": 20397 }, { "epoch": 83.25714285714285, "grad_norm": 28.537466049194336, "learning_rate": 8.39795918367347e-06, "loss": 1.4285, "step": 20398 }, { "epoch": 83.26122448979592, "grad_norm": 29.51888084411621, "learning_rate": 8.395918367346939e-06, "loss": 1.4078, "step": 20399 }, { "epoch": 83.26530612244898, "grad_norm": 21.14480972290039, "learning_rate": 8.393877551020409e-06, "loss": 1.3665, "step": 20400 }, { "epoch": 83.26938775510204, "grad_norm": 18.327880859375, "learning_rate": 8.391836734693878e-06, "loss": 2.3291, "step": 20401 }, { "epoch": 83.2734693877551, "grad_norm": 28.55801010131836, "learning_rate": 8.389795918367348e-06, "loss": 0.6005, "step": 20402 }, { "epoch": 83.27755102040817, "grad_norm": 45.69436264038086, "learning_rate": 8.387755102040817e-06, "loss": 0.7677, "step": 20403 }, { "epoch": 83.28163265306122, "grad_norm": 38.308990478515625, "learning_rate": 8.385714285714286e-06, "loss": 0.416, "step": 20404 }, { "epoch": 83.28571428571429, "grad_norm": 45.75138854980469, "learning_rate": 8.383673469387755e-06, "loss": 0.2828, "step": 20405 }, { "epoch": 83.28979591836735, "grad_norm": 32.75979232788086, "learning_rate": 8.381632653061225e-06, "loss": 0.9741, "step": 20406 }, { "epoch": 83.29387755102042, "grad_norm": 38.14812469482422, "learning_rate": 8.379591836734694e-06, "loss": 1.5336, "step": 20407 }, { "epoch": 83.29795918367347, "grad_norm": 36.30852508544922, "learning_rate": 8.377551020408165e-06, "loss": 0.9658, "step": 20408 }, { "epoch": 83.30204081632652, "grad_norm": 21.93402671813965, "learning_rate": 8.375510204081632e-06, "loss": 1.6261, "step": 20409 }, { "epoch": 83.3061224489796, "grad_norm": 28.437776565551758, "learning_rate": 8.373469387755102e-06, "loss": 1.585, "step": 20410 }, { "epoch": 83.31020408163265, "grad_norm": 20.944091796875, "learning_rate": 8.371428571428571e-06, "loss": 2.1396, "step": 20411 }, { "epoch": 83.31428571428572, "grad_norm": 45.35838317871094, "learning_rate": 8.369387755102042e-06, "loss": 0.5561, "step": 20412 }, { "epoch": 83.31836734693877, "grad_norm": 32.103179931640625, "learning_rate": 8.36734693877551e-06, "loss": 1.5264, "step": 20413 }, { "epoch": 83.32244897959184, "grad_norm": 36.15626907348633, "learning_rate": 8.365306122448981e-06, "loss": 0.2792, "step": 20414 }, { "epoch": 83.3265306122449, "grad_norm": 33.83199691772461, "learning_rate": 8.363265306122448e-06, "loss": 1.2473, "step": 20415 }, { "epoch": 83.33061224489796, "grad_norm": 28.716440200805664, "learning_rate": 8.361224489795919e-06, "loss": 1.2316, "step": 20416 }, { "epoch": 83.33469387755102, "grad_norm": 42.169437408447266, "learning_rate": 8.359183673469387e-06, "loss": 1.0452, "step": 20417 }, { "epoch": 83.33877551020409, "grad_norm": 28.409927368164062, "learning_rate": 8.357142857142858e-06, "loss": 1.6533, "step": 20418 }, { "epoch": 83.34285714285714, "grad_norm": 38.7401237487793, "learning_rate": 8.355102040816327e-06, "loss": 0.5296, "step": 20419 }, { "epoch": 83.34693877551021, "grad_norm": 39.67719650268555, "learning_rate": 8.353061224489796e-06, "loss": 0.6413, "step": 20420 }, { "epoch": 83.35102040816327, "grad_norm": 33.48000717163086, "learning_rate": 8.351020408163266e-06, "loss": 0.8419, "step": 20421 }, { "epoch": 83.35510204081632, "grad_norm": 35.095916748046875, "learning_rate": 8.348979591836735e-06, "loss": 1.0876, "step": 20422 }, { "epoch": 83.35918367346939, "grad_norm": 34.59750747680664, "learning_rate": 8.346938775510205e-06, "loss": 0.4648, "step": 20423 }, { "epoch": 83.36326530612244, "grad_norm": 32.67869186401367, "learning_rate": 8.344897959183674e-06, "loss": 1.0649, "step": 20424 }, { "epoch": 83.36734693877551, "grad_norm": 20.22791290283203, "learning_rate": 8.342857142857143e-06, "loss": 2.0598, "step": 20425 }, { "epoch": 83.37142857142857, "grad_norm": 16.59785270690918, "learning_rate": 8.340816326530612e-06, "loss": 0.1052, "step": 20426 }, { "epoch": 83.37551020408164, "grad_norm": 28.905988693237305, "learning_rate": 8.338775510204082e-06, "loss": 1.5035, "step": 20427 }, { "epoch": 83.37959183673469, "grad_norm": 9.803079605102539, "learning_rate": 8.336734693877551e-06, "loss": 0.093, "step": 20428 }, { "epoch": 83.38367346938776, "grad_norm": 29.620010375976562, "learning_rate": 8.334693877551022e-06, "loss": 1.5923, "step": 20429 }, { "epoch": 83.38775510204081, "grad_norm": 27.367494583129883, "learning_rate": 8.33265306122449e-06, "loss": 1.3839, "step": 20430 }, { "epoch": 83.39183673469388, "grad_norm": 23.047330856323242, "learning_rate": 8.33061224489796e-06, "loss": 1.9793, "step": 20431 }, { "epoch": 83.39591836734694, "grad_norm": 30.311063766479492, "learning_rate": 8.328571428571428e-06, "loss": 1.1925, "step": 20432 }, { "epoch": 83.4, "grad_norm": 26.84729766845703, "learning_rate": 8.326530612244899e-06, "loss": 1.6408, "step": 20433 }, { "epoch": 83.40408163265306, "grad_norm": 26.71144676208496, "learning_rate": 8.324489795918368e-06, "loss": 0.2313, "step": 20434 }, { "epoch": 83.40816326530613, "grad_norm": 22.30716896057129, "learning_rate": 8.322448979591838e-06, "loss": 1.5546, "step": 20435 }, { "epoch": 83.41224489795918, "grad_norm": 38.38950729370117, "learning_rate": 8.320408163265307e-06, "loss": 1.4436, "step": 20436 }, { "epoch": 83.41632653061224, "grad_norm": 36.626060485839844, "learning_rate": 8.318367346938776e-06, "loss": 1.1221, "step": 20437 }, { "epoch": 83.42040816326531, "grad_norm": 29.011573791503906, "learning_rate": 8.316326530612245e-06, "loss": 0.4914, "step": 20438 }, { "epoch": 83.42448979591836, "grad_norm": 29.8101806640625, "learning_rate": 8.314285714285715e-06, "loss": 0.9565, "step": 20439 }, { "epoch": 83.42857142857143, "grad_norm": 27.377687454223633, "learning_rate": 8.312244897959184e-06, "loss": 0.7258, "step": 20440 }, { "epoch": 83.43265306122449, "grad_norm": 21.61038589477539, "learning_rate": 8.310204081632654e-06, "loss": 0.2248, "step": 20441 }, { "epoch": 83.43673469387755, "grad_norm": 32.177146911621094, "learning_rate": 8.308163265306121e-06, "loss": 1.3665, "step": 20442 }, { "epoch": 83.44081632653061, "grad_norm": 34.332298278808594, "learning_rate": 8.306122448979592e-06, "loss": 0.3693, "step": 20443 }, { "epoch": 83.44489795918368, "grad_norm": 43.317718505859375, "learning_rate": 8.30408163265306e-06, "loss": 0.8571, "step": 20444 }, { "epoch": 83.44897959183673, "grad_norm": 35.56943130493164, "learning_rate": 8.302040816326531e-06, "loss": 1.1117, "step": 20445 }, { "epoch": 83.4530612244898, "grad_norm": 11.100288391113281, "learning_rate": 8.3e-06, "loss": 3.0149, "step": 20446 }, { "epoch": 83.45714285714286, "grad_norm": 27.07347869873047, "learning_rate": 8.29795918367347e-06, "loss": 1.4835, "step": 20447 }, { "epoch": 83.46122448979592, "grad_norm": 46.90687561035156, "learning_rate": 8.29591836734694e-06, "loss": 0.6522, "step": 20448 }, { "epoch": 83.46530612244898, "grad_norm": 30.697975158691406, "learning_rate": 8.293877551020408e-06, "loss": 0.3669, "step": 20449 }, { "epoch": 83.46938775510205, "grad_norm": 37.2718620300293, "learning_rate": 8.291836734693879e-06, "loss": 0.7182, "step": 20450 }, { "epoch": 83.4734693877551, "grad_norm": 36.990692138671875, "learning_rate": 8.289795918367348e-06, "loss": 1.0518, "step": 20451 }, { "epoch": 83.47755102040816, "grad_norm": 35.63401794433594, "learning_rate": 8.287755102040816e-06, "loss": 0.2357, "step": 20452 }, { "epoch": 83.48163265306123, "grad_norm": 25.494089126586914, "learning_rate": 8.285714285714285e-06, "loss": 1.9261, "step": 20453 }, { "epoch": 83.48571428571428, "grad_norm": 35.84355926513672, "learning_rate": 8.283673469387756e-06, "loss": 1.0373, "step": 20454 }, { "epoch": 83.48979591836735, "grad_norm": 37.38978576660156, "learning_rate": 8.281632653061225e-06, "loss": 0.3619, "step": 20455 }, { "epoch": 83.4938775510204, "grad_norm": 50.54601287841797, "learning_rate": 8.279591836734695e-06, "loss": 0.5184, "step": 20456 }, { "epoch": 83.49795918367347, "grad_norm": 37.83692932128906, "learning_rate": 8.277551020408164e-06, "loss": 0.3841, "step": 20457 }, { "epoch": 83.50204081632653, "grad_norm": 34.81289291381836, "learning_rate": 8.275510204081633e-06, "loss": 0.5659, "step": 20458 }, { "epoch": 83.5061224489796, "grad_norm": 33.24367141723633, "learning_rate": 8.273469387755102e-06, "loss": 0.1184, "step": 20459 }, { "epoch": 83.51020408163265, "grad_norm": 36.44314956665039, "learning_rate": 8.271428571428572e-06, "loss": 0.5709, "step": 20460 }, { "epoch": 83.51428571428572, "grad_norm": 38.66659927368164, "learning_rate": 8.269387755102041e-06, "loss": 0.7882, "step": 20461 }, { "epoch": 83.51836734693877, "grad_norm": 25.583765029907227, "learning_rate": 8.267346938775511e-06, "loss": 1.1986, "step": 20462 }, { "epoch": 83.52244897959184, "grad_norm": 26.918975830078125, "learning_rate": 8.26530612244898e-06, "loss": 1.4454, "step": 20463 }, { "epoch": 83.5265306122449, "grad_norm": 18.98143196105957, "learning_rate": 8.263265306122449e-06, "loss": 0.1721, "step": 20464 }, { "epoch": 83.53061224489795, "grad_norm": 13.228117942810059, "learning_rate": 8.261224489795918e-06, "loss": 0.1006, "step": 20465 }, { "epoch": 83.53469387755102, "grad_norm": 49.42627716064453, "learning_rate": 8.259183673469388e-06, "loss": 0.8881, "step": 20466 }, { "epoch": 83.53877551020408, "grad_norm": 28.88188362121582, "learning_rate": 8.257142857142857e-06, "loss": 0.4371, "step": 20467 }, { "epoch": 83.54285714285714, "grad_norm": 28.761844635009766, "learning_rate": 8.255102040816328e-06, "loss": 0.5715, "step": 20468 }, { "epoch": 83.5469387755102, "grad_norm": 35.088558197021484, "learning_rate": 8.253061224489795e-06, "loss": 0.3382, "step": 20469 }, { "epoch": 83.55102040816327, "grad_norm": 43.38107681274414, "learning_rate": 8.251020408163265e-06, "loss": 1.1072, "step": 20470 }, { "epoch": 83.55510204081632, "grad_norm": 34.376136779785156, "learning_rate": 8.248979591836734e-06, "loss": 0.2771, "step": 20471 }, { "epoch": 83.55918367346939, "grad_norm": 33.96754455566406, "learning_rate": 8.246938775510205e-06, "loss": 1.3803, "step": 20472 }, { "epoch": 83.56326530612245, "grad_norm": 18.01105308532715, "learning_rate": 8.244897959183674e-06, "loss": 2.6045, "step": 20473 }, { "epoch": 83.56734693877551, "grad_norm": 36.41996383666992, "learning_rate": 8.242857142857144e-06, "loss": 1.0412, "step": 20474 }, { "epoch": 83.57142857142857, "grad_norm": 36.432395935058594, "learning_rate": 8.240816326530613e-06, "loss": 0.9894, "step": 20475 }, { "epoch": 83.57551020408164, "grad_norm": 33.58173370361328, "learning_rate": 8.238775510204082e-06, "loss": 0.5318, "step": 20476 }, { "epoch": 83.57959183673469, "grad_norm": 1.9743930101394653, "learning_rate": 8.236734693877552e-06, "loss": 0.0243, "step": 20477 }, { "epoch": 83.58367346938776, "grad_norm": 41.84709167480469, "learning_rate": 8.234693877551021e-06, "loss": 0.7688, "step": 20478 }, { "epoch": 83.58775510204082, "grad_norm": 26.96563148498535, "learning_rate": 8.23265306122449e-06, "loss": 1.52, "step": 20479 }, { "epoch": 83.59183673469387, "grad_norm": 27.9826602935791, "learning_rate": 8.230612244897959e-06, "loss": 0.3003, "step": 20480 }, { "epoch": 83.59591836734694, "grad_norm": 23.046493530273438, "learning_rate": 8.22857142857143e-06, "loss": 1.4727, "step": 20481 }, { "epoch": 83.6, "grad_norm": 23.29384422302246, "learning_rate": 8.226530612244898e-06, "loss": 1.4146, "step": 20482 }, { "epoch": 83.60408163265306, "grad_norm": 24.180770874023438, "learning_rate": 8.224489795918369e-06, "loss": 1.9345, "step": 20483 }, { "epoch": 83.60816326530612, "grad_norm": 28.341415405273438, "learning_rate": 8.222448979591837e-06, "loss": 0.397, "step": 20484 }, { "epoch": 83.61224489795919, "grad_norm": 23.049009323120117, "learning_rate": 8.220408163265308e-06, "loss": 1.732, "step": 20485 }, { "epoch": 83.61632653061224, "grad_norm": 29.5323543548584, "learning_rate": 8.218367346938775e-06, "loss": 0.4875, "step": 20486 }, { "epoch": 83.62040816326531, "grad_norm": 32.7977294921875, "learning_rate": 8.216326530612245e-06, "loss": 1.2435, "step": 20487 }, { "epoch": 83.62448979591836, "grad_norm": 35.973602294921875, "learning_rate": 8.214285714285714e-06, "loss": 0.6531, "step": 20488 }, { "epoch": 83.62857142857143, "grad_norm": 28.415061950683594, "learning_rate": 8.212244897959185e-06, "loss": 1.3967, "step": 20489 }, { "epoch": 83.63265306122449, "grad_norm": 30.510658264160156, "learning_rate": 8.210204081632654e-06, "loss": 1.3801, "step": 20490 }, { "epoch": 83.63673469387756, "grad_norm": 34.02547836303711, "learning_rate": 8.208163265306122e-06, "loss": 0.827, "step": 20491 }, { "epoch": 83.64081632653061, "grad_norm": 33.122528076171875, "learning_rate": 8.206122448979591e-06, "loss": 0.8646, "step": 20492 }, { "epoch": 83.64489795918368, "grad_norm": 38.458370208740234, "learning_rate": 8.204081632653062e-06, "loss": 0.7656, "step": 20493 }, { "epoch": 83.64897959183673, "grad_norm": 37.725093841552734, "learning_rate": 8.20204081632653e-06, "loss": 0.9141, "step": 20494 }, { "epoch": 83.65306122448979, "grad_norm": 40.88666915893555, "learning_rate": 8.200000000000001e-06, "loss": 0.7641, "step": 20495 }, { "epoch": 83.65714285714286, "grad_norm": 25.676334381103516, "learning_rate": 8.19795918367347e-06, "loss": 1.4963, "step": 20496 }, { "epoch": 83.66122448979591, "grad_norm": 32.205718994140625, "learning_rate": 8.195918367346939e-06, "loss": 0.9808, "step": 20497 }, { "epoch": 83.66530612244898, "grad_norm": 20.830490112304688, "learning_rate": 8.193877551020408e-06, "loss": 1.5068, "step": 20498 }, { "epoch": 83.66938775510204, "grad_norm": 28.418960571289062, "learning_rate": 8.191836734693878e-06, "loss": 0.3395, "step": 20499 }, { "epoch": 83.6734693877551, "grad_norm": 25.094547271728516, "learning_rate": 8.189795918367347e-06, "loss": 1.5344, "step": 20500 }, { "epoch": 83.67755102040816, "grad_norm": 32.09017562866211, "learning_rate": 8.187755102040817e-06, "loss": 0.6697, "step": 20501 }, { "epoch": 83.68163265306123, "grad_norm": 28.811735153198242, "learning_rate": 8.185714285714286e-06, "loss": 1.4578, "step": 20502 }, { "epoch": 83.68571428571428, "grad_norm": 28.811656951904297, "learning_rate": 8.183673469387755e-06, "loss": 1.5023, "step": 20503 }, { "epoch": 83.68979591836735, "grad_norm": 41.81159973144531, "learning_rate": 8.181632653061226e-06, "loss": 0.6428, "step": 20504 }, { "epoch": 83.6938775510204, "grad_norm": 11.889100074768066, "learning_rate": 8.179591836734694e-06, "loss": 0.0954, "step": 20505 }, { "epoch": 83.69795918367348, "grad_norm": 28.655105590820312, "learning_rate": 8.177551020408163e-06, "loss": 0.4114, "step": 20506 }, { "epoch": 83.70204081632653, "grad_norm": 36.700714111328125, "learning_rate": 8.175510204081634e-06, "loss": 0.8219, "step": 20507 }, { "epoch": 83.70612244897958, "grad_norm": 34.44654846191406, "learning_rate": 8.173469387755103e-06, "loss": 0.9897, "step": 20508 }, { "epoch": 83.71020408163265, "grad_norm": 30.596515655517578, "learning_rate": 8.171428571428571e-06, "loss": 1.1818, "step": 20509 }, { "epoch": 83.71428571428571, "grad_norm": 24.613813400268555, "learning_rate": 8.169387755102042e-06, "loss": 1.6668, "step": 20510 }, { "epoch": 83.71836734693878, "grad_norm": 33.56198501586914, "learning_rate": 8.16734693877551e-06, "loss": 0.9314, "step": 20511 }, { "epoch": 83.72244897959183, "grad_norm": 13.499329566955566, "learning_rate": 8.165306122448981e-06, "loss": 0.0965, "step": 20512 }, { "epoch": 83.7265306122449, "grad_norm": 35.75074005126953, "learning_rate": 8.163265306122448e-06, "loss": 0.8946, "step": 20513 }, { "epoch": 83.73061224489796, "grad_norm": 22.99991798400879, "learning_rate": 8.161224489795919e-06, "loss": 1.4754, "step": 20514 }, { "epoch": 83.73469387755102, "grad_norm": 45.009273529052734, "learning_rate": 8.159183673469388e-06, "loss": 0.8981, "step": 20515 }, { "epoch": 83.73877551020408, "grad_norm": 30.35419464111328, "learning_rate": 8.157142857142858e-06, "loss": 0.404, "step": 20516 }, { "epoch": 83.74285714285715, "grad_norm": 21.186386108398438, "learning_rate": 8.155102040816327e-06, "loss": 0.1482, "step": 20517 }, { "epoch": 83.7469387755102, "grad_norm": 25.108238220214844, "learning_rate": 8.153061224489798e-06, "loss": 0.409, "step": 20518 }, { "epoch": 83.75102040816327, "grad_norm": 41.47612762451172, "learning_rate": 8.151020408163265e-06, "loss": 0.6725, "step": 20519 }, { "epoch": 83.75510204081633, "grad_norm": 35.00951385498047, "learning_rate": 8.148979591836735e-06, "loss": 0.6775, "step": 20520 }, { "epoch": 83.7591836734694, "grad_norm": 26.188230514526367, "learning_rate": 8.146938775510204e-06, "loss": 1.9398, "step": 20521 }, { "epoch": 83.76326530612245, "grad_norm": 28.56143569946289, "learning_rate": 8.144897959183674e-06, "loss": 1.475, "step": 20522 }, { "epoch": 83.7673469387755, "grad_norm": 35.23558044433594, "learning_rate": 8.142857142857143e-06, "loss": 0.931, "step": 20523 }, { "epoch": 83.77142857142857, "grad_norm": 42.610774993896484, "learning_rate": 8.140816326530612e-06, "loss": 0.3518, "step": 20524 }, { "epoch": 83.77551020408163, "grad_norm": 21.74139404296875, "learning_rate": 8.138775510204081e-06, "loss": 1.9621, "step": 20525 }, { "epoch": 83.7795918367347, "grad_norm": 28.986827850341797, "learning_rate": 8.136734693877551e-06, "loss": 1.0754, "step": 20526 }, { "epoch": 83.78367346938775, "grad_norm": 33.45344161987305, "learning_rate": 8.13469387755102e-06, "loss": 1.383, "step": 20527 }, { "epoch": 83.78775510204082, "grad_norm": 44.030765533447266, "learning_rate": 8.13265306122449e-06, "loss": 0.4603, "step": 20528 }, { "epoch": 83.79183673469387, "grad_norm": 21.649099349975586, "learning_rate": 8.13061224489796e-06, "loss": 1.9425, "step": 20529 }, { "epoch": 83.79591836734694, "grad_norm": 27.77205467224121, "learning_rate": 8.128571428571428e-06, "loss": 1.1612, "step": 20530 }, { "epoch": 83.8, "grad_norm": 26.55658531188965, "learning_rate": 8.126530612244897e-06, "loss": 0.6952, "step": 20531 }, { "epoch": 83.80408163265307, "grad_norm": 29.175186157226562, "learning_rate": 8.124489795918368e-06, "loss": 1.4237, "step": 20532 }, { "epoch": 83.80816326530612, "grad_norm": 21.410888671875, "learning_rate": 8.122448979591837e-06, "loss": 1.2828, "step": 20533 }, { "epoch": 83.81224489795919, "grad_norm": 44.04100799560547, "learning_rate": 8.120408163265307e-06, "loss": 0.685, "step": 20534 }, { "epoch": 83.81632653061224, "grad_norm": 23.006845474243164, "learning_rate": 8.118367346938776e-06, "loss": 0.1329, "step": 20535 }, { "epoch": 83.82040816326531, "grad_norm": 32.84161376953125, "learning_rate": 8.116326530612245e-06, "loss": 1.3733, "step": 20536 }, { "epoch": 83.82448979591837, "grad_norm": 24.39917755126953, "learning_rate": 8.114285714285715e-06, "loss": 1.4587, "step": 20537 }, { "epoch": 83.82857142857142, "grad_norm": 32.732826232910156, "learning_rate": 8.112244897959184e-06, "loss": 0.7369, "step": 20538 }, { "epoch": 83.83265306122449, "grad_norm": 21.15201759338379, "learning_rate": 8.110204081632655e-06, "loss": 1.8953, "step": 20539 }, { "epoch": 83.83673469387755, "grad_norm": 30.133407592773438, "learning_rate": 8.108163265306123e-06, "loss": 0.3935, "step": 20540 }, { "epoch": 83.84081632653061, "grad_norm": 33.345394134521484, "learning_rate": 8.106122448979592e-06, "loss": 1.4024, "step": 20541 }, { "epoch": 83.84489795918367, "grad_norm": 20.899948120117188, "learning_rate": 8.104081632653061e-06, "loss": 1.9557, "step": 20542 }, { "epoch": 83.84897959183674, "grad_norm": 31.475059509277344, "learning_rate": 8.102040816326532e-06, "loss": 1.057, "step": 20543 }, { "epoch": 83.85306122448979, "grad_norm": 35.28173065185547, "learning_rate": 8.1e-06, "loss": 0.3759, "step": 20544 }, { "epoch": 83.85714285714286, "grad_norm": 34.16017150878906, "learning_rate": 8.097959183673471e-06, "loss": 0.8338, "step": 20545 }, { "epoch": 83.86122448979592, "grad_norm": 31.549394607543945, "learning_rate": 8.095918367346938e-06, "loss": 1.0153, "step": 20546 }, { "epoch": 83.86530612244898, "grad_norm": 16.64790153503418, "learning_rate": 8.093877551020409e-06, "loss": 2.2834, "step": 20547 }, { "epoch": 83.86938775510204, "grad_norm": 33.57331466674805, "learning_rate": 8.091836734693877e-06, "loss": 0.2947, "step": 20548 }, { "epoch": 83.87346938775511, "grad_norm": 32.73997116088867, "learning_rate": 8.089795918367348e-06, "loss": 0.7515, "step": 20549 }, { "epoch": 83.87755102040816, "grad_norm": 31.580778121948242, "learning_rate": 8.087755102040817e-06, "loss": 0.7314, "step": 20550 }, { "epoch": 83.88163265306123, "grad_norm": 27.441009521484375, "learning_rate": 8.085714285714287e-06, "loss": 1.5708, "step": 20551 }, { "epoch": 83.88571428571429, "grad_norm": 37.18574523925781, "learning_rate": 8.083673469387754e-06, "loss": 1.2653, "step": 20552 }, { "epoch": 83.88979591836734, "grad_norm": 37.79179763793945, "learning_rate": 8.081632653061225e-06, "loss": 1.221, "step": 20553 }, { "epoch": 83.89387755102041, "grad_norm": 39.65773391723633, "learning_rate": 8.079591836734694e-06, "loss": 1.2235, "step": 20554 }, { "epoch": 83.89795918367346, "grad_norm": 26.07466697692871, "learning_rate": 8.077551020408164e-06, "loss": 1.4919, "step": 20555 }, { "epoch": 83.90204081632653, "grad_norm": 56.21567153930664, "learning_rate": 8.075510204081633e-06, "loss": 1.2388, "step": 20556 }, { "epoch": 83.90612244897959, "grad_norm": 22.91323471069336, "learning_rate": 8.073469387755102e-06, "loss": 0.1448, "step": 20557 }, { "epoch": 83.91020408163266, "grad_norm": 43.61977005004883, "learning_rate": 8.07142857142857e-06, "loss": 0.3417, "step": 20558 }, { "epoch": 83.91428571428571, "grad_norm": 24.473854064941406, "learning_rate": 8.069387755102041e-06, "loss": 0.1572, "step": 20559 }, { "epoch": 83.91836734693878, "grad_norm": 35.914615631103516, "learning_rate": 8.06734693877551e-06, "loss": 0.7157, "step": 20560 }, { "epoch": 83.92244897959183, "grad_norm": 30.697755813598633, "learning_rate": 8.06530612244898e-06, "loss": 0.7306, "step": 20561 }, { "epoch": 83.9265306122449, "grad_norm": 29.269058227539062, "learning_rate": 8.06326530612245e-06, "loss": 1.2598, "step": 20562 }, { "epoch": 83.93061224489796, "grad_norm": 21.598976135253906, "learning_rate": 8.061224489795918e-06, "loss": 2.06, "step": 20563 }, { "epoch": 83.93469387755103, "grad_norm": 44.451942443847656, "learning_rate": 8.059183673469389e-06, "loss": 0.9374, "step": 20564 }, { "epoch": 83.93877551020408, "grad_norm": 37.11379623413086, "learning_rate": 8.057142857142857e-06, "loss": 0.3812, "step": 20565 }, { "epoch": 83.94285714285714, "grad_norm": 36.53919982910156, "learning_rate": 8.055102040816328e-06, "loss": 0.7428, "step": 20566 }, { "epoch": 83.9469387755102, "grad_norm": 26.943403244018555, "learning_rate": 8.053061224489797e-06, "loss": 1.6097, "step": 20567 }, { "epoch": 83.95102040816326, "grad_norm": 33.01313400268555, "learning_rate": 8.051020408163266e-06, "loss": 1.2857, "step": 20568 }, { "epoch": 83.95510204081633, "grad_norm": 29.813018798828125, "learning_rate": 8.048979591836734e-06, "loss": 0.8357, "step": 20569 }, { "epoch": 83.95918367346938, "grad_norm": 38.07909393310547, "learning_rate": 8.046938775510205e-06, "loss": 0.5328, "step": 20570 }, { "epoch": 83.96326530612245, "grad_norm": 36.50251007080078, "learning_rate": 8.044897959183674e-06, "loss": 0.8506, "step": 20571 }, { "epoch": 83.9673469387755, "grad_norm": 31.287498474121094, "learning_rate": 8.042857142857144e-06, "loss": 0.5528, "step": 20572 }, { "epoch": 83.97142857142858, "grad_norm": 36.03775405883789, "learning_rate": 8.040816326530613e-06, "loss": 0.3652, "step": 20573 }, { "epoch": 83.97551020408163, "grad_norm": 33.17744827270508, "learning_rate": 8.038775510204082e-06, "loss": 0.9658, "step": 20574 }, { "epoch": 83.9795918367347, "grad_norm": 28.69468879699707, "learning_rate": 8.03673469387755e-06, "loss": 1.1062, "step": 20575 }, { "epoch": 83.98367346938775, "grad_norm": 30.42432975769043, "learning_rate": 8.034693877551021e-06, "loss": 0.3309, "step": 20576 }, { "epoch": 83.98775510204082, "grad_norm": 34.910560607910156, "learning_rate": 8.03265306122449e-06, "loss": 0.4359, "step": 20577 }, { "epoch": 83.99183673469388, "grad_norm": 31.009305953979492, "learning_rate": 8.03061224489796e-06, "loss": 0.5022, "step": 20578 }, { "epoch": 83.99591836734695, "grad_norm": 34.146034240722656, "learning_rate": 8.028571428571428e-06, "loss": 0.6172, "step": 20579 }, { "epoch": 84.0, "grad_norm": 29.33535385131836, "learning_rate": 8.026530612244898e-06, "loss": 1.8109, "step": 20580 }, { "epoch": 84.00408163265305, "grad_norm": 21.359098434448242, "learning_rate": 8.024489795918367e-06, "loss": 1.9896, "step": 20581 }, { "epoch": 84.00816326530612, "grad_norm": 22.752443313598633, "learning_rate": 8.022448979591838e-06, "loss": 2.2401, "step": 20582 }, { "epoch": 84.01224489795918, "grad_norm": 30.424165725708008, "learning_rate": 8.020408163265306e-06, "loss": 0.249, "step": 20583 }, { "epoch": 84.01632653061225, "grad_norm": 25.057708740234375, "learning_rate": 8.018367346938777e-06, "loss": 1.1673, "step": 20584 }, { "epoch": 84.0204081632653, "grad_norm": 23.531984329223633, "learning_rate": 8.016326530612244e-06, "loss": 1.596, "step": 20585 }, { "epoch": 84.02448979591837, "grad_norm": 45.70628356933594, "learning_rate": 8.014285714285715e-06, "loss": 0.5573, "step": 20586 }, { "epoch": 84.02857142857142, "grad_norm": 27.400680541992188, "learning_rate": 8.012244897959183e-06, "loss": 0.617, "step": 20587 }, { "epoch": 84.0326530612245, "grad_norm": 28.3142032623291, "learning_rate": 8.010204081632654e-06, "loss": 0.3222, "step": 20588 }, { "epoch": 84.03673469387755, "grad_norm": 28.374801635742188, "learning_rate": 8.008163265306123e-06, "loss": 0.3743, "step": 20589 }, { "epoch": 84.04081632653062, "grad_norm": 38.188133239746094, "learning_rate": 8.006122448979591e-06, "loss": 0.3459, "step": 20590 }, { "epoch": 84.04489795918367, "grad_norm": 35.880210876464844, "learning_rate": 8.004081632653062e-06, "loss": 0.4165, "step": 20591 }, { "epoch": 84.04897959183674, "grad_norm": 36.27037048339844, "learning_rate": 8.00204081632653e-06, "loss": 0.5587, "step": 20592 }, { "epoch": 84.0530612244898, "grad_norm": 35.165870666503906, "learning_rate": 8.000000000000001e-06, "loss": 0.7826, "step": 20593 }, { "epoch": 84.05714285714286, "grad_norm": 24.69225311279297, "learning_rate": 7.99795918367347e-06, "loss": 1.4365, "step": 20594 }, { "epoch": 84.06122448979592, "grad_norm": 57.738319396972656, "learning_rate": 7.99591836734694e-06, "loss": 0.8271, "step": 20595 }, { "epoch": 84.06530612244897, "grad_norm": 32.357994079589844, "learning_rate": 7.993877551020408e-06, "loss": 1.2393, "step": 20596 }, { "epoch": 84.06938775510204, "grad_norm": 20.585580825805664, "learning_rate": 7.991836734693878e-06, "loss": 1.9087, "step": 20597 }, { "epoch": 84.0734693877551, "grad_norm": 22.669227600097656, "learning_rate": 7.989795918367347e-06, "loss": 0.2802, "step": 20598 }, { "epoch": 84.07755102040817, "grad_norm": 30.589149475097656, "learning_rate": 7.987755102040818e-06, "loss": 0.6702, "step": 20599 }, { "epoch": 84.08163265306122, "grad_norm": 25.823318481445312, "learning_rate": 7.985714285714286e-06, "loss": 1.0544, "step": 20600 }, { "epoch": 84.08571428571429, "grad_norm": 33.11176681518555, "learning_rate": 7.983673469387755e-06, "loss": 1.4995, "step": 20601 }, { "epoch": 84.08979591836734, "grad_norm": 29.614171981811523, "learning_rate": 7.981632653061224e-06, "loss": 1.7418, "step": 20602 }, { "epoch": 84.09387755102041, "grad_norm": 27.734783172607422, "learning_rate": 7.979591836734695e-06, "loss": 1.4214, "step": 20603 }, { "epoch": 84.09795918367347, "grad_norm": 32.03379440307617, "learning_rate": 7.977551020408163e-06, "loss": 0.6678, "step": 20604 }, { "epoch": 84.10204081632654, "grad_norm": 30.50743865966797, "learning_rate": 7.975510204081634e-06, "loss": 1.5182, "step": 20605 }, { "epoch": 84.10612244897959, "grad_norm": 26.22865867614746, "learning_rate": 7.973469387755103e-06, "loss": 1.9523, "step": 20606 }, { "epoch": 84.11020408163266, "grad_norm": 32.026123046875, "learning_rate": 7.971428571428572e-06, "loss": 0.3011, "step": 20607 }, { "epoch": 84.11428571428571, "grad_norm": 23.399019241333008, "learning_rate": 7.96938775510204e-06, "loss": 0.2717, "step": 20608 }, { "epoch": 84.11836734693877, "grad_norm": 25.730758666992188, "learning_rate": 7.967346938775511e-06, "loss": 1.3581, "step": 20609 }, { "epoch": 84.12244897959184, "grad_norm": 30.86211585998535, "learning_rate": 7.96530612244898e-06, "loss": 1.4847, "step": 20610 }, { "epoch": 84.12653061224489, "grad_norm": 39.42816162109375, "learning_rate": 7.96326530612245e-06, "loss": 0.7755, "step": 20611 }, { "epoch": 84.13061224489796, "grad_norm": 19.25267219543457, "learning_rate": 7.961224489795917e-06, "loss": 0.1224, "step": 20612 }, { "epoch": 84.13469387755102, "grad_norm": 28.932355880737305, "learning_rate": 7.959183673469388e-06, "loss": 0.3246, "step": 20613 }, { "epoch": 84.13877551020408, "grad_norm": 41.351436614990234, "learning_rate": 7.957142857142857e-06, "loss": 0.3877, "step": 20614 }, { "epoch": 84.14285714285714, "grad_norm": 33.163421630859375, "learning_rate": 7.955102040816327e-06, "loss": 1.6571, "step": 20615 }, { "epoch": 84.14693877551021, "grad_norm": 30.96731185913086, "learning_rate": 7.953061224489796e-06, "loss": 0.3414, "step": 20616 }, { "epoch": 84.15102040816326, "grad_norm": 38.41083908081055, "learning_rate": 7.951020408163267e-06, "loss": 0.7229, "step": 20617 }, { "epoch": 84.15510204081633, "grad_norm": 62.18842315673828, "learning_rate": 7.948979591836735e-06, "loss": 0.9731, "step": 20618 }, { "epoch": 84.15918367346939, "grad_norm": 41.43560791015625, "learning_rate": 7.946938775510204e-06, "loss": 0.4707, "step": 20619 }, { "epoch": 84.16326530612245, "grad_norm": 34.15934371948242, "learning_rate": 7.944897959183675e-06, "loss": 0.2902, "step": 20620 }, { "epoch": 84.16734693877551, "grad_norm": 31.67490577697754, "learning_rate": 7.942857142857144e-06, "loss": 1.3925, "step": 20621 }, { "epoch": 84.17142857142858, "grad_norm": 37.198211669921875, "learning_rate": 7.940816326530612e-06, "loss": 0.5816, "step": 20622 }, { "epoch": 84.17551020408163, "grad_norm": 24.194517135620117, "learning_rate": 7.938775510204081e-06, "loss": 1.534, "step": 20623 }, { "epoch": 84.17959183673469, "grad_norm": 16.152353286743164, "learning_rate": 7.936734693877552e-06, "loss": 0.1466, "step": 20624 }, { "epoch": 84.18367346938776, "grad_norm": 26.142757415771484, "learning_rate": 7.93469387755102e-06, "loss": 0.4411, "step": 20625 }, { "epoch": 84.18775510204081, "grad_norm": 19.511463165283203, "learning_rate": 7.932653061224491e-06, "loss": 2.0948, "step": 20626 }, { "epoch": 84.19183673469388, "grad_norm": 36.58310317993164, "learning_rate": 7.93061224489796e-06, "loss": 0.8144, "step": 20627 }, { "epoch": 84.19591836734693, "grad_norm": 31.348289489746094, "learning_rate": 7.928571428571429e-06, "loss": 0.6754, "step": 20628 }, { "epoch": 84.2, "grad_norm": 34.68117141723633, "learning_rate": 7.926530612244897e-06, "loss": 1.0057, "step": 20629 }, { "epoch": 84.20408163265306, "grad_norm": 16.818693161010742, "learning_rate": 7.924489795918368e-06, "loss": 0.108, "step": 20630 }, { "epoch": 84.20816326530613, "grad_norm": 27.108728408813477, "learning_rate": 7.922448979591837e-06, "loss": 1.4812, "step": 20631 }, { "epoch": 84.21224489795918, "grad_norm": 21.896167755126953, "learning_rate": 7.920408163265307e-06, "loss": 1.2401, "step": 20632 }, { "epoch": 84.21632653061225, "grad_norm": 34.578826904296875, "learning_rate": 7.918367346938776e-06, "loss": 0.3051, "step": 20633 }, { "epoch": 84.2204081632653, "grad_norm": 34.81217575073242, "learning_rate": 7.916326530612245e-06, "loss": 1.1853, "step": 20634 }, { "epoch": 84.22448979591837, "grad_norm": 14.793940544128418, "learning_rate": 7.914285714285714e-06, "loss": 0.0962, "step": 20635 }, { "epoch": 84.22857142857143, "grad_norm": 25.707712173461914, "learning_rate": 7.912244897959184e-06, "loss": 1.4543, "step": 20636 }, { "epoch": 84.2326530612245, "grad_norm": 43.09148406982422, "learning_rate": 7.910204081632653e-06, "loss": 0.6481, "step": 20637 }, { "epoch": 84.23673469387755, "grad_norm": 27.93001937866211, "learning_rate": 7.908163265306124e-06, "loss": 0.7064, "step": 20638 }, { "epoch": 84.2408163265306, "grad_norm": 32.923885345458984, "learning_rate": 7.90612244897959e-06, "loss": 1.2841, "step": 20639 }, { "epoch": 84.24489795918367, "grad_norm": 49.23904800415039, "learning_rate": 7.904081632653061e-06, "loss": 0.897, "step": 20640 }, { "epoch": 84.24897959183673, "grad_norm": 26.244163513183594, "learning_rate": 7.90204081632653e-06, "loss": 1.4721, "step": 20641 }, { "epoch": 84.2530612244898, "grad_norm": 28.715768814086914, "learning_rate": 7.9e-06, "loss": 0.546, "step": 20642 }, { "epoch": 84.25714285714285, "grad_norm": 21.235563278198242, "learning_rate": 7.89795918367347e-06, "loss": 1.9519, "step": 20643 }, { "epoch": 84.26122448979592, "grad_norm": 34.70505142211914, "learning_rate": 7.89591836734694e-06, "loss": 1.221, "step": 20644 }, { "epoch": 84.26530612244898, "grad_norm": 29.1474609375, "learning_rate": 7.893877551020409e-06, "loss": 1.2159, "step": 20645 }, { "epoch": 84.26938775510204, "grad_norm": 31.864797592163086, "learning_rate": 7.891836734693878e-06, "loss": 1.6998, "step": 20646 }, { "epoch": 84.2734693877551, "grad_norm": 24.76119041442871, "learning_rate": 7.889795918367348e-06, "loss": 2.0809, "step": 20647 }, { "epoch": 84.27755102040817, "grad_norm": 26.128822326660156, "learning_rate": 7.887755102040817e-06, "loss": 1.4537, "step": 20648 }, { "epoch": 84.28163265306122, "grad_norm": 33.02544021606445, "learning_rate": 7.885714285714286e-06, "loss": 0.5451, "step": 20649 }, { "epoch": 84.28571428571429, "grad_norm": 38.040645599365234, "learning_rate": 7.883673469387755e-06, "loss": 0.9223, "step": 20650 }, { "epoch": 84.28979591836735, "grad_norm": 27.54778480529785, "learning_rate": 7.881632653061225e-06, "loss": 0.7448, "step": 20651 }, { "epoch": 84.29387755102042, "grad_norm": 32.76668167114258, "learning_rate": 7.879591836734694e-06, "loss": 0.3917, "step": 20652 }, { "epoch": 84.29795918367347, "grad_norm": 28.71042251586914, "learning_rate": 7.877551020408164e-06, "loss": 1.1273, "step": 20653 }, { "epoch": 84.30204081632652, "grad_norm": 23.969886779785156, "learning_rate": 7.875510204081633e-06, "loss": 1.5638, "step": 20654 }, { "epoch": 84.3061224489796, "grad_norm": 21.326749801635742, "learning_rate": 7.873469387755104e-06, "loss": 2.0586, "step": 20655 }, { "epoch": 84.31020408163265, "grad_norm": 8.202876091003418, "learning_rate": 7.87142857142857e-06, "loss": 0.0782, "step": 20656 }, { "epoch": 84.31428571428572, "grad_norm": 23.881305694580078, "learning_rate": 7.869387755102041e-06, "loss": 0.2224, "step": 20657 }, { "epoch": 84.31836734693877, "grad_norm": 31.269655227661133, "learning_rate": 7.86734693877551e-06, "loss": 0.9428, "step": 20658 }, { "epoch": 84.32244897959184, "grad_norm": 36.313018798828125, "learning_rate": 7.86530612244898e-06, "loss": 0.2776, "step": 20659 }, { "epoch": 84.3265306122449, "grad_norm": 29.460153579711914, "learning_rate": 7.86326530612245e-06, "loss": 1.2335, "step": 20660 }, { "epoch": 84.33061224489796, "grad_norm": 33.277889251708984, "learning_rate": 7.861224489795918e-06, "loss": 0.5494, "step": 20661 }, { "epoch": 84.33469387755102, "grad_norm": 36.057518005371094, "learning_rate": 7.859183673469387e-06, "loss": 1.0854, "step": 20662 }, { "epoch": 84.33877551020409, "grad_norm": 18.17528533935547, "learning_rate": 7.857142857142858e-06, "loss": 2.3411, "step": 20663 }, { "epoch": 84.34285714285714, "grad_norm": 33.363834381103516, "learning_rate": 7.855102040816326e-06, "loss": 0.5494, "step": 20664 }, { "epoch": 84.34693877551021, "grad_norm": 27.268550872802734, "learning_rate": 7.853061224489797e-06, "loss": 1.4167, "step": 20665 }, { "epoch": 84.35102040816327, "grad_norm": 39.264976501464844, "learning_rate": 7.851020408163266e-06, "loss": 1.0977, "step": 20666 }, { "epoch": 84.35510204081632, "grad_norm": 39.35285568237305, "learning_rate": 7.848979591836735e-06, "loss": 0.6807, "step": 20667 }, { "epoch": 84.35918367346939, "grad_norm": 29.985458374023438, "learning_rate": 7.846938775510203e-06, "loss": 1.1605, "step": 20668 }, { "epoch": 84.36326530612244, "grad_norm": 34.185333251953125, "learning_rate": 7.844897959183674e-06, "loss": 0.3218, "step": 20669 }, { "epoch": 84.36734693877551, "grad_norm": 19.537200927734375, "learning_rate": 7.842857142857143e-06, "loss": 0.1046, "step": 20670 }, { "epoch": 84.37142857142857, "grad_norm": 15.774665832519531, "learning_rate": 7.840816326530613e-06, "loss": 2.2407, "step": 20671 }, { "epoch": 84.37551020408164, "grad_norm": 30.8165340423584, "learning_rate": 7.838775510204082e-06, "loss": 0.4765, "step": 20672 }, { "epoch": 84.37959183673469, "grad_norm": 41.436500549316406, "learning_rate": 7.836734693877551e-06, "loss": 0.6005, "step": 20673 }, { "epoch": 84.38367346938776, "grad_norm": 46.08671569824219, "learning_rate": 7.834693877551021e-06, "loss": 1.2062, "step": 20674 }, { "epoch": 84.38775510204081, "grad_norm": 27.740793228149414, "learning_rate": 7.83265306122449e-06, "loss": 0.5656, "step": 20675 }, { "epoch": 84.39183673469388, "grad_norm": 32.64674758911133, "learning_rate": 7.830612244897959e-06, "loss": 1.0417, "step": 20676 }, { "epoch": 84.39591836734694, "grad_norm": 25.81229019165039, "learning_rate": 7.82857142857143e-06, "loss": 1.6162, "step": 20677 }, { "epoch": 84.4, "grad_norm": 38.34517288208008, "learning_rate": 7.826530612244898e-06, "loss": 1.0839, "step": 20678 }, { "epoch": 84.40408163265306, "grad_norm": 41.14186096191406, "learning_rate": 7.824489795918367e-06, "loss": 0.2769, "step": 20679 }, { "epoch": 84.40816326530613, "grad_norm": 35.209590911865234, "learning_rate": 7.822448979591838e-06, "loss": 0.5639, "step": 20680 }, { "epoch": 84.41224489795918, "grad_norm": 33.254547119140625, "learning_rate": 7.820408163265307e-06, "loss": 1.0765, "step": 20681 }, { "epoch": 84.41632653061224, "grad_norm": 33.41438674926758, "learning_rate": 7.818367346938777e-06, "loss": 1.1392, "step": 20682 }, { "epoch": 84.42040816326531, "grad_norm": 31.0274600982666, "learning_rate": 7.816326530612244e-06, "loss": 1.326, "step": 20683 }, { "epoch": 84.42448979591836, "grad_norm": 34.735572814941406, "learning_rate": 7.814285714285715e-06, "loss": 0.9372, "step": 20684 }, { "epoch": 84.42857142857143, "grad_norm": 39.511207580566406, "learning_rate": 7.812244897959184e-06, "loss": 1.23, "step": 20685 }, { "epoch": 84.43265306122449, "grad_norm": 25.83338165283203, "learning_rate": 7.810204081632654e-06, "loss": 1.8538, "step": 20686 }, { "epoch": 84.43673469387755, "grad_norm": 31.59366798400879, "learning_rate": 7.808163265306123e-06, "loss": 1.0409, "step": 20687 }, { "epoch": 84.44081632653061, "grad_norm": 36.47875213623047, "learning_rate": 7.806122448979593e-06, "loss": 0.8586, "step": 20688 }, { "epoch": 84.44489795918368, "grad_norm": 8.83336067199707, "learning_rate": 7.80408163265306e-06, "loss": 0.0762, "step": 20689 }, { "epoch": 84.44897959183673, "grad_norm": 33.754215240478516, "learning_rate": 7.802040816326531e-06, "loss": 0.4258, "step": 20690 }, { "epoch": 84.4530612244898, "grad_norm": 26.174161911010742, "learning_rate": 7.8e-06, "loss": 0.698, "step": 20691 }, { "epoch": 84.45714285714286, "grad_norm": 4.750451564788818, "learning_rate": 7.79795918367347e-06, "loss": 0.051, "step": 20692 }, { "epoch": 84.46122448979592, "grad_norm": 43.414669036865234, "learning_rate": 7.79591836734694e-06, "loss": 0.3742, "step": 20693 }, { "epoch": 84.46530612244898, "grad_norm": 34.9552001953125, "learning_rate": 7.793877551020408e-06, "loss": 0.3782, "step": 20694 }, { "epoch": 84.46938775510205, "grad_norm": 30.608448028564453, "learning_rate": 7.791836734693877e-06, "loss": 0.9817, "step": 20695 }, { "epoch": 84.4734693877551, "grad_norm": 9.823989868164062, "learning_rate": 7.789795918367347e-06, "loss": 3.0011, "step": 20696 }, { "epoch": 84.47755102040816, "grad_norm": 14.815694808959961, "learning_rate": 7.787755102040816e-06, "loss": 0.0894, "step": 20697 }, { "epoch": 84.48163265306123, "grad_norm": 31.672882080078125, "learning_rate": 7.785714285714287e-06, "loss": 1.2556, "step": 20698 }, { "epoch": 84.48571428571428, "grad_norm": 25.119335174560547, "learning_rate": 7.783673469387755e-06, "loss": 1.4039, "step": 20699 }, { "epoch": 84.48979591836735, "grad_norm": 35.700340270996094, "learning_rate": 7.781632653061224e-06, "loss": 1.1537, "step": 20700 }, { "epoch": 84.4938775510204, "grad_norm": 12.819910049438477, "learning_rate": 7.779591836734695e-06, "loss": 0.1366, "step": 20701 }, { "epoch": 84.49795918367347, "grad_norm": 43.303016662597656, "learning_rate": 7.777551020408164e-06, "loss": 0.3137, "step": 20702 }, { "epoch": 84.50204081632653, "grad_norm": 39.109771728515625, "learning_rate": 7.775510204081632e-06, "loss": 0.3868, "step": 20703 }, { "epoch": 84.5061224489796, "grad_norm": 34.98418045043945, "learning_rate": 7.773469387755103e-06, "loss": 0.9446, "step": 20704 }, { "epoch": 84.51020408163265, "grad_norm": 35.366424560546875, "learning_rate": 7.771428571428572e-06, "loss": 0.3652, "step": 20705 }, { "epoch": 84.51428571428572, "grad_norm": 34.12211990356445, "learning_rate": 7.76938775510204e-06, "loss": 0.6204, "step": 20706 }, { "epoch": 84.51836734693877, "grad_norm": 33.48725509643555, "learning_rate": 7.767346938775511e-06, "loss": 0.9152, "step": 20707 }, { "epoch": 84.52244897959184, "grad_norm": 38.11866760253906, "learning_rate": 7.76530612244898e-06, "loss": 1.0045, "step": 20708 }, { "epoch": 84.5265306122449, "grad_norm": 26.476051330566406, "learning_rate": 7.76326530612245e-06, "loss": 0.3282, "step": 20709 }, { "epoch": 84.53061224489795, "grad_norm": 29.7315673828125, "learning_rate": 7.76122448979592e-06, "loss": 1.579, "step": 20710 }, { "epoch": 84.53469387755102, "grad_norm": 28.59246253967285, "learning_rate": 7.759183673469388e-06, "loss": 0.7325, "step": 20711 }, { "epoch": 84.53877551020408, "grad_norm": 22.962751388549805, "learning_rate": 7.757142857142857e-06, "loss": 1.3899, "step": 20712 }, { "epoch": 84.54285714285714, "grad_norm": 35.138092041015625, "learning_rate": 7.755102040816327e-06, "loss": 0.676, "step": 20713 }, { "epoch": 84.5469387755102, "grad_norm": 20.298036575317383, "learning_rate": 7.753061224489796e-06, "loss": 0.1682, "step": 20714 }, { "epoch": 84.55102040816327, "grad_norm": 34.51530838012695, "learning_rate": 7.751020408163267e-06, "loss": 1.4221, "step": 20715 }, { "epoch": 84.55510204081632, "grad_norm": 37.98850631713867, "learning_rate": 7.748979591836734e-06, "loss": 0.2901, "step": 20716 }, { "epoch": 84.55918367346939, "grad_norm": 39.421016693115234, "learning_rate": 7.746938775510204e-06, "loss": 0.7899, "step": 20717 }, { "epoch": 84.56326530612245, "grad_norm": 26.025917053222656, "learning_rate": 7.744897959183673e-06, "loss": 1.5026, "step": 20718 }, { "epoch": 84.56734693877551, "grad_norm": 38.27712631225586, "learning_rate": 7.742857142857144e-06, "loss": 0.9471, "step": 20719 }, { "epoch": 84.57142857142857, "grad_norm": 44.744319915771484, "learning_rate": 7.740816326530613e-06, "loss": 0.6544, "step": 20720 }, { "epoch": 84.57551020408164, "grad_norm": 24.478588104248047, "learning_rate": 7.738775510204083e-06, "loss": 1.9768, "step": 20721 }, { "epoch": 84.57959183673469, "grad_norm": 26.403684616088867, "learning_rate": 7.73673469387755e-06, "loss": 1.5155, "step": 20722 }, { "epoch": 84.58367346938776, "grad_norm": 32.966678619384766, "learning_rate": 7.73469387755102e-06, "loss": 0.2701, "step": 20723 }, { "epoch": 84.58775510204082, "grad_norm": 32.5198974609375, "learning_rate": 7.73265306122449e-06, "loss": 1.1847, "step": 20724 }, { "epoch": 84.59183673469387, "grad_norm": 38.0422477722168, "learning_rate": 7.73061224489796e-06, "loss": 0.7364, "step": 20725 }, { "epoch": 84.59591836734694, "grad_norm": 23.31952667236328, "learning_rate": 7.728571428571429e-06, "loss": 1.8866, "step": 20726 }, { "epoch": 84.6, "grad_norm": 29.097625732421875, "learning_rate": 7.726530612244898e-06, "loss": 1.5899, "step": 20727 }, { "epoch": 84.60408163265306, "grad_norm": 37.67923355102539, "learning_rate": 7.724489795918367e-06, "loss": 0.8906, "step": 20728 }, { "epoch": 84.60816326530612, "grad_norm": 33.84555435180664, "learning_rate": 7.722448979591837e-06, "loss": 0.8006, "step": 20729 }, { "epoch": 84.61224489795919, "grad_norm": 20.065162658691406, "learning_rate": 7.720408163265306e-06, "loss": 1.8693, "step": 20730 }, { "epoch": 84.61632653061224, "grad_norm": 27.47602653503418, "learning_rate": 7.718367346938776e-06, "loss": 1.9368, "step": 20731 }, { "epoch": 84.62040816326531, "grad_norm": 25.481300354003906, "learning_rate": 7.716326530612245e-06, "loss": 0.3859, "step": 20732 }, { "epoch": 84.62448979591836, "grad_norm": 33.31747055053711, "learning_rate": 7.714285714285714e-06, "loss": 0.5734, "step": 20733 }, { "epoch": 84.62857142857143, "grad_norm": 43.85130310058594, "learning_rate": 7.712244897959185e-06, "loss": 1.0671, "step": 20734 }, { "epoch": 84.63265306122449, "grad_norm": 37.145755767822266, "learning_rate": 7.710204081632653e-06, "loss": 0.9384, "step": 20735 }, { "epoch": 84.63673469387756, "grad_norm": 27.30767822265625, "learning_rate": 7.708163265306124e-06, "loss": 1.5866, "step": 20736 }, { "epoch": 84.64081632653061, "grad_norm": 26.70640754699707, "learning_rate": 7.706122448979593e-06, "loss": 1.3959, "step": 20737 }, { "epoch": 84.64489795918368, "grad_norm": 17.03577423095703, "learning_rate": 7.704081632653061e-06, "loss": 0.1208, "step": 20738 }, { "epoch": 84.64897959183673, "grad_norm": 42.57433319091797, "learning_rate": 7.70204081632653e-06, "loss": 0.7555, "step": 20739 }, { "epoch": 84.65306122448979, "grad_norm": 13.423806190490723, "learning_rate": 7.7e-06, "loss": 0.0926, "step": 20740 }, { "epoch": 84.65714285714286, "grad_norm": 28.690946578979492, "learning_rate": 7.69795918367347e-06, "loss": 1.0449, "step": 20741 }, { "epoch": 84.66122448979591, "grad_norm": 26.65097999572754, "learning_rate": 7.69591836734694e-06, "loss": 1.3481, "step": 20742 }, { "epoch": 84.66530612244898, "grad_norm": 29.858783721923828, "learning_rate": 7.693877551020409e-06, "loss": 0.3689, "step": 20743 }, { "epoch": 84.66938775510204, "grad_norm": 21.501930236816406, "learning_rate": 7.691836734693878e-06, "loss": 1.98, "step": 20744 }, { "epoch": 84.6734693877551, "grad_norm": 28.962772369384766, "learning_rate": 7.689795918367347e-06, "loss": 0.5464, "step": 20745 }, { "epoch": 84.67755102040816, "grad_norm": 2.9894979000091553, "learning_rate": 7.687755102040817e-06, "loss": 0.0293, "step": 20746 }, { "epoch": 84.68163265306123, "grad_norm": 33.992794036865234, "learning_rate": 7.685714285714286e-06, "loss": 0.3655, "step": 20747 }, { "epoch": 84.68571428571428, "grad_norm": 36.255775451660156, "learning_rate": 7.683673469387756e-06, "loss": 0.9979, "step": 20748 }, { "epoch": 84.68979591836735, "grad_norm": 37.291664123535156, "learning_rate": 7.681632653061224e-06, "loss": 0.7597, "step": 20749 }, { "epoch": 84.6938775510204, "grad_norm": 33.185325622558594, "learning_rate": 7.679591836734694e-06, "loss": 1.2354, "step": 20750 }, { "epoch": 84.69795918367348, "grad_norm": 29.12401580810547, "learning_rate": 7.677551020408163e-06, "loss": 1.4123, "step": 20751 }, { "epoch": 84.70204081632653, "grad_norm": 32.792823791503906, "learning_rate": 7.675510204081633e-06, "loss": 0.8068, "step": 20752 }, { "epoch": 84.70612244897958, "grad_norm": 35.42620086669922, "learning_rate": 7.673469387755102e-06, "loss": 1.1361, "step": 20753 }, { "epoch": 84.71020408163265, "grad_norm": 31.64348793029785, "learning_rate": 7.671428571428573e-06, "loss": 0.6289, "step": 20754 }, { "epoch": 84.71428571428571, "grad_norm": 23.987550735473633, "learning_rate": 7.66938775510204e-06, "loss": 1.3882, "step": 20755 }, { "epoch": 84.71836734693878, "grad_norm": 23.991472244262695, "learning_rate": 7.66734693877551e-06, "loss": 1.5123, "step": 20756 }, { "epoch": 84.72244897959183, "grad_norm": 23.27069854736328, "learning_rate": 7.66530612244898e-06, "loss": 0.2159, "step": 20757 }, { "epoch": 84.7265306122449, "grad_norm": 26.284236907958984, "learning_rate": 7.66326530612245e-06, "loss": 0.7348, "step": 20758 }, { "epoch": 84.73061224489796, "grad_norm": 33.39582443237305, "learning_rate": 7.661224489795919e-06, "loss": 0.4799, "step": 20759 }, { "epoch": 84.73469387755102, "grad_norm": 34.99277114868164, "learning_rate": 7.659183673469387e-06, "loss": 1.0878, "step": 20760 }, { "epoch": 84.73877551020408, "grad_norm": 35.63922882080078, "learning_rate": 7.657142857142858e-06, "loss": 0.3023, "step": 20761 }, { "epoch": 84.74285714285715, "grad_norm": 25.484081268310547, "learning_rate": 7.655102040816327e-06, "loss": 0.2445, "step": 20762 }, { "epoch": 84.7469387755102, "grad_norm": 29.004802703857422, "learning_rate": 7.653061224489797e-06, "loss": 1.5738, "step": 20763 }, { "epoch": 84.75102040816327, "grad_norm": 36.88319396972656, "learning_rate": 7.651020408163266e-06, "loss": 0.6751, "step": 20764 }, { "epoch": 84.75510204081633, "grad_norm": 20.456933975219727, "learning_rate": 7.648979591836737e-06, "loss": 0.1235, "step": 20765 }, { "epoch": 84.7591836734694, "grad_norm": 34.87173843383789, "learning_rate": 7.646938775510204e-06, "loss": 1.227, "step": 20766 }, { "epoch": 84.76326530612245, "grad_norm": 27.308597564697266, "learning_rate": 7.644897959183674e-06, "loss": 1.7216, "step": 20767 }, { "epoch": 84.7673469387755, "grad_norm": 31.724687576293945, "learning_rate": 7.642857142857143e-06, "loss": 1.0622, "step": 20768 }, { "epoch": 84.77142857142857, "grad_norm": 28.456279754638672, "learning_rate": 7.640816326530614e-06, "loss": 1.3157, "step": 20769 }, { "epoch": 84.77551020408163, "grad_norm": 35.51018142700195, "learning_rate": 7.638775510204082e-06, "loss": 0.7133, "step": 20770 }, { "epoch": 84.7795918367347, "grad_norm": 34.66157531738281, "learning_rate": 7.636734693877551e-06, "loss": 1.3614, "step": 20771 }, { "epoch": 84.78367346938775, "grad_norm": 39.478111267089844, "learning_rate": 7.63469387755102e-06, "loss": 1.021, "step": 20772 }, { "epoch": 84.78775510204082, "grad_norm": 40.86174392700195, "learning_rate": 7.63265306122449e-06, "loss": 0.885, "step": 20773 }, { "epoch": 84.79183673469387, "grad_norm": 23.96393585205078, "learning_rate": 7.63061224489796e-06, "loss": 1.6125, "step": 20774 }, { "epoch": 84.79591836734694, "grad_norm": 25.27371597290039, "learning_rate": 7.628571428571429e-06, "loss": 1.1475, "step": 20775 }, { "epoch": 84.8, "grad_norm": 34.858489990234375, "learning_rate": 7.626530612244899e-06, "loss": 0.3982, "step": 20776 }, { "epoch": 84.80408163265307, "grad_norm": 34.28365707397461, "learning_rate": 7.6244897959183675e-06, "loss": 1.347, "step": 20777 }, { "epoch": 84.80816326530612, "grad_norm": 29.402576446533203, "learning_rate": 7.622448979591837e-06, "loss": 1.2448, "step": 20778 }, { "epoch": 84.81224489795919, "grad_norm": 33.66559600830078, "learning_rate": 7.620408163265307e-06, "loss": 0.6401, "step": 20779 }, { "epoch": 84.81632653061224, "grad_norm": 33.60127639770508, "learning_rate": 7.6183673469387765e-06, "loss": 0.2473, "step": 20780 }, { "epoch": 84.82040816326531, "grad_norm": 24.075031280517578, "learning_rate": 7.616326530612246e-06, "loss": 0.2744, "step": 20781 }, { "epoch": 84.82448979591837, "grad_norm": 53.291847229003906, "learning_rate": 7.614285714285714e-06, "loss": 1.2601, "step": 20782 }, { "epoch": 84.82857142857142, "grad_norm": 38.900047302246094, "learning_rate": 7.612244897959184e-06, "loss": 1.2852, "step": 20783 }, { "epoch": 84.83265306122449, "grad_norm": 33.07009506225586, "learning_rate": 7.6102040816326534e-06, "loss": 1.3304, "step": 20784 }, { "epoch": 84.83673469387755, "grad_norm": 30.924409866333008, "learning_rate": 7.608163265306123e-06, "loss": 0.3635, "step": 20785 }, { "epoch": 84.84081632653061, "grad_norm": 29.470117568969727, "learning_rate": 7.606122448979593e-06, "loss": 0.359, "step": 20786 }, { "epoch": 84.84489795918367, "grad_norm": 26.838340759277344, "learning_rate": 7.6040816326530624e-06, "loss": 1.9343, "step": 20787 }, { "epoch": 84.84897959183674, "grad_norm": 47.77318572998047, "learning_rate": 7.60204081632653e-06, "loss": 0.8767, "step": 20788 }, { "epoch": 84.85306122448979, "grad_norm": 29.93279266357422, "learning_rate": 7.6e-06, "loss": 0.8091, "step": 20789 }, { "epoch": 84.85714285714286, "grad_norm": 18.46976089477539, "learning_rate": 7.59795918367347e-06, "loss": 2.5988, "step": 20790 }, { "epoch": 84.86122448979592, "grad_norm": 34.45350646972656, "learning_rate": 7.595918367346939e-06, "loss": 0.8928, "step": 20791 }, { "epoch": 84.86530612244898, "grad_norm": 26.469356536865234, "learning_rate": 7.593877551020409e-06, "loss": 1.5413, "step": 20792 }, { "epoch": 84.86938775510204, "grad_norm": 31.27228355407715, "learning_rate": 7.591836734693877e-06, "loss": 0.4664, "step": 20793 }, { "epoch": 84.87346938775511, "grad_norm": 33.50174331665039, "learning_rate": 7.589795918367347e-06, "loss": 1.1802, "step": 20794 }, { "epoch": 84.87755102040816, "grad_norm": 25.60812759399414, "learning_rate": 7.587755102040816e-06, "loss": 1.3608, "step": 20795 }, { "epoch": 84.88163265306123, "grad_norm": 8.475034713745117, "learning_rate": 7.585714285714286e-06, "loss": 0.0824, "step": 20796 }, { "epoch": 84.88571428571429, "grad_norm": 24.48943328857422, "learning_rate": 7.583673469387756e-06, "loss": 1.8538, "step": 20797 }, { "epoch": 84.88979591836734, "grad_norm": 34.80611801147461, "learning_rate": 7.5816326530612245e-06, "loss": 1.3846, "step": 20798 }, { "epoch": 84.89387755102041, "grad_norm": 39.38400650024414, "learning_rate": 7.579591836734694e-06, "loss": 1.4156, "step": 20799 }, { "epoch": 84.89795918367346, "grad_norm": 33.43343734741211, "learning_rate": 7.577551020408164e-06, "loss": 1.2643, "step": 20800 }, { "epoch": 84.90204081632653, "grad_norm": 44.079010009765625, "learning_rate": 7.575510204081633e-06, "loss": 0.695, "step": 20801 }, { "epoch": 84.90612244897959, "grad_norm": 24.900901794433594, "learning_rate": 7.573469387755102e-06, "loss": 0.2571, "step": 20802 }, { "epoch": 84.91020408163266, "grad_norm": 36.6231803894043, "learning_rate": 7.571428571428572e-06, "loss": 0.326, "step": 20803 }, { "epoch": 84.91428571428571, "grad_norm": 14.91800308227539, "learning_rate": 7.569387755102041e-06, "loss": 0.1351, "step": 20804 }, { "epoch": 84.91836734693878, "grad_norm": 18.03799819946289, "learning_rate": 7.5673469387755105e-06, "loss": 2.0183, "step": 20805 }, { "epoch": 84.92244897959183, "grad_norm": 23.524784088134766, "learning_rate": 7.56530612244898e-06, "loss": 0.3221, "step": 20806 }, { "epoch": 84.9265306122449, "grad_norm": 31.58207893371582, "learning_rate": 7.56326530612245e-06, "loss": 1.3875, "step": 20807 }, { "epoch": 84.93061224489796, "grad_norm": 29.320985794067383, "learning_rate": 7.5612244897959195e-06, "loss": 0.3838, "step": 20808 }, { "epoch": 84.93469387755103, "grad_norm": 42.95953369140625, "learning_rate": 7.5591836734693875e-06, "loss": 0.5354, "step": 20809 }, { "epoch": 84.93877551020408, "grad_norm": 23.55130386352539, "learning_rate": 7.557142857142857e-06, "loss": 2.0743, "step": 20810 }, { "epoch": 84.94285714285714, "grad_norm": 32.14884948730469, "learning_rate": 7.555102040816327e-06, "loss": 0.9486, "step": 20811 }, { "epoch": 84.9469387755102, "grad_norm": 28.627967834472656, "learning_rate": 7.5530612244897965e-06, "loss": 0.3347, "step": 20812 }, { "epoch": 84.95102040816326, "grad_norm": 27.366249084472656, "learning_rate": 7.551020408163266e-06, "loss": 1.4807, "step": 20813 }, { "epoch": 84.95510204081633, "grad_norm": 36.13003158569336, "learning_rate": 7.548979591836736e-06, "loss": 0.5099, "step": 20814 }, { "epoch": 84.95918367346938, "grad_norm": 37.152679443359375, "learning_rate": 7.546938775510204e-06, "loss": 0.8302, "step": 20815 }, { "epoch": 84.96326530612245, "grad_norm": 57.52214813232422, "learning_rate": 7.5448979591836735e-06, "loss": 0.5656, "step": 20816 }, { "epoch": 84.9673469387755, "grad_norm": 39.02716827392578, "learning_rate": 7.542857142857143e-06, "loss": 0.684, "step": 20817 }, { "epoch": 84.97142857142858, "grad_norm": 37.09990692138672, "learning_rate": 7.540816326530613e-06, "loss": 1.381, "step": 20818 }, { "epoch": 84.97551020408163, "grad_norm": 34.68708419799805, "learning_rate": 7.5387755102040825e-06, "loss": 1.0503, "step": 20819 }, { "epoch": 84.9795918367347, "grad_norm": 32.39324188232422, "learning_rate": 7.5367346938775504e-06, "loss": 0.8259, "step": 20820 }, { "epoch": 84.98367346938775, "grad_norm": 25.038057327270508, "learning_rate": 7.53469387755102e-06, "loss": 1.6314, "step": 20821 }, { "epoch": 84.98775510204082, "grad_norm": 11.20328426361084, "learning_rate": 7.53265306122449e-06, "loss": 0.0867, "step": 20822 }, { "epoch": 84.99183673469388, "grad_norm": 28.90766716003418, "learning_rate": 7.5306122448979594e-06, "loss": 0.4144, "step": 20823 }, { "epoch": 84.99591836734695, "grad_norm": 25.388608932495117, "learning_rate": 7.528571428571429e-06, "loss": 0.3039, "step": 20824 }, { "epoch": 85.0, "grad_norm": 26.02895164489746, "learning_rate": 7.526530612244899e-06, "loss": 1.4447, "step": 20825 }, { "epoch": 85.00408163265305, "grad_norm": 34.870147705078125, "learning_rate": 7.524489795918368e-06, "loss": 0.6509, "step": 20826 }, { "epoch": 85.00816326530612, "grad_norm": 50.467559814453125, "learning_rate": 7.522448979591836e-06, "loss": 0.878, "step": 20827 }, { "epoch": 85.01224489795918, "grad_norm": 47.51141357421875, "learning_rate": 7.520408163265306e-06, "loss": 0.6115, "step": 20828 }, { "epoch": 85.01632653061225, "grad_norm": 27.30330467224121, "learning_rate": 7.518367346938776e-06, "loss": 1.5096, "step": 20829 }, { "epoch": 85.0204081632653, "grad_norm": 40.46978759765625, "learning_rate": 7.516326530612245e-06, "loss": 0.9532, "step": 20830 }, { "epoch": 85.02448979591837, "grad_norm": 36.97090530395508, "learning_rate": 7.514285714285714e-06, "loss": 0.9751, "step": 20831 }, { "epoch": 85.02857142857142, "grad_norm": 27.874937057495117, "learning_rate": 7.512244897959184e-06, "loss": 0.376, "step": 20832 }, { "epoch": 85.0326530612245, "grad_norm": 27.351648330688477, "learning_rate": 7.5102040816326536e-06, "loss": 1.5973, "step": 20833 }, { "epoch": 85.03673469387755, "grad_norm": 34.49925231933594, "learning_rate": 7.508163265306123e-06, "loss": 0.3913, "step": 20834 }, { "epoch": 85.04081632653062, "grad_norm": 20.32525062561035, "learning_rate": 7.506122448979593e-06, "loss": 1.9887, "step": 20835 }, { "epoch": 85.04489795918367, "grad_norm": 20.467912673950195, "learning_rate": 7.5040816326530626e-06, "loss": 1.8309, "step": 20836 }, { "epoch": 85.04897959183674, "grad_norm": 27.62213897705078, "learning_rate": 7.5020408163265305e-06, "loss": 1.3771, "step": 20837 }, { "epoch": 85.0530612244898, "grad_norm": 29.37392234802246, "learning_rate": 7.5e-06, "loss": 1.3623, "step": 20838 }, { "epoch": 85.05714285714286, "grad_norm": 28.193431854248047, "learning_rate": 7.49795918367347e-06, "loss": 1.4629, "step": 20839 }, { "epoch": 85.06122448979592, "grad_norm": 26.226137161254883, "learning_rate": 7.4959183673469395e-06, "loss": 1.9253, "step": 20840 }, { "epoch": 85.06530612244897, "grad_norm": 27.67038917541504, "learning_rate": 7.493877551020409e-06, "loss": 1.1039, "step": 20841 }, { "epoch": 85.06938775510204, "grad_norm": 17.193073272705078, "learning_rate": 7.491836734693877e-06, "loss": 0.1165, "step": 20842 }, { "epoch": 85.0734693877551, "grad_norm": 57.63032531738281, "learning_rate": 7.489795918367347e-06, "loss": 1.1753, "step": 20843 }, { "epoch": 85.07755102040817, "grad_norm": 28.71755599975586, "learning_rate": 7.4877551020408165e-06, "loss": 0.407, "step": 20844 }, { "epoch": 85.08163265306122, "grad_norm": 12.4430513381958, "learning_rate": 7.485714285714286e-06, "loss": 0.0797, "step": 20845 }, { "epoch": 85.08571428571429, "grad_norm": 35.27456283569336, "learning_rate": 7.483673469387756e-06, "loss": 0.7903, "step": 20846 }, { "epoch": 85.08979591836734, "grad_norm": 50.87868118286133, "learning_rate": 7.4816326530612255e-06, "loss": 0.4669, "step": 20847 }, { "epoch": 85.09387755102041, "grad_norm": 25.354888916015625, "learning_rate": 7.4795918367346935e-06, "loss": 1.9257, "step": 20848 }, { "epoch": 85.09795918367347, "grad_norm": 30.266103744506836, "learning_rate": 7.477551020408163e-06, "loss": 0.6312, "step": 20849 }, { "epoch": 85.10204081632654, "grad_norm": 34.90842056274414, "learning_rate": 7.475510204081633e-06, "loss": 0.2732, "step": 20850 }, { "epoch": 85.10612244897959, "grad_norm": 11.065704345703125, "learning_rate": 7.4734693877551025e-06, "loss": 0.0876, "step": 20851 }, { "epoch": 85.11020408163266, "grad_norm": 29.798202514648438, "learning_rate": 7.471428571428572e-06, "loss": 0.6997, "step": 20852 }, { "epoch": 85.11428571428571, "grad_norm": 30.104814529418945, "learning_rate": 7.46938775510204e-06, "loss": 1.1388, "step": 20853 }, { "epoch": 85.11836734693877, "grad_norm": 29.855804443359375, "learning_rate": 7.46734693877551e-06, "loss": 1.0081, "step": 20854 }, { "epoch": 85.12244897959184, "grad_norm": 31.37977409362793, "learning_rate": 7.4653061224489794e-06, "loss": 0.5762, "step": 20855 }, { "epoch": 85.12653061224489, "grad_norm": 33.21879959106445, "learning_rate": 7.463265306122449e-06, "loss": 1.0322, "step": 20856 }, { "epoch": 85.13061224489796, "grad_norm": 16.78862762451172, "learning_rate": 7.461224489795919e-06, "loss": 2.2381, "step": 20857 }, { "epoch": 85.13469387755102, "grad_norm": 33.83087158203125, "learning_rate": 7.4591836734693884e-06, "loss": 0.5358, "step": 20858 }, { "epoch": 85.13877551020408, "grad_norm": 36.588802337646484, "learning_rate": 7.457142857142857e-06, "loss": 0.6126, "step": 20859 }, { "epoch": 85.14285714285714, "grad_norm": 35.597816467285156, "learning_rate": 7.455102040816327e-06, "loss": 1.1568, "step": 20860 }, { "epoch": 85.14693877551021, "grad_norm": 32.50872802734375, "learning_rate": 7.453061224489797e-06, "loss": 1.1823, "step": 20861 }, { "epoch": 85.15102040816326, "grad_norm": 33.81364059448242, "learning_rate": 7.451020408163266e-06, "loss": 0.8482, "step": 20862 }, { "epoch": 85.15510204081633, "grad_norm": 36.06181335449219, "learning_rate": 7.448979591836736e-06, "loss": 0.6838, "step": 20863 }, { "epoch": 85.15918367346939, "grad_norm": 37.1413688659668, "learning_rate": 7.446938775510204e-06, "loss": 1.0805, "step": 20864 }, { "epoch": 85.16326530612245, "grad_norm": 34.036170959472656, "learning_rate": 7.4448979591836736e-06, "loss": 1.052, "step": 20865 }, { "epoch": 85.16734693877551, "grad_norm": 17.263328552246094, "learning_rate": 7.442857142857143e-06, "loss": 2.5529, "step": 20866 }, { "epoch": 85.17142857142858, "grad_norm": 23.715364456176758, "learning_rate": 7.440816326530613e-06, "loss": 2.0267, "step": 20867 }, { "epoch": 85.17551020408163, "grad_norm": 28.390430450439453, "learning_rate": 7.4387755102040826e-06, "loss": 1.3375, "step": 20868 }, { "epoch": 85.17959183673469, "grad_norm": 25.07941436767578, "learning_rate": 7.436734693877552e-06, "loss": 0.3597, "step": 20869 }, { "epoch": 85.18367346938776, "grad_norm": 43.76320266723633, "learning_rate": 7.43469387755102e-06, "loss": 0.8453, "step": 20870 }, { "epoch": 85.18775510204081, "grad_norm": 30.387950897216797, "learning_rate": 7.43265306122449e-06, "loss": 1.1623, "step": 20871 }, { "epoch": 85.19183673469388, "grad_norm": 43.01215744018555, "learning_rate": 7.4306122448979595e-06, "loss": 0.8862, "step": 20872 }, { "epoch": 85.19591836734693, "grad_norm": 19.732540130615234, "learning_rate": 7.428571428571429e-06, "loss": 0.1355, "step": 20873 }, { "epoch": 85.2, "grad_norm": 34.44249725341797, "learning_rate": 7.426530612244899e-06, "loss": 0.3447, "step": 20874 }, { "epoch": 85.20408163265306, "grad_norm": 41.68623733520508, "learning_rate": 7.424489795918367e-06, "loss": 0.7141, "step": 20875 }, { "epoch": 85.20816326530613, "grad_norm": 25.720178604125977, "learning_rate": 7.4224489795918365e-06, "loss": 1.4409, "step": 20876 }, { "epoch": 85.21224489795918, "grad_norm": 34.98122024536133, "learning_rate": 7.420408163265306e-06, "loss": 0.6494, "step": 20877 }, { "epoch": 85.21632653061225, "grad_norm": 30.090274810791016, "learning_rate": 7.418367346938776e-06, "loss": 0.2454, "step": 20878 }, { "epoch": 85.2204081632653, "grad_norm": 26.932586669921875, "learning_rate": 7.4163265306122455e-06, "loss": 1.5348, "step": 20879 }, { "epoch": 85.22448979591837, "grad_norm": 20.177391052246094, "learning_rate": 7.414285714285715e-06, "loss": 2.0845, "step": 20880 }, { "epoch": 85.22857142857143, "grad_norm": 35.10419845581055, "learning_rate": 7.412244897959183e-06, "loss": 1.2689, "step": 20881 }, { "epoch": 85.2326530612245, "grad_norm": 13.337406158447266, "learning_rate": 7.410204081632653e-06, "loss": 0.1346, "step": 20882 }, { "epoch": 85.23673469387755, "grad_norm": 27.570791244506836, "learning_rate": 7.4081632653061225e-06, "loss": 0.7055, "step": 20883 }, { "epoch": 85.2408163265306, "grad_norm": 40.57978820800781, "learning_rate": 7.406122448979592e-06, "loss": 1.016, "step": 20884 }, { "epoch": 85.24489795918367, "grad_norm": 27.125402450561523, "learning_rate": 7.404081632653062e-06, "loss": 1.0535, "step": 20885 }, { "epoch": 85.24897959183673, "grad_norm": 10.44369888305664, "learning_rate": 7.402040816326531e-06, "loss": 0.0813, "step": 20886 }, { "epoch": 85.2530612244898, "grad_norm": 9.14005184173584, "learning_rate": 7.4e-06, "loss": 2.9878, "step": 20887 }, { "epoch": 85.25714285714285, "grad_norm": 38.68473434448242, "learning_rate": 7.39795918367347e-06, "loss": 0.6369, "step": 20888 }, { "epoch": 85.26122448979592, "grad_norm": 18.568227767944336, "learning_rate": 7.39591836734694e-06, "loss": 0.1638, "step": 20889 }, { "epoch": 85.26530612244898, "grad_norm": 1.4582338333129883, "learning_rate": 7.393877551020409e-06, "loss": 0.0271, "step": 20890 }, { "epoch": 85.26938775510204, "grad_norm": 37.66617965698242, "learning_rate": 7.391836734693879e-06, "loss": 0.8006, "step": 20891 }, { "epoch": 85.2734693877551, "grad_norm": 27.99547576904297, "learning_rate": 7.389795918367347e-06, "loss": 0.454, "step": 20892 }, { "epoch": 85.27755102040817, "grad_norm": 31.218425750732422, "learning_rate": 7.387755102040817e-06, "loss": 0.6713, "step": 20893 }, { "epoch": 85.28163265306122, "grad_norm": 31.826847076416016, "learning_rate": 7.385714285714286e-06, "loss": 0.5481, "step": 20894 }, { "epoch": 85.28571428571429, "grad_norm": 34.75954818725586, "learning_rate": 7.383673469387756e-06, "loss": 0.7479, "step": 20895 }, { "epoch": 85.28979591836735, "grad_norm": 24.744321823120117, "learning_rate": 7.381632653061226e-06, "loss": 0.2251, "step": 20896 }, { "epoch": 85.29387755102042, "grad_norm": 37.99126434326172, "learning_rate": 7.379591836734694e-06, "loss": 1.0882, "step": 20897 }, { "epoch": 85.29795918367347, "grad_norm": 37.87036895751953, "learning_rate": 7.377551020408163e-06, "loss": 0.6062, "step": 20898 }, { "epoch": 85.30204081632652, "grad_norm": 35.48414611816406, "learning_rate": 7.375510204081633e-06, "loss": 0.933, "step": 20899 }, { "epoch": 85.3061224489796, "grad_norm": 32.17928695678711, "learning_rate": 7.373469387755103e-06, "loss": 1.2153, "step": 20900 }, { "epoch": 85.31020408163265, "grad_norm": 35.18752670288086, "learning_rate": 7.371428571428572e-06, "loss": 1.1995, "step": 20901 }, { "epoch": 85.31428571428572, "grad_norm": 47.04872131347656, "learning_rate": 7.369387755102042e-06, "loss": 0.3902, "step": 20902 }, { "epoch": 85.31836734693877, "grad_norm": 27.92269515991211, "learning_rate": 7.36734693877551e-06, "loss": 1.6432, "step": 20903 }, { "epoch": 85.32244897959184, "grad_norm": 27.13498306274414, "learning_rate": 7.3653061224489796e-06, "loss": 0.3267, "step": 20904 }, { "epoch": 85.3265306122449, "grad_norm": 5.624587535858154, "learning_rate": 7.363265306122449e-06, "loss": 0.0569, "step": 20905 }, { "epoch": 85.33061224489796, "grad_norm": 28.20528221130371, "learning_rate": 7.361224489795919e-06, "loss": 1.2065, "step": 20906 }, { "epoch": 85.33469387755102, "grad_norm": 36.374229431152344, "learning_rate": 7.3591836734693886e-06, "loss": 0.5628, "step": 20907 }, { "epoch": 85.33877551020409, "grad_norm": 37.755821228027344, "learning_rate": 7.3571428571428565e-06, "loss": 0.3713, "step": 20908 }, { "epoch": 85.34285714285714, "grad_norm": 36.264190673828125, "learning_rate": 7.355102040816326e-06, "loss": 1.3124, "step": 20909 }, { "epoch": 85.34693877551021, "grad_norm": 9.047924041748047, "learning_rate": 7.353061224489796e-06, "loss": 0.0862, "step": 20910 }, { "epoch": 85.35102040816327, "grad_norm": 37.92521667480469, "learning_rate": 7.3510204081632655e-06, "loss": 0.6356, "step": 20911 }, { "epoch": 85.35510204081632, "grad_norm": 27.501943588256836, "learning_rate": 7.348979591836735e-06, "loss": 1.5193, "step": 20912 }, { "epoch": 85.35918367346939, "grad_norm": 30.357213973999023, "learning_rate": 7.346938775510205e-06, "loss": 1.2834, "step": 20913 }, { "epoch": 85.36326530612244, "grad_norm": 28.773515701293945, "learning_rate": 7.344897959183674e-06, "loss": 0.2548, "step": 20914 }, { "epoch": 85.36734693877551, "grad_norm": 33.38176727294922, "learning_rate": 7.342857142857143e-06, "loss": 1.2225, "step": 20915 }, { "epoch": 85.37142857142857, "grad_norm": 32.250823974609375, "learning_rate": 7.340816326530613e-06, "loss": 1.3548, "step": 20916 }, { "epoch": 85.37551020408164, "grad_norm": 51.86942672729492, "learning_rate": 7.338775510204083e-06, "loss": 1.2383, "step": 20917 }, { "epoch": 85.37959183673469, "grad_norm": 32.47605514526367, "learning_rate": 7.3367346938775515e-06, "loss": 0.67, "step": 20918 }, { "epoch": 85.38367346938776, "grad_norm": 35.68844223022461, "learning_rate": 7.33469387755102e-06, "loss": 0.3542, "step": 20919 }, { "epoch": 85.38775510204081, "grad_norm": 37.94974136352539, "learning_rate": 7.33265306122449e-06, "loss": 0.5639, "step": 20920 }, { "epoch": 85.39183673469388, "grad_norm": 39.905517578125, "learning_rate": 7.33061224489796e-06, "loss": 0.4108, "step": 20921 }, { "epoch": 85.39591836734694, "grad_norm": 36.55565643310547, "learning_rate": 7.328571428571429e-06, "loss": 0.3943, "step": 20922 }, { "epoch": 85.4, "grad_norm": 28.92041015625, "learning_rate": 7.326530612244899e-06, "loss": 1.2284, "step": 20923 }, { "epoch": 85.40408163265306, "grad_norm": 28.21266746520996, "learning_rate": 7.324489795918369e-06, "loss": 1.3103, "step": 20924 }, { "epoch": 85.40816326530613, "grad_norm": 20.195438385009766, "learning_rate": 7.322448979591837e-06, "loss": 2.3303, "step": 20925 }, { "epoch": 85.41224489795918, "grad_norm": 20.821741104125977, "learning_rate": 7.320408163265306e-06, "loss": 0.162, "step": 20926 }, { "epoch": 85.41632653061224, "grad_norm": 33.31538009643555, "learning_rate": 7.318367346938776e-06, "loss": 0.9374, "step": 20927 }, { "epoch": 85.42040816326531, "grad_norm": 48.11977767944336, "learning_rate": 7.316326530612246e-06, "loss": 0.5668, "step": 20928 }, { "epoch": 85.42448979591836, "grad_norm": 38.848731994628906, "learning_rate": 7.314285714285715e-06, "loss": 0.3601, "step": 20929 }, { "epoch": 85.42857142857143, "grad_norm": 16.595129013061523, "learning_rate": 7.312244897959183e-06, "loss": 0.0779, "step": 20930 }, { "epoch": 85.43265306122449, "grad_norm": 18.818145751953125, "learning_rate": 7.310204081632653e-06, "loss": 1.9998, "step": 20931 }, { "epoch": 85.43673469387755, "grad_norm": 44.406219482421875, "learning_rate": 7.308163265306123e-06, "loss": 0.834, "step": 20932 }, { "epoch": 85.44081632653061, "grad_norm": 33.5672607421875, "learning_rate": 7.306122448979592e-06, "loss": 1.2177, "step": 20933 }, { "epoch": 85.44489795918368, "grad_norm": 25.43504524230957, "learning_rate": 7.304081632653062e-06, "loss": 1.8849, "step": 20934 }, { "epoch": 85.44897959183673, "grad_norm": 30.773773193359375, "learning_rate": 7.302040816326532e-06, "loss": 0.9563, "step": 20935 }, { "epoch": 85.4530612244898, "grad_norm": 7.264255046844482, "learning_rate": 7.2999999999999996e-06, "loss": 0.0791, "step": 20936 }, { "epoch": 85.45714285714286, "grad_norm": 28.11244773864746, "learning_rate": 7.297959183673469e-06, "loss": 1.7551, "step": 20937 }, { "epoch": 85.46122448979592, "grad_norm": 21.007360458374023, "learning_rate": 7.295918367346939e-06, "loss": 0.248, "step": 20938 }, { "epoch": 85.46530612244898, "grad_norm": 28.116785049438477, "learning_rate": 7.2938775510204086e-06, "loss": 1.518, "step": 20939 }, { "epoch": 85.46938775510205, "grad_norm": 25.624055862426758, "learning_rate": 7.291836734693878e-06, "loss": 1.9024, "step": 20940 }, { "epoch": 85.4734693877551, "grad_norm": 30.15204620361328, "learning_rate": 7.289795918367347e-06, "loss": 1.3904, "step": 20941 }, { "epoch": 85.47755102040816, "grad_norm": 29.969085693359375, "learning_rate": 7.287755102040817e-06, "loss": 1.4151, "step": 20942 }, { "epoch": 85.48163265306123, "grad_norm": 34.27853012084961, "learning_rate": 7.285714285714286e-06, "loss": 1.4141, "step": 20943 }, { "epoch": 85.48571428571428, "grad_norm": 40.9441032409668, "learning_rate": 7.283673469387756e-06, "loss": 1.0941, "step": 20944 }, { "epoch": 85.48979591836735, "grad_norm": 33.792945861816406, "learning_rate": 7.281632653061225e-06, "loss": 1.0663, "step": 20945 }, { "epoch": 85.4938775510204, "grad_norm": 31.334558486938477, "learning_rate": 7.2795918367346945e-06, "loss": 0.3087, "step": 20946 }, { "epoch": 85.49795918367347, "grad_norm": 32.89817428588867, "learning_rate": 7.277551020408163e-06, "loss": 1.0284, "step": 20947 }, { "epoch": 85.50204081632653, "grad_norm": 22.610685348510742, "learning_rate": 7.275510204081633e-06, "loss": 0.266, "step": 20948 }, { "epoch": 85.5061224489796, "grad_norm": 34.360713958740234, "learning_rate": 7.273469387755103e-06, "loss": 0.2554, "step": 20949 }, { "epoch": 85.51020408163265, "grad_norm": 27.276931762695312, "learning_rate": 7.271428571428572e-06, "loss": 1.3168, "step": 20950 }, { "epoch": 85.51428571428572, "grad_norm": 28.235349655151367, "learning_rate": 7.269387755102042e-06, "loss": 1.2068, "step": 20951 }, { "epoch": 85.51836734693877, "grad_norm": 32.64017868041992, "learning_rate": 7.26734693877551e-06, "loss": 1.6433, "step": 20952 }, { "epoch": 85.52244897959184, "grad_norm": 24.080842971801758, "learning_rate": 7.26530612244898e-06, "loss": 0.2279, "step": 20953 }, { "epoch": 85.5265306122449, "grad_norm": 31.899614334106445, "learning_rate": 7.263265306122449e-06, "loss": 0.3551, "step": 20954 }, { "epoch": 85.53061224489795, "grad_norm": 49.02216720581055, "learning_rate": 7.261224489795919e-06, "loss": 0.3877, "step": 20955 }, { "epoch": 85.53469387755102, "grad_norm": 32.08637237548828, "learning_rate": 7.259183673469389e-06, "loss": 0.2762, "step": 20956 }, { "epoch": 85.53877551020408, "grad_norm": 22.993499755859375, "learning_rate": 7.257142857142857e-06, "loss": 1.8102, "step": 20957 }, { "epoch": 85.54285714285714, "grad_norm": 34.550113677978516, "learning_rate": 7.255102040816326e-06, "loss": 0.7759, "step": 20958 }, { "epoch": 85.5469387755102, "grad_norm": 34.75956726074219, "learning_rate": 7.253061224489796e-06, "loss": 1.1824, "step": 20959 }, { "epoch": 85.55102040816327, "grad_norm": 28.268592834472656, "learning_rate": 7.251020408163266e-06, "loss": 1.3346, "step": 20960 }, { "epoch": 85.55510204081632, "grad_norm": 30.561481475830078, "learning_rate": 7.248979591836735e-06, "loss": 0.4073, "step": 20961 }, { "epoch": 85.55918367346939, "grad_norm": 37.81727981567383, "learning_rate": 7.246938775510205e-06, "loss": 0.7371, "step": 20962 }, { "epoch": 85.56326530612245, "grad_norm": 27.500667572021484, "learning_rate": 7.244897959183673e-06, "loss": 1.8566, "step": 20963 }, { "epoch": 85.56734693877551, "grad_norm": 32.39986038208008, "learning_rate": 7.242857142857143e-06, "loss": 0.7942, "step": 20964 }, { "epoch": 85.57142857142857, "grad_norm": 21.17242431640625, "learning_rate": 7.240816326530612e-06, "loss": 1.9517, "step": 20965 }, { "epoch": 85.57551020408164, "grad_norm": 43.79890823364258, "learning_rate": 7.238775510204082e-06, "loss": 1.4685, "step": 20966 }, { "epoch": 85.57959183673469, "grad_norm": 34.72604751586914, "learning_rate": 7.236734693877552e-06, "loss": 0.3103, "step": 20967 }, { "epoch": 85.58367346938776, "grad_norm": 21.19736671447754, "learning_rate": 7.2346938775510204e-06, "loss": 0.2842, "step": 20968 }, { "epoch": 85.58775510204082, "grad_norm": 33.27555847167969, "learning_rate": 7.23265306122449e-06, "loss": 0.3839, "step": 20969 }, { "epoch": 85.59183673469387, "grad_norm": 35.062103271484375, "learning_rate": 7.23061224489796e-06, "loss": 0.9241, "step": 20970 }, { "epoch": 85.59591836734694, "grad_norm": 38.0280876159668, "learning_rate": 7.228571428571429e-06, "loss": 1.2136, "step": 20971 }, { "epoch": 85.6, "grad_norm": 26.88721466064453, "learning_rate": 7.226530612244898e-06, "loss": 1.4917, "step": 20972 }, { "epoch": 85.60408163265306, "grad_norm": 34.31241989135742, "learning_rate": 7.224489795918368e-06, "loss": 0.2358, "step": 20973 }, { "epoch": 85.60816326530612, "grad_norm": 33.094398498535156, "learning_rate": 7.222448979591837e-06, "loss": 1.3658, "step": 20974 }, { "epoch": 85.61224489795919, "grad_norm": 25.121639251708984, "learning_rate": 7.220408163265306e-06, "loss": 0.2416, "step": 20975 }, { "epoch": 85.61632653061224, "grad_norm": 16.89862823486328, "learning_rate": 7.218367346938776e-06, "loss": 0.1385, "step": 20976 }, { "epoch": 85.62040816326531, "grad_norm": 23.132984161376953, "learning_rate": 7.216326530612246e-06, "loss": 1.4404, "step": 20977 }, { "epoch": 85.62448979591836, "grad_norm": 38.003116607666016, "learning_rate": 7.214285714285715e-06, "loss": 0.5071, "step": 20978 }, { "epoch": 85.62857142857143, "grad_norm": 22.293350219726562, "learning_rate": 7.212244897959183e-06, "loss": 0.2188, "step": 20979 }, { "epoch": 85.63265306122449, "grad_norm": 26.588939666748047, "learning_rate": 7.210204081632653e-06, "loss": 1.1905, "step": 20980 }, { "epoch": 85.63673469387756, "grad_norm": 22.275009155273438, "learning_rate": 7.208163265306123e-06, "loss": 1.578, "step": 20981 }, { "epoch": 85.64081632653061, "grad_norm": 23.35000228881836, "learning_rate": 7.206122448979592e-06, "loss": 2.256, "step": 20982 }, { "epoch": 85.64489795918368, "grad_norm": 35.24489212036133, "learning_rate": 7.204081632653062e-06, "loss": 1.0592, "step": 20983 }, { "epoch": 85.64897959183673, "grad_norm": 33.160606384277344, "learning_rate": 7.202040816326532e-06, "loss": 0.8037, "step": 20984 }, { "epoch": 85.65306122448979, "grad_norm": 25.11735725402832, "learning_rate": 7.2e-06, "loss": 2.0647, "step": 20985 }, { "epoch": 85.65714285714286, "grad_norm": 38.34679412841797, "learning_rate": 7.197959183673469e-06, "loss": 0.5028, "step": 20986 }, { "epoch": 85.66122448979591, "grad_norm": 25.053619384765625, "learning_rate": 7.195918367346939e-06, "loss": 1.4478, "step": 20987 }, { "epoch": 85.66530612244898, "grad_norm": 35.966102600097656, "learning_rate": 7.193877551020409e-06, "loss": 1.3556, "step": 20988 }, { "epoch": 85.66938775510204, "grad_norm": 40.09952926635742, "learning_rate": 7.191836734693878e-06, "loss": 1.2776, "step": 20989 }, { "epoch": 85.6734693877551, "grad_norm": 30.775941848754883, "learning_rate": 7.189795918367346e-06, "loss": 1.4063, "step": 20990 }, { "epoch": 85.67755102040816, "grad_norm": 21.59468650817871, "learning_rate": 7.187755102040816e-06, "loss": 2.0474, "step": 20991 }, { "epoch": 85.68163265306123, "grad_norm": 36.79556655883789, "learning_rate": 7.185714285714286e-06, "loss": 0.9578, "step": 20992 }, { "epoch": 85.68571428571428, "grad_norm": 46.21316909790039, "learning_rate": 7.183673469387755e-06, "loss": 0.6088, "step": 20993 }, { "epoch": 85.68979591836735, "grad_norm": 39.9120979309082, "learning_rate": 7.181632653061225e-06, "loss": 0.8851, "step": 20994 }, { "epoch": 85.6938775510204, "grad_norm": 39.71044158935547, "learning_rate": 7.179591836734695e-06, "loss": 0.6365, "step": 20995 }, { "epoch": 85.69795918367348, "grad_norm": 31.012611389160156, "learning_rate": 7.1775510204081635e-06, "loss": 0.3109, "step": 20996 }, { "epoch": 85.70204081632653, "grad_norm": 18.290040969848633, "learning_rate": 7.175510204081633e-06, "loss": 0.1236, "step": 20997 }, { "epoch": 85.70612244897958, "grad_norm": 37.903873443603516, "learning_rate": 7.173469387755102e-06, "loss": 0.5741, "step": 20998 }, { "epoch": 85.71020408163265, "grad_norm": 30.422046661376953, "learning_rate": 7.171428571428572e-06, "loss": 0.4024, "step": 20999 }, { "epoch": 85.71428571428571, "grad_norm": 33.78848648071289, "learning_rate": 7.169387755102041e-06, "loss": 1.3118, "step": 21000 }, { "epoch": 85.71836734693878, "grad_norm": 32.32213592529297, "learning_rate": 7.16734693877551e-06, "loss": 1.1872, "step": 21001 }, { "epoch": 85.72244897959183, "grad_norm": 26.08536720275879, "learning_rate": 7.16530612244898e-06, "loss": 1.5732, "step": 21002 }, { "epoch": 85.7265306122449, "grad_norm": 27.66866683959961, "learning_rate": 7.1632653061224494e-06, "loss": 1.4836, "step": 21003 }, { "epoch": 85.73061224489796, "grad_norm": 29.539247512817383, "learning_rate": 7.161224489795919e-06, "loss": 0.968, "step": 21004 }, { "epoch": 85.73469387755102, "grad_norm": 32.68415832519531, "learning_rate": 7.159183673469389e-06, "loss": 0.932, "step": 21005 }, { "epoch": 85.73877551020408, "grad_norm": 22.99568748474121, "learning_rate": 7.1571428571428584e-06, "loss": 1.6764, "step": 21006 }, { "epoch": 85.74285714285715, "grad_norm": 30.078123092651367, "learning_rate": 7.155102040816326e-06, "loss": 0.5183, "step": 21007 }, { "epoch": 85.7469387755102, "grad_norm": 33.379638671875, "learning_rate": 7.153061224489796e-06, "loss": 1.01, "step": 21008 }, { "epoch": 85.75102040816327, "grad_norm": 36.75910186767578, "learning_rate": 7.151020408163266e-06, "loss": 0.295, "step": 21009 }, { "epoch": 85.75510204081633, "grad_norm": 25.545339584350586, "learning_rate": 7.148979591836735e-06, "loss": 0.2356, "step": 21010 }, { "epoch": 85.7591836734694, "grad_norm": 34.197574615478516, "learning_rate": 7.146938775510205e-06, "loss": 0.7858, "step": 21011 }, { "epoch": 85.76326530612245, "grad_norm": 26.605043411254883, "learning_rate": 7.144897959183673e-06, "loss": 0.4315, "step": 21012 }, { "epoch": 85.7673469387755, "grad_norm": 33.010009765625, "learning_rate": 7.142857142857143e-06, "loss": 1.3433, "step": 21013 }, { "epoch": 85.77142857142857, "grad_norm": 32.74313735961914, "learning_rate": 7.140816326530612e-06, "loss": 0.7094, "step": 21014 }, { "epoch": 85.77551020408163, "grad_norm": 27.273588180541992, "learning_rate": 7.138775510204082e-06, "loss": 1.5799, "step": 21015 }, { "epoch": 85.7795918367347, "grad_norm": 31.278949737548828, "learning_rate": 7.136734693877552e-06, "loss": 0.2181, "step": 21016 }, { "epoch": 85.78367346938775, "grad_norm": 42.410911560058594, "learning_rate": 7.134693877551021e-06, "loss": 0.4994, "step": 21017 }, { "epoch": 85.78775510204082, "grad_norm": 24.489843368530273, "learning_rate": 7.132653061224489e-06, "loss": 1.9482, "step": 21018 }, { "epoch": 85.79183673469387, "grad_norm": 40.294246673583984, "learning_rate": 7.130612244897959e-06, "loss": 0.628, "step": 21019 }, { "epoch": 85.79591836734694, "grad_norm": 26.60700798034668, "learning_rate": 7.128571428571429e-06, "loss": 1.6095, "step": 21020 }, { "epoch": 85.8, "grad_norm": 30.578886032104492, "learning_rate": 7.126530612244898e-06, "loss": 1.4031, "step": 21021 }, { "epoch": 85.80408163265307, "grad_norm": 36.23394775390625, "learning_rate": 7.124489795918368e-06, "loss": 0.6548, "step": 21022 }, { "epoch": 85.80816326530612, "grad_norm": 28.70444679260254, "learning_rate": 7.122448979591837e-06, "loss": 0.5665, "step": 21023 }, { "epoch": 85.81224489795919, "grad_norm": 25.855348587036133, "learning_rate": 7.120408163265306e-06, "loss": 1.557, "step": 21024 }, { "epoch": 85.81632653061224, "grad_norm": 36.567955017089844, "learning_rate": 7.118367346938775e-06, "loss": 0.3573, "step": 21025 }, { "epoch": 85.82040816326531, "grad_norm": 23.947416305541992, "learning_rate": 7.116326530612245e-06, "loss": 1.9328, "step": 21026 }, { "epoch": 85.82448979591837, "grad_norm": 33.79168701171875, "learning_rate": 7.114285714285715e-06, "loss": 0.2728, "step": 21027 }, { "epoch": 85.82857142857142, "grad_norm": 29.98346710205078, "learning_rate": 7.112244897959184e-06, "loss": 0.3331, "step": 21028 }, { "epoch": 85.83265306122449, "grad_norm": 30.86415672302246, "learning_rate": 7.110204081632653e-06, "loss": 1.4655, "step": 21029 }, { "epoch": 85.83673469387755, "grad_norm": 33.10871124267578, "learning_rate": 7.108163265306123e-06, "loss": 0.9147, "step": 21030 }, { "epoch": 85.84081632653061, "grad_norm": 26.717559814453125, "learning_rate": 7.1061224489795925e-06, "loss": 0.3543, "step": 21031 }, { "epoch": 85.84489795918367, "grad_norm": 31.576229095458984, "learning_rate": 7.104081632653062e-06, "loss": 0.7678, "step": 21032 }, { "epoch": 85.84897959183674, "grad_norm": 29.51835060119629, "learning_rate": 7.102040816326532e-06, "loss": 1.0399, "step": 21033 }, { "epoch": 85.85306122448979, "grad_norm": 29.209604263305664, "learning_rate": 7.1e-06, "loss": 0.7116, "step": 21034 }, { "epoch": 85.85714285714286, "grad_norm": 36.86174011230469, "learning_rate": 7.0979591836734695e-06, "loss": 0.6793, "step": 21035 }, { "epoch": 85.86122448979592, "grad_norm": 36.76937484741211, "learning_rate": 7.095918367346939e-06, "loss": 0.6361, "step": 21036 }, { "epoch": 85.86530612244898, "grad_norm": 22.934778213500977, "learning_rate": 7.093877551020409e-06, "loss": 1.4911, "step": 21037 }, { "epoch": 85.86938775510204, "grad_norm": 26.545211791992188, "learning_rate": 7.0918367346938785e-06, "loss": 0.6057, "step": 21038 }, { "epoch": 85.87346938775511, "grad_norm": 29.808544158935547, "learning_rate": 7.089795918367348e-06, "loss": 0.316, "step": 21039 }, { "epoch": 85.87755102040816, "grad_norm": 27.731836318969727, "learning_rate": 7.087755102040816e-06, "loss": 1.4375, "step": 21040 }, { "epoch": 85.88163265306123, "grad_norm": 17.55417251586914, "learning_rate": 7.085714285714286e-06, "loss": 0.1234, "step": 21041 }, { "epoch": 85.88571428571429, "grad_norm": 30.01170539855957, "learning_rate": 7.083673469387755e-06, "loss": 0.6884, "step": 21042 }, { "epoch": 85.88979591836734, "grad_norm": 14.12015438079834, "learning_rate": 7.081632653061225e-06, "loss": 0.0989, "step": 21043 }, { "epoch": 85.89387755102041, "grad_norm": 27.091135025024414, "learning_rate": 7.079591836734695e-06, "loss": 1.5209, "step": 21044 }, { "epoch": 85.89795918367346, "grad_norm": 21.684572219848633, "learning_rate": 7.077551020408163e-06, "loss": 1.9363, "step": 21045 }, { "epoch": 85.90204081632653, "grad_norm": 24.314565658569336, "learning_rate": 7.075510204081632e-06, "loss": 1.3859, "step": 21046 }, { "epoch": 85.90612244897959, "grad_norm": 40.57086181640625, "learning_rate": 7.073469387755102e-06, "loss": 1.1872, "step": 21047 }, { "epoch": 85.91020408163266, "grad_norm": 29.835948944091797, "learning_rate": 7.071428571428572e-06, "loss": 1.3158, "step": 21048 }, { "epoch": 85.91428571428571, "grad_norm": 35.93406295776367, "learning_rate": 7.069387755102041e-06, "loss": 0.4926, "step": 21049 }, { "epoch": 85.91836734693878, "grad_norm": 29.979494094848633, "learning_rate": 7.067346938775511e-06, "loss": 0.453, "step": 21050 }, { "epoch": 85.92244897959183, "grad_norm": 19.441822052001953, "learning_rate": 7.065306122448979e-06, "loss": 0.1015, "step": 21051 }, { "epoch": 85.9265306122449, "grad_norm": 28.297292709350586, "learning_rate": 7.063265306122449e-06, "loss": 1.4682, "step": 21052 }, { "epoch": 85.93061224489796, "grad_norm": 28.811363220214844, "learning_rate": 7.061224489795918e-06, "loss": 1.1962, "step": 21053 }, { "epoch": 85.93469387755103, "grad_norm": 25.74443817138672, "learning_rate": 7.059183673469388e-06, "loss": 1.4081, "step": 21054 }, { "epoch": 85.93877551020408, "grad_norm": 42.790279388427734, "learning_rate": 7.057142857142858e-06, "loss": 0.8976, "step": 21055 }, { "epoch": 85.94285714285714, "grad_norm": 25.435291290283203, "learning_rate": 7.0551020408163265e-06, "loss": 1.2847, "step": 21056 }, { "epoch": 85.9469387755102, "grad_norm": 37.71731948852539, "learning_rate": 7.053061224489796e-06, "loss": 0.367, "step": 21057 }, { "epoch": 85.95102040816326, "grad_norm": 44.24468231201172, "learning_rate": 7.051020408163266e-06, "loss": 0.7452, "step": 21058 }, { "epoch": 85.95510204081633, "grad_norm": 32.595149993896484, "learning_rate": 7.0489795918367355e-06, "loss": 0.8188, "step": 21059 }, { "epoch": 85.95918367346938, "grad_norm": 26.881561279296875, "learning_rate": 7.046938775510205e-06, "loss": 0.7113, "step": 21060 }, { "epoch": 85.96326530612245, "grad_norm": 23.35064697265625, "learning_rate": 7.044897959183675e-06, "loss": 1.433, "step": 21061 }, { "epoch": 85.9673469387755, "grad_norm": 33.946205139160156, "learning_rate": 7.042857142857143e-06, "loss": 0.8087, "step": 21062 }, { "epoch": 85.97142857142858, "grad_norm": 34.77030563354492, "learning_rate": 7.0408163265306125e-06, "loss": 0.9465, "step": 21063 }, { "epoch": 85.97551020408163, "grad_norm": 39.314735412597656, "learning_rate": 7.038775510204082e-06, "loss": 1.1029, "step": 21064 }, { "epoch": 85.9795918367347, "grad_norm": 29.38322639465332, "learning_rate": 7.036734693877552e-06, "loss": 1.5442, "step": 21065 }, { "epoch": 85.98367346938775, "grad_norm": 30.94495964050293, "learning_rate": 7.0346938775510215e-06, "loss": 0.3206, "step": 21066 }, { "epoch": 85.98775510204082, "grad_norm": 41.06601333618164, "learning_rate": 7.0326530612244895e-06, "loss": 0.9436, "step": 21067 }, { "epoch": 85.99183673469388, "grad_norm": 26.277429580688477, "learning_rate": 7.030612244897959e-06, "loss": 0.3757, "step": 21068 }, { "epoch": 85.99591836734695, "grad_norm": 26.023706436157227, "learning_rate": 7.028571428571429e-06, "loss": 1.5585, "step": 21069 }, { "epoch": 86.0, "grad_norm": 29.07086753845215, "learning_rate": 7.0265306122448985e-06, "loss": 0.2981, "step": 21070 }, { "epoch": 86.00408163265305, "grad_norm": 25.180234909057617, "learning_rate": 7.024489795918368e-06, "loss": 1.1389, "step": 21071 }, { "epoch": 86.00816326530612, "grad_norm": 24.529666900634766, "learning_rate": 7.022448979591838e-06, "loss": 2.2221, "step": 21072 }, { "epoch": 86.01224489795918, "grad_norm": 20.325437545776367, "learning_rate": 7.020408163265306e-06, "loss": 1.9628, "step": 21073 }, { "epoch": 86.01632653061225, "grad_norm": 22.50034523010254, "learning_rate": 7.0183673469387754e-06, "loss": 2.0099, "step": 21074 }, { "epoch": 86.0204081632653, "grad_norm": 30.580707550048828, "learning_rate": 7.016326530612245e-06, "loss": 0.4554, "step": 21075 }, { "epoch": 86.02448979591837, "grad_norm": 25.104671478271484, "learning_rate": 7.014285714285715e-06, "loss": 1.8217, "step": 21076 }, { "epoch": 86.02857142857142, "grad_norm": 10.584299087524414, "learning_rate": 7.0122448979591844e-06, "loss": 0.0761, "step": 21077 }, { "epoch": 86.0326530612245, "grad_norm": 33.178558349609375, "learning_rate": 7.010204081632652e-06, "loss": 0.8008, "step": 21078 }, { "epoch": 86.03673469387755, "grad_norm": 30.776268005371094, "learning_rate": 7.008163265306122e-06, "loss": 1.1698, "step": 21079 }, { "epoch": 86.04081632653062, "grad_norm": 25.25621223449707, "learning_rate": 7.006122448979592e-06, "loss": 1.439, "step": 21080 }, { "epoch": 86.04489795918367, "grad_norm": 26.89955711364746, "learning_rate": 7.004081632653061e-06, "loss": 0.3691, "step": 21081 }, { "epoch": 86.04897959183674, "grad_norm": 26.93796157836914, "learning_rate": 7.002040816326531e-06, "loss": 1.1086, "step": 21082 }, { "epoch": 86.0530612244898, "grad_norm": 44.336517333984375, "learning_rate": 7.000000000000001e-06, "loss": 0.8468, "step": 21083 }, { "epoch": 86.05714285714286, "grad_norm": 22.784141540527344, "learning_rate": 6.9979591836734696e-06, "loss": 1.3898, "step": 21084 }, { "epoch": 86.06122448979592, "grad_norm": 22.067886352539062, "learning_rate": 6.995918367346939e-06, "loss": 0.1507, "step": 21085 }, { "epoch": 86.06530612244897, "grad_norm": 33.8648681640625, "learning_rate": 6.993877551020409e-06, "loss": 0.94, "step": 21086 }, { "epoch": 86.06938775510204, "grad_norm": 24.74382972717285, "learning_rate": 6.9918367346938786e-06, "loss": 0.668, "step": 21087 }, { "epoch": 86.0734693877551, "grad_norm": 37.980506896972656, "learning_rate": 6.989795918367348e-06, "loss": 0.6848, "step": 21088 }, { "epoch": 86.07755102040817, "grad_norm": 48.37366485595703, "learning_rate": 6.987755102040816e-06, "loss": 0.6424, "step": 21089 }, { "epoch": 86.08163265306122, "grad_norm": 31.964359283447266, "learning_rate": 6.985714285714286e-06, "loss": 0.6619, "step": 21090 }, { "epoch": 86.08571428571429, "grad_norm": 27.538259506225586, "learning_rate": 6.9836734693877555e-06, "loss": 1.0452, "step": 21091 }, { "epoch": 86.08979591836734, "grad_norm": 19.50657844543457, "learning_rate": 6.981632653061225e-06, "loss": 1.8361, "step": 21092 }, { "epoch": 86.09387755102041, "grad_norm": 25.61452865600586, "learning_rate": 6.979591836734695e-06, "loss": 0.9842, "step": 21093 }, { "epoch": 86.09795918367347, "grad_norm": 34.04849624633789, "learning_rate": 6.9775510204081645e-06, "loss": 1.2764, "step": 21094 }, { "epoch": 86.10204081632654, "grad_norm": 25.61739730834961, "learning_rate": 6.9755102040816325e-06, "loss": 0.2669, "step": 21095 }, { "epoch": 86.10612244897959, "grad_norm": 32.110557556152344, "learning_rate": 6.973469387755102e-06, "loss": 0.3647, "step": 21096 }, { "epoch": 86.11020408163266, "grad_norm": 45.22494888305664, "learning_rate": 6.971428571428572e-06, "loss": 0.5381, "step": 21097 }, { "epoch": 86.11428571428571, "grad_norm": 35.404205322265625, "learning_rate": 6.9693877551020415e-06, "loss": 0.7615, "step": 21098 }, { "epoch": 86.11836734693877, "grad_norm": 32.5792121887207, "learning_rate": 6.967346938775511e-06, "loss": 0.3339, "step": 21099 }, { "epoch": 86.12244897959184, "grad_norm": 24.672428131103516, "learning_rate": 6.965306122448979e-06, "loss": 1.3382, "step": 21100 }, { "epoch": 86.12653061224489, "grad_norm": 42.773353576660156, "learning_rate": 6.963265306122449e-06, "loss": 1.2936, "step": 21101 }, { "epoch": 86.13061224489796, "grad_norm": 35.61481857299805, "learning_rate": 6.9612244897959185e-06, "loss": 0.8587, "step": 21102 }, { "epoch": 86.13469387755102, "grad_norm": 33.907012939453125, "learning_rate": 6.959183673469388e-06, "loss": 0.3028, "step": 21103 }, { "epoch": 86.13877551020408, "grad_norm": 37.82224655151367, "learning_rate": 6.957142857142858e-06, "loss": 0.6925, "step": 21104 }, { "epoch": 86.14285714285714, "grad_norm": 24.562576293945312, "learning_rate": 6.9551020408163275e-06, "loss": 1.4424, "step": 21105 }, { "epoch": 86.14693877551021, "grad_norm": 35.18488693237305, "learning_rate": 6.9530612244897955e-06, "loss": 1.2133, "step": 21106 }, { "epoch": 86.15102040816326, "grad_norm": 34.42384719848633, "learning_rate": 6.951020408163265e-06, "loss": 0.7829, "step": 21107 }, { "epoch": 86.15510204081633, "grad_norm": 39.10007095336914, "learning_rate": 6.948979591836735e-06, "loss": 0.928, "step": 21108 }, { "epoch": 86.15918367346939, "grad_norm": 17.22317886352539, "learning_rate": 6.9469387755102045e-06, "loss": 0.113, "step": 21109 }, { "epoch": 86.16326530612245, "grad_norm": 28.470998764038086, "learning_rate": 6.944897959183674e-06, "loss": 0.2457, "step": 21110 }, { "epoch": 86.16734693877551, "grad_norm": 35.70016098022461, "learning_rate": 6.942857142857143e-06, "loss": 1.0085, "step": 21111 }, { "epoch": 86.17142857142858, "grad_norm": 21.139005661010742, "learning_rate": 6.940816326530613e-06, "loss": 1.5735, "step": 21112 }, { "epoch": 86.17551020408163, "grad_norm": 38.094886779785156, "learning_rate": 6.938775510204082e-06, "loss": 1.0502, "step": 21113 }, { "epoch": 86.17959183673469, "grad_norm": 10.078545570373535, "learning_rate": 6.936734693877552e-06, "loss": 2.9773, "step": 21114 }, { "epoch": 86.18367346938776, "grad_norm": 27.094825744628906, "learning_rate": 6.934693877551021e-06, "loss": 1.2266, "step": 21115 }, { "epoch": 86.18775510204081, "grad_norm": 37.84044647216797, "learning_rate": 6.93265306122449e-06, "loss": 0.5635, "step": 21116 }, { "epoch": 86.19183673469388, "grad_norm": 28.18417739868164, "learning_rate": 6.930612244897959e-06, "loss": 0.7807, "step": 21117 }, { "epoch": 86.19591836734693, "grad_norm": 26.30794906616211, "learning_rate": 6.928571428571429e-06, "loss": 0.6395, "step": 21118 }, { "epoch": 86.2, "grad_norm": 25.064529418945312, "learning_rate": 6.9265306122448986e-06, "loss": 1.6525, "step": 21119 }, { "epoch": 86.20408163265306, "grad_norm": 32.61547088623047, "learning_rate": 6.924489795918368e-06, "loss": 0.565, "step": 21120 }, { "epoch": 86.20816326530613, "grad_norm": 36.66412353515625, "learning_rate": 6.922448979591838e-06, "loss": 0.7875, "step": 21121 }, { "epoch": 86.21224489795918, "grad_norm": 39.56845474243164, "learning_rate": 6.920408163265306e-06, "loss": 1.3431, "step": 21122 }, { "epoch": 86.21632653061225, "grad_norm": 30.60923194885254, "learning_rate": 6.9183673469387755e-06, "loss": 0.2413, "step": 21123 }, { "epoch": 86.2204081632653, "grad_norm": 41.79567337036133, "learning_rate": 6.916326530612245e-06, "loss": 1.1828, "step": 21124 }, { "epoch": 86.22448979591837, "grad_norm": 28.253509521484375, "learning_rate": 6.914285714285715e-06, "loss": 1.4071, "step": 21125 }, { "epoch": 86.22857142857143, "grad_norm": 17.05617904663086, "learning_rate": 6.9122448979591845e-06, "loss": 2.5576, "step": 21126 }, { "epoch": 86.2326530612245, "grad_norm": 32.402896881103516, "learning_rate": 6.9102040816326525e-06, "loss": 1.0594, "step": 21127 }, { "epoch": 86.23673469387755, "grad_norm": 27.903120040893555, "learning_rate": 6.908163265306122e-06, "loss": 1.1619, "step": 21128 }, { "epoch": 86.2408163265306, "grad_norm": 24.42935562133789, "learning_rate": 6.906122448979592e-06, "loss": 0.3447, "step": 21129 }, { "epoch": 86.24489795918367, "grad_norm": 41.80379104614258, "learning_rate": 6.9040816326530615e-06, "loss": 0.2375, "step": 21130 }, { "epoch": 86.24897959183673, "grad_norm": 24.18461036682129, "learning_rate": 6.902040816326531e-06, "loss": 1.8801, "step": 21131 }, { "epoch": 86.2530612244898, "grad_norm": 36.84278106689453, "learning_rate": 6.900000000000001e-06, "loss": 1.098, "step": 21132 }, { "epoch": 86.25714285714285, "grad_norm": 37.12629699707031, "learning_rate": 6.897959183673469e-06, "loss": 0.3552, "step": 21133 }, { "epoch": 86.26122448979592, "grad_norm": 35.96422576904297, "learning_rate": 6.8959183673469385e-06, "loss": 0.5714, "step": 21134 }, { "epoch": 86.26530612244898, "grad_norm": 27.23065757751465, "learning_rate": 6.893877551020408e-06, "loss": 1.5526, "step": 21135 }, { "epoch": 86.26938775510204, "grad_norm": 23.202226638793945, "learning_rate": 6.891836734693878e-06, "loss": 1.2345, "step": 21136 }, { "epoch": 86.2734693877551, "grad_norm": 27.38322639465332, "learning_rate": 6.8897959183673475e-06, "loss": 0.7101, "step": 21137 }, { "epoch": 86.27755102040817, "grad_norm": 30.826740264892578, "learning_rate": 6.887755102040816e-06, "loss": 0.4665, "step": 21138 }, { "epoch": 86.28163265306122, "grad_norm": 28.409961700439453, "learning_rate": 6.885714285714286e-06, "loss": 1.378, "step": 21139 }, { "epoch": 86.28571428571429, "grad_norm": 28.813100814819336, "learning_rate": 6.883673469387756e-06, "loss": 1.1852, "step": 21140 }, { "epoch": 86.28979591836735, "grad_norm": 30.197874069213867, "learning_rate": 6.881632653061225e-06, "loss": 1.1388, "step": 21141 }, { "epoch": 86.29387755102042, "grad_norm": 26.287933349609375, "learning_rate": 6.879591836734694e-06, "loss": 1.3775, "step": 21142 }, { "epoch": 86.29795918367347, "grad_norm": 26.10788345336914, "learning_rate": 6.877551020408164e-06, "loss": 0.7113, "step": 21143 }, { "epoch": 86.30204081632652, "grad_norm": 26.6236515045166, "learning_rate": 6.875510204081633e-06, "loss": 1.4692, "step": 21144 }, { "epoch": 86.3061224489796, "grad_norm": 32.48633575439453, "learning_rate": 6.873469387755102e-06, "loss": 0.6645, "step": 21145 }, { "epoch": 86.31020408163265, "grad_norm": 22.520952224731445, "learning_rate": 6.871428571428572e-06, "loss": 0.1501, "step": 21146 }, { "epoch": 86.31428571428572, "grad_norm": 44.00075149536133, "learning_rate": 6.869387755102042e-06, "loss": 0.4229, "step": 21147 }, { "epoch": 86.31836734693877, "grad_norm": 33.982887268066406, "learning_rate": 6.867346938775511e-06, "loss": 0.6989, "step": 21148 }, { "epoch": 86.32244897959184, "grad_norm": 43.926578521728516, "learning_rate": 6.865306122448979e-06, "loss": 0.721, "step": 21149 }, { "epoch": 86.3265306122449, "grad_norm": 28.296772003173828, "learning_rate": 6.863265306122449e-06, "loss": 1.5388, "step": 21150 }, { "epoch": 86.33061224489796, "grad_norm": 25.19175910949707, "learning_rate": 6.861224489795919e-06, "loss": 1.2867, "step": 21151 }, { "epoch": 86.33469387755102, "grad_norm": 25.60618782043457, "learning_rate": 6.859183673469388e-06, "loss": 0.3157, "step": 21152 }, { "epoch": 86.33877551020409, "grad_norm": 32.114540100097656, "learning_rate": 6.857142857142858e-06, "loss": 0.4507, "step": 21153 }, { "epoch": 86.34285714285714, "grad_norm": 24.207666397094727, "learning_rate": 6.855102040816328e-06, "loss": 0.224, "step": 21154 }, { "epoch": 86.34693877551021, "grad_norm": 36.87100601196289, "learning_rate": 6.8530612244897956e-06, "loss": 1.0614, "step": 21155 }, { "epoch": 86.35102040816327, "grad_norm": 31.866594314575195, "learning_rate": 6.851020408163265e-06, "loss": 0.277, "step": 21156 }, { "epoch": 86.35510204081632, "grad_norm": 23.725645065307617, "learning_rate": 6.848979591836735e-06, "loss": 1.456, "step": 21157 }, { "epoch": 86.35918367346939, "grad_norm": 41.33676528930664, "learning_rate": 6.8469387755102046e-06, "loss": 1.0521, "step": 21158 }, { "epoch": 86.36326530612244, "grad_norm": 35.26179885864258, "learning_rate": 6.844897959183674e-06, "loss": 0.7797, "step": 21159 }, { "epoch": 86.36734693877551, "grad_norm": 36.3567008972168, "learning_rate": 6.842857142857142e-06, "loss": 1.1591, "step": 21160 }, { "epoch": 86.37142857142857, "grad_norm": 29.544034957885742, "learning_rate": 6.840816326530612e-06, "loss": 1.3382, "step": 21161 }, { "epoch": 86.37551020408164, "grad_norm": 23.899499893188477, "learning_rate": 6.8387755102040815e-06, "loss": 0.2778, "step": 21162 }, { "epoch": 86.37959183673469, "grad_norm": 31.14008331298828, "learning_rate": 6.836734693877551e-06, "loss": 1.6247, "step": 21163 }, { "epoch": 86.38367346938776, "grad_norm": 31.893903732299805, "learning_rate": 6.834693877551021e-06, "loss": 0.3858, "step": 21164 }, { "epoch": 86.38775510204081, "grad_norm": 36.44316101074219, "learning_rate": 6.8326530612244905e-06, "loss": 0.5464, "step": 21165 }, { "epoch": 86.39183673469388, "grad_norm": 34.83241653442383, "learning_rate": 6.830612244897959e-06, "loss": 1.1508, "step": 21166 }, { "epoch": 86.39591836734694, "grad_norm": 35.477813720703125, "learning_rate": 6.828571428571429e-06, "loss": 0.9586, "step": 21167 }, { "epoch": 86.4, "grad_norm": 20.784988403320312, "learning_rate": 6.826530612244898e-06, "loss": 2.0154, "step": 21168 }, { "epoch": 86.40408163265306, "grad_norm": 31.548519134521484, "learning_rate": 6.8244897959183675e-06, "loss": 1.2198, "step": 21169 }, { "epoch": 86.40816326530613, "grad_norm": 22.753162384033203, "learning_rate": 6.822448979591837e-06, "loss": 2.0345, "step": 21170 }, { "epoch": 86.41224489795918, "grad_norm": 20.76486587524414, "learning_rate": 6.820408163265306e-06, "loss": 1.8809, "step": 21171 }, { "epoch": 86.41632653061224, "grad_norm": 32.55733108520508, "learning_rate": 6.818367346938776e-06, "loss": 1.1986, "step": 21172 }, { "epoch": 86.42040816326531, "grad_norm": 6.426707744598389, "learning_rate": 6.816326530612245e-06, "loss": 0.0499, "step": 21173 }, { "epoch": 86.42448979591836, "grad_norm": 45.00832748413086, "learning_rate": 6.814285714285715e-06, "loss": 0.6373, "step": 21174 }, { "epoch": 86.42857142857143, "grad_norm": 31.16954231262207, "learning_rate": 6.812244897959185e-06, "loss": 1.4854, "step": 21175 }, { "epoch": 86.43265306122449, "grad_norm": 35.0080680847168, "learning_rate": 6.810204081632654e-06, "loss": 1.1729, "step": 21176 }, { "epoch": 86.43673469387755, "grad_norm": 33.499755859375, "learning_rate": 6.808163265306122e-06, "loss": 0.3278, "step": 21177 }, { "epoch": 86.44081632653061, "grad_norm": 30.138797760009766, "learning_rate": 6.806122448979592e-06, "loss": 0.7489, "step": 21178 }, { "epoch": 86.44489795918368, "grad_norm": 32.86692810058594, "learning_rate": 6.804081632653062e-06, "loss": 1.0098, "step": 21179 }, { "epoch": 86.44897959183673, "grad_norm": 28.943668365478516, "learning_rate": 6.802040816326531e-06, "loss": 1.1467, "step": 21180 }, { "epoch": 86.4530612244898, "grad_norm": 29.035030364990234, "learning_rate": 6.800000000000001e-06, "loss": 0.94, "step": 21181 }, { "epoch": 86.45714285714286, "grad_norm": 27.21944808959961, "learning_rate": 6.797959183673469e-06, "loss": 0.2404, "step": 21182 }, { "epoch": 86.46122448979592, "grad_norm": 22.503833770751953, "learning_rate": 6.795918367346939e-06, "loss": 1.9285, "step": 21183 }, { "epoch": 86.46530612244898, "grad_norm": 32.63774490356445, "learning_rate": 6.793877551020408e-06, "loss": 0.9082, "step": 21184 }, { "epoch": 86.46938775510205, "grad_norm": 41.631080627441406, "learning_rate": 6.791836734693878e-06, "loss": 1.0496, "step": 21185 }, { "epoch": 86.4734693877551, "grad_norm": 41.39558792114258, "learning_rate": 6.789795918367348e-06, "loss": 0.3916, "step": 21186 }, { "epoch": 86.47755102040816, "grad_norm": 9.585787773132324, "learning_rate": 6.787755102040817e-06, "loss": 0.1244, "step": 21187 }, { "epoch": 86.48163265306123, "grad_norm": 16.442052841186523, "learning_rate": 6.785714285714285e-06, "loss": 0.0886, "step": 21188 }, { "epoch": 86.48571428571428, "grad_norm": 30.00848960876465, "learning_rate": 6.783673469387755e-06, "loss": 1.4183, "step": 21189 }, { "epoch": 86.48979591836735, "grad_norm": 32.90331268310547, "learning_rate": 6.7816326530612246e-06, "loss": 1.0467, "step": 21190 }, { "epoch": 86.4938775510204, "grad_norm": 21.953527450561523, "learning_rate": 6.779591836734694e-06, "loss": 1.9164, "step": 21191 }, { "epoch": 86.49795918367347, "grad_norm": 27.253568649291992, "learning_rate": 6.777551020408164e-06, "loss": 0.2309, "step": 21192 }, { "epoch": 86.50204081632653, "grad_norm": 40.86273956298828, "learning_rate": 6.775510204081633e-06, "loss": 0.761, "step": 21193 }, { "epoch": 86.5061224489796, "grad_norm": 31.788618087768555, "learning_rate": 6.773469387755102e-06, "loss": 1.3948, "step": 21194 }, { "epoch": 86.51020408163265, "grad_norm": 28.498981475830078, "learning_rate": 6.771428571428571e-06, "loss": 0.3201, "step": 21195 }, { "epoch": 86.51428571428572, "grad_norm": 13.596757888793945, "learning_rate": 6.769387755102041e-06, "loss": 0.0913, "step": 21196 }, { "epoch": 86.51836734693877, "grad_norm": 33.053794860839844, "learning_rate": 6.7673469387755105e-06, "loss": 0.2378, "step": 21197 }, { "epoch": 86.52244897959184, "grad_norm": 24.271068572998047, "learning_rate": 6.76530612244898e-06, "loss": 0.25, "step": 21198 }, { "epoch": 86.5265306122449, "grad_norm": 10.015210151672363, "learning_rate": 6.763265306122449e-06, "loss": 0.0885, "step": 21199 }, { "epoch": 86.53061224489795, "grad_norm": 32.465389251708984, "learning_rate": 6.761224489795919e-06, "loss": 0.6584, "step": 21200 }, { "epoch": 86.53469387755102, "grad_norm": 32.10944366455078, "learning_rate": 6.759183673469388e-06, "loss": 0.4422, "step": 21201 }, { "epoch": 86.53877551020408, "grad_norm": 31.636035919189453, "learning_rate": 6.757142857142858e-06, "loss": 0.4543, "step": 21202 }, { "epoch": 86.54285714285714, "grad_norm": 27.805326461791992, "learning_rate": 6.755102040816328e-06, "loss": 1.5374, "step": 21203 }, { "epoch": 86.5469387755102, "grad_norm": 25.818796157836914, "learning_rate": 6.753061224489796e-06, "loss": 1.4535, "step": 21204 }, { "epoch": 86.55102040816327, "grad_norm": 11.209683418273926, "learning_rate": 6.751020408163265e-06, "loss": 0.0785, "step": 21205 }, { "epoch": 86.55510204081632, "grad_norm": 30.02271842956543, "learning_rate": 6.748979591836735e-06, "loss": 0.9051, "step": 21206 }, { "epoch": 86.55918367346939, "grad_norm": 38.084224700927734, "learning_rate": 6.746938775510205e-06, "loss": 0.9919, "step": 21207 }, { "epoch": 86.56326530612245, "grad_norm": 37.59408950805664, "learning_rate": 6.744897959183674e-06, "loss": 0.56, "step": 21208 }, { "epoch": 86.56734693877551, "grad_norm": 24.515724182128906, "learning_rate": 6.742857142857144e-06, "loss": 0.3117, "step": 21209 }, { "epoch": 86.57142857142857, "grad_norm": 32.14337158203125, "learning_rate": 6.740816326530612e-06, "loss": 0.895, "step": 21210 }, { "epoch": 86.57551020408164, "grad_norm": 41.71194076538086, "learning_rate": 6.738775510204082e-06, "loss": 0.5835, "step": 21211 }, { "epoch": 86.57959183673469, "grad_norm": 40.100257873535156, "learning_rate": 6.736734693877551e-06, "loss": 0.8447, "step": 21212 }, { "epoch": 86.58367346938776, "grad_norm": 25.73430061340332, "learning_rate": 6.734693877551021e-06, "loss": 1.4702, "step": 21213 }, { "epoch": 86.58775510204082, "grad_norm": 40.21649169921875, "learning_rate": 6.732653061224491e-06, "loss": 0.4165, "step": 21214 }, { "epoch": 86.59183673469387, "grad_norm": 43.45954132080078, "learning_rate": 6.730612244897959e-06, "loss": 0.9058, "step": 21215 }, { "epoch": 86.59591836734694, "grad_norm": 36.668766021728516, "learning_rate": 6.728571428571428e-06, "loss": 0.2619, "step": 21216 }, { "epoch": 86.6, "grad_norm": 34.4505615234375, "learning_rate": 6.726530612244898e-06, "loss": 0.3231, "step": 21217 }, { "epoch": 86.60408163265306, "grad_norm": 27.627460479736328, "learning_rate": 6.724489795918368e-06, "loss": 1.7279, "step": 21218 }, { "epoch": 86.60816326530612, "grad_norm": 34.503944396972656, "learning_rate": 6.722448979591837e-06, "loss": 0.6695, "step": 21219 }, { "epoch": 86.61224489795919, "grad_norm": 28.272748947143555, "learning_rate": 6.720408163265307e-06, "loss": 1.4211, "step": 21220 }, { "epoch": 86.61632653061224, "grad_norm": 26.12171745300293, "learning_rate": 6.718367346938775e-06, "loss": 1.5572, "step": 21221 }, { "epoch": 86.62040816326531, "grad_norm": 32.27391052246094, "learning_rate": 6.716326530612245e-06, "loss": 0.9103, "step": 21222 }, { "epoch": 86.62448979591836, "grad_norm": 31.117183685302734, "learning_rate": 6.714285714285714e-06, "loss": 0.4295, "step": 21223 }, { "epoch": 86.62857142857143, "grad_norm": 15.513822555541992, "learning_rate": 6.712244897959184e-06, "loss": 2.2437, "step": 21224 }, { "epoch": 86.63265306122449, "grad_norm": 27.248794555664062, "learning_rate": 6.710204081632654e-06, "loss": 1.4929, "step": 21225 }, { "epoch": 86.63673469387756, "grad_norm": 23.715085983276367, "learning_rate": 6.708163265306122e-06, "loss": 1.5199, "step": 21226 }, { "epoch": 86.64081632653061, "grad_norm": 37.166168212890625, "learning_rate": 6.706122448979592e-06, "loss": 1.0428, "step": 21227 }, { "epoch": 86.64489795918368, "grad_norm": 29.23478889465332, "learning_rate": 6.704081632653062e-06, "loss": 1.4026, "step": 21228 }, { "epoch": 86.64897959183673, "grad_norm": 30.241968154907227, "learning_rate": 6.702040816326531e-06, "loss": 1.2349, "step": 21229 }, { "epoch": 86.65306122448979, "grad_norm": 23.159603118896484, "learning_rate": 6.700000000000001e-06, "loss": 1.7953, "step": 21230 }, { "epoch": 86.65714285714286, "grad_norm": 46.961910247802734, "learning_rate": 6.697959183673471e-06, "loss": 0.8556, "step": 21231 }, { "epoch": 86.66122448979591, "grad_norm": 23.111526489257812, "learning_rate": 6.695918367346939e-06, "loss": 1.5965, "step": 21232 }, { "epoch": 86.66530612244898, "grad_norm": 36.217308044433594, "learning_rate": 6.693877551020408e-06, "loss": 0.5802, "step": 21233 }, { "epoch": 86.66938775510204, "grad_norm": 29.16965675354004, "learning_rate": 6.691836734693878e-06, "loss": 0.5644, "step": 21234 }, { "epoch": 86.6734693877551, "grad_norm": 26.323627471923828, "learning_rate": 6.689795918367348e-06, "loss": 1.3025, "step": 21235 }, { "epoch": 86.67755102040816, "grad_norm": 26.147626876831055, "learning_rate": 6.687755102040817e-06, "loss": 1.3749, "step": 21236 }, { "epoch": 86.68163265306123, "grad_norm": 36.190364837646484, "learning_rate": 6.685714285714285e-06, "loss": 1.0615, "step": 21237 }, { "epoch": 86.68571428571428, "grad_norm": 1.6733043193817139, "learning_rate": 6.683673469387755e-06, "loss": 0.0291, "step": 21238 }, { "epoch": 86.68979591836735, "grad_norm": 29.439008712768555, "learning_rate": 6.681632653061225e-06, "loss": 1.2255, "step": 21239 }, { "epoch": 86.6938775510204, "grad_norm": 24.261173248291016, "learning_rate": 6.679591836734694e-06, "loss": 1.8947, "step": 21240 }, { "epoch": 86.69795918367348, "grad_norm": 30.966032028198242, "learning_rate": 6.677551020408164e-06, "loss": 1.2991, "step": 21241 }, { "epoch": 86.70204081632653, "grad_norm": 14.246768951416016, "learning_rate": 6.675510204081634e-06, "loss": 0.0894, "step": 21242 }, { "epoch": 86.70612244897958, "grad_norm": 29.939424514770508, "learning_rate": 6.673469387755102e-06, "loss": 1.2758, "step": 21243 }, { "epoch": 86.71020408163265, "grad_norm": 24.78738021850586, "learning_rate": 6.671428571428571e-06, "loss": 0.135, "step": 21244 }, { "epoch": 86.71428571428571, "grad_norm": 26.722793579101562, "learning_rate": 6.669387755102041e-06, "loss": 0.2794, "step": 21245 }, { "epoch": 86.71836734693878, "grad_norm": 39.180477142333984, "learning_rate": 6.667346938775511e-06, "loss": 0.6775, "step": 21246 }, { "epoch": 86.72244897959183, "grad_norm": 41.62443542480469, "learning_rate": 6.66530612244898e-06, "loss": 0.3378, "step": 21247 }, { "epoch": 86.7265306122449, "grad_norm": 30.55994415283203, "learning_rate": 6.663265306122448e-06, "loss": 0.3359, "step": 21248 }, { "epoch": 86.73061224489796, "grad_norm": 18.499248504638672, "learning_rate": 6.661224489795918e-06, "loss": 0.1306, "step": 21249 }, { "epoch": 86.73469387755102, "grad_norm": 24.948772430419922, "learning_rate": 6.659183673469388e-06, "loss": 1.4508, "step": 21250 }, { "epoch": 86.73877551020408, "grad_norm": 56.05036926269531, "learning_rate": 6.657142857142857e-06, "loss": 1.1713, "step": 21251 }, { "epoch": 86.74285714285715, "grad_norm": 19.203384399414062, "learning_rate": 6.655102040816327e-06, "loss": 2.321, "step": 21252 }, { "epoch": 86.7469387755102, "grad_norm": 22.298450469970703, "learning_rate": 6.653061224489797e-06, "loss": 2.0415, "step": 21253 }, { "epoch": 86.75102040816327, "grad_norm": 34.01173400878906, "learning_rate": 6.6510204081632654e-06, "loss": 1.3177, "step": 21254 }, { "epoch": 86.75510204081633, "grad_norm": 36.53672409057617, "learning_rate": 6.648979591836735e-06, "loss": 0.2756, "step": 21255 }, { "epoch": 86.7591836734694, "grad_norm": 33.48301696777344, "learning_rate": 6.646938775510205e-06, "loss": 0.3711, "step": 21256 }, { "epoch": 86.76326530612245, "grad_norm": 36.1861572265625, "learning_rate": 6.6448979591836744e-06, "loss": 0.3498, "step": 21257 }, { "epoch": 86.7673469387755, "grad_norm": 56.488243103027344, "learning_rate": 6.642857142857144e-06, "loss": 0.5377, "step": 21258 }, { "epoch": 86.77142857142857, "grad_norm": 28.576148986816406, "learning_rate": 6.640816326530612e-06, "loss": 0.5963, "step": 21259 }, { "epoch": 86.77551020408163, "grad_norm": 35.15191650390625, "learning_rate": 6.638775510204082e-06, "loss": 0.8164, "step": 21260 }, { "epoch": 86.7795918367347, "grad_norm": 33.149314880371094, "learning_rate": 6.636734693877551e-06, "loss": 1.6545, "step": 21261 }, { "epoch": 86.78367346938775, "grad_norm": 23.61690902709961, "learning_rate": 6.634693877551021e-06, "loss": 0.2129, "step": 21262 }, { "epoch": 86.78775510204082, "grad_norm": 34.137115478515625, "learning_rate": 6.632653061224491e-06, "loss": 0.9429, "step": 21263 }, { "epoch": 86.79183673469387, "grad_norm": 25.87049674987793, "learning_rate": 6.63061224489796e-06, "loss": 1.9443, "step": 21264 }, { "epoch": 86.79591836734694, "grad_norm": 35.554893493652344, "learning_rate": 6.628571428571428e-06, "loss": 1.3409, "step": 21265 }, { "epoch": 86.8, "grad_norm": 34.200279235839844, "learning_rate": 6.626530612244898e-06, "loss": 0.5587, "step": 21266 }, { "epoch": 86.80408163265307, "grad_norm": 34.77204895019531, "learning_rate": 6.624489795918368e-06, "loss": 0.2694, "step": 21267 }, { "epoch": 86.80816326530612, "grad_norm": 27.864744186401367, "learning_rate": 6.622448979591837e-06, "loss": 1.5164, "step": 21268 }, { "epoch": 86.81224489795919, "grad_norm": 32.71426773071289, "learning_rate": 6.620408163265307e-06, "loss": 0.9249, "step": 21269 }, { "epoch": 86.81632653061224, "grad_norm": 24.241512298583984, "learning_rate": 6.618367346938775e-06, "loss": 0.3733, "step": 21270 }, { "epoch": 86.82040816326531, "grad_norm": 25.52692413330078, "learning_rate": 6.616326530612245e-06, "loss": 1.4069, "step": 21271 }, { "epoch": 86.82448979591837, "grad_norm": 33.9808349609375, "learning_rate": 6.614285714285714e-06, "loss": 0.7941, "step": 21272 }, { "epoch": 86.82857142857142, "grad_norm": 36.91680145263672, "learning_rate": 6.612244897959184e-06, "loss": 0.502, "step": 21273 }, { "epoch": 86.83265306122449, "grad_norm": 49.270973205566406, "learning_rate": 6.610204081632654e-06, "loss": 0.7068, "step": 21274 }, { "epoch": 86.83673469387755, "grad_norm": 16.09862518310547, "learning_rate": 6.608163265306123e-06, "loss": 0.1253, "step": 21275 }, { "epoch": 86.84081632653061, "grad_norm": 34.42929458618164, "learning_rate": 6.606122448979591e-06, "loss": 1.3158, "step": 21276 }, { "epoch": 86.84489795918367, "grad_norm": 32.67375946044922, "learning_rate": 6.604081632653061e-06, "loss": 0.3607, "step": 21277 }, { "epoch": 86.84897959183674, "grad_norm": 31.87095069885254, "learning_rate": 6.602040816326531e-06, "loss": 1.4864, "step": 21278 }, { "epoch": 86.85306122448979, "grad_norm": 29.05602264404297, "learning_rate": 6.6e-06, "loss": 0.623, "step": 21279 }, { "epoch": 86.85714285714286, "grad_norm": 28.347312927246094, "learning_rate": 6.59795918367347e-06, "loss": 1.5374, "step": 21280 }, { "epoch": 86.86122448979592, "grad_norm": 22.41014289855957, "learning_rate": 6.595918367346939e-06, "loss": 1.9346, "step": 21281 }, { "epoch": 86.86530612244898, "grad_norm": 21.083444595336914, "learning_rate": 6.5938775510204085e-06, "loss": 2.1005, "step": 21282 }, { "epoch": 86.86938775510204, "grad_norm": 31.457984924316406, "learning_rate": 6.591836734693878e-06, "loss": 0.3162, "step": 21283 }, { "epoch": 86.87346938775511, "grad_norm": 36.597496032714844, "learning_rate": 6.589795918367348e-06, "loss": 1.2393, "step": 21284 }, { "epoch": 86.87755102040816, "grad_norm": 29.322179794311523, "learning_rate": 6.5877551020408175e-06, "loss": 1.1591, "step": 21285 }, { "epoch": 86.88163265306123, "grad_norm": 28.825767517089844, "learning_rate": 6.5857142857142855e-06, "loss": 0.2497, "step": 21286 }, { "epoch": 86.88571428571429, "grad_norm": 31.838369369506836, "learning_rate": 6.583673469387755e-06, "loss": 0.5078, "step": 21287 }, { "epoch": 86.88979591836734, "grad_norm": 36.66462707519531, "learning_rate": 6.581632653061225e-06, "loss": 0.8072, "step": 21288 }, { "epoch": 86.89387755102041, "grad_norm": 32.29893493652344, "learning_rate": 6.5795918367346945e-06, "loss": 0.3347, "step": 21289 }, { "epoch": 86.89795918367346, "grad_norm": 31.93172836303711, "learning_rate": 6.577551020408164e-06, "loss": 0.9736, "step": 21290 }, { "epoch": 86.90204081632653, "grad_norm": 19.04790687561035, "learning_rate": 6.575510204081634e-06, "loss": 0.1572, "step": 21291 }, { "epoch": 86.90612244897959, "grad_norm": 36.72890853881836, "learning_rate": 6.573469387755102e-06, "loss": 0.5331, "step": 21292 }, { "epoch": 86.91020408163266, "grad_norm": 28.835773468017578, "learning_rate": 6.5714285714285714e-06, "loss": 1.9261, "step": 21293 }, { "epoch": 86.91428571428571, "grad_norm": 46.53068161010742, "learning_rate": 6.569387755102041e-06, "loss": 0.7368, "step": 21294 }, { "epoch": 86.91836734693878, "grad_norm": 27.317771911621094, "learning_rate": 6.567346938775511e-06, "loss": 0.6255, "step": 21295 }, { "epoch": 86.92244897959183, "grad_norm": 35.48251724243164, "learning_rate": 6.5653061224489804e-06, "loss": 1.3071, "step": 21296 }, { "epoch": 86.9265306122449, "grad_norm": 33.07857894897461, "learning_rate": 6.563265306122448e-06, "loss": 1.2255, "step": 21297 }, { "epoch": 86.93061224489796, "grad_norm": 40.792171478271484, "learning_rate": 6.561224489795918e-06, "loss": 0.9389, "step": 21298 }, { "epoch": 86.93469387755103, "grad_norm": 32.523651123046875, "learning_rate": 6.559183673469388e-06, "loss": 0.5608, "step": 21299 }, { "epoch": 86.93877551020408, "grad_norm": 29.661855697631836, "learning_rate": 6.557142857142857e-06, "loss": 1.6509, "step": 21300 }, { "epoch": 86.94285714285714, "grad_norm": 35.53034591674805, "learning_rate": 6.555102040816327e-06, "loss": 0.601, "step": 21301 }, { "epoch": 86.9469387755102, "grad_norm": 9.848878860473633, "learning_rate": 6.553061224489797e-06, "loss": 0.0813, "step": 21302 }, { "epoch": 86.95102040816326, "grad_norm": 24.186203002929688, "learning_rate": 6.551020408163265e-06, "loss": 0.3635, "step": 21303 }, { "epoch": 86.95510204081633, "grad_norm": 29.866668701171875, "learning_rate": 6.548979591836734e-06, "loss": 0.3459, "step": 21304 }, { "epoch": 86.95918367346938, "grad_norm": 35.5171012878418, "learning_rate": 6.546938775510204e-06, "loss": 0.7947, "step": 21305 }, { "epoch": 86.96326530612245, "grad_norm": 28.882436752319336, "learning_rate": 6.544897959183674e-06, "loss": 0.4273, "step": 21306 }, { "epoch": 86.9673469387755, "grad_norm": 25.688663482666016, "learning_rate": 6.542857142857143e-06, "loss": 1.4162, "step": 21307 }, { "epoch": 86.97142857142858, "grad_norm": 13.076349258422852, "learning_rate": 6.540816326530612e-06, "loss": 0.087, "step": 21308 }, { "epoch": 86.97551020408163, "grad_norm": 40.518497467041016, "learning_rate": 6.538775510204082e-06, "loss": 0.8681, "step": 21309 }, { "epoch": 86.9795918367347, "grad_norm": 32.02511978149414, "learning_rate": 6.5367346938775515e-06, "loss": 0.6842, "step": 21310 }, { "epoch": 86.98367346938775, "grad_norm": 28.48588752746582, "learning_rate": 6.534693877551021e-06, "loss": 0.2297, "step": 21311 }, { "epoch": 86.98775510204082, "grad_norm": 37.16130828857422, "learning_rate": 6.532653061224491e-06, "loss": 0.5678, "step": 21312 }, { "epoch": 86.99183673469388, "grad_norm": 29.94816017150879, "learning_rate": 6.53061224489796e-06, "loss": 1.4247, "step": 21313 }, { "epoch": 86.99591836734695, "grad_norm": 27.227676391601562, "learning_rate": 6.5285714285714285e-06, "loss": 1.5555, "step": 21314 }, { "epoch": 87.0, "grad_norm": 27.246732711791992, "learning_rate": 6.526530612244898e-06, "loss": 0.2936, "step": 21315 }, { "epoch": 87.00408163265305, "grad_norm": 15.167754173278809, "learning_rate": 6.524489795918368e-06, "loss": 0.1411, "step": 21316 }, { "epoch": 87.00816326530612, "grad_norm": 33.799381256103516, "learning_rate": 6.5224489795918375e-06, "loss": 1.2847, "step": 21317 }, { "epoch": 87.01224489795918, "grad_norm": 33.82020568847656, "learning_rate": 6.520408163265307e-06, "loss": 0.528, "step": 21318 }, { "epoch": 87.01632653061225, "grad_norm": 33.130271911621094, "learning_rate": 6.518367346938775e-06, "loss": 1.0115, "step": 21319 }, { "epoch": 87.0204081632653, "grad_norm": 18.58926010131836, "learning_rate": 6.516326530612245e-06, "loss": 0.0935, "step": 21320 }, { "epoch": 87.02448979591837, "grad_norm": 27.78302001953125, "learning_rate": 6.5142857142857145e-06, "loss": 1.4377, "step": 21321 }, { "epoch": 87.02857142857142, "grad_norm": 32.701778411865234, "learning_rate": 6.512244897959184e-06, "loss": 0.6165, "step": 21322 }, { "epoch": 87.0326530612245, "grad_norm": 24.921966552734375, "learning_rate": 6.510204081632654e-06, "loss": 0.327, "step": 21323 }, { "epoch": 87.03673469387755, "grad_norm": 33.582637786865234, "learning_rate": 6.5081632653061235e-06, "loss": 0.3325, "step": 21324 }, { "epoch": 87.04081632653062, "grad_norm": 38.56657409667969, "learning_rate": 6.5061224489795914e-06, "loss": 0.3992, "step": 21325 }, { "epoch": 87.04489795918367, "grad_norm": 33.92037582397461, "learning_rate": 6.504081632653061e-06, "loss": 0.8139, "step": 21326 }, { "epoch": 87.04897959183674, "grad_norm": 32.83436584472656, "learning_rate": 6.502040816326531e-06, "loss": 0.2389, "step": 21327 }, { "epoch": 87.0530612244898, "grad_norm": 39.926185607910156, "learning_rate": 6.5000000000000004e-06, "loss": 0.6249, "step": 21328 }, { "epoch": 87.05714285714286, "grad_norm": 38.03818893432617, "learning_rate": 6.49795918367347e-06, "loss": 1.1955, "step": 21329 }, { "epoch": 87.06122448979592, "grad_norm": 56.13026428222656, "learning_rate": 6.495918367346938e-06, "loss": 1.1418, "step": 21330 }, { "epoch": 87.06530612244897, "grad_norm": 34.08042907714844, "learning_rate": 6.493877551020408e-06, "loss": 1.2654, "step": 21331 }, { "epoch": 87.06938775510204, "grad_norm": 23.465282440185547, "learning_rate": 6.491836734693877e-06, "loss": 0.2007, "step": 21332 }, { "epoch": 87.0734693877551, "grad_norm": 11.689888000488281, "learning_rate": 6.489795918367347e-06, "loss": 0.104, "step": 21333 }, { "epoch": 87.07755102040817, "grad_norm": 32.04862976074219, "learning_rate": 6.487755102040817e-06, "loss": 1.272, "step": 21334 }, { "epoch": 87.08163265306122, "grad_norm": 29.39887046813965, "learning_rate": 6.485714285714286e-06, "loss": 0.4923, "step": 21335 }, { "epoch": 87.08571428571429, "grad_norm": 32.789119720458984, "learning_rate": 6.483673469387755e-06, "loss": 0.824, "step": 21336 }, { "epoch": 87.08979591836734, "grad_norm": 36.91694259643555, "learning_rate": 6.481632653061225e-06, "loss": 1.1272, "step": 21337 }, { "epoch": 87.09387755102041, "grad_norm": 24.109474182128906, "learning_rate": 6.4795918367346946e-06, "loss": 0.2171, "step": 21338 }, { "epoch": 87.09795918367347, "grad_norm": 40.887420654296875, "learning_rate": 6.477551020408163e-06, "loss": 0.2997, "step": 21339 }, { "epoch": 87.10204081632654, "grad_norm": 34.8160400390625, "learning_rate": 6.475510204081633e-06, "loss": 0.5274, "step": 21340 }, { "epoch": 87.10612244897959, "grad_norm": 8.388874053955078, "learning_rate": 6.473469387755102e-06, "loss": 0.0739, "step": 21341 }, { "epoch": 87.11020408163266, "grad_norm": 30.44306755065918, "learning_rate": 6.4714285714285715e-06, "loss": 0.2576, "step": 21342 }, { "epoch": 87.11428571428571, "grad_norm": 30.292661666870117, "learning_rate": 6.469387755102041e-06, "loss": 1.1263, "step": 21343 }, { "epoch": 87.11836734693877, "grad_norm": 28.324188232421875, "learning_rate": 6.467346938775511e-06, "loss": 1.5139, "step": 21344 }, { "epoch": 87.12244897959184, "grad_norm": 24.450101852416992, "learning_rate": 6.4653061224489805e-06, "loss": 1.6263, "step": 21345 }, { "epoch": 87.12653061224489, "grad_norm": 30.72097396850586, "learning_rate": 6.46326530612245e-06, "loss": 0.5197, "step": 21346 }, { "epoch": 87.13061224489796, "grad_norm": 34.45896530151367, "learning_rate": 6.461224489795918e-06, "loss": 0.9043, "step": 21347 }, { "epoch": 87.13469387755102, "grad_norm": 26.309532165527344, "learning_rate": 6.459183673469388e-06, "loss": 1.3956, "step": 21348 }, { "epoch": 87.13877551020408, "grad_norm": 52.639007568359375, "learning_rate": 6.4571428571428575e-06, "loss": 0.8219, "step": 21349 }, { "epoch": 87.14285714285714, "grad_norm": 18.096542358398438, "learning_rate": 6.455102040816327e-06, "loss": 0.0817, "step": 21350 }, { "epoch": 87.14693877551021, "grad_norm": 31.526119232177734, "learning_rate": 6.453061224489797e-06, "loss": 0.8872, "step": 21351 }, { "epoch": 87.15102040816326, "grad_norm": 26.87150001525879, "learning_rate": 6.451020408163265e-06, "loss": 1.3338, "step": 21352 }, { "epoch": 87.15510204081633, "grad_norm": 23.693572998046875, "learning_rate": 6.4489795918367345e-06, "loss": 1.3934, "step": 21353 }, { "epoch": 87.15918367346939, "grad_norm": 26.98136329650879, "learning_rate": 6.446938775510204e-06, "loss": 0.3291, "step": 21354 }, { "epoch": 87.16326530612245, "grad_norm": 30.57619857788086, "learning_rate": 6.444897959183674e-06, "loss": 1.3792, "step": 21355 }, { "epoch": 87.16734693877551, "grad_norm": 13.733476638793945, "learning_rate": 6.4428571428571435e-06, "loss": 0.0788, "step": 21356 }, { "epoch": 87.17142857142858, "grad_norm": 36.68247604370117, "learning_rate": 6.440816326530613e-06, "loss": 0.9259, "step": 21357 }, { "epoch": 87.17551020408163, "grad_norm": 27.602460861206055, "learning_rate": 6.438775510204081e-06, "loss": 0.2789, "step": 21358 }, { "epoch": 87.17959183673469, "grad_norm": 29.989643096923828, "learning_rate": 6.436734693877551e-06, "loss": 0.6414, "step": 21359 }, { "epoch": 87.18367346938776, "grad_norm": 15.91905689239502, "learning_rate": 6.4346938775510205e-06, "loss": 0.1074, "step": 21360 }, { "epoch": 87.18775510204081, "grad_norm": 31.403284072875977, "learning_rate": 6.43265306122449e-06, "loss": 0.6084, "step": 21361 }, { "epoch": 87.19183673469388, "grad_norm": 29.567426681518555, "learning_rate": 6.43061224489796e-06, "loss": 0.9662, "step": 21362 }, { "epoch": 87.19591836734693, "grad_norm": 31.49201774597168, "learning_rate": 6.428571428571429e-06, "loss": 0.2538, "step": 21363 }, { "epoch": 87.2, "grad_norm": 21.929059982299805, "learning_rate": 6.426530612244898e-06, "loss": 0.2063, "step": 21364 }, { "epoch": 87.20408163265306, "grad_norm": 42.267364501953125, "learning_rate": 6.424489795918368e-06, "loss": 1.0573, "step": 21365 }, { "epoch": 87.20816326530613, "grad_norm": 26.488065719604492, "learning_rate": 6.422448979591837e-06, "loss": 0.3631, "step": 21366 }, { "epoch": 87.21224489795918, "grad_norm": 39.82957458496094, "learning_rate": 6.4204081632653064e-06, "loss": 0.8823, "step": 21367 }, { "epoch": 87.21632653061225, "grad_norm": 35.27932357788086, "learning_rate": 6.418367346938776e-06, "loss": 0.5332, "step": 21368 }, { "epoch": 87.2204081632653, "grad_norm": 31.559823989868164, "learning_rate": 6.416326530612245e-06, "loss": 0.8585, "step": 21369 }, { "epoch": 87.22448979591837, "grad_norm": 35.00371551513672, "learning_rate": 6.414285714285715e-06, "loss": 0.7862, "step": 21370 }, { "epoch": 87.22857142857143, "grad_norm": 26.347957611083984, "learning_rate": 6.412244897959184e-06, "loss": 1.3035, "step": 21371 }, { "epoch": 87.2326530612245, "grad_norm": 31.696821212768555, "learning_rate": 6.410204081632654e-06, "loss": 0.9375, "step": 21372 }, { "epoch": 87.23673469387755, "grad_norm": 31.430639266967773, "learning_rate": 6.408163265306124e-06, "loss": 1.0849, "step": 21373 }, { "epoch": 87.2408163265306, "grad_norm": 40.1033821105957, "learning_rate": 6.4061224489795916e-06, "loss": 0.5593, "step": 21374 }, { "epoch": 87.24489795918367, "grad_norm": 30.464494705200195, "learning_rate": 6.404081632653061e-06, "loss": 0.3731, "step": 21375 }, { "epoch": 87.24897959183673, "grad_norm": 25.671979904174805, "learning_rate": 6.402040816326531e-06, "loss": 1.8985, "step": 21376 }, { "epoch": 87.2530612244898, "grad_norm": 30.235280990600586, "learning_rate": 6.4000000000000006e-06, "loss": 0.9853, "step": 21377 }, { "epoch": 87.25714285714285, "grad_norm": 31.40602684020996, "learning_rate": 6.39795918367347e-06, "loss": 1.3664, "step": 21378 }, { "epoch": 87.26122448979592, "grad_norm": 37.13279342651367, "learning_rate": 6.39591836734694e-06, "loss": 0.4821, "step": 21379 }, { "epoch": 87.26530612244898, "grad_norm": 34.06058120727539, "learning_rate": 6.393877551020408e-06, "loss": 0.6443, "step": 21380 }, { "epoch": 87.26938775510204, "grad_norm": 23.98294448852539, "learning_rate": 6.3918367346938775e-06, "loss": 0.2071, "step": 21381 }, { "epoch": 87.2734693877551, "grad_norm": 16.855077743530273, "learning_rate": 6.389795918367347e-06, "loss": 2.2174, "step": 21382 }, { "epoch": 87.27755102040817, "grad_norm": 29.697113037109375, "learning_rate": 6.387755102040817e-06, "loss": 0.4296, "step": 21383 }, { "epoch": 87.28163265306122, "grad_norm": 38.07300567626953, "learning_rate": 6.3857142857142865e-06, "loss": 0.2592, "step": 21384 }, { "epoch": 87.28571428571429, "grad_norm": 25.17759132385254, "learning_rate": 6.3836734693877545e-06, "loss": 1.311, "step": 21385 }, { "epoch": 87.28979591836735, "grad_norm": 28.732219696044922, "learning_rate": 6.381632653061224e-06, "loss": 0.6662, "step": 21386 }, { "epoch": 87.29387755102042, "grad_norm": 29.32667350769043, "learning_rate": 6.379591836734694e-06, "loss": 1.0625, "step": 21387 }, { "epoch": 87.29795918367347, "grad_norm": 36.03435134887695, "learning_rate": 6.3775510204081635e-06, "loss": 0.7558, "step": 21388 }, { "epoch": 87.30204081632652, "grad_norm": 27.538496017456055, "learning_rate": 6.375510204081633e-06, "loss": 0.3048, "step": 21389 }, { "epoch": 87.3061224489796, "grad_norm": 38.53044891357422, "learning_rate": 6.373469387755103e-06, "loss": 0.7931, "step": 21390 }, { "epoch": 87.31020408163265, "grad_norm": 32.073936462402344, "learning_rate": 6.371428571428572e-06, "loss": 0.3413, "step": 21391 }, { "epoch": 87.31428571428572, "grad_norm": 27.001667022705078, "learning_rate": 6.3693877551020405e-06, "loss": 1.4236, "step": 21392 }, { "epoch": 87.31836734693877, "grad_norm": 33.012794494628906, "learning_rate": 6.36734693877551e-06, "loss": 0.154, "step": 21393 }, { "epoch": 87.32244897959184, "grad_norm": 35.8425178527832, "learning_rate": 6.36530612244898e-06, "loss": 1.3733, "step": 21394 }, { "epoch": 87.3265306122449, "grad_norm": 28.513479232788086, "learning_rate": 6.3632653061224495e-06, "loss": 1.4061, "step": 21395 }, { "epoch": 87.33061224489796, "grad_norm": 32.963375091552734, "learning_rate": 6.361224489795918e-06, "loss": 0.7527, "step": 21396 }, { "epoch": 87.33469387755102, "grad_norm": 37.4954833984375, "learning_rate": 6.359183673469388e-06, "loss": 0.9308, "step": 21397 }, { "epoch": 87.33877551020409, "grad_norm": 25.505117416381836, "learning_rate": 6.357142857142858e-06, "loss": 1.4398, "step": 21398 }, { "epoch": 87.34285714285714, "grad_norm": 42.06495666503906, "learning_rate": 6.355102040816327e-06, "loss": 1.0508, "step": 21399 }, { "epoch": 87.34693877551021, "grad_norm": 21.690502166748047, "learning_rate": 6.353061224489797e-06, "loss": 1.8839, "step": 21400 }, { "epoch": 87.35102040816327, "grad_norm": 27.659709930419922, "learning_rate": 6.351020408163267e-06, "loss": 0.3951, "step": 21401 }, { "epoch": 87.35510204081632, "grad_norm": 34.948387145996094, "learning_rate": 6.348979591836735e-06, "loss": 1.4542, "step": 21402 }, { "epoch": 87.35918367346939, "grad_norm": 22.589523315429688, "learning_rate": 6.346938775510204e-06, "loss": 1.2148, "step": 21403 }, { "epoch": 87.36326530612244, "grad_norm": 30.651182174682617, "learning_rate": 6.344897959183674e-06, "loss": 1.1496, "step": 21404 }, { "epoch": 87.36734693877551, "grad_norm": 36.02526092529297, "learning_rate": 6.342857142857144e-06, "loss": 0.8956, "step": 21405 }, { "epoch": 87.37142857142857, "grad_norm": 26.062950134277344, "learning_rate": 6.340816326530613e-06, "loss": 0.264, "step": 21406 }, { "epoch": 87.37551020408164, "grad_norm": 36.948822021484375, "learning_rate": 6.338775510204081e-06, "loss": 1.0642, "step": 21407 }, { "epoch": 87.37959183673469, "grad_norm": 23.497791290283203, "learning_rate": 6.336734693877551e-06, "loss": 1.4532, "step": 21408 }, { "epoch": 87.38367346938776, "grad_norm": 22.95980453491211, "learning_rate": 6.3346938775510206e-06, "loss": 1.9013, "step": 21409 }, { "epoch": 87.38775510204081, "grad_norm": 46.462646484375, "learning_rate": 6.33265306122449e-06, "loss": 0.6859, "step": 21410 }, { "epoch": 87.39183673469388, "grad_norm": 32.38518142700195, "learning_rate": 6.33061224489796e-06, "loss": 0.8756, "step": 21411 }, { "epoch": 87.39591836734694, "grad_norm": 24.446632385253906, "learning_rate": 6.3285714285714296e-06, "loss": 1.4169, "step": 21412 }, { "epoch": 87.4, "grad_norm": 24.236513137817383, "learning_rate": 6.3265306122448975e-06, "loss": 0.295, "step": 21413 }, { "epoch": 87.40408163265306, "grad_norm": 18.345735549926758, "learning_rate": 6.324489795918367e-06, "loss": 1.9709, "step": 21414 }, { "epoch": 87.40816326530613, "grad_norm": 24.80071449279785, "learning_rate": 6.322448979591837e-06, "loss": 0.2872, "step": 21415 }, { "epoch": 87.41224489795918, "grad_norm": 29.778406143188477, "learning_rate": 6.3204081632653065e-06, "loss": 0.2835, "step": 21416 }, { "epoch": 87.41632653061224, "grad_norm": 39.912776947021484, "learning_rate": 6.318367346938776e-06, "loss": 0.8997, "step": 21417 }, { "epoch": 87.42040816326531, "grad_norm": 11.471548080444336, "learning_rate": 6.316326530612245e-06, "loss": 0.1006, "step": 21418 }, { "epoch": 87.42448979591836, "grad_norm": 38.13930130004883, "learning_rate": 6.314285714285714e-06, "loss": 0.3142, "step": 21419 }, { "epoch": 87.42857142857143, "grad_norm": 38.16625213623047, "learning_rate": 6.3122448979591835e-06, "loss": 1.0647, "step": 21420 }, { "epoch": 87.43265306122449, "grad_norm": 35.159271240234375, "learning_rate": 6.310204081632653e-06, "loss": 0.7662, "step": 21421 }, { "epoch": 87.43673469387755, "grad_norm": 27.25518226623535, "learning_rate": 6.308163265306123e-06, "loss": 0.6702, "step": 21422 }, { "epoch": 87.44081632653061, "grad_norm": 40.0946159362793, "learning_rate": 6.3061224489795925e-06, "loss": 0.6055, "step": 21423 }, { "epoch": 87.44489795918368, "grad_norm": 28.885578155517578, "learning_rate": 6.304081632653061e-06, "loss": 1.4355, "step": 21424 }, { "epoch": 87.44897959183673, "grad_norm": 37.335845947265625, "learning_rate": 6.302040816326531e-06, "loss": 0.5595, "step": 21425 }, { "epoch": 87.4530612244898, "grad_norm": 38.08585739135742, "learning_rate": 6.300000000000001e-06, "loss": 0.2609, "step": 21426 }, { "epoch": 87.45714285714286, "grad_norm": 27.927824020385742, "learning_rate": 6.29795918367347e-06, "loss": 0.6516, "step": 21427 }, { "epoch": 87.46122448979592, "grad_norm": 21.125812530517578, "learning_rate": 6.29591836734694e-06, "loss": 1.8268, "step": 21428 }, { "epoch": 87.46530612244898, "grad_norm": 40.384761810302734, "learning_rate": 6.293877551020408e-06, "loss": 1.0929, "step": 21429 }, { "epoch": 87.46938775510205, "grad_norm": 26.241928100585938, "learning_rate": 6.291836734693878e-06, "loss": 0.7018, "step": 21430 }, { "epoch": 87.4734693877551, "grad_norm": 29.251514434814453, "learning_rate": 6.289795918367347e-06, "loss": 1.1731, "step": 21431 }, { "epoch": 87.47755102040816, "grad_norm": 22.199674606323242, "learning_rate": 6.287755102040817e-06, "loss": 1.5572, "step": 21432 }, { "epoch": 87.48163265306123, "grad_norm": 30.883441925048828, "learning_rate": 6.285714285714287e-06, "loss": 0.3259, "step": 21433 }, { "epoch": 87.48571428571428, "grad_norm": 36.97721862792969, "learning_rate": 6.283673469387756e-06, "loss": 0.7904, "step": 21434 }, { "epoch": 87.48979591836735, "grad_norm": 45.131317138671875, "learning_rate": 6.281632653061224e-06, "loss": 0.8255, "step": 21435 }, { "epoch": 87.4938775510204, "grad_norm": 30.1280517578125, "learning_rate": 6.279591836734694e-06, "loss": 1.5911, "step": 21436 }, { "epoch": 87.49795918367347, "grad_norm": 22.60625457763672, "learning_rate": 6.277551020408164e-06, "loss": 0.1992, "step": 21437 }, { "epoch": 87.50204081632653, "grad_norm": 35.82577133178711, "learning_rate": 6.275510204081633e-06, "loss": 0.5063, "step": 21438 }, { "epoch": 87.5061224489796, "grad_norm": 36.531253814697266, "learning_rate": 6.273469387755103e-06, "loss": 0.6826, "step": 21439 }, { "epoch": 87.51020408163265, "grad_norm": 33.49152374267578, "learning_rate": 6.271428571428571e-06, "loss": 0.3351, "step": 21440 }, { "epoch": 87.51428571428572, "grad_norm": 31.09390640258789, "learning_rate": 6.269387755102041e-06, "loss": 1.1646, "step": 21441 }, { "epoch": 87.51836734693877, "grad_norm": 1.392350435256958, "learning_rate": 6.26734693877551e-06, "loss": 0.0257, "step": 21442 }, { "epoch": 87.52244897959184, "grad_norm": 33.76002502441406, "learning_rate": 6.26530612244898e-06, "loss": 1.1673, "step": 21443 }, { "epoch": 87.5265306122449, "grad_norm": 41.335994720458984, "learning_rate": 6.26326530612245e-06, "loss": 0.3892, "step": 21444 }, { "epoch": 87.53061224489795, "grad_norm": 28.617525100708008, "learning_rate": 6.261224489795919e-06, "loss": 1.8393, "step": 21445 }, { "epoch": 87.53469387755102, "grad_norm": 27.962629318237305, "learning_rate": 6.259183673469387e-06, "loss": 1.5972, "step": 21446 }, { "epoch": 87.53877551020408, "grad_norm": 24.580535888671875, "learning_rate": 6.257142857142857e-06, "loss": 2.2183, "step": 21447 }, { "epoch": 87.54285714285714, "grad_norm": 48.80544662475586, "learning_rate": 6.2551020408163266e-06, "loss": 0.7549, "step": 21448 }, { "epoch": 87.5469387755102, "grad_norm": 23.088489532470703, "learning_rate": 6.253061224489796e-06, "loss": 2.0326, "step": 21449 }, { "epoch": 87.55102040816327, "grad_norm": 31.862977981567383, "learning_rate": 6.251020408163266e-06, "loss": 1.3696, "step": 21450 }, { "epoch": 87.55510204081632, "grad_norm": 29.498804092407227, "learning_rate": 6.248979591836735e-06, "loss": 1.5972, "step": 21451 }, { "epoch": 87.55918367346939, "grad_norm": 30.879776000976562, "learning_rate": 6.246938775510204e-06, "loss": 1.1949, "step": 21452 }, { "epoch": 87.56326530612245, "grad_norm": 35.474666595458984, "learning_rate": 6.244897959183674e-06, "loss": 1.1106, "step": 21453 }, { "epoch": 87.56734693877551, "grad_norm": 24.37589454650879, "learning_rate": 6.242857142857144e-06, "loss": 1.9283, "step": 21454 }, { "epoch": 87.57142857142857, "grad_norm": 31.73420524597168, "learning_rate": 6.2408163265306125e-06, "loss": 1.2151, "step": 21455 }, { "epoch": 87.57551020408164, "grad_norm": 24.878108978271484, "learning_rate": 6.238775510204082e-06, "loss": 1.5486, "step": 21456 }, { "epoch": 87.57959183673469, "grad_norm": 25.615076065063477, "learning_rate": 6.236734693877552e-06, "loss": 1.5086, "step": 21457 }, { "epoch": 87.58367346938776, "grad_norm": 30.430402755737305, "learning_rate": 6.234693877551021e-06, "loss": 1.1795, "step": 21458 }, { "epoch": 87.58775510204082, "grad_norm": 33.10224151611328, "learning_rate": 6.23265306122449e-06, "loss": 0.7217, "step": 21459 }, { "epoch": 87.59183673469387, "grad_norm": 34.702945709228516, "learning_rate": 6.23061224489796e-06, "loss": 1.0302, "step": 21460 }, { "epoch": 87.59591836734694, "grad_norm": 30.57283592224121, "learning_rate": 6.228571428571429e-06, "loss": 0.5809, "step": 21461 }, { "epoch": 87.6, "grad_norm": 18.698991775512695, "learning_rate": 6.2265306122448985e-06, "loss": 0.2423, "step": 21462 }, { "epoch": 87.60408163265306, "grad_norm": 30.47454833984375, "learning_rate": 6.224489795918367e-06, "loss": 0.4389, "step": 21463 }, { "epoch": 87.60816326530612, "grad_norm": 8.079313278198242, "learning_rate": 6.222448979591837e-06, "loss": 0.0781, "step": 21464 }, { "epoch": 87.61224489795919, "grad_norm": 27.790870666503906, "learning_rate": 6.220408163265307e-06, "loss": 0.1735, "step": 21465 }, { "epoch": 87.61632653061224, "grad_norm": 40.08412551879883, "learning_rate": 6.2183673469387755e-06, "loss": 0.3877, "step": 21466 }, { "epoch": 87.62040816326531, "grad_norm": 29.175609588623047, "learning_rate": 6.216326530612245e-06, "loss": 0.4193, "step": 21467 }, { "epoch": 87.62448979591836, "grad_norm": 39.038612365722656, "learning_rate": 6.214285714285715e-06, "loss": 0.6627, "step": 21468 }, { "epoch": 87.62857142857143, "grad_norm": 27.484479904174805, "learning_rate": 6.212244897959184e-06, "loss": 1.1763, "step": 21469 }, { "epoch": 87.63265306122449, "grad_norm": 32.69994354248047, "learning_rate": 6.210204081632653e-06, "loss": 0.368, "step": 21470 }, { "epoch": 87.63673469387756, "grad_norm": 24.74477195739746, "learning_rate": 6.208163265306123e-06, "loss": 1.3478, "step": 21471 }, { "epoch": 87.64081632653061, "grad_norm": 43.04697036743164, "learning_rate": 6.206122448979592e-06, "loss": 0.3493, "step": 21472 }, { "epoch": 87.64489795918368, "grad_norm": 23.752840042114258, "learning_rate": 6.2040816326530614e-06, "loss": 0.249, "step": 21473 }, { "epoch": 87.64897959183673, "grad_norm": 36.295406341552734, "learning_rate": 6.20204081632653e-06, "loss": 0.9791, "step": 21474 }, { "epoch": 87.65306122448979, "grad_norm": 23.928836822509766, "learning_rate": 6.2e-06, "loss": 1.4016, "step": 21475 }, { "epoch": 87.65714285714286, "grad_norm": 24.081567764282227, "learning_rate": 6.19795918367347e-06, "loss": 1.5231, "step": 21476 }, { "epoch": 87.66122448979591, "grad_norm": 29.753406524658203, "learning_rate": 6.195918367346938e-06, "loss": 1.2895, "step": 21477 }, { "epoch": 87.66530612244898, "grad_norm": 46.2896614074707, "learning_rate": 6.193877551020408e-06, "loss": 0.8233, "step": 21478 }, { "epoch": 87.66938775510204, "grad_norm": 19.34056854248047, "learning_rate": 6.191836734693878e-06, "loss": 2.1008, "step": 21479 }, { "epoch": 87.6734693877551, "grad_norm": 25.435928344726562, "learning_rate": 6.189795918367347e-06, "loss": 1.8005, "step": 21480 }, { "epoch": 87.67755102040816, "grad_norm": 27.902788162231445, "learning_rate": 6.187755102040817e-06, "loss": 1.5534, "step": 21481 }, { "epoch": 87.68163265306123, "grad_norm": 24.032203674316406, "learning_rate": 6.185714285714287e-06, "loss": 0.3585, "step": 21482 }, { "epoch": 87.68571428571428, "grad_norm": 20.6923770904541, "learning_rate": 6.1836734693877556e-06, "loss": 1.9922, "step": 21483 }, { "epoch": 87.68979591836735, "grad_norm": 24.77012825012207, "learning_rate": 6.181632653061225e-06, "loss": 1.3916, "step": 21484 }, { "epoch": 87.6938775510204, "grad_norm": 30.596729278564453, "learning_rate": 6.179591836734694e-06, "loss": 1.304, "step": 21485 }, { "epoch": 87.69795918367348, "grad_norm": 46.52623748779297, "learning_rate": 6.177551020408164e-06, "loss": 0.6315, "step": 21486 }, { "epoch": 87.70204081632653, "grad_norm": 32.87615966796875, "learning_rate": 6.175510204081633e-06, "loss": 0.4852, "step": 21487 }, { "epoch": 87.70612244897958, "grad_norm": 33.805442810058594, "learning_rate": 6.173469387755102e-06, "loss": 1.0553, "step": 21488 }, { "epoch": 87.71020408163265, "grad_norm": 41.085243225097656, "learning_rate": 6.171428571428572e-06, "loss": 0.9835, "step": 21489 }, { "epoch": 87.71428571428571, "grad_norm": 31.458112716674805, "learning_rate": 6.1693877551020415e-06, "loss": 0.3251, "step": 21490 }, { "epoch": 87.71836734693878, "grad_norm": 24.1629695892334, "learning_rate": 6.16734693877551e-06, "loss": 1.8828, "step": 21491 }, { "epoch": 87.72244897959183, "grad_norm": 9.798093795776367, "learning_rate": 6.16530612244898e-06, "loss": 2.985, "step": 21492 }, { "epoch": 87.7265306122449, "grad_norm": 31.966018676757812, "learning_rate": 6.163265306122449e-06, "loss": 1.2943, "step": 21493 }, { "epoch": 87.73061224489796, "grad_norm": 32.6445426940918, "learning_rate": 6.1612244897959185e-06, "loss": 0.5425, "step": 21494 }, { "epoch": 87.73469387755102, "grad_norm": 42.12508010864258, "learning_rate": 6.159183673469388e-06, "loss": 0.9571, "step": 21495 }, { "epoch": 87.73877551020408, "grad_norm": 31.816648483276367, "learning_rate": 6.157142857142857e-06, "loss": 0.4467, "step": 21496 }, { "epoch": 87.74285714285715, "grad_norm": 31.257387161254883, "learning_rate": 6.155102040816327e-06, "loss": 0.3281, "step": 21497 }, { "epoch": 87.7469387755102, "grad_norm": 19.6864070892334, "learning_rate": 6.153061224489796e-06, "loss": 2.0099, "step": 21498 }, { "epoch": 87.75102040816327, "grad_norm": 29.428857803344727, "learning_rate": 6.151020408163265e-06, "loss": 0.5618, "step": 21499 }, { "epoch": 87.75510204081633, "grad_norm": 26.8952693939209, "learning_rate": 6.148979591836735e-06, "loss": 1.5167, "step": 21500 }, { "epoch": 87.7591836734694, "grad_norm": 37.45372009277344, "learning_rate": 6.1469387755102045e-06, "loss": 0.2731, "step": 21501 }, { "epoch": 87.76326530612245, "grad_norm": 28.379993438720703, "learning_rate": 6.144897959183673e-06, "loss": 0.2317, "step": 21502 }, { "epoch": 87.7673469387755, "grad_norm": 40.73072052001953, "learning_rate": 6.142857142857143e-06, "loss": 0.7306, "step": 21503 }, { "epoch": 87.77142857142857, "grad_norm": 32.78712844848633, "learning_rate": 6.140816326530612e-06, "loss": 0.8966, "step": 21504 }, { "epoch": 87.77551020408163, "grad_norm": 35.092628479003906, "learning_rate": 6.1387755102040815e-06, "loss": 1.2107, "step": 21505 }, { "epoch": 87.7795918367347, "grad_norm": 44.025047302246094, "learning_rate": 6.136734693877551e-06, "loss": 0.3598, "step": 21506 }, { "epoch": 87.78367346938775, "grad_norm": 37.17525863647461, "learning_rate": 6.134693877551021e-06, "loss": 0.8103, "step": 21507 }, { "epoch": 87.78775510204082, "grad_norm": 32.70189666748047, "learning_rate": 6.1326530612244905e-06, "loss": 1.5294, "step": 21508 }, { "epoch": 87.79183673469387, "grad_norm": 35.39659881591797, "learning_rate": 6.13061224489796e-06, "loss": 0.2338, "step": 21509 }, { "epoch": 87.79591836734694, "grad_norm": 30.224123001098633, "learning_rate": 6.128571428571429e-06, "loss": 0.3411, "step": 21510 }, { "epoch": 87.8, "grad_norm": 35.219505310058594, "learning_rate": 6.126530612244899e-06, "loss": 0.2808, "step": 21511 }, { "epoch": 87.80408163265307, "grad_norm": 28.933080673217773, "learning_rate": 6.124489795918368e-06, "loss": 0.6893, "step": 21512 }, { "epoch": 87.80816326530612, "grad_norm": 25.071151733398438, "learning_rate": 6.122448979591837e-06, "loss": 1.3845, "step": 21513 }, { "epoch": 87.81224489795919, "grad_norm": 25.671789169311523, "learning_rate": 6.120408163265307e-06, "loss": 1.8879, "step": 21514 }, { "epoch": 87.81632653061224, "grad_norm": 9.967184066772461, "learning_rate": 6.118367346938776e-06, "loss": 0.1249, "step": 21515 }, { "epoch": 87.82040816326531, "grad_norm": 30.86250877380371, "learning_rate": 6.116326530612245e-06, "loss": 1.5642, "step": 21516 }, { "epoch": 87.82448979591837, "grad_norm": 27.448026657104492, "learning_rate": 6.114285714285715e-06, "loss": 0.3071, "step": 21517 }, { "epoch": 87.82857142857142, "grad_norm": 37.240203857421875, "learning_rate": 6.112244897959184e-06, "loss": 1.3468, "step": 21518 }, { "epoch": 87.83265306122449, "grad_norm": 31.63663673400879, "learning_rate": 6.110204081632653e-06, "loss": 1.2108, "step": 21519 }, { "epoch": 87.83673469387755, "grad_norm": 20.917755126953125, "learning_rate": 6.108163265306123e-06, "loss": 1.8862, "step": 21520 }, { "epoch": 87.84081632653061, "grad_norm": 33.22682571411133, "learning_rate": 6.106122448979592e-06, "loss": 0.7191, "step": 21521 }, { "epoch": 87.84489795918367, "grad_norm": 36.875003814697266, "learning_rate": 6.1040816326530616e-06, "loss": 1.2262, "step": 21522 }, { "epoch": 87.84897959183674, "grad_norm": 32.539432525634766, "learning_rate": 6.102040816326531e-06, "loss": 1.2535, "step": 21523 }, { "epoch": 87.85306122448979, "grad_norm": 31.50797462463379, "learning_rate": 6.1e-06, "loss": 0.754, "step": 21524 }, { "epoch": 87.85714285714286, "grad_norm": 23.71267318725586, "learning_rate": 6.09795918367347e-06, "loss": 1.907, "step": 21525 }, { "epoch": 87.86122448979592, "grad_norm": 31.251216888427734, "learning_rate": 6.0959183673469385e-06, "loss": 0.4936, "step": 21526 }, { "epoch": 87.86530612244898, "grad_norm": 39.0860710144043, "learning_rate": 6.093877551020408e-06, "loss": 0.8566, "step": 21527 }, { "epoch": 87.86938775510204, "grad_norm": 27.211977005004883, "learning_rate": 6.091836734693878e-06, "loss": 2.058, "step": 21528 }, { "epoch": 87.87346938775511, "grad_norm": 30.744213104248047, "learning_rate": 6.089795918367347e-06, "loss": 1.6265, "step": 21529 }, { "epoch": 87.87755102040816, "grad_norm": 19.172761917114258, "learning_rate": 6.087755102040816e-06, "loss": 2.3024, "step": 21530 }, { "epoch": 87.88163265306123, "grad_norm": 12.777205467224121, "learning_rate": 6.085714285714286e-06, "loss": 0.0858, "step": 21531 }, { "epoch": 87.88571428571429, "grad_norm": 26.561986923217773, "learning_rate": 6.083673469387755e-06, "loss": 1.1102, "step": 21532 }, { "epoch": 87.88979591836734, "grad_norm": 32.51016616821289, "learning_rate": 6.0816326530612245e-06, "loss": 1.2968, "step": 21533 }, { "epoch": 87.89387755102041, "grad_norm": 33.93257141113281, "learning_rate": 6.079591836734694e-06, "loss": 0.1378, "step": 21534 }, { "epoch": 87.89795918367346, "grad_norm": 26.09267234802246, "learning_rate": 6.077551020408164e-06, "loss": 1.3872, "step": 21535 }, { "epoch": 87.90204081632653, "grad_norm": 38.41912078857422, "learning_rate": 6.075510204081633e-06, "loss": 1.1502, "step": 21536 }, { "epoch": 87.90612244897959, "grad_norm": 29.298887252807617, "learning_rate": 6.073469387755102e-06, "loss": 1.0979, "step": 21537 }, { "epoch": 87.91020408163266, "grad_norm": 33.59914016723633, "learning_rate": 6.071428571428572e-06, "loss": 0.6099, "step": 21538 }, { "epoch": 87.91428571428571, "grad_norm": 31.88381576538086, "learning_rate": 6.069387755102042e-06, "loss": 1.1556, "step": 21539 }, { "epoch": 87.91836734693878, "grad_norm": 37.03872299194336, "learning_rate": 6.0673469387755105e-06, "loss": 0.3875, "step": 21540 }, { "epoch": 87.92244897959183, "grad_norm": 33.186195373535156, "learning_rate": 6.06530612244898e-06, "loss": 0.4832, "step": 21541 }, { "epoch": 87.9265306122449, "grad_norm": 29.725116729736328, "learning_rate": 6.06326530612245e-06, "loss": 0.5638, "step": 21542 }, { "epoch": 87.93061224489796, "grad_norm": 26.338037490844727, "learning_rate": 6.061224489795919e-06, "loss": 1.4962, "step": 21543 }, { "epoch": 87.93469387755103, "grad_norm": 28.71244239807129, "learning_rate": 6.059183673469388e-06, "loss": 1.7231, "step": 21544 }, { "epoch": 87.93877551020408, "grad_norm": 35.697689056396484, "learning_rate": 6.057142857142858e-06, "loss": 0.6018, "step": 21545 }, { "epoch": 87.94285714285714, "grad_norm": 28.946809768676758, "learning_rate": 6.055102040816327e-06, "loss": 0.659, "step": 21546 }, { "epoch": 87.9469387755102, "grad_norm": 38.769710540771484, "learning_rate": 6.0530612244897964e-06, "loss": 0.7859, "step": 21547 }, { "epoch": 87.95102040816326, "grad_norm": 32.72652816772461, "learning_rate": 6.051020408163265e-06, "loss": 1.2651, "step": 21548 }, { "epoch": 87.95510204081633, "grad_norm": 34.60519790649414, "learning_rate": 6.048979591836735e-06, "loss": 1.0365, "step": 21549 }, { "epoch": 87.95918367346938, "grad_norm": 22.680870056152344, "learning_rate": 6.046938775510205e-06, "loss": 1.4655, "step": 21550 }, { "epoch": 87.96326530612245, "grad_norm": 29.53711700439453, "learning_rate": 6.044897959183673e-06, "loss": 0.7701, "step": 21551 }, { "epoch": 87.9673469387755, "grad_norm": 30.01390838623047, "learning_rate": 6.042857142857143e-06, "loss": 1.3016, "step": 21552 }, { "epoch": 87.97142857142858, "grad_norm": 29.971216201782227, "learning_rate": 6.040816326530613e-06, "loss": 1.4601, "step": 21553 }, { "epoch": 87.97551020408163, "grad_norm": 31.573904037475586, "learning_rate": 6.0387755102040816e-06, "loss": 0.4903, "step": 21554 }, { "epoch": 87.9795918367347, "grad_norm": 7.182870388031006, "learning_rate": 6.036734693877551e-06, "loss": 0.0736, "step": 21555 }, { "epoch": 87.98367346938775, "grad_norm": 17.075504302978516, "learning_rate": 6.034693877551021e-06, "loss": 2.5453, "step": 21556 }, { "epoch": 87.98775510204082, "grad_norm": 5.745710849761963, "learning_rate": 6.03265306122449e-06, "loss": 0.0518, "step": 21557 }, { "epoch": 87.99183673469388, "grad_norm": 30.19720458984375, "learning_rate": 6.030612244897959e-06, "loss": 0.5953, "step": 21558 }, { "epoch": 87.99591836734695, "grad_norm": 33.88593292236328, "learning_rate": 6.028571428571428e-06, "loss": 0.3318, "step": 21559 }, { "epoch": 88.0, "grad_norm": 26.195749282836914, "learning_rate": 6.026530612244898e-06, "loss": 0.1652, "step": 21560 }, { "epoch": 88.00408163265305, "grad_norm": 34.02967071533203, "learning_rate": 6.0244897959183675e-06, "loss": 1.1743, "step": 21561 }, { "epoch": 88.00816326530612, "grad_norm": 32.970027923583984, "learning_rate": 6.022448979591837e-06, "loss": 0.4295, "step": 21562 }, { "epoch": 88.01224489795918, "grad_norm": 29.82607078552246, "learning_rate": 6.020408163265306e-06, "loss": 0.8891, "step": 21563 }, { "epoch": 88.01632653061225, "grad_norm": 36.4228515625, "learning_rate": 6.018367346938776e-06, "loss": 0.3364, "step": 21564 }, { "epoch": 88.0204081632653, "grad_norm": 32.18647003173828, "learning_rate": 6.016326530612245e-06, "loss": 0.2539, "step": 21565 }, { "epoch": 88.02448979591837, "grad_norm": 29.026580810546875, "learning_rate": 6.014285714285715e-06, "loss": 1.7015, "step": 21566 }, { "epoch": 88.02857142857142, "grad_norm": 1.2317488193511963, "learning_rate": 6.012244897959185e-06, "loss": 0.0219, "step": 21567 }, { "epoch": 88.0326530612245, "grad_norm": 20.530275344848633, "learning_rate": 6.0102040816326535e-06, "loss": 1.9699, "step": 21568 }, { "epoch": 88.03673469387755, "grad_norm": 33.927040100097656, "learning_rate": 6.008163265306123e-06, "loss": 1.2691, "step": 21569 }, { "epoch": 88.04081632653062, "grad_norm": 29.975372314453125, "learning_rate": 6.006122448979592e-06, "loss": 1.0661, "step": 21570 }, { "epoch": 88.04489795918367, "grad_norm": 32.91726303100586, "learning_rate": 6.004081632653062e-06, "loss": 1.1346, "step": 21571 }, { "epoch": 88.04897959183674, "grad_norm": 32.2495231628418, "learning_rate": 6.002040816326531e-06, "loss": 0.7614, "step": 21572 }, { "epoch": 88.0530612244898, "grad_norm": 33.45914840698242, "learning_rate": 6e-06, "loss": 0.4426, "step": 21573 }, { "epoch": 88.05714285714286, "grad_norm": 33.89619827270508, "learning_rate": 5.99795918367347e-06, "loss": 0.5511, "step": 21574 }, { "epoch": 88.06122448979592, "grad_norm": 24.94171142578125, "learning_rate": 5.9959183673469395e-06, "loss": 1.9068, "step": 21575 }, { "epoch": 88.06530612244897, "grad_norm": 34.45795440673828, "learning_rate": 5.993877551020408e-06, "loss": 0.2945, "step": 21576 }, { "epoch": 88.06938775510204, "grad_norm": 33.089500427246094, "learning_rate": 5.991836734693878e-06, "loss": 0.8658, "step": 21577 }, { "epoch": 88.0734693877551, "grad_norm": 12.589179992675781, "learning_rate": 5.989795918367347e-06, "loss": 0.0959, "step": 21578 }, { "epoch": 88.07755102040817, "grad_norm": 25.08393096923828, "learning_rate": 5.9877551020408165e-06, "loss": 1.0021, "step": 21579 }, { "epoch": 88.08163265306122, "grad_norm": 33.590087890625, "learning_rate": 5.985714285714286e-06, "loss": 0.2686, "step": 21580 }, { "epoch": 88.08571428571429, "grad_norm": 28.431720733642578, "learning_rate": 5.983673469387755e-06, "loss": 0.6937, "step": 21581 }, { "epoch": 88.08979591836734, "grad_norm": 22.957624435424805, "learning_rate": 5.981632653061225e-06, "loss": 0.4034, "step": 21582 }, { "epoch": 88.09387755102041, "grad_norm": 39.801795959472656, "learning_rate": 5.979591836734694e-06, "loss": 0.6878, "step": 21583 }, { "epoch": 88.09795918367347, "grad_norm": 28.21201515197754, "learning_rate": 5.977551020408163e-06, "loss": 0.6666, "step": 21584 }, { "epoch": 88.10204081632654, "grad_norm": 29.27252197265625, "learning_rate": 5.975510204081633e-06, "loss": 1.1489, "step": 21585 }, { "epoch": 88.10612244897959, "grad_norm": 38.716697692871094, "learning_rate": 5.9734693877551024e-06, "loss": 0.5883, "step": 21586 }, { "epoch": 88.11020408163266, "grad_norm": 30.94137191772461, "learning_rate": 5.971428571428571e-06, "loss": 0.707, "step": 21587 }, { "epoch": 88.11428571428571, "grad_norm": 12.608786582946777, "learning_rate": 5.969387755102041e-06, "loss": 0.0793, "step": 21588 }, { "epoch": 88.11836734693877, "grad_norm": 12.108631134033203, "learning_rate": 5.96734693877551e-06, "loss": 0.1249, "step": 21589 }, { "epoch": 88.12244897959184, "grad_norm": 24.927824020385742, "learning_rate": 5.965306122448979e-06, "loss": 0.2676, "step": 21590 }, { "epoch": 88.12653061224489, "grad_norm": 36.934486389160156, "learning_rate": 5.963265306122449e-06, "loss": 1.2057, "step": 21591 }, { "epoch": 88.13061224489796, "grad_norm": 30.972309112548828, "learning_rate": 5.961224489795919e-06, "loss": 0.2945, "step": 21592 }, { "epoch": 88.13469387755102, "grad_norm": 21.04334259033203, "learning_rate": 5.959183673469388e-06, "loss": 0.2127, "step": 21593 }, { "epoch": 88.13877551020408, "grad_norm": 31.35117530822754, "learning_rate": 5.957142857142858e-06, "loss": 1.1955, "step": 21594 }, { "epoch": 88.14285714285714, "grad_norm": 35.82870101928711, "learning_rate": 5.955102040816327e-06, "loss": 0.3382, "step": 21595 }, { "epoch": 88.14693877551021, "grad_norm": 6.471056938171387, "learning_rate": 5.9530612244897966e-06, "loss": 0.0817, "step": 21596 }, { "epoch": 88.15102040816326, "grad_norm": 26.318742752075195, "learning_rate": 5.951020408163266e-06, "loss": 0.1295, "step": 21597 }, { "epoch": 88.15510204081633, "grad_norm": 46.91497802734375, "learning_rate": 5.948979591836735e-06, "loss": 0.8198, "step": 21598 }, { "epoch": 88.15918367346939, "grad_norm": 33.199974060058594, "learning_rate": 5.946938775510205e-06, "loss": 1.1732, "step": 21599 }, { "epoch": 88.16326530612245, "grad_norm": 32.867549896240234, "learning_rate": 5.9448979591836735e-06, "loss": 1.2506, "step": 21600 }, { "epoch": 88.16734693877551, "grad_norm": 19.261873245239258, "learning_rate": 5.942857142857143e-06, "loss": 1.9494, "step": 21601 }, { "epoch": 88.17142857142858, "grad_norm": 24.590126037597656, "learning_rate": 5.940816326530613e-06, "loss": 0.6672, "step": 21602 }, { "epoch": 88.17551020408163, "grad_norm": 21.973512649536133, "learning_rate": 5.938775510204082e-06, "loss": 1.5318, "step": 21603 }, { "epoch": 88.17959183673469, "grad_norm": 22.134485244750977, "learning_rate": 5.936734693877551e-06, "loss": 1.7964, "step": 21604 }, { "epoch": 88.18367346938776, "grad_norm": 30.986919403076172, "learning_rate": 5.934693877551021e-06, "loss": 0.4034, "step": 21605 }, { "epoch": 88.18775510204081, "grad_norm": 42.64622497558594, "learning_rate": 5.93265306122449e-06, "loss": 0.2711, "step": 21606 }, { "epoch": 88.19183673469388, "grad_norm": 28.45125389099121, "learning_rate": 5.9306122448979595e-06, "loss": 1.4063, "step": 21607 }, { "epoch": 88.19591836734693, "grad_norm": 28.77992057800293, "learning_rate": 5.928571428571429e-06, "loss": 0.9561, "step": 21608 }, { "epoch": 88.2, "grad_norm": 30.88871192932129, "learning_rate": 5.926530612244898e-06, "loss": 1.1713, "step": 21609 }, { "epoch": 88.20408163265306, "grad_norm": 33.673866271972656, "learning_rate": 5.924489795918368e-06, "loss": 0.9008, "step": 21610 }, { "epoch": 88.20816326530613, "grad_norm": 30.190019607543945, "learning_rate": 5.9224489795918365e-06, "loss": 0.6143, "step": 21611 }, { "epoch": 88.21224489795918, "grad_norm": 29.947757720947266, "learning_rate": 5.920408163265306e-06, "loss": 1.0574, "step": 21612 }, { "epoch": 88.21632653061225, "grad_norm": 26.44293212890625, "learning_rate": 5.918367346938776e-06, "loss": 1.4118, "step": 21613 }, { "epoch": 88.2204081632653, "grad_norm": 34.442657470703125, "learning_rate": 5.916326530612245e-06, "loss": 0.8698, "step": 21614 }, { "epoch": 88.22448979591837, "grad_norm": 28.54976463317871, "learning_rate": 5.914285714285714e-06, "loss": 0.7155, "step": 21615 }, { "epoch": 88.22857142857143, "grad_norm": 29.373950958251953, "learning_rate": 5.912244897959184e-06, "loss": 0.3551, "step": 21616 }, { "epoch": 88.2326530612245, "grad_norm": 21.739187240600586, "learning_rate": 5.910204081632653e-06, "loss": 1.972, "step": 21617 }, { "epoch": 88.23673469387755, "grad_norm": 29.908226013183594, "learning_rate": 5.9081632653061224e-06, "loss": 0.891, "step": 21618 }, { "epoch": 88.2408163265306, "grad_norm": 37.89997482299805, "learning_rate": 5.906122448979592e-06, "loss": 0.5491, "step": 21619 }, { "epoch": 88.24489795918367, "grad_norm": 30.349193572998047, "learning_rate": 5.904081632653062e-06, "loss": 1.3756, "step": 21620 }, { "epoch": 88.24897959183673, "grad_norm": 30.698020935058594, "learning_rate": 5.9020408163265314e-06, "loss": 1.3503, "step": 21621 }, { "epoch": 88.2530612244898, "grad_norm": 49.10581970214844, "learning_rate": 5.9e-06, "loss": 0.8236, "step": 21622 }, { "epoch": 88.25714285714285, "grad_norm": 35.3376579284668, "learning_rate": 5.89795918367347e-06, "loss": 0.5182, "step": 21623 }, { "epoch": 88.26122448979592, "grad_norm": 20.931392669677734, "learning_rate": 5.89591836734694e-06, "loss": 0.1978, "step": 21624 }, { "epoch": 88.26530612244898, "grad_norm": 26.097686767578125, "learning_rate": 5.893877551020408e-06, "loss": 2.0053, "step": 21625 }, { "epoch": 88.26938775510204, "grad_norm": 35.6378059387207, "learning_rate": 5.891836734693878e-06, "loss": 0.7929, "step": 21626 }, { "epoch": 88.2734693877551, "grad_norm": 23.260412216186523, "learning_rate": 5.889795918367348e-06, "loss": 1.486, "step": 21627 }, { "epoch": 88.27755102040817, "grad_norm": 33.592227935791016, "learning_rate": 5.8877551020408166e-06, "loss": 0.3449, "step": 21628 }, { "epoch": 88.28163265306122, "grad_norm": 28.556612014770508, "learning_rate": 5.885714285714286e-06, "loss": 1.2613, "step": 21629 }, { "epoch": 88.28571428571429, "grad_norm": 27.02092170715332, "learning_rate": 5.883673469387756e-06, "loss": 0.342, "step": 21630 }, { "epoch": 88.28979591836735, "grad_norm": 10.690366744995117, "learning_rate": 5.881632653061225e-06, "loss": 0.117, "step": 21631 }, { "epoch": 88.29387755102042, "grad_norm": 36.19309997558594, "learning_rate": 5.879591836734694e-06, "loss": 1.01, "step": 21632 }, { "epoch": 88.29795918367347, "grad_norm": 31.597200393676758, "learning_rate": 5.877551020408163e-06, "loss": 0.6818, "step": 21633 }, { "epoch": 88.30204081632652, "grad_norm": 23.852096557617188, "learning_rate": 5.875510204081633e-06, "loss": 1.2182, "step": 21634 }, { "epoch": 88.3061224489796, "grad_norm": 22.33576202392578, "learning_rate": 5.8734693877551025e-06, "loss": 1.3921, "step": 21635 }, { "epoch": 88.31020408163265, "grad_norm": 33.93335723876953, "learning_rate": 5.871428571428571e-06, "loss": 1.1693, "step": 21636 }, { "epoch": 88.31428571428572, "grad_norm": 37.4589958190918, "learning_rate": 5.869387755102041e-06, "loss": 0.5079, "step": 21637 }, { "epoch": 88.31836734693877, "grad_norm": 34.7199592590332, "learning_rate": 5.867346938775511e-06, "loss": 1.4232, "step": 21638 }, { "epoch": 88.32244897959184, "grad_norm": 28.460969924926758, "learning_rate": 5.8653061224489795e-06, "loss": 0.3099, "step": 21639 }, { "epoch": 88.3265306122449, "grad_norm": 11.49692153930664, "learning_rate": 5.863265306122449e-06, "loss": 0.0845, "step": 21640 }, { "epoch": 88.33061224489796, "grad_norm": 36.85008239746094, "learning_rate": 5.861224489795919e-06, "loss": 0.9981, "step": 21641 }, { "epoch": 88.33469387755102, "grad_norm": 29.907262802124023, "learning_rate": 5.859183673469388e-06, "loss": 0.2398, "step": 21642 }, { "epoch": 88.33877551020409, "grad_norm": 33.00830841064453, "learning_rate": 5.857142857142857e-06, "loss": 0.6548, "step": 21643 }, { "epoch": 88.34285714285714, "grad_norm": 24.252046585083008, "learning_rate": 5.855102040816326e-06, "loss": 1.997, "step": 21644 }, { "epoch": 88.34693877551021, "grad_norm": 29.84336280822754, "learning_rate": 5.853061224489796e-06, "loss": 0.5463, "step": 21645 }, { "epoch": 88.35102040816327, "grad_norm": 37.193580627441406, "learning_rate": 5.8510204081632655e-06, "loss": 0.8309, "step": 21646 }, { "epoch": 88.35510204081632, "grad_norm": 29.9312744140625, "learning_rate": 5.848979591836735e-06, "loss": 0.2949, "step": 21647 }, { "epoch": 88.35918367346939, "grad_norm": 42.50560760498047, "learning_rate": 5.846938775510204e-06, "loss": 0.8876, "step": 21648 }, { "epoch": 88.36326530612244, "grad_norm": 38.13081359863281, "learning_rate": 5.844897959183674e-06, "loss": 0.5179, "step": 21649 }, { "epoch": 88.36734693877551, "grad_norm": 25.437213897705078, "learning_rate": 5.842857142857143e-06, "loss": 1.4924, "step": 21650 }, { "epoch": 88.37142857142857, "grad_norm": 9.64837646484375, "learning_rate": 5.840816326530613e-06, "loss": 2.976, "step": 21651 }, { "epoch": 88.37551020408164, "grad_norm": 37.01746368408203, "learning_rate": 5.838775510204083e-06, "loss": 1.1017, "step": 21652 }, { "epoch": 88.37959183673469, "grad_norm": 31.640357971191406, "learning_rate": 5.8367346938775515e-06, "loss": 0.5914, "step": 21653 }, { "epoch": 88.38367346938776, "grad_norm": 36.700862884521484, "learning_rate": 5.834693877551021e-06, "loss": 0.9552, "step": 21654 }, { "epoch": 88.38775510204081, "grad_norm": 38.051883697509766, "learning_rate": 5.83265306122449e-06, "loss": 0.8061, "step": 21655 }, { "epoch": 88.39183673469388, "grad_norm": 36.76902770996094, "learning_rate": 5.83061224489796e-06, "loss": 0.591, "step": 21656 }, { "epoch": 88.39591836734694, "grad_norm": 27.826454162597656, "learning_rate": 5.828571428571429e-06, "loss": 1.0931, "step": 21657 }, { "epoch": 88.4, "grad_norm": 34.53862762451172, "learning_rate": 5.826530612244898e-06, "loss": 1.241, "step": 21658 }, { "epoch": 88.40408163265306, "grad_norm": 27.02831268310547, "learning_rate": 5.824489795918368e-06, "loss": 1.8809, "step": 21659 }, { "epoch": 88.40816326530613, "grad_norm": 31.696035385131836, "learning_rate": 5.8224489795918374e-06, "loss": 1.3298, "step": 21660 }, { "epoch": 88.41224489795918, "grad_norm": 40.66306686401367, "learning_rate": 5.820408163265306e-06, "loss": 0.5011, "step": 21661 }, { "epoch": 88.41632653061224, "grad_norm": 23.341875076293945, "learning_rate": 5.818367346938776e-06, "loss": 1.3196, "step": 21662 }, { "epoch": 88.42040816326531, "grad_norm": 25.44322395324707, "learning_rate": 5.816326530612245e-06, "loss": 1.6305, "step": 21663 }, { "epoch": 88.42448979591836, "grad_norm": 21.25469970703125, "learning_rate": 5.814285714285714e-06, "loss": 1.4203, "step": 21664 }, { "epoch": 88.42857142857143, "grad_norm": 23.942832946777344, "learning_rate": 5.812244897959184e-06, "loss": 1.8072, "step": 21665 }, { "epoch": 88.43265306122449, "grad_norm": 30.956941604614258, "learning_rate": 5.810204081632653e-06, "loss": 0.6141, "step": 21666 }, { "epoch": 88.43673469387755, "grad_norm": 18.178354263305664, "learning_rate": 5.8081632653061226e-06, "loss": 0.2217, "step": 21667 }, { "epoch": 88.44081632653061, "grad_norm": 27.58545684814453, "learning_rate": 5.806122448979592e-06, "loss": 0.9426, "step": 21668 }, { "epoch": 88.44489795918368, "grad_norm": 12.928504943847656, "learning_rate": 5.804081632653061e-06, "loss": 0.0825, "step": 21669 }, { "epoch": 88.44897959183673, "grad_norm": 7.439486026763916, "learning_rate": 5.802040816326531e-06, "loss": 0.066, "step": 21670 }, { "epoch": 88.4530612244898, "grad_norm": 26.736814498901367, "learning_rate": 5.8e-06, "loss": 1.9555, "step": 21671 }, { "epoch": 88.45714285714286, "grad_norm": 32.06047058105469, "learning_rate": 5.797959183673469e-06, "loss": 0.518, "step": 21672 }, { "epoch": 88.46122448979592, "grad_norm": 23.648120880126953, "learning_rate": 5.795918367346939e-06, "loss": 0.3432, "step": 21673 }, { "epoch": 88.46530612244898, "grad_norm": 30.737783432006836, "learning_rate": 5.7938775510204085e-06, "loss": 1.5755, "step": 21674 }, { "epoch": 88.46938775510205, "grad_norm": 27.09312629699707, "learning_rate": 5.791836734693877e-06, "loss": 1.489, "step": 21675 }, { "epoch": 88.4734693877551, "grad_norm": 23.509044647216797, "learning_rate": 5.789795918367347e-06, "loss": 0.2287, "step": 21676 }, { "epoch": 88.47755102040816, "grad_norm": 30.54435157775879, "learning_rate": 5.787755102040817e-06, "loss": 0.5443, "step": 21677 }, { "epoch": 88.48163265306123, "grad_norm": 31.916536331176758, "learning_rate": 5.785714285714286e-06, "loss": 0.6041, "step": 21678 }, { "epoch": 88.48571428571428, "grad_norm": 34.71826934814453, "learning_rate": 5.783673469387756e-06, "loss": 0.3337, "step": 21679 }, { "epoch": 88.48979591836735, "grad_norm": 31.663951873779297, "learning_rate": 5.781632653061225e-06, "loss": 0.8173, "step": 21680 }, { "epoch": 88.4938775510204, "grad_norm": 29.856962203979492, "learning_rate": 5.7795918367346945e-06, "loss": 1.5621, "step": 21681 }, { "epoch": 88.49795918367347, "grad_norm": 24.147775650024414, "learning_rate": 5.777551020408164e-06, "loss": 0.304, "step": 21682 }, { "epoch": 88.50204081632653, "grad_norm": 26.247390747070312, "learning_rate": 5.775510204081633e-06, "loss": 1.1372, "step": 21683 }, { "epoch": 88.5061224489796, "grad_norm": 24.815771102905273, "learning_rate": 5.773469387755103e-06, "loss": 0.182, "step": 21684 }, { "epoch": 88.51020408163265, "grad_norm": 29.914173126220703, "learning_rate": 5.7714285714285715e-06, "loss": 0.5004, "step": 21685 }, { "epoch": 88.51428571428572, "grad_norm": 34.33968734741211, "learning_rate": 5.769387755102041e-06, "loss": 0.3697, "step": 21686 }, { "epoch": 88.51836734693877, "grad_norm": 32.90266418457031, "learning_rate": 5.767346938775511e-06, "loss": 0.5499, "step": 21687 }, { "epoch": 88.52244897959184, "grad_norm": 28.059057235717773, "learning_rate": 5.76530612244898e-06, "loss": 0.4142, "step": 21688 }, { "epoch": 88.5265306122449, "grad_norm": 29.061279296875, "learning_rate": 5.763265306122449e-06, "loss": 1.3744, "step": 21689 }, { "epoch": 88.53061224489795, "grad_norm": 27.792072296142578, "learning_rate": 5.761224489795919e-06, "loss": 1.4038, "step": 21690 }, { "epoch": 88.53469387755102, "grad_norm": 35.9986686706543, "learning_rate": 5.759183673469388e-06, "loss": 0.8744, "step": 21691 }, { "epoch": 88.53877551020408, "grad_norm": 44.60609817504883, "learning_rate": 5.7571428571428574e-06, "loss": 0.7089, "step": 21692 }, { "epoch": 88.54285714285714, "grad_norm": 31.986480712890625, "learning_rate": 5.755102040816327e-06, "loss": 0.9422, "step": 21693 }, { "epoch": 88.5469387755102, "grad_norm": 8.536189079284668, "learning_rate": 5.753061224489796e-06, "loss": 0.0781, "step": 21694 }, { "epoch": 88.55102040816327, "grad_norm": 39.625396728515625, "learning_rate": 5.751020408163266e-06, "loss": 0.9736, "step": 21695 }, { "epoch": 88.55510204081632, "grad_norm": 25.499984741210938, "learning_rate": 5.748979591836734e-06, "loss": 1.8742, "step": 21696 }, { "epoch": 88.55918367346939, "grad_norm": 28.759977340698242, "learning_rate": 5.746938775510204e-06, "loss": 1.3879, "step": 21697 }, { "epoch": 88.56326530612245, "grad_norm": 45.074974060058594, "learning_rate": 5.744897959183674e-06, "loss": 0.3179, "step": 21698 }, { "epoch": 88.56734693877551, "grad_norm": 28.27936363220215, "learning_rate": 5.7428571428571426e-06, "loss": 1.5009, "step": 21699 }, { "epoch": 88.57142857142857, "grad_norm": 26.650630950927734, "learning_rate": 5.740816326530612e-06, "loss": 0.2615, "step": 21700 }, { "epoch": 88.57551020408164, "grad_norm": 34.22227478027344, "learning_rate": 5.738775510204082e-06, "loss": 0.4702, "step": 21701 }, { "epoch": 88.57959183673469, "grad_norm": 25.94367790222168, "learning_rate": 5.736734693877551e-06, "loss": 1.5758, "step": 21702 }, { "epoch": 88.58367346938776, "grad_norm": 39.6415901184082, "learning_rate": 5.73469387755102e-06, "loss": 0.7992, "step": 21703 }, { "epoch": 88.58775510204082, "grad_norm": 23.118528366088867, "learning_rate": 5.73265306122449e-06, "loss": 1.9058, "step": 21704 }, { "epoch": 88.59183673469387, "grad_norm": 28.340444564819336, "learning_rate": 5.73061224489796e-06, "loss": 1.5123, "step": 21705 }, { "epoch": 88.59591836734694, "grad_norm": 32.89254379272461, "learning_rate": 5.728571428571429e-06, "loss": 1.2558, "step": 21706 }, { "epoch": 88.6, "grad_norm": 40.14963150024414, "learning_rate": 5.726530612244898e-06, "loss": 1.1108, "step": 21707 }, { "epoch": 88.60408163265306, "grad_norm": 34.87159729003906, "learning_rate": 5.724489795918368e-06, "loss": 1.27, "step": 21708 }, { "epoch": 88.60816326530612, "grad_norm": 32.66883087158203, "learning_rate": 5.7224489795918375e-06, "loss": 1.1522, "step": 21709 }, { "epoch": 88.61224489795919, "grad_norm": 30.987991333007812, "learning_rate": 5.720408163265306e-06, "loss": 0.4896, "step": 21710 }, { "epoch": 88.61632653061224, "grad_norm": 34.021114349365234, "learning_rate": 5.718367346938776e-06, "loss": 0.8651, "step": 21711 }, { "epoch": 88.62040816326531, "grad_norm": 15.822572708129883, "learning_rate": 5.716326530612246e-06, "loss": 0.1083, "step": 21712 }, { "epoch": 88.62448979591836, "grad_norm": 17.37964630126953, "learning_rate": 5.7142857142857145e-06, "loss": 2.216, "step": 21713 }, { "epoch": 88.62857142857143, "grad_norm": 30.143543243408203, "learning_rate": 5.712244897959184e-06, "loss": 0.2179, "step": 21714 }, { "epoch": 88.63265306122449, "grad_norm": 37.5107307434082, "learning_rate": 5.710204081632654e-06, "loss": 0.6152, "step": 21715 }, { "epoch": 88.63673469387756, "grad_norm": 15.582965850830078, "learning_rate": 5.708163265306123e-06, "loss": 0.1141, "step": 21716 }, { "epoch": 88.64081632653061, "grad_norm": 28.31304359436035, "learning_rate": 5.706122448979592e-06, "loss": 0.9732, "step": 21717 }, { "epoch": 88.64489795918368, "grad_norm": 40.63117980957031, "learning_rate": 5.704081632653061e-06, "loss": 1.0685, "step": 21718 }, { "epoch": 88.64897959183673, "grad_norm": 18.012924194335938, "learning_rate": 5.702040816326531e-06, "loss": 0.1072, "step": 21719 }, { "epoch": 88.65306122448979, "grad_norm": 33.129493713378906, "learning_rate": 5.7000000000000005e-06, "loss": 1.0337, "step": 21720 }, { "epoch": 88.65714285714286, "grad_norm": 25.898923873901367, "learning_rate": 5.697959183673469e-06, "loss": 1.4649, "step": 21721 }, { "epoch": 88.66122448979591, "grad_norm": 31.10823631286621, "learning_rate": 5.695918367346939e-06, "loss": 0.6979, "step": 21722 }, { "epoch": 88.66530612244898, "grad_norm": 31.43700408935547, "learning_rate": 5.693877551020409e-06, "loss": 1.2985, "step": 21723 }, { "epoch": 88.66938775510204, "grad_norm": 23.97286033630371, "learning_rate": 5.6918367346938775e-06, "loss": 0.6144, "step": 21724 }, { "epoch": 88.6734693877551, "grad_norm": 25.670671463012695, "learning_rate": 5.689795918367347e-06, "loss": 1.5498, "step": 21725 }, { "epoch": 88.67755102040816, "grad_norm": 38.21060562133789, "learning_rate": 5.687755102040817e-06, "loss": 0.5375, "step": 21726 }, { "epoch": 88.68163265306123, "grad_norm": 31.781034469604492, "learning_rate": 5.685714285714286e-06, "loss": 0.3264, "step": 21727 }, { "epoch": 88.68571428571428, "grad_norm": 31.08612060546875, "learning_rate": 5.683673469387755e-06, "loss": 0.374, "step": 21728 }, { "epoch": 88.68979591836735, "grad_norm": 30.351722717285156, "learning_rate": 5.681632653061224e-06, "loss": 1.553, "step": 21729 }, { "epoch": 88.6938775510204, "grad_norm": 39.0578498840332, "learning_rate": 5.679591836734694e-06, "loss": 0.4143, "step": 21730 }, { "epoch": 88.69795918367348, "grad_norm": 38.68354797363281, "learning_rate": 5.6775510204081634e-06, "loss": 0.7861, "step": 21731 }, { "epoch": 88.70204081632653, "grad_norm": 33.28862762451172, "learning_rate": 5.675510204081633e-06, "loss": 1.1486, "step": 21732 }, { "epoch": 88.70612244897958, "grad_norm": 32.40489196777344, "learning_rate": 5.673469387755103e-06, "loss": 0.7451, "step": 21733 }, { "epoch": 88.71020408163265, "grad_norm": 36.32466506958008, "learning_rate": 5.671428571428572e-06, "loss": 0.4657, "step": 21734 }, { "epoch": 88.71428571428571, "grad_norm": 32.633262634277344, "learning_rate": 5.669387755102041e-06, "loss": 0.6459, "step": 21735 }, { "epoch": 88.71836734693878, "grad_norm": 32.06937026977539, "learning_rate": 5.667346938775511e-06, "loss": 0.9026, "step": 21736 }, { "epoch": 88.72244897959183, "grad_norm": 28.93990707397461, "learning_rate": 5.665306122448981e-06, "loss": 1.1395, "step": 21737 }, { "epoch": 88.7265306122449, "grad_norm": 20.580419540405273, "learning_rate": 5.663265306122449e-06, "loss": 0.2455, "step": 21738 }, { "epoch": 88.73061224489796, "grad_norm": 32.266845703125, "learning_rate": 5.661224489795919e-06, "loss": 1.1513, "step": 21739 }, { "epoch": 88.73469387755102, "grad_norm": 30.903043746948242, "learning_rate": 5.659183673469388e-06, "loss": 1.28, "step": 21740 }, { "epoch": 88.73877551020408, "grad_norm": 29.67064666748047, "learning_rate": 5.6571428571428576e-06, "loss": 1.584, "step": 21741 }, { "epoch": 88.74285714285715, "grad_norm": 33.99845504760742, "learning_rate": 5.655102040816327e-06, "loss": 0.9239, "step": 21742 }, { "epoch": 88.7469387755102, "grad_norm": 40.38273239135742, "learning_rate": 5.653061224489796e-06, "loss": 1.0182, "step": 21743 }, { "epoch": 88.75102040816327, "grad_norm": 26.2811222076416, "learning_rate": 5.651020408163266e-06, "loss": 1.8907, "step": 21744 }, { "epoch": 88.75510204081633, "grad_norm": 32.26481246948242, "learning_rate": 5.648979591836735e-06, "loss": 0.4236, "step": 21745 }, { "epoch": 88.7591836734694, "grad_norm": 39.706607818603516, "learning_rate": 5.646938775510204e-06, "loss": 0.3366, "step": 21746 }, { "epoch": 88.76326530612245, "grad_norm": 39.44765090942383, "learning_rate": 5.644897959183674e-06, "loss": 0.6485, "step": 21747 }, { "epoch": 88.7673469387755, "grad_norm": 28.04143714904785, "learning_rate": 5.642857142857143e-06, "loss": 1.4361, "step": 21748 }, { "epoch": 88.77142857142857, "grad_norm": 37.01230239868164, "learning_rate": 5.640816326530612e-06, "loss": 0.7533, "step": 21749 }, { "epoch": 88.77551020408163, "grad_norm": 20.783105850219727, "learning_rate": 5.638775510204082e-06, "loss": 0.1461, "step": 21750 }, { "epoch": 88.7795918367347, "grad_norm": 35.17771530151367, "learning_rate": 5.636734693877551e-06, "loss": 0.3129, "step": 21751 }, { "epoch": 88.78367346938775, "grad_norm": 16.453914642333984, "learning_rate": 5.6346938775510205e-06, "loss": 2.5306, "step": 21752 }, { "epoch": 88.78775510204082, "grad_norm": 28.502628326416016, "learning_rate": 5.63265306122449e-06, "loss": 1.6304, "step": 21753 }, { "epoch": 88.79183673469387, "grad_norm": 24.329072952270508, "learning_rate": 5.630612244897959e-06, "loss": 0.2375, "step": 21754 }, { "epoch": 88.79591836734694, "grad_norm": 22.354881286621094, "learning_rate": 5.628571428571429e-06, "loss": 1.3458, "step": 21755 }, { "epoch": 88.8, "grad_norm": 40.16929244995117, "learning_rate": 5.626530612244898e-06, "loss": 1.1677, "step": 21756 }, { "epoch": 88.80408163265307, "grad_norm": 22.041837692260742, "learning_rate": 5.624489795918367e-06, "loss": 0.2808, "step": 21757 }, { "epoch": 88.80816326530612, "grad_norm": 36.84705352783203, "learning_rate": 5.622448979591837e-06, "loss": 1.3028, "step": 21758 }, { "epoch": 88.81224489795919, "grad_norm": 37.930320739746094, "learning_rate": 5.6204081632653065e-06, "loss": 0.3085, "step": 21759 }, { "epoch": 88.81632653061224, "grad_norm": 27.550798416137695, "learning_rate": 5.618367346938775e-06, "loss": 1.3916, "step": 21760 }, { "epoch": 88.82040816326531, "grad_norm": 32.099029541015625, "learning_rate": 5.616326530612245e-06, "loss": 1.1485, "step": 21761 }, { "epoch": 88.82448979591837, "grad_norm": 23.867612838745117, "learning_rate": 5.614285714285715e-06, "loss": 0.3402, "step": 21762 }, { "epoch": 88.82857142857142, "grad_norm": 26.45589828491211, "learning_rate": 5.612244897959184e-06, "loss": 1.4509, "step": 21763 }, { "epoch": 88.83265306122449, "grad_norm": 25.9295711517334, "learning_rate": 5.610204081632654e-06, "loss": 1.4163, "step": 21764 }, { "epoch": 88.83673469387755, "grad_norm": 31.526344299316406, "learning_rate": 5.608163265306123e-06, "loss": 0.7469, "step": 21765 }, { "epoch": 88.84081632653061, "grad_norm": 31.015317916870117, "learning_rate": 5.6061224489795924e-06, "loss": 0.3599, "step": 21766 }, { "epoch": 88.84489795918367, "grad_norm": 27.011157989501953, "learning_rate": 5.604081632653062e-06, "loss": 1.3599, "step": 21767 }, { "epoch": 88.84897959183674, "grad_norm": 23.836149215698242, "learning_rate": 5.602040816326531e-06, "loss": 1.9035, "step": 21768 }, { "epoch": 88.85306122448979, "grad_norm": 28.947919845581055, "learning_rate": 5.600000000000001e-06, "loss": 1.4451, "step": 21769 }, { "epoch": 88.85714285714286, "grad_norm": 25.322246551513672, "learning_rate": 5.597959183673469e-06, "loss": 1.8132, "step": 21770 }, { "epoch": 88.86122448979592, "grad_norm": 29.99505043029785, "learning_rate": 5.595918367346939e-06, "loss": 1.1395, "step": 21771 }, { "epoch": 88.86530612244898, "grad_norm": 36.86590576171875, "learning_rate": 5.593877551020409e-06, "loss": 0.2127, "step": 21772 }, { "epoch": 88.86938775510204, "grad_norm": 34.74433135986328, "learning_rate": 5.5918367346938776e-06, "loss": 0.5918, "step": 21773 }, { "epoch": 88.87346938775511, "grad_norm": 30.71963882446289, "learning_rate": 5.589795918367347e-06, "loss": 0.8676, "step": 21774 }, { "epoch": 88.87755102040816, "grad_norm": 38.281837463378906, "learning_rate": 5.587755102040817e-06, "loss": 1.0183, "step": 21775 }, { "epoch": 88.88163265306123, "grad_norm": 29.483932495117188, "learning_rate": 5.585714285714286e-06, "loss": 0.324, "step": 21776 }, { "epoch": 88.88571428571429, "grad_norm": 30.475954055786133, "learning_rate": 5.583673469387755e-06, "loss": 1.3508, "step": 21777 }, { "epoch": 88.88979591836734, "grad_norm": 38.48759841918945, "learning_rate": 5.581632653061225e-06, "loss": 1.3583, "step": 21778 }, { "epoch": 88.89387755102041, "grad_norm": 55.879112243652344, "learning_rate": 5.579591836734694e-06, "loss": 1.1594, "step": 21779 }, { "epoch": 88.89795918367346, "grad_norm": 33.907936096191406, "learning_rate": 5.5775510204081635e-06, "loss": 1.0429, "step": 21780 }, { "epoch": 88.90204081632653, "grad_norm": 24.019140243530273, "learning_rate": 5.575510204081632e-06, "loss": 0.2919, "step": 21781 }, { "epoch": 88.90612244897959, "grad_norm": 26.138952255249023, "learning_rate": 5.573469387755102e-06, "loss": 1.3801, "step": 21782 }, { "epoch": 88.91020408163266, "grad_norm": 19.4923095703125, "learning_rate": 5.571428571428572e-06, "loss": 2.0703, "step": 21783 }, { "epoch": 88.91428571428571, "grad_norm": 30.81835174560547, "learning_rate": 5.5693877551020405e-06, "loss": 0.67, "step": 21784 }, { "epoch": 88.91836734693878, "grad_norm": 26.901002883911133, "learning_rate": 5.56734693877551e-06, "loss": 0.2689, "step": 21785 }, { "epoch": 88.92244897959183, "grad_norm": 49.40946578979492, "learning_rate": 5.56530612244898e-06, "loss": 0.8563, "step": 21786 }, { "epoch": 88.9265306122449, "grad_norm": 22.20785903930664, "learning_rate": 5.563265306122449e-06, "loss": 0.2015, "step": 21787 }, { "epoch": 88.93061224489796, "grad_norm": 47.19584274291992, "learning_rate": 5.561224489795918e-06, "loss": 0.7072, "step": 21788 }, { "epoch": 88.93469387755103, "grad_norm": 18.558483123779297, "learning_rate": 5.559183673469388e-06, "loss": 2.2873, "step": 21789 }, { "epoch": 88.93877551020408, "grad_norm": 26.472957611083984, "learning_rate": 5.557142857142858e-06, "loss": 1.3169, "step": 21790 }, { "epoch": 88.94285714285714, "grad_norm": 31.152076721191406, "learning_rate": 5.555102040816327e-06, "loss": 0.6352, "step": 21791 }, { "epoch": 88.9469387755102, "grad_norm": 35.34453201293945, "learning_rate": 5.553061224489796e-06, "loss": 0.3084, "step": 21792 }, { "epoch": 88.95102040816326, "grad_norm": 16.35975456237793, "learning_rate": 5.551020408163266e-06, "loss": 0.0888, "step": 21793 }, { "epoch": 88.95510204081633, "grad_norm": 38.16307067871094, "learning_rate": 5.5489795918367355e-06, "loss": 0.3423, "step": 21794 }, { "epoch": 88.95918367346938, "grad_norm": 5.601729869842529, "learning_rate": 5.546938775510204e-06, "loss": 0.054, "step": 21795 }, { "epoch": 88.96326530612245, "grad_norm": 23.863115310668945, "learning_rate": 5.544897959183674e-06, "loss": 2.2299, "step": 21796 }, { "epoch": 88.9673469387755, "grad_norm": 31.595354080200195, "learning_rate": 5.542857142857144e-06, "loss": 1.2886, "step": 21797 }, { "epoch": 88.97142857142858, "grad_norm": 26.59532356262207, "learning_rate": 5.5408163265306125e-06, "loss": 1.4216, "step": 21798 }, { "epoch": 88.97551020408163, "grad_norm": 37.40589141845703, "learning_rate": 5.538775510204082e-06, "loss": 0.5721, "step": 21799 }, { "epoch": 88.9795918367347, "grad_norm": 24.735403060913086, "learning_rate": 5.536734693877552e-06, "loss": 0.2002, "step": 21800 }, { "epoch": 88.98367346938775, "grad_norm": 42.310569763183594, "learning_rate": 5.534693877551021e-06, "loss": 0.6016, "step": 21801 }, { "epoch": 88.98775510204082, "grad_norm": 30.364248275756836, "learning_rate": 5.53265306122449e-06, "loss": 0.3801, "step": 21802 }, { "epoch": 88.99183673469388, "grad_norm": 35.509796142578125, "learning_rate": 5.530612244897959e-06, "loss": 1.0595, "step": 21803 }, { "epoch": 88.99591836734695, "grad_norm": 39.1631965637207, "learning_rate": 5.528571428571429e-06, "loss": 0.7757, "step": 21804 }, { "epoch": 89.0, "grad_norm": 21.90935707092285, "learning_rate": 5.5265306122448984e-06, "loss": 1.9014, "step": 21805 }, { "epoch": 89.00408163265305, "grad_norm": 27.805635452270508, "learning_rate": 5.524489795918367e-06, "loss": 1.3931, "step": 21806 }, { "epoch": 89.00816326530612, "grad_norm": 26.324474334716797, "learning_rate": 5.522448979591837e-06, "loss": 0.2525, "step": 21807 }, { "epoch": 89.01224489795918, "grad_norm": 33.82133483886719, "learning_rate": 5.520408163265307e-06, "loss": 0.3, "step": 21808 }, { "epoch": 89.01632653061225, "grad_norm": 27.260082244873047, "learning_rate": 5.518367346938775e-06, "loss": 1.4446, "step": 21809 }, { "epoch": 89.0204081632653, "grad_norm": 30.78543472290039, "learning_rate": 5.516326530612245e-06, "loss": 0.4922, "step": 21810 }, { "epoch": 89.02448979591837, "grad_norm": 34.68431854248047, "learning_rate": 5.514285714285715e-06, "loss": 0.9652, "step": 21811 }, { "epoch": 89.02857142857142, "grad_norm": 31.877199172973633, "learning_rate": 5.5122448979591836e-06, "loss": 1.2338, "step": 21812 }, { "epoch": 89.0326530612245, "grad_norm": 16.7484130859375, "learning_rate": 5.510204081632653e-06, "loss": 0.1027, "step": 21813 }, { "epoch": 89.03673469387755, "grad_norm": 22.863080978393555, "learning_rate": 5.508163265306122e-06, "loss": 0.2012, "step": 21814 }, { "epoch": 89.04081632653062, "grad_norm": 31.462738037109375, "learning_rate": 5.506122448979592e-06, "loss": 0.4958, "step": 21815 }, { "epoch": 89.04489795918367, "grad_norm": 28.0594425201416, "learning_rate": 5.504081632653061e-06, "loss": 0.331, "step": 21816 }, { "epoch": 89.04897959183674, "grad_norm": 38.3760986328125, "learning_rate": 5.502040816326531e-06, "loss": 0.9563, "step": 21817 }, { "epoch": 89.0530612244898, "grad_norm": 17.973344802856445, "learning_rate": 5.500000000000001e-06, "loss": 2.265, "step": 21818 }, { "epoch": 89.05714285714286, "grad_norm": 20.310293197631836, "learning_rate": 5.4979591836734695e-06, "loss": 1.8029, "step": 21819 }, { "epoch": 89.06122448979592, "grad_norm": 26.871627807617188, "learning_rate": 5.495918367346939e-06, "loss": 1.3886, "step": 21820 }, { "epoch": 89.06530612244897, "grad_norm": 33.238319396972656, "learning_rate": 5.493877551020409e-06, "loss": 0.244, "step": 21821 }, { "epoch": 89.06938775510204, "grad_norm": 27.388826370239258, "learning_rate": 5.4918367346938785e-06, "loss": 1.2658, "step": 21822 }, { "epoch": 89.0734693877551, "grad_norm": 21.54586410522461, "learning_rate": 5.489795918367347e-06, "loss": 1.9613, "step": 21823 }, { "epoch": 89.07755102040817, "grad_norm": 26.596487045288086, "learning_rate": 5.487755102040817e-06, "loss": 1.4027, "step": 21824 }, { "epoch": 89.08163265306122, "grad_norm": 20.8490047454834, "learning_rate": 5.485714285714286e-06, "loss": 1.8498, "step": 21825 }, { "epoch": 89.08571428571429, "grad_norm": 27.43206787109375, "learning_rate": 5.4836734693877555e-06, "loss": 1.3271, "step": 21826 }, { "epoch": 89.08979591836734, "grad_norm": 40.695064544677734, "learning_rate": 5.481632653061225e-06, "loss": 1.1053, "step": 21827 }, { "epoch": 89.09387755102041, "grad_norm": 39.41560745239258, "learning_rate": 5.479591836734694e-06, "loss": 1.1035, "step": 21828 }, { "epoch": 89.09795918367347, "grad_norm": 23.14141273498535, "learning_rate": 5.477551020408164e-06, "loss": 1.5322, "step": 21829 }, { "epoch": 89.10204081632654, "grad_norm": 33.72724533081055, "learning_rate": 5.475510204081633e-06, "loss": 0.7203, "step": 21830 }, { "epoch": 89.10612244897959, "grad_norm": 56.074100494384766, "learning_rate": 5.473469387755102e-06, "loss": 1.1107, "step": 21831 }, { "epoch": 89.11020408163266, "grad_norm": 27.945621490478516, "learning_rate": 5.471428571428572e-06, "loss": 0.3799, "step": 21832 }, { "epoch": 89.11428571428571, "grad_norm": 38.91241455078125, "learning_rate": 5.469387755102041e-06, "loss": 0.5586, "step": 21833 }, { "epoch": 89.11836734693877, "grad_norm": 30.5538387298584, "learning_rate": 5.46734693877551e-06, "loss": 0.2266, "step": 21834 }, { "epoch": 89.12244897959184, "grad_norm": 34.4361686706543, "learning_rate": 5.46530612244898e-06, "loss": 0.3157, "step": 21835 }, { "epoch": 89.12653061224489, "grad_norm": 37.48854064941406, "learning_rate": 5.463265306122449e-06, "loss": 0.5557, "step": 21836 }, { "epoch": 89.13061224489796, "grad_norm": 35.5894775390625, "learning_rate": 5.4612244897959184e-06, "loss": 0.3219, "step": 21837 }, { "epoch": 89.13469387755102, "grad_norm": 33.99437713623047, "learning_rate": 5.459183673469388e-06, "loss": 1.1647, "step": 21838 }, { "epoch": 89.13877551020408, "grad_norm": 11.484951972961426, "learning_rate": 5.457142857142857e-06, "loss": 0.1143, "step": 21839 }, { "epoch": 89.14285714285714, "grad_norm": 31.189350128173828, "learning_rate": 5.455102040816327e-06, "loss": 1.2387, "step": 21840 }, { "epoch": 89.14693877551021, "grad_norm": 19.943078994750977, "learning_rate": 5.453061224489796e-06, "loss": 1.8701, "step": 21841 }, { "epoch": 89.15102040816326, "grad_norm": 25.09052276611328, "learning_rate": 5.451020408163265e-06, "loss": 0.2266, "step": 21842 }, { "epoch": 89.15510204081633, "grad_norm": 13.835434913635254, "learning_rate": 5.448979591836735e-06, "loss": 0.092, "step": 21843 }, { "epoch": 89.15918367346939, "grad_norm": 30.004743576049805, "learning_rate": 5.446938775510204e-06, "loss": 0.4229, "step": 21844 }, { "epoch": 89.16326530612245, "grad_norm": 37.152565002441406, "learning_rate": 5.444897959183673e-06, "loss": 0.5164, "step": 21845 }, { "epoch": 89.16734693877551, "grad_norm": 10.90916633605957, "learning_rate": 5.442857142857143e-06, "loss": 0.0835, "step": 21846 }, { "epoch": 89.17142857142858, "grad_norm": 16.230545043945312, "learning_rate": 5.4408163265306126e-06, "loss": 0.1823, "step": 21847 }, { "epoch": 89.17551020408163, "grad_norm": 35.17704772949219, "learning_rate": 5.438775510204082e-06, "loss": 1.0975, "step": 21848 }, { "epoch": 89.17959183673469, "grad_norm": 27.360122680664062, "learning_rate": 5.436734693877552e-06, "loss": 0.2935, "step": 21849 }, { "epoch": 89.18367346938776, "grad_norm": 28.1796817779541, "learning_rate": 5.434693877551021e-06, "loss": 1.2867, "step": 21850 }, { "epoch": 89.18775510204081, "grad_norm": 30.180524826049805, "learning_rate": 5.43265306122449e-06, "loss": 1.188, "step": 21851 }, { "epoch": 89.19183673469388, "grad_norm": 40.66203689575195, "learning_rate": 5.43061224489796e-06, "loss": 0.6792, "step": 21852 }, { "epoch": 89.19591836734693, "grad_norm": 22.776874542236328, "learning_rate": 5.428571428571429e-06, "loss": 0.3101, "step": 21853 }, { "epoch": 89.2, "grad_norm": 34.2385139465332, "learning_rate": 5.4265306122448985e-06, "loss": 1.2568, "step": 21854 }, { "epoch": 89.20408163265306, "grad_norm": 32.76173782348633, "learning_rate": 5.424489795918367e-06, "loss": 0.7636, "step": 21855 }, { "epoch": 89.20816326530613, "grad_norm": 28.850372314453125, "learning_rate": 5.422448979591837e-06, "loss": 0.6633, "step": 21856 }, { "epoch": 89.21224489795918, "grad_norm": 26.585308074951172, "learning_rate": 5.420408163265307e-06, "loss": 0.5952, "step": 21857 }, { "epoch": 89.21632653061225, "grad_norm": 31.618297576904297, "learning_rate": 5.4183673469387755e-06, "loss": 1.2373, "step": 21858 }, { "epoch": 89.2204081632653, "grad_norm": 33.42047882080078, "learning_rate": 5.416326530612245e-06, "loss": 0.5706, "step": 21859 }, { "epoch": 89.22448979591837, "grad_norm": 28.835208892822266, "learning_rate": 5.414285714285715e-06, "loss": 1.6057, "step": 21860 }, { "epoch": 89.22857142857143, "grad_norm": 38.94237518310547, "learning_rate": 5.412244897959184e-06, "loss": 0.5304, "step": 21861 }, { "epoch": 89.2326530612245, "grad_norm": 32.14267349243164, "learning_rate": 5.410204081632653e-06, "loss": 0.6179, "step": 21862 }, { "epoch": 89.23673469387755, "grad_norm": 30.922094345092773, "learning_rate": 5.408163265306123e-06, "loss": 1.2502, "step": 21863 }, { "epoch": 89.2408163265306, "grad_norm": 40.09793472290039, "learning_rate": 5.406122448979592e-06, "loss": 1.356, "step": 21864 }, { "epoch": 89.24489795918367, "grad_norm": 27.47156524658203, "learning_rate": 5.4040816326530615e-06, "loss": 0.7237, "step": 21865 }, { "epoch": 89.24897959183673, "grad_norm": 32.62934875488281, "learning_rate": 5.40204081632653e-06, "loss": 0.8635, "step": 21866 }, { "epoch": 89.2530612244898, "grad_norm": 32.50440979003906, "learning_rate": 5.4e-06, "loss": 1.0041, "step": 21867 }, { "epoch": 89.25714285714285, "grad_norm": 35.79927062988281, "learning_rate": 5.39795918367347e-06, "loss": 0.3382, "step": 21868 }, { "epoch": 89.26122448979592, "grad_norm": 38.15043258666992, "learning_rate": 5.3959183673469385e-06, "loss": 0.2457, "step": 21869 }, { "epoch": 89.26530612244898, "grad_norm": 26.8432559967041, "learning_rate": 5.393877551020408e-06, "loss": 1.3656, "step": 21870 }, { "epoch": 89.26938775510204, "grad_norm": 22.343055725097656, "learning_rate": 5.391836734693878e-06, "loss": 2.1722, "step": 21871 }, { "epoch": 89.2734693877551, "grad_norm": 28.923416137695312, "learning_rate": 5.389795918367347e-06, "loss": 0.2197, "step": 21872 }, { "epoch": 89.27755102040817, "grad_norm": 24.76375961303711, "learning_rate": 5.387755102040816e-06, "loss": 0.2817, "step": 21873 }, { "epoch": 89.28163265306122, "grad_norm": 31.891145706176758, "learning_rate": 5.385714285714286e-06, "loss": 1.1411, "step": 21874 }, { "epoch": 89.28571428571429, "grad_norm": 26.849937438964844, "learning_rate": 5.383673469387756e-06, "loss": 1.0375, "step": 21875 }, { "epoch": 89.28979591836735, "grad_norm": 38.468265533447266, "learning_rate": 5.381632653061225e-06, "loss": 0.6897, "step": 21876 }, { "epoch": 89.29387755102042, "grad_norm": 23.392545700073242, "learning_rate": 5.379591836734694e-06, "loss": 0.5278, "step": 21877 }, { "epoch": 89.29795918367347, "grad_norm": 23.064823150634766, "learning_rate": 5.377551020408164e-06, "loss": 1.2027, "step": 21878 }, { "epoch": 89.30204081632652, "grad_norm": 29.205427169799805, "learning_rate": 5.3755102040816334e-06, "loss": 0.6137, "step": 21879 }, { "epoch": 89.3061224489796, "grad_norm": 25.763381958007812, "learning_rate": 5.373469387755102e-06, "loss": 1.4194, "step": 21880 }, { "epoch": 89.31020408163265, "grad_norm": 49.29548263549805, "learning_rate": 5.371428571428572e-06, "loss": 0.6907, "step": 21881 }, { "epoch": 89.31428571428572, "grad_norm": 34.832618713378906, "learning_rate": 5.369387755102042e-06, "loss": 0.4617, "step": 21882 }, { "epoch": 89.31836734693877, "grad_norm": 35.61616134643555, "learning_rate": 5.36734693877551e-06, "loss": 1.0311, "step": 21883 }, { "epoch": 89.32244897959184, "grad_norm": 32.20664978027344, "learning_rate": 5.36530612244898e-06, "loss": 1.1598, "step": 21884 }, { "epoch": 89.3265306122449, "grad_norm": 28.173870086669922, "learning_rate": 5.36326530612245e-06, "loss": 0.2652, "step": 21885 }, { "epoch": 89.33061224489796, "grad_norm": 1.5125668048858643, "learning_rate": 5.3612244897959186e-06, "loss": 0.024, "step": 21886 }, { "epoch": 89.33469387755102, "grad_norm": 26.542469024658203, "learning_rate": 5.359183673469388e-06, "loss": 0.3992, "step": 21887 }, { "epoch": 89.33877551020409, "grad_norm": 37.144290924072266, "learning_rate": 5.357142857142857e-06, "loss": 0.6254, "step": 21888 }, { "epoch": 89.34285714285714, "grad_norm": 28.464265823364258, "learning_rate": 5.355102040816327e-06, "loss": 0.2322, "step": 21889 }, { "epoch": 89.34693877551021, "grad_norm": 16.886606216430664, "learning_rate": 5.353061224489796e-06, "loss": 2.5272, "step": 21890 }, { "epoch": 89.35102040816327, "grad_norm": 28.851844787597656, "learning_rate": 5.351020408163265e-06, "loss": 0.6294, "step": 21891 }, { "epoch": 89.35510204081632, "grad_norm": 26.122623443603516, "learning_rate": 5.348979591836735e-06, "loss": 1.4949, "step": 21892 }, { "epoch": 89.35918367346939, "grad_norm": 35.15357971191406, "learning_rate": 5.3469387755102045e-06, "loss": 0.7979, "step": 21893 }, { "epoch": 89.36326530612244, "grad_norm": 22.614757537841797, "learning_rate": 5.344897959183673e-06, "loss": 2.0099, "step": 21894 }, { "epoch": 89.36734693877551, "grad_norm": 39.244903564453125, "learning_rate": 5.342857142857143e-06, "loss": 1.0157, "step": 21895 }, { "epoch": 89.37142857142857, "grad_norm": 38.68160629272461, "learning_rate": 5.340816326530613e-06, "loss": 1.0153, "step": 21896 }, { "epoch": 89.37551020408164, "grad_norm": 35.19831085205078, "learning_rate": 5.3387755102040815e-06, "loss": 0.8639, "step": 21897 }, { "epoch": 89.37959183673469, "grad_norm": 35.53704833984375, "learning_rate": 5.336734693877551e-06, "loss": 0.9084, "step": 21898 }, { "epoch": 89.38367346938776, "grad_norm": 30.570905685424805, "learning_rate": 5.33469387755102e-06, "loss": 0.2591, "step": 21899 }, { "epoch": 89.38775510204081, "grad_norm": 32.88877487182617, "learning_rate": 5.33265306122449e-06, "loss": 1.2884, "step": 21900 }, { "epoch": 89.39183673469388, "grad_norm": 8.138423919677734, "learning_rate": 5.330612244897959e-06, "loss": 0.0528, "step": 21901 }, { "epoch": 89.39591836734694, "grad_norm": 19.0426082611084, "learning_rate": 5.328571428571429e-06, "loss": 1.9652, "step": 21902 }, { "epoch": 89.4, "grad_norm": 40.899139404296875, "learning_rate": 5.326530612244899e-06, "loss": 0.5813, "step": 21903 }, { "epoch": 89.40408163265306, "grad_norm": 27.191808700561523, "learning_rate": 5.3244897959183675e-06, "loss": 0.7007, "step": 21904 }, { "epoch": 89.40816326530613, "grad_norm": 8.457588195800781, "learning_rate": 5.322448979591837e-06, "loss": 2.956, "step": 21905 }, { "epoch": 89.41224489795918, "grad_norm": 27.864408493041992, "learning_rate": 5.320408163265307e-06, "loss": 1.5835, "step": 21906 }, { "epoch": 89.41632653061224, "grad_norm": 43.40616226196289, "learning_rate": 5.318367346938776e-06, "loss": 0.7824, "step": 21907 }, { "epoch": 89.42040816326531, "grad_norm": 43.672481536865234, "learning_rate": 5.316326530612245e-06, "loss": 0.9683, "step": 21908 }, { "epoch": 89.42448979591836, "grad_norm": 25.462989807128906, "learning_rate": 5.314285714285715e-06, "loss": 2.0406, "step": 21909 }, { "epoch": 89.42857142857143, "grad_norm": 32.506412506103516, "learning_rate": 5.312244897959184e-06, "loss": 0.774, "step": 21910 }, { "epoch": 89.43265306122449, "grad_norm": 27.83167266845703, "learning_rate": 5.3102040816326534e-06, "loss": 1.3874, "step": 21911 }, { "epoch": 89.43673469387755, "grad_norm": 33.48522186279297, "learning_rate": 5.308163265306123e-06, "loss": 0.6245, "step": 21912 }, { "epoch": 89.44081632653061, "grad_norm": 36.69619369506836, "learning_rate": 5.306122448979592e-06, "loss": 0.9999, "step": 21913 }, { "epoch": 89.44489795918368, "grad_norm": 29.407489776611328, "learning_rate": 5.304081632653062e-06, "loss": 0.9523, "step": 21914 }, { "epoch": 89.44897959183673, "grad_norm": 12.182125091552734, "learning_rate": 5.302040816326531e-06, "loss": 0.0892, "step": 21915 }, { "epoch": 89.4530612244898, "grad_norm": 36.51913070678711, "learning_rate": 5.3e-06, "loss": 0.8911, "step": 21916 }, { "epoch": 89.45714285714286, "grad_norm": 27.499040603637695, "learning_rate": 5.29795918367347e-06, "loss": 1.1794, "step": 21917 }, { "epoch": 89.46122448979592, "grad_norm": 34.88279342651367, "learning_rate": 5.2959183673469386e-06, "loss": 1.1881, "step": 21918 }, { "epoch": 89.46530612244898, "grad_norm": 23.351001739501953, "learning_rate": 5.293877551020408e-06, "loss": 0.2037, "step": 21919 }, { "epoch": 89.46938775510205, "grad_norm": 28.323625564575195, "learning_rate": 5.291836734693878e-06, "loss": 1.0145, "step": 21920 }, { "epoch": 89.4734693877551, "grad_norm": 32.5029296875, "learning_rate": 5.289795918367347e-06, "loss": 0.9136, "step": 21921 }, { "epoch": 89.47755102040816, "grad_norm": 31.596567153930664, "learning_rate": 5.287755102040816e-06, "loss": 0.7555, "step": 21922 }, { "epoch": 89.48163265306123, "grad_norm": 25.60176658630371, "learning_rate": 5.285714285714286e-06, "loss": 1.8086, "step": 21923 }, { "epoch": 89.48571428571428, "grad_norm": 24.456165313720703, "learning_rate": 5.283673469387755e-06, "loss": 1.4986, "step": 21924 }, { "epoch": 89.48979591836735, "grad_norm": 42.20029067993164, "learning_rate": 5.2816326530612245e-06, "loss": 0.8759, "step": 21925 }, { "epoch": 89.4938775510204, "grad_norm": 30.788854598999023, "learning_rate": 5.279591836734694e-06, "loss": 0.7314, "step": 21926 }, { "epoch": 89.49795918367347, "grad_norm": 27.753128051757812, "learning_rate": 5.277551020408163e-06, "loss": 0.6789, "step": 21927 }, { "epoch": 89.50204081632653, "grad_norm": 31.736614227294922, "learning_rate": 5.275510204081633e-06, "loss": 0.33, "step": 21928 }, { "epoch": 89.5061224489796, "grad_norm": 30.672361373901367, "learning_rate": 5.273469387755102e-06, "loss": 0.3514, "step": 21929 }, { "epoch": 89.51020408163265, "grad_norm": 9.028285026550293, "learning_rate": 5.271428571428572e-06, "loss": 0.0767, "step": 21930 }, { "epoch": 89.51428571428572, "grad_norm": 41.32573318481445, "learning_rate": 5.269387755102041e-06, "loss": 0.7553, "step": 21931 }, { "epoch": 89.51836734693877, "grad_norm": 35.215736389160156, "learning_rate": 5.2673469387755105e-06, "loss": 1.4273, "step": 21932 }, { "epoch": 89.52244897959184, "grad_norm": 48.67347717285156, "learning_rate": 5.26530612244898e-06, "loss": 0.8116, "step": 21933 }, { "epoch": 89.5265306122449, "grad_norm": 24.937074661254883, "learning_rate": 5.26326530612245e-06, "loss": 1.9144, "step": 21934 }, { "epoch": 89.53061224489795, "grad_norm": 21.308040618896484, "learning_rate": 5.261224489795919e-06, "loss": 1.9997, "step": 21935 }, { "epoch": 89.53469387755102, "grad_norm": 30.418970108032227, "learning_rate": 5.259183673469388e-06, "loss": 0.4338, "step": 21936 }, { "epoch": 89.53877551020408, "grad_norm": 29.381444931030273, "learning_rate": 5.257142857142858e-06, "loss": 0.1913, "step": 21937 }, { "epoch": 89.54285714285714, "grad_norm": 23.540481567382812, "learning_rate": 5.255102040816327e-06, "loss": 1.4122, "step": 21938 }, { "epoch": 89.5469387755102, "grad_norm": 31.250093460083008, "learning_rate": 5.2530612244897965e-06, "loss": 0.2712, "step": 21939 }, { "epoch": 89.55102040816327, "grad_norm": 31.72203254699707, "learning_rate": 5.251020408163265e-06, "loss": 1.5975, "step": 21940 }, { "epoch": 89.55510204081632, "grad_norm": 23.79681396484375, "learning_rate": 5.248979591836735e-06, "loss": 0.2604, "step": 21941 }, { "epoch": 89.55918367346939, "grad_norm": 27.374706268310547, "learning_rate": 5.246938775510205e-06, "loss": 1.2704, "step": 21942 }, { "epoch": 89.56326530612245, "grad_norm": 10.912827491760254, "learning_rate": 5.2448979591836735e-06, "loss": 0.0923, "step": 21943 }, { "epoch": 89.56734693877551, "grad_norm": 26.633115768432617, "learning_rate": 5.242857142857143e-06, "loss": 0.3458, "step": 21944 }, { "epoch": 89.57142857142857, "grad_norm": 24.950443267822266, "learning_rate": 5.240816326530613e-06, "loss": 0.1638, "step": 21945 }, { "epoch": 89.57551020408164, "grad_norm": 31.21047019958496, "learning_rate": 5.238775510204082e-06, "loss": 1.3683, "step": 21946 }, { "epoch": 89.57959183673469, "grad_norm": 40.757877349853516, "learning_rate": 5.236734693877551e-06, "loss": 0.6062, "step": 21947 }, { "epoch": 89.58367346938776, "grad_norm": 32.093406677246094, "learning_rate": 5.234693877551021e-06, "loss": 1.287, "step": 21948 }, { "epoch": 89.58775510204082, "grad_norm": 33.231788635253906, "learning_rate": 5.23265306122449e-06, "loss": 0.4878, "step": 21949 }, { "epoch": 89.59183673469387, "grad_norm": 25.794885635375977, "learning_rate": 5.2306122448979594e-06, "loss": 1.4689, "step": 21950 }, { "epoch": 89.59591836734694, "grad_norm": 33.314632415771484, "learning_rate": 5.228571428571428e-06, "loss": 0.6435, "step": 21951 }, { "epoch": 89.6, "grad_norm": 30.12247085571289, "learning_rate": 5.226530612244898e-06, "loss": 0.3407, "step": 21952 }, { "epoch": 89.60408163265306, "grad_norm": 32.90556335449219, "learning_rate": 5.224489795918368e-06, "loss": 1.1466, "step": 21953 }, { "epoch": 89.60816326530612, "grad_norm": 29.864002227783203, "learning_rate": 5.222448979591836e-06, "loss": 1.239, "step": 21954 }, { "epoch": 89.61224489795919, "grad_norm": 36.427860260009766, "learning_rate": 5.220408163265306e-06, "loss": 1.0341, "step": 21955 }, { "epoch": 89.61632653061224, "grad_norm": 34.84946060180664, "learning_rate": 5.218367346938776e-06, "loss": 1.269, "step": 21956 }, { "epoch": 89.62040816326531, "grad_norm": 29.1054744720459, "learning_rate": 5.2163265306122445e-06, "loss": 1.1787, "step": 21957 }, { "epoch": 89.62448979591836, "grad_norm": 32.0358772277832, "learning_rate": 5.214285714285714e-06, "loss": 0.9868, "step": 21958 }, { "epoch": 89.62857142857143, "grad_norm": 16.245996475219727, "learning_rate": 5.212244897959184e-06, "loss": 0.1414, "step": 21959 }, { "epoch": 89.63265306122449, "grad_norm": 29.43889808654785, "learning_rate": 5.2102040816326535e-06, "loss": 1.1286, "step": 21960 }, { "epoch": 89.63673469387756, "grad_norm": 43.649173736572266, "learning_rate": 5.208163265306123e-06, "loss": 0.9364, "step": 21961 }, { "epoch": 89.64081632653061, "grad_norm": 27.94144630432129, "learning_rate": 5.206122448979592e-06, "loss": 1.8624, "step": 21962 }, { "epoch": 89.64489795918368, "grad_norm": 25.859573364257812, "learning_rate": 5.204081632653062e-06, "loss": 1.3816, "step": 21963 }, { "epoch": 89.64897959183673, "grad_norm": 33.77302169799805, "learning_rate": 5.202040816326531e-06, "loss": 1.1473, "step": 21964 }, { "epoch": 89.65306122448979, "grad_norm": 30.387907028198242, "learning_rate": 5.2e-06, "loss": 0.6683, "step": 21965 }, { "epoch": 89.65714285714286, "grad_norm": 28.58636474609375, "learning_rate": 5.19795918367347e-06, "loss": 1.4969, "step": 21966 }, { "epoch": 89.66122448979591, "grad_norm": 33.08909225463867, "learning_rate": 5.1959183673469395e-06, "loss": 0.7507, "step": 21967 }, { "epoch": 89.66530612244898, "grad_norm": 26.630691528320312, "learning_rate": 5.193877551020408e-06, "loss": 0.4086, "step": 21968 }, { "epoch": 89.66938775510204, "grad_norm": 29.375524520874023, "learning_rate": 5.191836734693878e-06, "loss": 1.4322, "step": 21969 }, { "epoch": 89.6734693877551, "grad_norm": 29.99639129638672, "learning_rate": 5.189795918367348e-06, "loss": 1.1274, "step": 21970 }, { "epoch": 89.67755102040816, "grad_norm": 28.051082611083984, "learning_rate": 5.1877551020408165e-06, "loss": 0.313, "step": 21971 }, { "epoch": 89.68163265306123, "grad_norm": 31.306180953979492, "learning_rate": 5.185714285714286e-06, "loss": 0.4253, "step": 21972 }, { "epoch": 89.68571428571428, "grad_norm": 28.247507095336914, "learning_rate": 5.183673469387755e-06, "loss": 1.4142, "step": 21973 }, { "epoch": 89.68979591836735, "grad_norm": 26.531747817993164, "learning_rate": 5.181632653061225e-06, "loss": 1.1453, "step": 21974 }, { "epoch": 89.6938775510204, "grad_norm": 34.318599700927734, "learning_rate": 5.179591836734694e-06, "loss": 0.3395, "step": 21975 }, { "epoch": 89.69795918367348, "grad_norm": 44.20024871826172, "learning_rate": 5.177551020408163e-06, "loss": 0.5259, "step": 21976 }, { "epoch": 89.70204081632653, "grad_norm": 31.355093002319336, "learning_rate": 5.175510204081633e-06, "loss": 1.1408, "step": 21977 }, { "epoch": 89.70612244897958, "grad_norm": 33.30439758300781, "learning_rate": 5.1734693877551025e-06, "loss": 0.8836, "step": 21978 }, { "epoch": 89.71020408163265, "grad_norm": 29.969009399414062, "learning_rate": 5.171428571428571e-06, "loss": 1.0751, "step": 21979 }, { "epoch": 89.71428571428571, "grad_norm": 19.41775131225586, "learning_rate": 5.169387755102041e-06, "loss": 0.2281, "step": 21980 }, { "epoch": 89.71836734693878, "grad_norm": 25.79597282409668, "learning_rate": 5.167346938775511e-06, "loss": 0.6917, "step": 21981 }, { "epoch": 89.72244897959183, "grad_norm": 9.0032958984375, "learning_rate": 5.1653061224489794e-06, "loss": 0.0732, "step": 21982 }, { "epoch": 89.7265306122449, "grad_norm": 26.021339416503906, "learning_rate": 5.163265306122449e-06, "loss": 1.3582, "step": 21983 }, { "epoch": 89.73061224489796, "grad_norm": 54.33291244506836, "learning_rate": 5.161224489795918e-06, "loss": 0.5425, "step": 21984 }, { "epoch": 89.73469387755102, "grad_norm": 29.943889617919922, "learning_rate": 5.159183673469388e-06, "loss": 0.3612, "step": 21985 }, { "epoch": 89.73877551020408, "grad_norm": 29.659940719604492, "learning_rate": 5.157142857142857e-06, "loss": 0.5305, "step": 21986 }, { "epoch": 89.74285714285715, "grad_norm": 29.986164093017578, "learning_rate": 5.155102040816327e-06, "loss": 0.3595, "step": 21987 }, { "epoch": 89.7469387755102, "grad_norm": 11.627750396728516, "learning_rate": 5.153061224489797e-06, "loss": 0.0687, "step": 21988 }, { "epoch": 89.75102040816327, "grad_norm": 28.6000919342041, "learning_rate": 5.151020408163266e-06, "loss": 1.484, "step": 21989 }, { "epoch": 89.75510204081633, "grad_norm": 18.13335609436035, "learning_rate": 5.148979591836735e-06, "loss": 0.2386, "step": 21990 }, { "epoch": 89.7591836734694, "grad_norm": 44.564002990722656, "learning_rate": 5.146938775510205e-06, "loss": 0.4416, "step": 21991 }, { "epoch": 89.76326530612245, "grad_norm": 19.01803207397461, "learning_rate": 5.1448979591836736e-06, "loss": 2.0639, "step": 21992 }, { "epoch": 89.7673469387755, "grad_norm": 23.57933235168457, "learning_rate": 5.142857142857143e-06, "loss": 1.635, "step": 21993 }, { "epoch": 89.77142857142857, "grad_norm": 21.910341262817383, "learning_rate": 5.140816326530613e-06, "loss": 0.2632, "step": 21994 }, { "epoch": 89.77551020408163, "grad_norm": 26.41545867919922, "learning_rate": 5.138775510204082e-06, "loss": 0.4362, "step": 21995 }, { "epoch": 89.7795918367347, "grad_norm": 27.823801040649414, "learning_rate": 5.136734693877551e-06, "loss": 1.5159, "step": 21996 }, { "epoch": 89.78367346938775, "grad_norm": 24.606056213378906, "learning_rate": 5.134693877551021e-06, "loss": 0.2965, "step": 21997 }, { "epoch": 89.78775510204082, "grad_norm": 29.02288246154785, "learning_rate": 5.13265306122449e-06, "loss": 1.5214, "step": 21998 }, { "epoch": 89.79183673469387, "grad_norm": 23.620630264282227, "learning_rate": 5.1306122448979595e-06, "loss": 1.305, "step": 21999 }, { "epoch": 89.79591836734694, "grad_norm": 8.67699146270752, "learning_rate": 5.128571428571429e-06, "loss": 0.073, "step": 22000 }, { "epoch": 89.8, "grad_norm": 30.911462783813477, "learning_rate": 5.126530612244898e-06, "loss": 1.3824, "step": 22001 }, { "epoch": 89.80408163265307, "grad_norm": 26.1380615234375, "learning_rate": 5.124489795918368e-06, "loss": 0.2893, "step": 22002 }, { "epoch": 89.80816326530612, "grad_norm": 27.988309860229492, "learning_rate": 5.1224489795918365e-06, "loss": 0.5522, "step": 22003 }, { "epoch": 89.81224489795919, "grad_norm": 24.329574584960938, "learning_rate": 5.120408163265306e-06, "loss": 0.2864, "step": 22004 }, { "epoch": 89.81632653061224, "grad_norm": 25.309673309326172, "learning_rate": 5.118367346938776e-06, "loss": 1.3935, "step": 22005 }, { "epoch": 89.82040816326531, "grad_norm": 31.592496871948242, "learning_rate": 5.116326530612245e-06, "loss": 0.7337, "step": 22006 }, { "epoch": 89.82448979591837, "grad_norm": 23.22056007385254, "learning_rate": 5.114285714285714e-06, "loss": 1.843, "step": 22007 }, { "epoch": 89.82857142857142, "grad_norm": 27.647829055786133, "learning_rate": 5.112244897959184e-06, "loss": 1.5363, "step": 22008 }, { "epoch": 89.83265306122449, "grad_norm": 34.576499938964844, "learning_rate": 5.110204081632653e-06, "loss": 0.8769, "step": 22009 }, { "epoch": 89.83673469387755, "grad_norm": 25.772077560424805, "learning_rate": 5.1081632653061225e-06, "loss": 0.2088, "step": 22010 }, { "epoch": 89.84081632653061, "grad_norm": 46.1578369140625, "learning_rate": 5.106122448979592e-06, "loss": 0.804, "step": 22011 }, { "epoch": 89.84489795918367, "grad_norm": 27.717906951904297, "learning_rate": 5.104081632653061e-06, "loss": 1.5761, "step": 22012 }, { "epoch": 89.84897959183674, "grad_norm": 35.68015670776367, "learning_rate": 5.102040816326531e-06, "loss": 0.5095, "step": 22013 }, { "epoch": 89.85306122448979, "grad_norm": 54.543575286865234, "learning_rate": 5.1e-06, "loss": 0.392, "step": 22014 }, { "epoch": 89.85714285714286, "grad_norm": 24.47296714782715, "learning_rate": 5.09795918367347e-06, "loss": 1.4445, "step": 22015 }, { "epoch": 89.86122448979592, "grad_norm": 18.158662796020508, "learning_rate": 5.095918367346939e-06, "loss": 2.2153, "step": 22016 }, { "epoch": 89.86530612244898, "grad_norm": 28.75810432434082, "learning_rate": 5.0938775510204085e-06, "loss": 1.1892, "step": 22017 }, { "epoch": 89.86938775510204, "grad_norm": 14.684412956237793, "learning_rate": 5.091836734693878e-06, "loss": 0.111, "step": 22018 }, { "epoch": 89.87346938775511, "grad_norm": 26.525426864624023, "learning_rate": 5.089795918367348e-06, "loss": 1.0692, "step": 22019 }, { "epoch": 89.87755102040816, "grad_norm": 27.7299861907959, "learning_rate": 5.087755102040817e-06, "loss": 0.5349, "step": 22020 }, { "epoch": 89.88163265306123, "grad_norm": 31.849821090698242, "learning_rate": 5.085714285714286e-06, "loss": 0.6002, "step": 22021 }, { "epoch": 89.88571428571429, "grad_norm": 32.87488555908203, "learning_rate": 5.083673469387756e-06, "loss": 0.8697, "step": 22022 }, { "epoch": 89.88979591836734, "grad_norm": 35.3375244140625, "learning_rate": 5.081632653061225e-06, "loss": 0.7784, "step": 22023 }, { "epoch": 89.89387755102041, "grad_norm": 30.687793731689453, "learning_rate": 5.079591836734694e-06, "loss": 0.7837, "step": 22024 }, { "epoch": 89.89795918367346, "grad_norm": 24.68393898010254, "learning_rate": 5.077551020408163e-06, "loss": 1.7832, "step": 22025 }, { "epoch": 89.90204081632653, "grad_norm": 21.814239501953125, "learning_rate": 5.075510204081633e-06, "loss": 1.8995, "step": 22026 }, { "epoch": 89.90612244897959, "grad_norm": 32.23088455200195, "learning_rate": 5.0734693877551026e-06, "loss": 0.3678, "step": 22027 }, { "epoch": 89.91020408163266, "grad_norm": 28.952991485595703, "learning_rate": 5.071428571428571e-06, "loss": 1.3876, "step": 22028 }, { "epoch": 89.91428571428571, "grad_norm": 30.864582061767578, "learning_rate": 5.069387755102041e-06, "loss": 0.9656, "step": 22029 }, { "epoch": 89.91836734693878, "grad_norm": 32.64577865600586, "learning_rate": 5.067346938775511e-06, "loss": 0.2518, "step": 22030 }, { "epoch": 89.92244897959183, "grad_norm": 26.72112464904785, "learning_rate": 5.0653061224489795e-06, "loss": 1.3381, "step": 22031 }, { "epoch": 89.9265306122449, "grad_norm": 30.49853515625, "learning_rate": 5.063265306122449e-06, "loss": 0.6512, "step": 22032 }, { "epoch": 89.93061224489796, "grad_norm": 35.115291595458984, "learning_rate": 5.061224489795919e-06, "loss": 0.3648, "step": 22033 }, { "epoch": 89.93469387755103, "grad_norm": 34.706642150878906, "learning_rate": 5.059183673469388e-06, "loss": 0.8874, "step": 22034 }, { "epoch": 89.93877551020408, "grad_norm": 37.173187255859375, "learning_rate": 5.057142857142857e-06, "loss": 0.5715, "step": 22035 }, { "epoch": 89.94285714285714, "grad_norm": 27.374996185302734, "learning_rate": 5.055102040816326e-06, "loss": 1.9046, "step": 22036 }, { "epoch": 89.9469387755102, "grad_norm": 29.233840942382812, "learning_rate": 5.053061224489796e-06, "loss": 0.2164, "step": 22037 }, { "epoch": 89.95102040816326, "grad_norm": 35.47037124633789, "learning_rate": 5.0510204081632655e-06, "loss": 0.6668, "step": 22038 }, { "epoch": 89.95510204081633, "grad_norm": 26.412199020385742, "learning_rate": 5.048979591836734e-06, "loss": 0.2851, "step": 22039 }, { "epoch": 89.95918367346938, "grad_norm": 32.50576400756836, "learning_rate": 5.046938775510204e-06, "loss": 0.8922, "step": 22040 }, { "epoch": 89.96326530612245, "grad_norm": 19.123544692993164, "learning_rate": 5.044897959183674e-06, "loss": 0.1332, "step": 22041 }, { "epoch": 89.9673469387755, "grad_norm": 36.14970779418945, "learning_rate": 5.042857142857143e-06, "loss": 0.6311, "step": 22042 }, { "epoch": 89.97142857142858, "grad_norm": 23.989927291870117, "learning_rate": 5.040816326530612e-06, "loss": 1.8974, "step": 22043 }, { "epoch": 89.97551020408163, "grad_norm": 8.530379295349121, "learning_rate": 5.038775510204082e-06, "loss": 0.071, "step": 22044 }, { "epoch": 89.9795918367347, "grad_norm": 28.40422821044922, "learning_rate": 5.0367346938775515e-06, "loss": 1.7533, "step": 22045 }, { "epoch": 89.98367346938775, "grad_norm": 28.889183044433594, "learning_rate": 5.034693877551021e-06, "loss": 1.1069, "step": 22046 }, { "epoch": 89.98775510204082, "grad_norm": 34.62544631958008, "learning_rate": 5.03265306122449e-06, "loss": 1.0114, "step": 22047 }, { "epoch": 89.99183673469388, "grad_norm": 26.498571395874023, "learning_rate": 5.03061224489796e-06, "loss": 0.3715, "step": 22048 }, { "epoch": 89.99591836734695, "grad_norm": 14.165949821472168, "learning_rate": 5.028571428571429e-06, "loss": 0.1303, "step": 22049 }, { "epoch": 90.0, "grad_norm": 34.18460464477539, "learning_rate": 5.026530612244898e-06, "loss": 1.3695, "step": 22050 }, { "epoch": 90.00408163265305, "grad_norm": 36.58049774169922, "learning_rate": 5.024489795918368e-06, "loss": 1.0185, "step": 22051 }, { "epoch": 90.00816326530612, "grad_norm": 29.937715530395508, "learning_rate": 5.0224489795918375e-06, "loss": 0.2298, "step": 22052 }, { "epoch": 90.01224489795918, "grad_norm": 34.12527084350586, "learning_rate": 5.020408163265306e-06, "loss": 1.1721, "step": 22053 }, { "epoch": 90.01632653061225, "grad_norm": 29.577665328979492, "learning_rate": 5.018367346938776e-06, "loss": 0.815, "step": 22054 }, { "epoch": 90.0204081632653, "grad_norm": 30.214378356933594, "learning_rate": 5.016326530612246e-06, "loss": 1.3463, "step": 22055 }, { "epoch": 90.02448979591837, "grad_norm": 11.211374282836914, "learning_rate": 5.0142857142857144e-06, "loss": 0.0817, "step": 22056 }, { "epoch": 90.02857142857142, "grad_norm": 21.626964569091797, "learning_rate": 5.012244897959184e-06, "loss": 0.2694, "step": 22057 }, { "epoch": 90.0326530612245, "grad_norm": 28.95152473449707, "learning_rate": 5.010204081632653e-06, "loss": 0.256, "step": 22058 }, { "epoch": 90.03673469387755, "grad_norm": 24.375368118286133, "learning_rate": 5.008163265306123e-06, "loss": 1.7659, "step": 22059 }, { "epoch": 90.04081632653062, "grad_norm": 28.467716217041016, "learning_rate": 5.006122448979592e-06, "loss": 1.6865, "step": 22060 }, { "epoch": 90.04489795918367, "grad_norm": 27.950632095336914, "learning_rate": 5.004081632653061e-06, "loss": 1.8553, "step": 22061 }, { "epoch": 90.04897959183674, "grad_norm": 21.849681854248047, "learning_rate": 5.002040816326531e-06, "loss": 0.2593, "step": 22062 }, { "epoch": 90.0530612244898, "grad_norm": 32.4722785949707, "learning_rate": 5e-06, "loss": 0.9868, "step": 22063 }, { "epoch": 90.05714285714286, "grad_norm": 36.49258804321289, "learning_rate": 4.997959183673469e-06, "loss": 1.4284, "step": 22064 }, { "epoch": 90.06122448979592, "grad_norm": 36.322227478027344, "learning_rate": 4.995918367346939e-06, "loss": 1.1711, "step": 22065 }, { "epoch": 90.06530612244897, "grad_norm": 23.543880462646484, "learning_rate": 4.9938775510204086e-06, "loss": 1.3739, "step": 22066 }, { "epoch": 90.06938775510204, "grad_norm": 34.79906463623047, "learning_rate": 4.991836734693877e-06, "loss": 0.9515, "step": 22067 }, { "epoch": 90.0734693877551, "grad_norm": 22.33803367614746, "learning_rate": 4.989795918367347e-06, "loss": 1.7908, "step": 22068 }, { "epoch": 90.07755102040817, "grad_norm": 25.179487228393555, "learning_rate": 4.987755102040816e-06, "loss": 0.2912, "step": 22069 }, { "epoch": 90.08163265306122, "grad_norm": 30.94430923461914, "learning_rate": 4.9857142857142855e-06, "loss": 0.4919, "step": 22070 }, { "epoch": 90.08571428571429, "grad_norm": 6.521045207977295, "learning_rate": 4.983673469387755e-06, "loss": 0.0681, "step": 22071 }, { "epoch": 90.08979591836734, "grad_norm": 14.919842720031738, "learning_rate": 4.981632653061225e-06, "loss": 0.0756, "step": 22072 }, { "epoch": 90.09387755102041, "grad_norm": 40.898681640625, "learning_rate": 4.9795918367346945e-06, "loss": 0.6743, "step": 22073 }, { "epoch": 90.09795918367347, "grad_norm": 31.959854125976562, "learning_rate": 4.977551020408164e-06, "loss": 0.209, "step": 22074 }, { "epoch": 90.10204081632654, "grad_norm": 36.07087707519531, "learning_rate": 4.975510204081633e-06, "loss": 0.9736, "step": 22075 }, { "epoch": 90.10612244897959, "grad_norm": 26.991043090820312, "learning_rate": 4.973469387755103e-06, "loss": 0.5618, "step": 22076 }, { "epoch": 90.11020408163266, "grad_norm": 28.584854125976562, "learning_rate": 4.9714285714285715e-06, "loss": 0.3291, "step": 22077 }, { "epoch": 90.11428571428571, "grad_norm": 26.39954376220703, "learning_rate": 4.969387755102041e-06, "loss": 0.3017, "step": 22078 }, { "epoch": 90.11836734693877, "grad_norm": 35.9512939453125, "learning_rate": 4.967346938775511e-06, "loss": 1.1142, "step": 22079 }, { "epoch": 90.12244897959184, "grad_norm": 37.028076171875, "learning_rate": 4.96530612244898e-06, "loss": 0.514, "step": 22080 }, { "epoch": 90.12653061224489, "grad_norm": 30.008331298828125, "learning_rate": 4.963265306122449e-06, "loss": 1.2674, "step": 22081 }, { "epoch": 90.13061224489796, "grad_norm": 25.068002700805664, "learning_rate": 4.961224489795919e-06, "loss": 1.3579, "step": 22082 }, { "epoch": 90.13469387755102, "grad_norm": 35.455299377441406, "learning_rate": 4.959183673469388e-06, "loss": 0.7519, "step": 22083 }, { "epoch": 90.13877551020408, "grad_norm": 38.65890884399414, "learning_rate": 4.9571428571428575e-06, "loss": 0.5869, "step": 22084 }, { "epoch": 90.14285714285714, "grad_norm": 31.787933349609375, "learning_rate": 4.955102040816327e-06, "loss": 0.5249, "step": 22085 }, { "epoch": 90.14693877551021, "grad_norm": 35.72534942626953, "learning_rate": 4.953061224489796e-06, "loss": 0.4394, "step": 22086 }, { "epoch": 90.15102040816326, "grad_norm": 35.95665740966797, "learning_rate": 4.951020408163266e-06, "loss": 0.5288, "step": 22087 }, { "epoch": 90.15510204081633, "grad_norm": 29.14141082763672, "learning_rate": 4.9489795918367345e-06, "loss": 1.0739, "step": 22088 }, { "epoch": 90.15918367346939, "grad_norm": 21.995634078979492, "learning_rate": 4.946938775510204e-06, "loss": 0.2415, "step": 22089 }, { "epoch": 90.16326530612245, "grad_norm": 28.932403564453125, "learning_rate": 4.944897959183674e-06, "loss": 0.6112, "step": 22090 }, { "epoch": 90.16734693877551, "grad_norm": 30.352380752563477, "learning_rate": 4.942857142857143e-06, "loss": 0.2135, "step": 22091 }, { "epoch": 90.17142857142858, "grad_norm": 33.72749328613281, "learning_rate": 4.940816326530612e-06, "loss": 1.0033, "step": 22092 }, { "epoch": 90.17551020408163, "grad_norm": 28.138105392456055, "learning_rate": 4.938775510204082e-06, "loss": 0.3995, "step": 22093 }, { "epoch": 90.17959183673469, "grad_norm": 35.19187927246094, "learning_rate": 4.936734693877551e-06, "loss": 0.5951, "step": 22094 }, { "epoch": 90.18367346938776, "grad_norm": 21.866592407226562, "learning_rate": 4.93469387755102e-06, "loss": 0.211, "step": 22095 }, { "epoch": 90.18775510204081, "grad_norm": 26.054954528808594, "learning_rate": 4.93265306122449e-06, "loss": 0.3961, "step": 22096 }, { "epoch": 90.19183673469388, "grad_norm": 46.44245910644531, "learning_rate": 4.930612244897959e-06, "loss": 0.8048, "step": 22097 }, { "epoch": 90.19591836734693, "grad_norm": 32.100154876708984, "learning_rate": 4.9285714285714286e-06, "loss": 0.3517, "step": 22098 }, { "epoch": 90.2, "grad_norm": 20.66657066345215, "learning_rate": 4.926530612244898e-06, "loss": 2.2922, "step": 22099 }, { "epoch": 90.20408163265306, "grad_norm": 34.47219467163086, "learning_rate": 4.924489795918368e-06, "loss": 0.5473, "step": 22100 }, { "epoch": 90.20816326530613, "grad_norm": 24.09064483642578, "learning_rate": 4.922448979591837e-06, "loss": 1.751, "step": 22101 }, { "epoch": 90.21224489795918, "grad_norm": 26.68292999267578, "learning_rate": 4.920408163265306e-06, "loss": 0.3716, "step": 22102 }, { "epoch": 90.21632653061225, "grad_norm": 30.94671058654785, "learning_rate": 4.918367346938776e-06, "loss": 1.1173, "step": 22103 }, { "epoch": 90.2204081632653, "grad_norm": 25.384798049926758, "learning_rate": 4.916326530612246e-06, "loss": 1.8583, "step": 22104 }, { "epoch": 90.22448979591837, "grad_norm": 23.78243064880371, "learning_rate": 4.9142857142857145e-06, "loss": 2.1744, "step": 22105 }, { "epoch": 90.22857142857143, "grad_norm": 29.071054458618164, "learning_rate": 4.912244897959184e-06, "loss": 1.1097, "step": 22106 }, { "epoch": 90.2326530612245, "grad_norm": 30.897541046142578, "learning_rate": 4.910204081632654e-06, "loss": 0.1949, "step": 22107 }, { "epoch": 90.23673469387755, "grad_norm": 29.393110275268555, "learning_rate": 4.908163265306123e-06, "loss": 1.4519, "step": 22108 }, { "epoch": 90.2408163265306, "grad_norm": 27.550186157226562, "learning_rate": 4.906122448979592e-06, "loss": 1.2984, "step": 22109 }, { "epoch": 90.24489795918367, "grad_norm": 26.377553939819336, "learning_rate": 4.904081632653061e-06, "loss": 1.8111, "step": 22110 }, { "epoch": 90.24897959183673, "grad_norm": 31.65735626220703, "learning_rate": 4.902040816326531e-06, "loss": 1.5565, "step": 22111 }, { "epoch": 90.2530612244898, "grad_norm": 20.80910301208496, "learning_rate": 4.9000000000000005e-06, "loss": 1.2927, "step": 22112 }, { "epoch": 90.25714285714285, "grad_norm": 34.939247131347656, "learning_rate": 4.897959183673469e-06, "loss": 1.1569, "step": 22113 }, { "epoch": 90.26122448979592, "grad_norm": 24.091737747192383, "learning_rate": 4.895918367346939e-06, "loss": 1.4818, "step": 22114 }, { "epoch": 90.26530612244898, "grad_norm": 26.741300582885742, "learning_rate": 4.893877551020409e-06, "loss": 1.3437, "step": 22115 }, { "epoch": 90.26938775510204, "grad_norm": 29.835216522216797, "learning_rate": 4.8918367346938775e-06, "loss": 0.6923, "step": 22116 }, { "epoch": 90.2734693877551, "grad_norm": 23.131420135498047, "learning_rate": 4.889795918367347e-06, "loss": 1.2135, "step": 22117 }, { "epoch": 90.27755102040817, "grad_norm": 37.20534133911133, "learning_rate": 4.887755102040817e-06, "loss": 1.0608, "step": 22118 }, { "epoch": 90.28163265306122, "grad_norm": 30.021026611328125, "learning_rate": 4.885714285714286e-06, "loss": 1.3328, "step": 22119 }, { "epoch": 90.28571428571429, "grad_norm": 26.70537567138672, "learning_rate": 4.883673469387755e-06, "loss": 0.522, "step": 22120 }, { "epoch": 90.28979591836735, "grad_norm": 21.39785385131836, "learning_rate": 4.881632653061224e-06, "loss": 2.0483, "step": 22121 }, { "epoch": 90.29387755102042, "grad_norm": 31.357295989990234, "learning_rate": 4.879591836734694e-06, "loss": 0.8688, "step": 22122 }, { "epoch": 90.29795918367347, "grad_norm": 27.310707092285156, "learning_rate": 4.8775510204081635e-06, "loss": 0.3309, "step": 22123 }, { "epoch": 90.30204081632652, "grad_norm": 38.68431854248047, "learning_rate": 4.875510204081632e-06, "loss": 0.9367, "step": 22124 }, { "epoch": 90.3061224489796, "grad_norm": 30.380285263061523, "learning_rate": 4.873469387755102e-06, "loss": 0.6503, "step": 22125 }, { "epoch": 90.31020408163265, "grad_norm": 20.17241668701172, "learning_rate": 4.871428571428572e-06, "loss": 1.9641, "step": 22126 }, { "epoch": 90.31428571428572, "grad_norm": 34.233009338378906, "learning_rate": 4.869387755102041e-06, "loss": 0.3196, "step": 22127 }, { "epoch": 90.31836734693877, "grad_norm": 36.85541915893555, "learning_rate": 4.86734693877551e-06, "loss": 0.3162, "step": 22128 }, { "epoch": 90.32244897959184, "grad_norm": 29.06331443786621, "learning_rate": 4.86530612244898e-06, "loss": 1.2263, "step": 22129 }, { "epoch": 90.3265306122449, "grad_norm": 13.620471954345703, "learning_rate": 4.8632653061224494e-06, "loss": 0.0854, "step": 22130 }, { "epoch": 90.33061224489796, "grad_norm": 33.83921813964844, "learning_rate": 4.861224489795919e-06, "loss": 0.5689, "step": 22131 }, { "epoch": 90.33469387755102, "grad_norm": 29.24842643737793, "learning_rate": 4.859183673469388e-06, "loss": 0.2599, "step": 22132 }, { "epoch": 90.33877551020409, "grad_norm": 37.01545715332031, "learning_rate": 4.857142857142858e-06, "loss": 0.8058, "step": 22133 }, { "epoch": 90.34285714285714, "grad_norm": 31.243906021118164, "learning_rate": 4.855102040816327e-06, "loss": 1.1673, "step": 22134 }, { "epoch": 90.34693877551021, "grad_norm": 10.192605018615723, "learning_rate": 4.853061224489796e-06, "loss": 0.0762, "step": 22135 }, { "epoch": 90.35102040816327, "grad_norm": 26.834394454956055, "learning_rate": 4.851020408163266e-06, "loss": 1.4445, "step": 22136 }, { "epoch": 90.35510204081632, "grad_norm": 30.06655502319336, "learning_rate": 4.848979591836735e-06, "loss": 1.0708, "step": 22137 }, { "epoch": 90.35918367346939, "grad_norm": 36.42354202270508, "learning_rate": 4.846938775510204e-06, "loss": 0.8652, "step": 22138 }, { "epoch": 90.36326530612244, "grad_norm": 29.74523162841797, "learning_rate": 4.844897959183674e-06, "loss": 0.2941, "step": 22139 }, { "epoch": 90.36734693877551, "grad_norm": 23.12662124633789, "learning_rate": 4.8428571428571436e-06, "loss": 1.8514, "step": 22140 }, { "epoch": 90.37142857142857, "grad_norm": 28.578706741333008, "learning_rate": 4.840816326530612e-06, "loss": 1.4795, "step": 22141 }, { "epoch": 90.37551020408164, "grad_norm": 33.10789108276367, "learning_rate": 4.838775510204082e-06, "loss": 0.8936, "step": 22142 }, { "epoch": 90.37959183673469, "grad_norm": 22.1899471282959, "learning_rate": 4.836734693877551e-06, "loss": 0.228, "step": 22143 }, { "epoch": 90.38367346938776, "grad_norm": 31.761171340942383, "learning_rate": 4.8346938775510205e-06, "loss": 1.4231, "step": 22144 }, { "epoch": 90.38775510204081, "grad_norm": 35.13112258911133, "learning_rate": 4.83265306122449e-06, "loss": 0.4428, "step": 22145 }, { "epoch": 90.39183673469388, "grad_norm": 18.046798706054688, "learning_rate": 4.830612244897959e-06, "loss": 1.9521, "step": 22146 }, { "epoch": 90.39591836734694, "grad_norm": 38.214534759521484, "learning_rate": 4.828571428571429e-06, "loss": 0.3279, "step": 22147 }, { "epoch": 90.4, "grad_norm": 45.110469818115234, "learning_rate": 4.826530612244898e-06, "loss": 0.8031, "step": 22148 }, { "epoch": 90.40408163265306, "grad_norm": 35.37863540649414, "learning_rate": 4.824489795918367e-06, "loss": 1.258, "step": 22149 }, { "epoch": 90.40816326530613, "grad_norm": 26.524559020996094, "learning_rate": 4.822448979591837e-06, "loss": 1.5722, "step": 22150 }, { "epoch": 90.41224489795918, "grad_norm": 29.53715705871582, "learning_rate": 4.8204081632653065e-06, "loss": 0.3378, "step": 22151 }, { "epoch": 90.41632653061224, "grad_norm": 24.0660457611084, "learning_rate": 4.818367346938775e-06, "loss": 1.3713, "step": 22152 }, { "epoch": 90.42040816326531, "grad_norm": 26.682926177978516, "learning_rate": 4.816326530612245e-06, "loss": 1.2861, "step": 22153 }, { "epoch": 90.42448979591836, "grad_norm": 22.523149490356445, "learning_rate": 4.814285714285714e-06, "loss": 0.3213, "step": 22154 }, { "epoch": 90.42857142857143, "grad_norm": 30.61031723022461, "learning_rate": 4.8122448979591835e-06, "loss": 1.5169, "step": 22155 }, { "epoch": 90.43265306122449, "grad_norm": 27.323640823364258, "learning_rate": 4.810204081632653e-06, "loss": 0.3328, "step": 22156 }, { "epoch": 90.43673469387755, "grad_norm": 28.20888328552246, "learning_rate": 4.808163265306123e-06, "loss": 0.6395, "step": 22157 }, { "epoch": 90.44081632653061, "grad_norm": 9.482953071594238, "learning_rate": 4.8061224489795925e-06, "loss": 0.0786, "step": 22158 }, { "epoch": 90.44489795918368, "grad_norm": 29.97400665283203, "learning_rate": 4.804081632653062e-06, "loss": 0.2302, "step": 22159 }, { "epoch": 90.44897959183673, "grad_norm": 28.561748504638672, "learning_rate": 4.802040816326531e-06, "loss": 1.3065, "step": 22160 }, { "epoch": 90.4530612244898, "grad_norm": 25.5396785736084, "learning_rate": 4.800000000000001e-06, "loss": 1.3488, "step": 22161 }, { "epoch": 90.45714285714286, "grad_norm": 30.381378173828125, "learning_rate": 4.7979591836734694e-06, "loss": 0.2953, "step": 22162 }, { "epoch": 90.46122448979592, "grad_norm": 39.2273063659668, "learning_rate": 4.795918367346939e-06, "loss": 1.1018, "step": 22163 }, { "epoch": 90.46530612244898, "grad_norm": 9.065681457519531, "learning_rate": 4.793877551020409e-06, "loss": 0.1127, "step": 22164 }, { "epoch": 90.46938775510205, "grad_norm": 37.5741081237793, "learning_rate": 4.791836734693878e-06, "loss": 0.8992, "step": 22165 }, { "epoch": 90.4734693877551, "grad_norm": 21.809768676757812, "learning_rate": 4.789795918367347e-06, "loss": 1.8952, "step": 22166 }, { "epoch": 90.47755102040816, "grad_norm": 28.230697631835938, "learning_rate": 4.787755102040817e-06, "loss": 0.2332, "step": 22167 }, { "epoch": 90.48163265306123, "grad_norm": 11.991625785827637, "learning_rate": 4.785714285714286e-06, "loss": 0.1185, "step": 22168 }, { "epoch": 90.48571428571428, "grad_norm": 42.41271209716797, "learning_rate": 4.783673469387755e-06, "loss": 0.4848, "step": 22169 }, { "epoch": 90.48979591836735, "grad_norm": 23.799671173095703, "learning_rate": 4.781632653061225e-06, "loss": 1.3503, "step": 22170 }, { "epoch": 90.4938775510204, "grad_norm": 21.555768966674805, "learning_rate": 4.779591836734694e-06, "loss": 0.1903, "step": 22171 }, { "epoch": 90.49795918367347, "grad_norm": 30.216285705566406, "learning_rate": 4.7775510204081636e-06, "loss": 0.4468, "step": 22172 }, { "epoch": 90.50204081632653, "grad_norm": 26.777345657348633, "learning_rate": 4.775510204081632e-06, "loss": 1.3775, "step": 22173 }, { "epoch": 90.5061224489796, "grad_norm": 28.62704086303711, "learning_rate": 4.773469387755102e-06, "loss": 0.2833, "step": 22174 }, { "epoch": 90.51020408163265, "grad_norm": 35.102989196777344, "learning_rate": 4.771428571428572e-06, "loss": 1.1067, "step": 22175 }, { "epoch": 90.51428571428572, "grad_norm": 39.77308654785156, "learning_rate": 4.7693877551020405e-06, "loss": 0.5211, "step": 22176 }, { "epoch": 90.51836734693877, "grad_norm": 26.531282424926758, "learning_rate": 4.76734693877551e-06, "loss": 0.2664, "step": 22177 }, { "epoch": 90.52244897959184, "grad_norm": 19.286161422729492, "learning_rate": 4.76530612244898e-06, "loss": 0.1558, "step": 22178 }, { "epoch": 90.5265306122449, "grad_norm": 26.090682983398438, "learning_rate": 4.763265306122449e-06, "loss": 0.446, "step": 22179 }, { "epoch": 90.53061224489795, "grad_norm": 24.220121383666992, "learning_rate": 4.761224489795918e-06, "loss": 0.2191, "step": 22180 }, { "epoch": 90.53469387755102, "grad_norm": 24.289478302001953, "learning_rate": 4.759183673469388e-06, "loss": 0.6401, "step": 22181 }, { "epoch": 90.53877551020408, "grad_norm": 21.86332130432129, "learning_rate": 4.757142857142857e-06, "loss": 1.8634, "step": 22182 }, { "epoch": 90.54285714285714, "grad_norm": 29.0224666595459, "learning_rate": 4.7551020408163265e-06, "loss": 1.2446, "step": 22183 }, { "epoch": 90.5469387755102, "grad_norm": 27.070724487304688, "learning_rate": 4.753061224489796e-06, "loss": 0.4544, "step": 22184 }, { "epoch": 90.55102040816327, "grad_norm": 21.456594467163086, "learning_rate": 4.751020408163266e-06, "loss": 1.9964, "step": 22185 }, { "epoch": 90.55510204081632, "grad_norm": 25.929157257080078, "learning_rate": 4.7489795918367355e-06, "loss": 0.2938, "step": 22186 }, { "epoch": 90.55918367346939, "grad_norm": 28.59903907775879, "learning_rate": 4.746938775510204e-06, "loss": 1.0665, "step": 22187 }, { "epoch": 90.56326530612245, "grad_norm": 31.750762939453125, "learning_rate": 4.744897959183674e-06, "loss": 0.9726, "step": 22188 }, { "epoch": 90.56734693877551, "grad_norm": 33.16468811035156, "learning_rate": 4.742857142857144e-06, "loss": 0.6282, "step": 22189 }, { "epoch": 90.57142857142857, "grad_norm": 87.48820495605469, "learning_rate": 4.7408163265306125e-06, "loss": 0.4546, "step": 22190 }, { "epoch": 90.57551020408164, "grad_norm": 28.603260040283203, "learning_rate": 4.738775510204082e-06, "loss": 1.39, "step": 22191 }, { "epoch": 90.57959183673469, "grad_norm": 26.6204891204834, "learning_rate": 4.736734693877552e-06, "loss": 1.4402, "step": 22192 }, { "epoch": 90.58367346938776, "grad_norm": 31.474164962768555, "learning_rate": 4.734693877551021e-06, "loss": 0.2243, "step": 22193 }, { "epoch": 90.58775510204082, "grad_norm": 9.419135093688965, "learning_rate": 4.73265306122449e-06, "loss": 2.9592, "step": 22194 }, { "epoch": 90.59183673469387, "grad_norm": 30.803363800048828, "learning_rate": 4.730612244897959e-06, "loss": 0.9162, "step": 22195 }, { "epoch": 90.59591836734694, "grad_norm": 34.1041374206543, "learning_rate": 4.728571428571429e-06, "loss": 0.4202, "step": 22196 }, { "epoch": 90.6, "grad_norm": 24.0887451171875, "learning_rate": 4.7265306122448985e-06, "loss": 1.9304, "step": 22197 }, { "epoch": 90.60408163265306, "grad_norm": 33.77213668823242, "learning_rate": 4.724489795918367e-06, "loss": 1.1462, "step": 22198 }, { "epoch": 90.60816326530612, "grad_norm": 27.22393226623535, "learning_rate": 4.722448979591837e-06, "loss": 1.4563, "step": 22199 }, { "epoch": 90.61224489795919, "grad_norm": 39.714202880859375, "learning_rate": 4.720408163265307e-06, "loss": 1.1293, "step": 22200 }, { "epoch": 90.61632653061224, "grad_norm": 37.25507736206055, "learning_rate": 4.7183673469387754e-06, "loss": 0.5189, "step": 22201 }, { "epoch": 90.62040816326531, "grad_norm": 28.826980590820312, "learning_rate": 4.716326530612245e-06, "loss": 0.6506, "step": 22202 }, { "epoch": 90.62448979591836, "grad_norm": 25.795635223388672, "learning_rate": 4.714285714285715e-06, "loss": 1.3163, "step": 22203 }, { "epoch": 90.62857142857143, "grad_norm": 23.556398391723633, "learning_rate": 4.712244897959184e-06, "loss": 1.846, "step": 22204 }, { "epoch": 90.63265306122449, "grad_norm": 25.820852279663086, "learning_rate": 4.710204081632653e-06, "loss": 1.348, "step": 22205 }, { "epoch": 90.63673469387756, "grad_norm": 28.058794021606445, "learning_rate": 4.708163265306122e-06, "loss": 0.406, "step": 22206 }, { "epoch": 90.64081632653061, "grad_norm": 34.68992233276367, "learning_rate": 4.706122448979592e-06, "loss": 1.1763, "step": 22207 }, { "epoch": 90.64489795918368, "grad_norm": 24.71141242980957, "learning_rate": 4.704081632653061e-06, "loss": 1.4226, "step": 22208 }, { "epoch": 90.64897959183673, "grad_norm": 35.414730072021484, "learning_rate": 4.70204081632653e-06, "loss": 0.5891, "step": 22209 }, { "epoch": 90.65306122448979, "grad_norm": 36.6071891784668, "learning_rate": 4.7e-06, "loss": 0.7554, "step": 22210 }, { "epoch": 90.65714285714286, "grad_norm": 27.07956886291504, "learning_rate": 4.6979591836734696e-06, "loss": 0.4648, "step": 22211 }, { "epoch": 90.66122448979591, "grad_norm": 28.979368209838867, "learning_rate": 4.695918367346939e-06, "loss": 0.5806, "step": 22212 }, { "epoch": 90.66530612244898, "grad_norm": 28.582788467407227, "learning_rate": 4.693877551020408e-06, "loss": 1.3276, "step": 22213 }, { "epoch": 90.66938775510204, "grad_norm": 28.03139877319336, "learning_rate": 4.691836734693878e-06, "loss": 0.5105, "step": 22214 }, { "epoch": 90.6734693877551, "grad_norm": 33.36091613769531, "learning_rate": 4.689795918367347e-06, "loss": 0.738, "step": 22215 }, { "epoch": 90.67755102040816, "grad_norm": 29.40159034729004, "learning_rate": 4.687755102040817e-06, "loss": 0.9503, "step": 22216 }, { "epoch": 90.68163265306123, "grad_norm": 26.86735725402832, "learning_rate": 4.685714285714286e-06, "loss": 1.4966, "step": 22217 }, { "epoch": 90.68571428571428, "grad_norm": 34.45166778564453, "learning_rate": 4.6836734693877555e-06, "loss": 0.7364, "step": 22218 }, { "epoch": 90.68979591836735, "grad_norm": 14.485536575317383, "learning_rate": 4.681632653061225e-06, "loss": 0.103, "step": 22219 }, { "epoch": 90.6938775510204, "grad_norm": 40.279884338378906, "learning_rate": 4.679591836734694e-06, "loss": 0.5777, "step": 22220 }, { "epoch": 90.69795918367348, "grad_norm": 30.06777572631836, "learning_rate": 4.677551020408164e-06, "loss": 0.3497, "step": 22221 }, { "epoch": 90.70204081632653, "grad_norm": 37.20753479003906, "learning_rate": 4.675510204081633e-06, "loss": 0.3241, "step": 22222 }, { "epoch": 90.70612244897958, "grad_norm": 33.203060150146484, "learning_rate": 4.673469387755102e-06, "loss": 0.7213, "step": 22223 }, { "epoch": 90.71020408163265, "grad_norm": 40.54397201538086, "learning_rate": 4.671428571428572e-06, "loss": 0.4069, "step": 22224 }, { "epoch": 90.71428571428571, "grad_norm": 27.509815216064453, "learning_rate": 4.6693877551020415e-06, "loss": 0.5003, "step": 22225 }, { "epoch": 90.71836734693878, "grad_norm": 17.641948699951172, "learning_rate": 4.66734693877551e-06, "loss": 2.5317, "step": 22226 }, { "epoch": 90.72244897959183, "grad_norm": 34.18061828613281, "learning_rate": 4.66530612244898e-06, "loss": 1.2659, "step": 22227 }, { "epoch": 90.7265306122449, "grad_norm": 42.647159576416016, "learning_rate": 4.663265306122449e-06, "loss": 1.2972, "step": 22228 }, { "epoch": 90.73061224489796, "grad_norm": 25.294891357421875, "learning_rate": 4.6612244897959185e-06, "loss": 2.0211, "step": 22229 }, { "epoch": 90.73469387755102, "grad_norm": 30.570833206176758, "learning_rate": 4.659183673469388e-06, "loss": 0.4356, "step": 22230 }, { "epoch": 90.73877551020408, "grad_norm": 19.543237686157227, "learning_rate": 4.657142857142857e-06, "loss": 0.2438, "step": 22231 }, { "epoch": 90.74285714285715, "grad_norm": 30.9814453125, "learning_rate": 4.655102040816327e-06, "loss": 0.3624, "step": 22232 }, { "epoch": 90.7469387755102, "grad_norm": 29.719738006591797, "learning_rate": 4.653061224489796e-06, "loss": 0.5915, "step": 22233 }, { "epoch": 90.75102040816327, "grad_norm": 30.873170852661133, "learning_rate": 4.651020408163265e-06, "loss": 1.2433, "step": 22234 }, { "epoch": 90.75510204081633, "grad_norm": 24.16229248046875, "learning_rate": 4.648979591836735e-06, "loss": 0.2796, "step": 22235 }, { "epoch": 90.7591836734694, "grad_norm": 22.37993621826172, "learning_rate": 4.6469387755102044e-06, "loss": 1.534, "step": 22236 }, { "epoch": 90.76326530612245, "grad_norm": 25.45604133605957, "learning_rate": 4.644897959183673e-06, "loss": 0.6417, "step": 22237 }, { "epoch": 90.7673469387755, "grad_norm": 32.9085807800293, "learning_rate": 4.642857142857143e-06, "loss": 0.8993, "step": 22238 }, { "epoch": 90.77142857142857, "grad_norm": 27.1348934173584, "learning_rate": 4.640816326530613e-06, "loss": 1.1607, "step": 22239 }, { "epoch": 90.77551020408163, "grad_norm": 22.409509658813477, "learning_rate": 4.638775510204081e-06, "loss": 0.3442, "step": 22240 }, { "epoch": 90.7795918367347, "grad_norm": 28.271886825561523, "learning_rate": 4.636734693877551e-06, "loss": 1.4699, "step": 22241 }, { "epoch": 90.78367346938775, "grad_norm": 29.62704849243164, "learning_rate": 4.634693877551021e-06, "loss": 0.8766, "step": 22242 }, { "epoch": 90.78775510204082, "grad_norm": 33.82392883300781, "learning_rate": 4.63265306122449e-06, "loss": 1.1199, "step": 22243 }, { "epoch": 90.79183673469387, "grad_norm": 1.6685775518417358, "learning_rate": 4.63061224489796e-06, "loss": 0.0274, "step": 22244 }, { "epoch": 90.79591836734694, "grad_norm": 37.288883209228516, "learning_rate": 4.628571428571429e-06, "loss": 0.8126, "step": 22245 }, { "epoch": 90.8, "grad_norm": NaN, "learning_rate": 4.6265306122448986e-06, "loss": 0.9071, "step": 22246 }, { "epoch": 90.80408163265307, "grad_norm": 48.9190673828125, "learning_rate": 4.6265306122448986e-06, "loss": 0.8245, "step": 22247 }, { "epoch": 90.80816326530612, "grad_norm": 28.51238250732422, "learning_rate": 4.624489795918367e-06, "loss": 0.7391, "step": 22248 }, { "epoch": 90.81224489795919, "grad_norm": 10.233481407165527, "learning_rate": 4.622448979591837e-06, "loss": 0.1185, "step": 22249 }, { "epoch": 90.81632653061224, "grad_norm": 30.857933044433594, "learning_rate": 4.620408163265307e-06, "loss": 0.9928, "step": 22250 }, { "epoch": 90.82040816326531, "grad_norm": 43.85835266113281, "learning_rate": 4.6183673469387755e-06, "loss": 0.2786, "step": 22251 }, { "epoch": 90.82448979591837, "grad_norm": 24.938356399536133, "learning_rate": 4.616326530612245e-06, "loss": 0.1955, "step": 22252 }, { "epoch": 90.82857142857142, "grad_norm": 34.16277313232422, "learning_rate": 4.614285714285715e-06, "loss": 0.7533, "step": 22253 }, { "epoch": 90.83265306122449, "grad_norm": 37.45033645629883, "learning_rate": 4.612244897959184e-06, "loss": 0.9447, "step": 22254 }, { "epoch": 90.83673469387755, "grad_norm": 26.614709854125977, "learning_rate": 4.610204081632653e-06, "loss": 1.0077, "step": 22255 }, { "epoch": 90.84081632653061, "grad_norm": 30.625104904174805, "learning_rate": 4.608163265306123e-06, "loss": 1.1665, "step": 22256 }, { "epoch": 90.84489795918367, "grad_norm": 35.70811462402344, "learning_rate": 4.606122448979592e-06, "loss": 0.2578, "step": 22257 }, { "epoch": 90.84897959183674, "grad_norm": 35.52216339111328, "learning_rate": 4.6040816326530615e-06, "loss": 0.6898, "step": 22258 }, { "epoch": 90.85306122448979, "grad_norm": 7.440247535705566, "learning_rate": 4.60204081632653e-06, "loss": 0.065, "step": 22259 }, { "epoch": 90.85714285714286, "grad_norm": 24.53506851196289, "learning_rate": 4.6e-06, "loss": 1.5323, "step": 22260 }, { "epoch": 90.86122448979592, "grad_norm": 25.374059677124023, "learning_rate": 4.59795918367347e-06, "loss": 1.5082, "step": 22261 }, { "epoch": 90.86530612244898, "grad_norm": 33.93872833251953, "learning_rate": 4.5959183673469385e-06, "loss": 0.8031, "step": 22262 }, { "epoch": 90.86938775510204, "grad_norm": 27.787845611572266, "learning_rate": 4.593877551020408e-06, "loss": 1.2339, "step": 22263 }, { "epoch": 90.87346938775511, "grad_norm": 39.92091751098633, "learning_rate": 4.591836734693878e-06, "loss": 1.3425, "step": 22264 }, { "epoch": 90.87755102040816, "grad_norm": 27.702526092529297, "learning_rate": 4.589795918367347e-06, "loss": 1.1639, "step": 22265 }, { "epoch": 90.88163265306123, "grad_norm": 35.04043197631836, "learning_rate": 4.587755102040816e-06, "loss": 0.7764, "step": 22266 }, { "epoch": 90.88571428571429, "grad_norm": 26.840225219726562, "learning_rate": 4.585714285714286e-06, "loss": 0.6557, "step": 22267 }, { "epoch": 90.88979591836734, "grad_norm": 27.190927505493164, "learning_rate": 4.583673469387755e-06, "loss": 1.4201, "step": 22268 }, { "epoch": 90.89387755102041, "grad_norm": 30.38950538635254, "learning_rate": 4.5816326530612245e-06, "loss": 0.5973, "step": 22269 }, { "epoch": 90.89795918367346, "grad_norm": 24.094457626342773, "learning_rate": 4.579591836734694e-06, "loss": 1.4897, "step": 22270 }, { "epoch": 90.90204081632653, "grad_norm": 20.129409790039062, "learning_rate": 4.577551020408164e-06, "loss": 1.9698, "step": 22271 }, { "epoch": 90.90612244897959, "grad_norm": 30.525604248046875, "learning_rate": 4.5755102040816335e-06, "loss": 0.2582, "step": 22272 }, { "epoch": 90.91020408163266, "grad_norm": 28.615537643432617, "learning_rate": 4.573469387755102e-06, "loss": 1.2561, "step": 22273 }, { "epoch": 90.91428571428571, "grad_norm": 32.37730407714844, "learning_rate": 4.571428571428572e-06, "loss": 0.8287, "step": 22274 }, { "epoch": 90.91836734693878, "grad_norm": 31.113554000854492, "learning_rate": 4.569387755102042e-06, "loss": 0.7138, "step": 22275 }, { "epoch": 90.92244897959183, "grad_norm": 44.967384338378906, "learning_rate": 4.5673469387755104e-06, "loss": 0.7009, "step": 22276 }, { "epoch": 90.9265306122449, "grad_norm": 16.024948120117188, "learning_rate": 4.56530612244898e-06, "loss": 2.2151, "step": 22277 }, { "epoch": 90.93061224489796, "grad_norm": 4.1197428703308105, "learning_rate": 4.56326530612245e-06, "loss": 0.0503, "step": 22278 }, { "epoch": 90.93469387755103, "grad_norm": 31.7801456451416, "learning_rate": 4.561224489795919e-06, "loss": 0.6614, "step": 22279 }, { "epoch": 90.93877551020408, "grad_norm": 35.96879577636719, "learning_rate": 4.559183673469388e-06, "loss": 1.0166, "step": 22280 }, { "epoch": 90.94285714285714, "grad_norm": 23.4974365234375, "learning_rate": 4.557142857142857e-06, "loss": 1.6372, "step": 22281 }, { "epoch": 90.9469387755102, "grad_norm": 26.626829147338867, "learning_rate": 4.555102040816327e-06, "loss": 1.49, "step": 22282 }, { "epoch": 90.95102040816326, "grad_norm": 31.452407836914062, "learning_rate": 4.553061224489796e-06, "loss": 0.7692, "step": 22283 }, { "epoch": 90.95510204081633, "grad_norm": 33.386226654052734, "learning_rate": 4.551020408163265e-06, "loss": 0.5944, "step": 22284 }, { "epoch": 90.95918367346938, "grad_norm": 18.838577270507812, "learning_rate": 4.548979591836735e-06, "loss": 0.1273, "step": 22285 }, { "epoch": 90.96326530612245, "grad_norm": 17.1682071685791, "learning_rate": 4.5469387755102046e-06, "loss": 0.1095, "step": 22286 }, { "epoch": 90.9673469387755, "grad_norm": 30.85658073425293, "learning_rate": 4.544897959183673e-06, "loss": 0.9707, "step": 22287 }, { "epoch": 90.97142857142858, "grad_norm": 35.633506774902344, "learning_rate": 4.542857142857143e-06, "loss": 1.0298, "step": 22288 }, { "epoch": 90.97551020408163, "grad_norm": 32.21323013305664, "learning_rate": 4.540816326530613e-06, "loss": 1.0831, "step": 22289 }, { "epoch": 90.9795918367347, "grad_norm": 7.524223327636719, "learning_rate": 4.5387755102040815e-06, "loss": 0.0735, "step": 22290 }, { "epoch": 90.98367346938775, "grad_norm": 25.801958084106445, "learning_rate": 4.536734693877551e-06, "loss": 1.2591, "step": 22291 }, { "epoch": 90.98775510204082, "grad_norm": 31.68606185913086, "learning_rate": 4.53469387755102e-06, "loss": 0.2874, "step": 22292 }, { "epoch": 90.99183673469388, "grad_norm": 41.28125, "learning_rate": 4.53265306122449e-06, "loss": 1.0166, "step": 22293 }, { "epoch": 90.99591836734695, "grad_norm": 19.417644500732422, "learning_rate": 4.530612244897959e-06, "loss": 0.1038, "step": 22294 }, { "epoch": 91.0, "grad_norm": 45.939266204833984, "learning_rate": 4.528571428571428e-06, "loss": 1.0831, "step": 22295 }, { "epoch": 91.00408163265305, "grad_norm": 30.095539093017578, "learning_rate": 4.526530612244898e-06, "loss": 0.794, "step": 22296 }, { "epoch": 91.00816326530612, "grad_norm": 21.89320945739746, "learning_rate": 4.5244897959183675e-06, "loss": 0.1969, "step": 22297 }, { "epoch": 91.01224489795918, "grad_norm": 24.27119255065918, "learning_rate": 4.522448979591837e-06, "loss": 0.3217, "step": 22298 }, { "epoch": 91.01632653061225, "grad_norm": 33.87977981567383, "learning_rate": 4.520408163265307e-06, "loss": 0.3426, "step": 22299 }, { "epoch": 91.0204081632653, "grad_norm": 28.14420509338379, "learning_rate": 4.518367346938776e-06, "loss": 1.3974, "step": 22300 }, { "epoch": 91.02448979591837, "grad_norm": 45.039119720458984, "learning_rate": 4.516326530612245e-06, "loss": 0.5014, "step": 22301 }, { "epoch": 91.02857142857142, "grad_norm": 26.004230499267578, "learning_rate": 4.514285714285715e-06, "loss": 0.2812, "step": 22302 }, { "epoch": 91.0326530612245, "grad_norm": 48.32766342163086, "learning_rate": 4.512244897959184e-06, "loss": 0.7856, "step": 22303 }, { "epoch": 91.03673469387755, "grad_norm": 27.42278289794922, "learning_rate": 4.5102040816326535e-06, "loss": 0.5792, "step": 22304 }, { "epoch": 91.04081632653062, "grad_norm": 26.310331344604492, "learning_rate": 4.508163265306123e-06, "loss": 1.8807, "step": 22305 }, { "epoch": 91.04489795918367, "grad_norm": 38.721038818359375, "learning_rate": 4.506122448979592e-06, "loss": 0.5458, "step": 22306 }, { "epoch": 91.04897959183674, "grad_norm": 32.469635009765625, "learning_rate": 4.504081632653062e-06, "loss": 0.9656, "step": 22307 }, { "epoch": 91.0530612244898, "grad_norm": 9.456855773925781, "learning_rate": 4.502040816326531e-06, "loss": 2.9527, "step": 22308 }, { "epoch": 91.05714285714286, "grad_norm": 17.407424926757812, "learning_rate": 4.5e-06, "loss": 2.4854, "step": 22309 }, { "epoch": 91.06122448979592, "grad_norm": 29.86870765686035, "learning_rate": 4.49795918367347e-06, "loss": 0.3251, "step": 22310 }, { "epoch": 91.06530612244897, "grad_norm": 20.078832626342773, "learning_rate": 4.4959183673469394e-06, "loss": 0.1232, "step": 22311 }, { "epoch": 91.06938775510204, "grad_norm": 18.801319122314453, "learning_rate": 4.493877551020408e-06, "loss": 1.8984, "step": 22312 }, { "epoch": 91.0734693877551, "grad_norm": 34.007118225097656, "learning_rate": 4.491836734693878e-06, "loss": 0.7265, "step": 22313 }, { "epoch": 91.07755102040817, "grad_norm": 39.86275100708008, "learning_rate": 4.489795918367347e-06, "loss": 0.5614, "step": 22314 }, { "epoch": 91.08163265306122, "grad_norm": 23.060791015625, "learning_rate": 4.487755102040816e-06, "loss": 1.3089, "step": 22315 }, { "epoch": 91.08571428571429, "grad_norm": 27.617998123168945, "learning_rate": 4.485714285714286e-06, "loss": 1.4816, "step": 22316 }, { "epoch": 91.08979591836734, "grad_norm": 37.883480072021484, "learning_rate": 4.483673469387755e-06, "loss": 0.9413, "step": 22317 }, { "epoch": 91.09387755102041, "grad_norm": 35.32096481323242, "learning_rate": 4.4816326530612246e-06, "loss": 0.905, "step": 22318 }, { "epoch": 91.09795918367347, "grad_norm": 25.63558006286621, "learning_rate": 4.479591836734694e-06, "loss": 0.2344, "step": 22319 }, { "epoch": 91.10204081632654, "grad_norm": 28.860319137573242, "learning_rate": 4.477551020408163e-06, "loss": 0.6427, "step": 22320 }, { "epoch": 91.10612244897959, "grad_norm": 27.291824340820312, "learning_rate": 4.475510204081633e-06, "loss": 1.2731, "step": 22321 }, { "epoch": 91.11020408163266, "grad_norm": 23.045391082763672, "learning_rate": 4.4734693877551015e-06, "loss": 1.4571, "step": 22322 }, { "epoch": 91.11428571428571, "grad_norm": 30.38811683654785, "learning_rate": 4.471428571428571e-06, "loss": 1.3144, "step": 22323 }, { "epoch": 91.11836734693877, "grad_norm": 33.49137878417969, "learning_rate": 4.469387755102041e-06, "loss": 0.5684, "step": 22324 }, { "epoch": 91.12244897959184, "grad_norm": 24.4415225982666, "learning_rate": 4.4673469387755105e-06, "loss": 0.217, "step": 22325 }, { "epoch": 91.12653061224489, "grad_norm": 37.651554107666016, "learning_rate": 4.465306122448979e-06, "loss": 0.6546, "step": 22326 }, { "epoch": 91.13061224489796, "grad_norm": 9.99918270111084, "learning_rate": 4.463265306122449e-06, "loss": 0.1168, "step": 22327 }, { "epoch": 91.13469387755102, "grad_norm": 8.596738815307617, "learning_rate": 4.461224489795919e-06, "loss": 0.0708, "step": 22328 }, { "epoch": 91.13877551020408, "grad_norm": 21.924943923950195, "learning_rate": 4.459183673469388e-06, "loss": 1.8648, "step": 22329 }, { "epoch": 91.14285714285714, "grad_norm": 26.40488624572754, "learning_rate": 4.457142857142858e-06, "loss": 0.9842, "step": 22330 }, { "epoch": 91.14693877551021, "grad_norm": 33.73012161254883, "learning_rate": 4.455102040816327e-06, "loss": 0.8532, "step": 22331 }, { "epoch": 91.15102040816326, "grad_norm": 25.42445182800293, "learning_rate": 4.4530612244897965e-06, "loss": 0.5362, "step": 22332 }, { "epoch": 91.15510204081633, "grad_norm": 12.995975494384766, "learning_rate": 4.451020408163265e-06, "loss": 0.1196, "step": 22333 }, { "epoch": 91.15918367346939, "grad_norm": 24.277257919311523, "learning_rate": 4.448979591836735e-06, "loss": 1.317, "step": 22334 }, { "epoch": 91.16326530612245, "grad_norm": 33.70728302001953, "learning_rate": 4.446938775510205e-06, "loss": 1.0492, "step": 22335 }, { "epoch": 91.16734693877551, "grad_norm": 30.80092430114746, "learning_rate": 4.4448979591836735e-06, "loss": 0.55, "step": 22336 }, { "epoch": 91.17142857142858, "grad_norm": 17.950584411621094, "learning_rate": 4.442857142857143e-06, "loss": 0.1825, "step": 22337 }, { "epoch": 91.17551020408163, "grad_norm": 34.097373962402344, "learning_rate": 4.440816326530613e-06, "loss": 1.1546, "step": 22338 }, { "epoch": 91.17959183673469, "grad_norm": 22.033252716064453, "learning_rate": 4.438775510204082e-06, "loss": 1.8456, "step": 22339 }, { "epoch": 91.18367346938776, "grad_norm": 26.78746795654297, "learning_rate": 4.436734693877551e-06, "loss": 0.2726, "step": 22340 }, { "epoch": 91.18775510204081, "grad_norm": 19.336158752441406, "learning_rate": 4.434693877551021e-06, "loss": 0.0883, "step": 22341 }, { "epoch": 91.19183673469388, "grad_norm": 29.887418746948242, "learning_rate": 4.43265306122449e-06, "loss": 1.1062, "step": 22342 }, { "epoch": 91.19591836734693, "grad_norm": 4.011760234832764, "learning_rate": 4.4306122448979595e-06, "loss": 0.042, "step": 22343 }, { "epoch": 91.2, "grad_norm": 33.16244125366211, "learning_rate": 4.428571428571428e-06, "loss": 0.8431, "step": 22344 }, { "epoch": 91.20408163265306, "grad_norm": 26.027599334716797, "learning_rate": 4.426530612244898e-06, "loss": 0.3837, "step": 22345 }, { "epoch": 91.20816326530613, "grad_norm": 30.767261505126953, "learning_rate": 4.424489795918368e-06, "loss": 0.8602, "step": 22346 }, { "epoch": 91.21224489795918, "grad_norm": 33.034053802490234, "learning_rate": 4.4224489795918364e-06, "loss": 0.9698, "step": 22347 }, { "epoch": 91.21632653061225, "grad_norm": 24.809675216674805, "learning_rate": 4.420408163265306e-06, "loss": 2.1728, "step": 22348 }, { "epoch": 91.2204081632653, "grad_norm": 28.23029327392578, "learning_rate": 4.418367346938776e-06, "loss": 1.7977, "step": 22349 }, { "epoch": 91.22448979591837, "grad_norm": 26.947546005249023, "learning_rate": 4.416326530612245e-06, "loss": 0.6695, "step": 22350 }, { "epoch": 91.22857142857143, "grad_norm": 39.09749221801758, "learning_rate": 4.414285714285714e-06, "loss": 1.1476, "step": 22351 }, { "epoch": 91.2326530612245, "grad_norm": 29.80929946899414, "learning_rate": 4.412244897959184e-06, "loss": 0.4875, "step": 22352 }, { "epoch": 91.23673469387755, "grad_norm": 26.432851791381836, "learning_rate": 4.410204081632653e-06, "loss": 1.826, "step": 22353 }, { "epoch": 91.2408163265306, "grad_norm": 23.94459342956543, "learning_rate": 4.408163265306122e-06, "loss": 0.2272, "step": 22354 }, { "epoch": 91.24489795918367, "grad_norm": 25.3232479095459, "learning_rate": 4.406122448979592e-06, "loss": 1.6132, "step": 22355 }, { "epoch": 91.24897959183673, "grad_norm": 35.76069259643555, "learning_rate": 4.404081632653062e-06, "loss": 0.5166, "step": 22356 }, { "epoch": 91.2530612244898, "grad_norm": 33.749107360839844, "learning_rate": 4.402040816326531e-06, "loss": 0.4811, "step": 22357 }, { "epoch": 91.25714285714285, "grad_norm": 35.07527160644531, "learning_rate": 4.4e-06, "loss": 0.7944, "step": 22358 }, { "epoch": 91.26122448979592, "grad_norm": 31.074968338012695, "learning_rate": 4.39795918367347e-06, "loss": 1.0968, "step": 22359 }, { "epoch": 91.26530612244898, "grad_norm": 28.81693458557129, "learning_rate": 4.3959183673469396e-06, "loss": 1.3543, "step": 22360 }, { "epoch": 91.26938775510204, "grad_norm": 34.79488754272461, "learning_rate": 4.393877551020408e-06, "loss": 1.0815, "step": 22361 }, { "epoch": 91.2734693877551, "grad_norm": 33.700035095214844, "learning_rate": 4.391836734693878e-06, "loss": 0.6368, "step": 22362 }, { "epoch": 91.27755102040817, "grad_norm": 27.947834014892578, "learning_rate": 4.389795918367348e-06, "loss": 1.304, "step": 22363 }, { "epoch": 91.28163265306122, "grad_norm": 28.346704483032227, "learning_rate": 4.3877551020408165e-06, "loss": 0.3312, "step": 22364 }, { "epoch": 91.28571428571429, "grad_norm": 32.6120719909668, "learning_rate": 4.385714285714286e-06, "loss": 0.4534, "step": 22365 }, { "epoch": 91.28979591836735, "grad_norm": 27.195354461669922, "learning_rate": 4.383673469387755e-06, "loss": 1.3543, "step": 22366 }, { "epoch": 91.29387755102042, "grad_norm": 36.39936065673828, "learning_rate": 4.381632653061225e-06, "loss": 0.7485, "step": 22367 }, { "epoch": 91.29795918367347, "grad_norm": 26.191652297973633, "learning_rate": 4.379591836734694e-06, "loss": 1.7608, "step": 22368 }, { "epoch": 91.30204081632652, "grad_norm": 14.642556190490723, "learning_rate": 4.377551020408163e-06, "loss": 0.1052, "step": 22369 }, { "epoch": 91.3061224489796, "grad_norm": 39.29911422729492, "learning_rate": 4.375510204081633e-06, "loss": 0.4831, "step": 22370 }, { "epoch": 91.31020408163265, "grad_norm": 26.74260139465332, "learning_rate": 4.3734693877551025e-06, "loss": 1.1035, "step": 22371 }, { "epoch": 91.31428571428572, "grad_norm": 28.69528579711914, "learning_rate": 4.371428571428571e-06, "loss": 0.2648, "step": 22372 }, { "epoch": 91.31836734693877, "grad_norm": 36.58736038208008, "learning_rate": 4.369387755102041e-06, "loss": 0.704, "step": 22373 }, { "epoch": 91.32244897959184, "grad_norm": 13.07031536102295, "learning_rate": 4.367346938775511e-06, "loss": 0.0937, "step": 22374 }, { "epoch": 91.3265306122449, "grad_norm": 25.394134521484375, "learning_rate": 4.3653061224489795e-06, "loss": 1.4155, "step": 22375 }, { "epoch": 91.33061224489796, "grad_norm": 17.22372055053711, "learning_rate": 4.363265306122449e-06, "loss": 0.2138, "step": 22376 }, { "epoch": 91.33469387755102, "grad_norm": 33.45912170410156, "learning_rate": 4.361224489795918e-06, "loss": 0.7159, "step": 22377 }, { "epoch": 91.33877551020409, "grad_norm": 24.63184928894043, "learning_rate": 4.359183673469388e-06, "loss": 1.3837, "step": 22378 }, { "epoch": 91.34285714285714, "grad_norm": 20.21237564086914, "learning_rate": 4.357142857142857e-06, "loss": 2.0297, "step": 22379 }, { "epoch": 91.34693877551021, "grad_norm": 29.739656448364258, "learning_rate": 4.355102040816326e-06, "loss": 0.3283, "step": 22380 }, { "epoch": 91.35102040816327, "grad_norm": 37.16730499267578, "learning_rate": 4.353061224489796e-06, "loss": 0.9342, "step": 22381 }, { "epoch": 91.35510204081632, "grad_norm": 25.746135711669922, "learning_rate": 4.3510204081632654e-06, "loss": 1.3206, "step": 22382 }, { "epoch": 91.35918367346939, "grad_norm": 26.955970764160156, "learning_rate": 4.348979591836735e-06, "loss": 1.3492, "step": 22383 }, { "epoch": 91.36326530612244, "grad_norm": 24.834440231323242, "learning_rate": 4.346938775510205e-06, "loss": 1.8405, "step": 22384 }, { "epoch": 91.36734693877551, "grad_norm": 28.678449630737305, "learning_rate": 4.344897959183674e-06, "loss": 0.2253, "step": 22385 }, { "epoch": 91.37142857142857, "grad_norm": 34.873416900634766, "learning_rate": 4.342857142857143e-06, "loss": 0.5197, "step": 22386 }, { "epoch": 91.37551020408164, "grad_norm": 25.35062599182129, "learning_rate": 4.340816326530613e-06, "loss": 0.8985, "step": 22387 }, { "epoch": 91.37959183673469, "grad_norm": 26.67339324951172, "learning_rate": 4.338775510204082e-06, "loss": 0.2762, "step": 22388 }, { "epoch": 91.38367346938776, "grad_norm": 31.83460235595703, "learning_rate": 4.336734693877551e-06, "loss": 0.5958, "step": 22389 }, { "epoch": 91.38775510204081, "grad_norm": 34.50571823120117, "learning_rate": 4.334693877551021e-06, "loss": 0.7696, "step": 22390 }, { "epoch": 91.39183673469388, "grad_norm": 21.8891544342041, "learning_rate": 4.33265306122449e-06, "loss": 1.9729, "step": 22391 }, { "epoch": 91.39591836734694, "grad_norm": 44.246307373046875, "learning_rate": 4.3306122448979596e-06, "loss": 1.1306, "step": 22392 }, { "epoch": 91.4, "grad_norm": 43.10311508178711, "learning_rate": 4.328571428571429e-06, "loss": 0.7825, "step": 22393 }, { "epoch": 91.40408163265306, "grad_norm": 32.19963073730469, "learning_rate": 4.326530612244898e-06, "loss": 0.3224, "step": 22394 }, { "epoch": 91.40816326530613, "grad_norm": 21.95501708984375, "learning_rate": 4.324489795918368e-06, "loss": 1.267, "step": 22395 }, { "epoch": 91.41224489795918, "grad_norm": 21.190855026245117, "learning_rate": 4.322448979591837e-06, "loss": 0.2534, "step": 22396 }, { "epoch": 91.41632653061224, "grad_norm": 17.47842025756836, "learning_rate": 4.320408163265306e-06, "loss": 2.1863, "step": 22397 }, { "epoch": 91.42040816326531, "grad_norm": 19.838666915893555, "learning_rate": 4.318367346938776e-06, "loss": 0.1901, "step": 22398 }, { "epoch": 91.42448979591836, "grad_norm": 21.508729934692383, "learning_rate": 4.316326530612245e-06, "loss": 1.8133, "step": 22399 }, { "epoch": 91.42857142857143, "grad_norm": 35.369022369384766, "learning_rate": 4.314285714285714e-06, "loss": 1.5621, "step": 22400 }, { "epoch": 91.43265306122449, "grad_norm": 33.82431411743164, "learning_rate": 4.312244897959184e-06, "loss": 1.0779, "step": 22401 }, { "epoch": 91.43673469387755, "grad_norm": 48.507041931152344, "learning_rate": 4.310204081632653e-06, "loss": 0.8116, "step": 22402 }, { "epoch": 91.44081632653061, "grad_norm": 29.73541831970215, "learning_rate": 4.3081632653061225e-06, "loss": 0.9119, "step": 22403 }, { "epoch": 91.44489795918368, "grad_norm": 34.5367546081543, "learning_rate": 4.306122448979592e-06, "loss": 0.5159, "step": 22404 }, { "epoch": 91.44897959183673, "grad_norm": 29.774309158325195, "learning_rate": 4.304081632653061e-06, "loss": 1.1451, "step": 22405 }, { "epoch": 91.4530612244898, "grad_norm": 30.656124114990234, "learning_rate": 4.302040816326531e-06, "loss": 0.6999, "step": 22406 }, { "epoch": 91.45714285714286, "grad_norm": 21.99430274963379, "learning_rate": 4.2999999999999995e-06, "loss": 1.4125, "step": 22407 }, { "epoch": 91.46122448979592, "grad_norm": 8.225893020629883, "learning_rate": 4.297959183673469e-06, "loss": 0.073, "step": 22408 }, { "epoch": 91.46530612244898, "grad_norm": 27.260881423950195, "learning_rate": 4.295918367346939e-06, "loss": 1.4365, "step": 22409 }, { "epoch": 91.46938775510205, "grad_norm": 24.89099884033203, "learning_rate": 4.2938775510204085e-06, "loss": 1.4155, "step": 22410 }, { "epoch": 91.4734693877551, "grad_norm": 26.38926887512207, "learning_rate": 4.291836734693878e-06, "loss": 0.2848, "step": 22411 }, { "epoch": 91.47755102040816, "grad_norm": 35.910003662109375, "learning_rate": 4.289795918367347e-06, "loss": 0.9915, "step": 22412 }, { "epoch": 91.48163265306123, "grad_norm": 26.651588439941406, "learning_rate": 4.287755102040817e-06, "loss": 1.3711, "step": 22413 }, { "epoch": 91.48571428571428, "grad_norm": 30.64471435546875, "learning_rate": 4.285714285714286e-06, "loss": 0.3333, "step": 22414 }, { "epoch": 91.48979591836735, "grad_norm": 29.569063186645508, "learning_rate": 4.283673469387756e-06, "loss": 1.701, "step": 22415 }, { "epoch": 91.4938775510204, "grad_norm": 23.17371940612793, "learning_rate": 4.281632653061225e-06, "loss": 0.6143, "step": 22416 }, { "epoch": 91.49795918367347, "grad_norm": 31.920530319213867, "learning_rate": 4.2795918367346945e-06, "loss": 0.9875, "step": 22417 }, { "epoch": 91.50204081632653, "grad_norm": 31.341699600219727, "learning_rate": 4.277551020408163e-06, "loss": 0.3413, "step": 22418 }, { "epoch": 91.5061224489796, "grad_norm": 10.283018112182617, "learning_rate": 4.275510204081633e-06, "loss": 0.0726, "step": 22419 }, { "epoch": 91.51020408163265, "grad_norm": 26.79490852355957, "learning_rate": 4.273469387755103e-06, "loss": 1.4341, "step": 22420 }, { "epoch": 91.51428571428572, "grad_norm": 32.93092346191406, "learning_rate": 4.2714285714285714e-06, "loss": 0.6524, "step": 22421 }, { "epoch": 91.51836734693877, "grad_norm": 37.92375564575195, "learning_rate": 4.269387755102041e-06, "loss": 1.0059, "step": 22422 }, { "epoch": 91.52244897959184, "grad_norm": 11.013467788696289, "learning_rate": 4.267346938775511e-06, "loss": 0.1077, "step": 22423 }, { "epoch": 91.5265306122449, "grad_norm": 23.97286033630371, "learning_rate": 4.26530612244898e-06, "loss": 1.8711, "step": 22424 }, { "epoch": 91.53061224489795, "grad_norm": 33.72587966918945, "learning_rate": 4.263265306122449e-06, "loss": 0.9838, "step": 22425 }, { "epoch": 91.53469387755102, "grad_norm": 27.653112411499023, "learning_rate": 4.261224489795919e-06, "loss": 0.183, "step": 22426 }, { "epoch": 91.53877551020408, "grad_norm": 28.640785217285156, "learning_rate": 4.259183673469388e-06, "loss": 0.2259, "step": 22427 }, { "epoch": 91.54285714285714, "grad_norm": 24.744840621948242, "learning_rate": 4.257142857142857e-06, "loss": 0.3441, "step": 22428 }, { "epoch": 91.5469387755102, "grad_norm": 41.632080078125, "learning_rate": 4.255102040816326e-06, "loss": 0.8316, "step": 22429 }, { "epoch": 91.55102040816327, "grad_norm": 26.762544631958008, "learning_rate": 4.253061224489796e-06, "loss": 1.1379, "step": 22430 }, { "epoch": 91.55510204081632, "grad_norm": 30.06505012512207, "learning_rate": 4.2510204081632656e-06, "loss": 1.1193, "step": 22431 }, { "epoch": 91.55918367346939, "grad_norm": 31.006938934326172, "learning_rate": 4.248979591836734e-06, "loss": 1.5183, "step": 22432 }, { "epoch": 91.56326530612245, "grad_norm": 35.436561584472656, "learning_rate": 4.246938775510204e-06, "loss": 0.6698, "step": 22433 }, { "epoch": 91.56734693877551, "grad_norm": 25.691999435424805, "learning_rate": 4.244897959183674e-06, "loss": 1.4964, "step": 22434 }, { "epoch": 91.57142857142857, "grad_norm": 31.836034774780273, "learning_rate": 4.2428571428571425e-06, "loss": 1.2454, "step": 22435 }, { "epoch": 91.57551020408164, "grad_norm": 33.050132751464844, "learning_rate": 4.240816326530612e-06, "loss": 1.3857, "step": 22436 }, { "epoch": 91.57959183673469, "grad_norm": 30.729841232299805, "learning_rate": 4.238775510204082e-06, "loss": 1.2526, "step": 22437 }, { "epoch": 91.58367346938776, "grad_norm": 39.628170013427734, "learning_rate": 4.236734693877551e-06, "loss": 0.5939, "step": 22438 }, { "epoch": 91.58775510204082, "grad_norm": 23.698225021362305, "learning_rate": 4.23469387755102e-06, "loss": 1.3872, "step": 22439 }, { "epoch": 91.59183673469387, "grad_norm": 27.688432693481445, "learning_rate": 4.23265306122449e-06, "loss": 1.3171, "step": 22440 }, { "epoch": 91.59591836734694, "grad_norm": 30.069963455200195, "learning_rate": 4.23061224489796e-06, "loss": 0.2978, "step": 22441 }, { "epoch": 91.6, "grad_norm": 22.114831924438477, "learning_rate": 4.228571428571429e-06, "loss": 0.2728, "step": 22442 }, { "epoch": 91.60408163265306, "grad_norm": 55.210025787353516, "learning_rate": 4.226530612244898e-06, "loss": 1.1143, "step": 22443 }, { "epoch": 91.60816326530612, "grad_norm": 23.559200286865234, "learning_rate": 4.224489795918368e-06, "loss": 1.8841, "step": 22444 }, { "epoch": 91.61224489795919, "grad_norm": 31.023300170898438, "learning_rate": 4.2224489795918375e-06, "loss": 1.1398, "step": 22445 }, { "epoch": 91.61632653061224, "grad_norm": 18.04226303100586, "learning_rate": 4.220408163265306e-06, "loss": 2.2722, "step": 22446 }, { "epoch": 91.62040816326531, "grad_norm": 28.759706497192383, "learning_rate": 4.218367346938776e-06, "loss": 1.5008, "step": 22447 }, { "epoch": 91.62448979591836, "grad_norm": 1.6236622333526611, "learning_rate": 4.216326530612246e-06, "loss": 0.0264, "step": 22448 }, { "epoch": 91.62857142857143, "grad_norm": 35.1522216796875, "learning_rate": 4.2142857142857145e-06, "loss": 0.4995, "step": 22449 }, { "epoch": 91.63265306122449, "grad_norm": 35.27406311035156, "learning_rate": 4.212244897959184e-06, "loss": 1.0979, "step": 22450 }, { "epoch": 91.63673469387756, "grad_norm": 32.58064270019531, "learning_rate": 4.210204081632653e-06, "loss": 0.4232, "step": 22451 }, { "epoch": 91.64081632653061, "grad_norm": 10.309732437133789, "learning_rate": 4.208163265306123e-06, "loss": 0.0931, "step": 22452 }, { "epoch": 91.64489795918368, "grad_norm": 48.7807731628418, "learning_rate": 4.206122448979592e-06, "loss": 0.702, "step": 22453 }, { "epoch": 91.64897959183673, "grad_norm": 31.70227813720703, "learning_rate": 4.204081632653061e-06, "loss": 0.4301, "step": 22454 }, { "epoch": 91.65306122448979, "grad_norm": 38.385162353515625, "learning_rate": 4.202040816326531e-06, "loss": 1.0185, "step": 22455 }, { "epoch": 91.65714285714286, "grad_norm": 22.930095672607422, "learning_rate": 4.2000000000000004e-06, "loss": 0.2013, "step": 22456 }, { "epoch": 91.66122448979591, "grad_norm": 30.216320037841797, "learning_rate": 4.197959183673469e-06, "loss": 1.0539, "step": 22457 }, { "epoch": 91.66530612244898, "grad_norm": 35.737518310546875, "learning_rate": 4.195918367346939e-06, "loss": 0.2207, "step": 22458 }, { "epoch": 91.66938775510204, "grad_norm": 26.43971824645996, "learning_rate": 4.193877551020409e-06, "loss": 0.207, "step": 22459 }, { "epoch": 91.6734693877551, "grad_norm": 27.76667594909668, "learning_rate": 4.191836734693877e-06, "loss": 0.6057, "step": 22460 }, { "epoch": 91.67755102040816, "grad_norm": 45.37702178955078, "learning_rate": 4.189795918367347e-06, "loss": 0.4094, "step": 22461 }, { "epoch": 91.68163265306123, "grad_norm": 27.3701114654541, "learning_rate": 4.187755102040816e-06, "loss": 0.6551, "step": 22462 }, { "epoch": 91.68571428571428, "grad_norm": 31.817556381225586, "learning_rate": 4.1857142857142856e-06, "loss": 0.6338, "step": 22463 }, { "epoch": 91.68979591836735, "grad_norm": 31.380773544311523, "learning_rate": 4.183673469387755e-06, "loss": 0.9012, "step": 22464 }, { "epoch": 91.6938775510204, "grad_norm": 29.677043914794922, "learning_rate": 4.181632653061224e-06, "loss": 1.584, "step": 22465 }, { "epoch": 91.69795918367348, "grad_norm": 33.8902702331543, "learning_rate": 4.179591836734694e-06, "loss": 0.5611, "step": 22466 }, { "epoch": 91.70204081632653, "grad_norm": 24.01772117614746, "learning_rate": 4.177551020408163e-06, "loss": 1.3569, "step": 22467 }, { "epoch": 91.70612244897958, "grad_norm": 26.385147094726562, "learning_rate": 4.175510204081633e-06, "loss": 0.2868, "step": 22468 }, { "epoch": 91.71020408163265, "grad_norm": 34.730316162109375, "learning_rate": 4.173469387755103e-06, "loss": 0.4288, "step": 22469 }, { "epoch": 91.71428571428571, "grad_norm": 36.257652282714844, "learning_rate": 4.1714285714285715e-06, "loss": 1.138, "step": 22470 }, { "epoch": 91.71836734693878, "grad_norm": 34.04145050048828, "learning_rate": 4.169387755102041e-06, "loss": 0.5069, "step": 22471 }, { "epoch": 91.72244897959183, "grad_norm": 19.01264190673828, "learning_rate": 4.167346938775511e-06, "loss": 1.9331, "step": 22472 }, { "epoch": 91.7265306122449, "grad_norm": 31.181310653686523, "learning_rate": 4.16530612244898e-06, "loss": 0.438, "step": 22473 }, { "epoch": 91.73061224489796, "grad_norm": 36.57078552246094, "learning_rate": 4.163265306122449e-06, "loss": 0.6369, "step": 22474 }, { "epoch": 91.73469387755102, "grad_norm": 39.43867492675781, "learning_rate": 4.161224489795919e-06, "loss": 1.3327, "step": 22475 }, { "epoch": 91.73877551020408, "grad_norm": 10.474029541015625, "learning_rate": 4.159183673469388e-06, "loss": 0.0831, "step": 22476 }, { "epoch": 91.74285714285715, "grad_norm": 24.225744247436523, "learning_rate": 4.1571428571428575e-06, "loss": 0.3167, "step": 22477 }, { "epoch": 91.7469387755102, "grad_norm": 22.004274368286133, "learning_rate": 4.155102040816327e-06, "loss": 1.5089, "step": 22478 }, { "epoch": 91.75102040816327, "grad_norm": 33.305118560791016, "learning_rate": 4.153061224489796e-06, "loss": 0.8788, "step": 22479 }, { "epoch": 91.75510204081633, "grad_norm": 34.7811393737793, "learning_rate": 4.151020408163266e-06, "loss": 1.249, "step": 22480 }, { "epoch": 91.7591836734694, "grad_norm": 30.283483505249023, "learning_rate": 4.148979591836735e-06, "loss": 1.2194, "step": 22481 }, { "epoch": 91.76326530612245, "grad_norm": 21.46730613708496, "learning_rate": 4.146938775510204e-06, "loss": 0.2598, "step": 22482 }, { "epoch": 91.7673469387755, "grad_norm": 27.814594268798828, "learning_rate": 4.144897959183674e-06, "loss": 1.0688, "step": 22483 }, { "epoch": 91.77142857142857, "grad_norm": 38.10470199584961, "learning_rate": 4.142857142857143e-06, "loss": 0.9589, "step": 22484 }, { "epoch": 91.77551020408163, "grad_norm": 32.983036041259766, "learning_rate": 4.140816326530612e-06, "loss": 0.6326, "step": 22485 }, { "epoch": 91.7795918367347, "grad_norm": 37.679988861083984, "learning_rate": 4.138775510204082e-06, "loss": 0.8851, "step": 22486 }, { "epoch": 91.78367346938775, "grad_norm": 18.1754150390625, "learning_rate": 4.136734693877551e-06, "loss": 0.1498, "step": 22487 }, { "epoch": 91.78775510204082, "grad_norm": 31.048742294311523, "learning_rate": 4.1346938775510205e-06, "loss": 1.2735, "step": 22488 }, { "epoch": 91.79183673469387, "grad_norm": 28.361553192138672, "learning_rate": 4.13265306122449e-06, "loss": 1.3261, "step": 22489 }, { "epoch": 91.79591836734694, "grad_norm": 24.107526779174805, "learning_rate": 4.130612244897959e-06, "loss": 1.2909, "step": 22490 }, { "epoch": 91.8, "grad_norm": 34.24395751953125, "learning_rate": 4.128571428571429e-06, "loss": 0.7198, "step": 22491 }, { "epoch": 91.80408163265307, "grad_norm": 30.897233963012695, "learning_rate": 4.1265306122448974e-06, "loss": 1.0865, "step": 22492 }, { "epoch": 91.80816326530612, "grad_norm": 29.566965103149414, "learning_rate": 4.124489795918367e-06, "loss": 0.3284, "step": 22493 }, { "epoch": 91.81224489795919, "grad_norm": 24.85450553894043, "learning_rate": 4.122448979591837e-06, "loss": 2.0058, "step": 22494 }, { "epoch": 91.81632653061224, "grad_norm": 35.07566452026367, "learning_rate": 4.1204081632653064e-06, "loss": 0.7299, "step": 22495 }, { "epoch": 91.82040816326531, "grad_norm": 23.044601440429688, "learning_rate": 4.118367346938776e-06, "loss": 1.9897, "step": 22496 }, { "epoch": 91.82448979591837, "grad_norm": 27.334749221801758, "learning_rate": 4.116326530612245e-06, "loss": 1.4529, "step": 22497 }, { "epoch": 91.82857142857142, "grad_norm": 27.285905838012695, "learning_rate": 4.114285714285715e-06, "loss": 0.625, "step": 22498 }, { "epoch": 91.83265306122449, "grad_norm": 26.276952743530273, "learning_rate": 4.112244897959184e-06, "loss": 0.2926, "step": 22499 }, { "epoch": 91.83673469387755, "grad_norm": 38.49363327026367, "learning_rate": 4.110204081632654e-06, "loss": 0.6704, "step": 22500 }, { "epoch": 91.84081632653061, "grad_norm": 30.554845809936523, "learning_rate": 4.108163265306123e-06, "loss": 1.0871, "step": 22501 }, { "epoch": 91.84489795918367, "grad_norm": 33.834625244140625, "learning_rate": 4.106122448979592e-06, "loss": 0.3235, "step": 22502 }, { "epoch": 91.84897959183674, "grad_norm": 12.997001647949219, "learning_rate": 4.104081632653061e-06, "loss": 0.0818, "step": 22503 }, { "epoch": 91.85306122448979, "grad_norm": 32.38065719604492, "learning_rate": 4.102040816326531e-06, "loss": 1.2493, "step": 22504 }, { "epoch": 91.85714285714286, "grad_norm": 32.558170318603516, "learning_rate": 4.1000000000000006e-06, "loss": 0.2572, "step": 22505 }, { "epoch": 91.86122448979592, "grad_norm": 34.02097702026367, "learning_rate": 4.097959183673469e-06, "loss": 1.3337, "step": 22506 }, { "epoch": 91.86530612244898, "grad_norm": 28.539499282836914, "learning_rate": 4.095918367346939e-06, "loss": 0.2489, "step": 22507 }, { "epoch": 91.86938775510204, "grad_norm": 23.12394905090332, "learning_rate": 4.093877551020409e-06, "loss": 0.2732, "step": 22508 }, { "epoch": 91.87346938775511, "grad_norm": 26.03020668029785, "learning_rate": 4.0918367346938775e-06, "loss": 1.8257, "step": 22509 }, { "epoch": 91.87755102040816, "grad_norm": 37.88867950439453, "learning_rate": 4.089795918367347e-06, "loss": 1.2871, "step": 22510 }, { "epoch": 91.88163265306123, "grad_norm": 25.802988052368164, "learning_rate": 4.087755102040817e-06, "loss": 1.2365, "step": 22511 }, { "epoch": 91.88571428571429, "grad_norm": 22.465654373168945, "learning_rate": 4.085714285714286e-06, "loss": 1.1932, "step": 22512 }, { "epoch": 91.88979591836734, "grad_norm": 30.54570770263672, "learning_rate": 4.083673469387755e-06, "loss": 0.4892, "step": 22513 }, { "epoch": 91.89387755102041, "grad_norm": 30.82163429260254, "learning_rate": 4.081632653061224e-06, "loss": 1.1281, "step": 22514 }, { "epoch": 91.89795918367346, "grad_norm": 32.02678680419922, "learning_rate": 4.079591836734694e-06, "loss": 0.2386, "step": 22515 }, { "epoch": 91.90204081632653, "grad_norm": 39.264442443847656, "learning_rate": 4.0775510204081635e-06, "loss": 0.3275, "step": 22516 }, { "epoch": 91.90612244897959, "grad_norm": 27.073833465576172, "learning_rate": 4.075510204081632e-06, "loss": 1.4523, "step": 22517 }, { "epoch": 91.91020408163266, "grad_norm": 30.939172744750977, "learning_rate": 4.073469387755102e-06, "loss": 0.7192, "step": 22518 }, { "epoch": 91.91428571428571, "grad_norm": 30.48075294494629, "learning_rate": 4.071428571428572e-06, "loss": 1.5333, "step": 22519 }, { "epoch": 91.91836734693878, "grad_norm": 35.636234283447266, "learning_rate": 4.0693877551020405e-06, "loss": 0.5843, "step": 22520 }, { "epoch": 91.92244897959183, "grad_norm": 21.78333282470703, "learning_rate": 4.06734693877551e-06, "loss": 0.1898, "step": 22521 }, { "epoch": 91.9265306122449, "grad_norm": 10.120954513549805, "learning_rate": 4.06530612244898e-06, "loss": 0.0742, "step": 22522 }, { "epoch": 91.93061224489796, "grad_norm": 33.80149841308594, "learning_rate": 4.063265306122449e-06, "loss": 0.9232, "step": 22523 }, { "epoch": 91.93469387755103, "grad_norm": 28.02924919128418, "learning_rate": 4.061224489795918e-06, "loss": 1.2266, "step": 22524 }, { "epoch": 91.93877551020408, "grad_norm": 30.86322784423828, "learning_rate": 4.059183673469388e-06, "loss": 1.129, "step": 22525 }, { "epoch": 91.94285714285714, "grad_norm": 30.663150787353516, "learning_rate": 4.057142857142858e-06, "loss": 0.5601, "step": 22526 }, { "epoch": 91.9469387755102, "grad_norm": 32.227989196777344, "learning_rate": 4.055102040816327e-06, "loss": 0.3097, "step": 22527 }, { "epoch": 91.95102040816326, "grad_norm": 38.201087951660156, "learning_rate": 4.053061224489796e-06, "loss": 0.7483, "step": 22528 }, { "epoch": 91.95510204081633, "grad_norm": 33.9932746887207, "learning_rate": 4.051020408163266e-06, "loss": 0.98, "step": 22529 }, { "epoch": 91.95918367346938, "grad_norm": 28.067617416381836, "learning_rate": 4.0489795918367354e-06, "loss": 0.5229, "step": 22530 }, { "epoch": 91.96326530612245, "grad_norm": 38.796302795410156, "learning_rate": 4.046938775510204e-06, "loss": 0.9005, "step": 22531 }, { "epoch": 91.9673469387755, "grad_norm": 36.80316162109375, "learning_rate": 4.044897959183674e-06, "loss": 0.3557, "step": 22532 }, { "epoch": 91.97142857142858, "grad_norm": 28.301530838012695, "learning_rate": 4.042857142857144e-06, "loss": 0.4039, "step": 22533 }, { "epoch": 91.97551020408163, "grad_norm": 28.131668090820312, "learning_rate": 4.040816326530612e-06, "loss": 1.2094, "step": 22534 }, { "epoch": 91.9795918367347, "grad_norm": 32.67884826660156, "learning_rate": 4.038775510204082e-06, "loss": 0.7956, "step": 22535 }, { "epoch": 91.98367346938775, "grad_norm": 23.61098289489746, "learning_rate": 4.036734693877551e-06, "loss": 1.5422, "step": 22536 }, { "epoch": 91.98775510204082, "grad_norm": 29.386783599853516, "learning_rate": 4.0346938775510206e-06, "loss": 0.2393, "step": 22537 }, { "epoch": 91.99183673469388, "grad_norm": 27.711681365966797, "learning_rate": 4.03265306122449e-06, "loss": 0.4635, "step": 22538 }, { "epoch": 91.99591836734695, "grad_norm": 36.0572624206543, "learning_rate": 4.030612244897959e-06, "loss": 0.9056, "step": 22539 }, { "epoch": 92.0, "grad_norm": 7.452199935913086, "learning_rate": 4.028571428571429e-06, "loss": 0.0674, "step": 22540 }, { "epoch": 92.00408163265305, "grad_norm": 28.00029182434082, "learning_rate": 4.026530612244898e-06, "loss": 0.6263, "step": 22541 }, { "epoch": 92.00816326530612, "grad_norm": 31.92091178894043, "learning_rate": 4.024489795918367e-06, "loss": 1.1236, "step": 22542 }, { "epoch": 92.01224489795918, "grad_norm": 27.518239974975586, "learning_rate": 4.022448979591837e-06, "loss": 1.2399, "step": 22543 }, { "epoch": 92.01632653061225, "grad_norm": 30.130088806152344, "learning_rate": 4.0204081632653065e-06, "loss": 1.4853, "step": 22544 }, { "epoch": 92.0204081632653, "grad_norm": 36.783565521240234, "learning_rate": 4.018367346938775e-06, "loss": 0.9846, "step": 22545 }, { "epoch": 92.02448979591837, "grad_norm": 18.954917907714844, "learning_rate": 4.016326530612245e-06, "loss": 1.9227, "step": 22546 }, { "epoch": 92.02857142857142, "grad_norm": 19.07746124267578, "learning_rate": 4.014285714285714e-06, "loss": 2.5036, "step": 22547 }, { "epoch": 92.0326530612245, "grad_norm": 24.45583152770996, "learning_rate": 4.0122448979591835e-06, "loss": 1.515, "step": 22548 }, { "epoch": 92.03673469387755, "grad_norm": 35.25061798095703, "learning_rate": 4.010204081632653e-06, "loss": 0.5226, "step": 22549 }, { "epoch": 92.04081632653062, "grad_norm": 35.39876937866211, "learning_rate": 4.008163265306122e-06, "loss": 1.0925, "step": 22550 }, { "epoch": 92.04489795918367, "grad_norm": 27.857147216796875, "learning_rate": 4.006122448979592e-06, "loss": 0.3362, "step": 22551 }, { "epoch": 92.04897959183674, "grad_norm": 33.41561508178711, "learning_rate": 4.004081632653061e-06, "loss": 0.8156, "step": 22552 }, { "epoch": 92.0530612244898, "grad_norm": 33.59263229370117, "learning_rate": 4.002040816326531e-06, "loss": 1.0449, "step": 22553 }, { "epoch": 92.05714285714286, "grad_norm": 24.555309295654297, "learning_rate": 4.000000000000001e-06, "loss": 0.2464, "step": 22554 }, { "epoch": 92.06122448979592, "grad_norm": 23.90071678161621, "learning_rate": 3.99795918367347e-06, "loss": 0.2544, "step": 22555 }, { "epoch": 92.06530612244897, "grad_norm": 20.485820770263672, "learning_rate": 3.995918367346939e-06, "loss": 0.1695, "step": 22556 }, { "epoch": 92.06938775510204, "grad_norm": 12.492083549499512, "learning_rate": 3.993877551020409e-06, "loss": 0.1183, "step": 22557 }, { "epoch": 92.0734693877551, "grad_norm": 40.158748626708984, "learning_rate": 3.991836734693878e-06, "loss": 0.9399, "step": 22558 }, { "epoch": 92.07755102040817, "grad_norm": 22.574478149414062, "learning_rate": 3.989795918367347e-06, "loss": 1.9361, "step": 22559 }, { "epoch": 92.08163265306122, "grad_norm": 31.23272132873535, "learning_rate": 3.987755102040817e-06, "loss": 1.2617, "step": 22560 }, { "epoch": 92.08571428571429, "grad_norm": 23.48436737060547, "learning_rate": 3.985714285714286e-06, "loss": 0.3567, "step": 22561 }, { "epoch": 92.08979591836734, "grad_norm": 33.559566497802734, "learning_rate": 3.9836734693877555e-06, "loss": 1.2029, "step": 22562 }, { "epoch": 92.09387755102041, "grad_norm": 36.66489028930664, "learning_rate": 3.981632653061225e-06, "loss": 0.7271, "step": 22563 }, { "epoch": 92.09795918367347, "grad_norm": 27.549732208251953, "learning_rate": 3.979591836734694e-06, "loss": 0.2159, "step": 22564 }, { "epoch": 92.10204081632654, "grad_norm": 35.002811431884766, "learning_rate": 3.977551020408164e-06, "loss": 1.135, "step": 22565 }, { "epoch": 92.10612244897959, "grad_norm": 36.336029052734375, "learning_rate": 3.975510204081633e-06, "loss": 0.621, "step": 22566 }, { "epoch": 92.11020408163266, "grad_norm": 27.990842819213867, "learning_rate": 3.973469387755102e-06, "loss": 1.4722, "step": 22567 }, { "epoch": 92.11428571428571, "grad_norm": 32.05941390991211, "learning_rate": 3.971428571428572e-06, "loss": 0.9237, "step": 22568 }, { "epoch": 92.11836734693877, "grad_norm": 26.712051391601562, "learning_rate": 3.969387755102041e-06, "loss": 0.4848, "step": 22569 }, { "epoch": 92.12244897959184, "grad_norm": 25.071372985839844, "learning_rate": 3.96734693877551e-06, "loss": 1.3343, "step": 22570 }, { "epoch": 92.12653061224489, "grad_norm": 28.909387588500977, "learning_rate": 3.96530612244898e-06, "loss": 1.2089, "step": 22571 }, { "epoch": 92.13061224489796, "grad_norm": 37.38102722167969, "learning_rate": 3.963265306122449e-06, "loss": 0.3447, "step": 22572 }, { "epoch": 92.13469387755102, "grad_norm": 26.713146209716797, "learning_rate": 3.961224489795918e-06, "loss": 1.6592, "step": 22573 }, { "epoch": 92.13877551020408, "grad_norm": 31.50176239013672, "learning_rate": 3.959183673469388e-06, "loss": 0.6219, "step": 22574 }, { "epoch": 92.14285714285714, "grad_norm": 27.325145721435547, "learning_rate": 3.957142857142857e-06, "loss": 1.3996, "step": 22575 }, { "epoch": 92.14693877551021, "grad_norm": 25.8061580657959, "learning_rate": 3.9551020408163266e-06, "loss": 1.3153, "step": 22576 }, { "epoch": 92.15102040816326, "grad_norm": 29.599794387817383, "learning_rate": 3.953061224489795e-06, "loss": 1.275, "step": 22577 }, { "epoch": 92.15510204081633, "grad_norm": 36.110843658447266, "learning_rate": 3.951020408163265e-06, "loss": 1.0105, "step": 22578 }, { "epoch": 92.15918367346939, "grad_norm": 22.09431266784668, "learning_rate": 3.948979591836735e-06, "loss": 1.2642, "step": 22579 }, { "epoch": 92.16326530612245, "grad_norm": 28.56050682067871, "learning_rate": 3.946938775510204e-06, "loss": 0.3694, "step": 22580 }, { "epoch": 92.16734693877551, "grad_norm": 35.67316818237305, "learning_rate": 3.944897959183674e-06, "loss": 0.5863, "step": 22581 }, { "epoch": 92.17142857142858, "grad_norm": 21.13184356689453, "learning_rate": 3.942857142857143e-06, "loss": 0.2532, "step": 22582 }, { "epoch": 92.17551020408163, "grad_norm": 44.92765808105469, "learning_rate": 3.9408163265306125e-06, "loss": 0.7667, "step": 22583 }, { "epoch": 92.17959183673469, "grad_norm": 36.390296936035156, "learning_rate": 3.938775510204082e-06, "loss": 0.8731, "step": 22584 }, { "epoch": 92.18367346938776, "grad_norm": 25.648813247680664, "learning_rate": 3.936734693877552e-06, "loss": 1.3332, "step": 22585 }, { "epoch": 92.18775510204081, "grad_norm": 34.636878967285156, "learning_rate": 3.934693877551021e-06, "loss": 0.6025, "step": 22586 }, { "epoch": 92.19183673469388, "grad_norm": 34.48518371582031, "learning_rate": 3.93265306122449e-06, "loss": 0.9141, "step": 22587 }, { "epoch": 92.19591836734693, "grad_norm": 31.700183868408203, "learning_rate": 3.930612244897959e-06, "loss": 1.0987, "step": 22588 }, { "epoch": 92.2, "grad_norm": 35.05195999145508, "learning_rate": 3.928571428571429e-06, "loss": 0.7766, "step": 22589 }, { "epoch": 92.20408163265306, "grad_norm": 27.196369171142578, "learning_rate": 3.9265306122448985e-06, "loss": 1.4497, "step": 22590 }, { "epoch": 92.20816326530613, "grad_norm": 26.656326293945312, "learning_rate": 3.924489795918367e-06, "loss": 0.2874, "step": 22591 }, { "epoch": 92.21224489795918, "grad_norm": 41.10078811645508, "learning_rate": 3.922448979591837e-06, "loss": 1.1156, "step": 22592 }, { "epoch": 92.21632653061225, "grad_norm": 28.48894500732422, "learning_rate": 3.920408163265307e-06, "loss": 0.294, "step": 22593 }, { "epoch": 92.2204081632653, "grad_norm": 24.769454956054688, "learning_rate": 3.9183673469387755e-06, "loss": 1.7411, "step": 22594 }, { "epoch": 92.22448979591837, "grad_norm": 28.324317932128906, "learning_rate": 3.916326530612245e-06, "loss": 1.3068, "step": 22595 }, { "epoch": 92.22857142857143, "grad_norm": 31.25990104675293, "learning_rate": 3.914285714285715e-06, "loss": 0.8516, "step": 22596 }, { "epoch": 92.2326530612245, "grad_norm": 15.795306205749512, "learning_rate": 3.912244897959184e-06, "loss": 2.1663, "step": 22597 }, { "epoch": 92.23673469387755, "grad_norm": 29.591127395629883, "learning_rate": 3.910204081632653e-06, "loss": 0.3447, "step": 22598 }, { "epoch": 92.2408163265306, "grad_norm": 32.80335998535156, "learning_rate": 3.908163265306122e-06, "loss": 0.5118, "step": 22599 }, { "epoch": 92.24489795918367, "grad_norm": 19.034706115722656, "learning_rate": 3.906122448979592e-06, "loss": 2.2411, "step": 22600 }, { "epoch": 92.24897959183673, "grad_norm": 52.576751708984375, "learning_rate": 3.9040816326530614e-06, "loss": 1.0891, "step": 22601 }, { "epoch": 92.2530612244898, "grad_norm": 30.269067764282227, "learning_rate": 3.90204081632653e-06, "loss": 1.2241, "step": 22602 }, { "epoch": 92.25714285714285, "grad_norm": 33.19803237915039, "learning_rate": 3.9e-06, "loss": 0.8344, "step": 22603 }, { "epoch": 92.26122448979592, "grad_norm": 29.26217269897461, "learning_rate": 3.89795918367347e-06, "loss": 0.2617, "step": 22604 }, { "epoch": 92.26530612244898, "grad_norm": 33.632442474365234, "learning_rate": 3.895918367346938e-06, "loss": 0.5188, "step": 22605 }, { "epoch": 92.26938775510204, "grad_norm": 26.987293243408203, "learning_rate": 3.893877551020408e-06, "loss": 1.0141, "step": 22606 }, { "epoch": 92.2734693877551, "grad_norm": 33.857330322265625, "learning_rate": 3.891836734693878e-06, "loss": 0.8542, "step": 22607 }, { "epoch": 92.27755102040817, "grad_norm": 21.739831924438477, "learning_rate": 3.889795918367347e-06, "loss": 1.8341, "step": 22608 }, { "epoch": 92.28163265306122, "grad_norm": 24.85552215576172, "learning_rate": 3.887755102040816e-06, "loss": 1.3362, "step": 22609 }, { "epoch": 92.28571428571429, "grad_norm": 32.71833038330078, "learning_rate": 3.885714285714286e-06, "loss": 0.7245, "step": 22610 }, { "epoch": 92.28979591836735, "grad_norm": 28.14426612854004, "learning_rate": 3.8836734693877556e-06, "loss": 1.5718, "step": 22611 }, { "epoch": 92.29387755102042, "grad_norm": 22.53605842590332, "learning_rate": 3.881632653061225e-06, "loss": 1.1805, "step": 22612 }, { "epoch": 92.29795918367347, "grad_norm": 21.59986114501953, "learning_rate": 3.879591836734694e-06, "loss": 0.1858, "step": 22613 }, { "epoch": 92.30204081632652, "grad_norm": 29.12561798095703, "learning_rate": 3.877551020408164e-06, "loss": 0.6654, "step": 22614 }, { "epoch": 92.3061224489796, "grad_norm": 25.63936424255371, "learning_rate": 3.875510204081633e-06, "loss": 0.2766, "step": 22615 }, { "epoch": 92.31020408163265, "grad_norm": 29.66420555114746, "learning_rate": 3.873469387755102e-06, "loss": 0.5822, "step": 22616 }, { "epoch": 92.31428571428572, "grad_norm": 14.770633697509766, "learning_rate": 3.871428571428572e-06, "loss": 0.1261, "step": 22617 }, { "epoch": 92.31836734693877, "grad_norm": 27.812063217163086, "learning_rate": 3.8693877551020415e-06, "loss": 1.4366, "step": 22618 }, { "epoch": 92.32244897959184, "grad_norm": 34.57500457763672, "learning_rate": 3.86734693877551e-06, "loss": 1.1341, "step": 22619 }, { "epoch": 92.3265306122449, "grad_norm": 25.480182647705078, "learning_rate": 3.86530612244898e-06, "loss": 0.2775, "step": 22620 }, { "epoch": 92.33061224489796, "grad_norm": 21.261371612548828, "learning_rate": 3.863265306122449e-06, "loss": 0.1951, "step": 22621 }, { "epoch": 92.33469387755102, "grad_norm": 28.384462356567383, "learning_rate": 3.8612244897959185e-06, "loss": 0.3096, "step": 22622 }, { "epoch": 92.33877551020409, "grad_norm": 36.9482536315918, "learning_rate": 3.859183673469388e-06, "loss": 0.5819, "step": 22623 }, { "epoch": 92.34285714285714, "grad_norm": 30.979637145996094, "learning_rate": 3.857142857142857e-06, "loss": 0.5019, "step": 22624 }, { "epoch": 92.34693877551021, "grad_norm": 20.850238800048828, "learning_rate": 3.855102040816327e-06, "loss": 1.9142, "step": 22625 }, { "epoch": 92.35102040816327, "grad_norm": 23.39177131652832, "learning_rate": 3.853061224489796e-06, "loss": 1.5948, "step": 22626 }, { "epoch": 92.35510204081632, "grad_norm": 31.155500411987305, "learning_rate": 3.851020408163265e-06, "loss": 0.4692, "step": 22627 }, { "epoch": 92.35918367346939, "grad_norm": 25.838638305664062, "learning_rate": 3.848979591836735e-06, "loss": 1.507, "step": 22628 }, { "epoch": 92.36326530612244, "grad_norm": 33.60810470581055, "learning_rate": 3.8469387755102045e-06, "loss": 0.484, "step": 22629 }, { "epoch": 92.36734693877551, "grad_norm": 33.1680793762207, "learning_rate": 3.844897959183673e-06, "loss": 0.9686, "step": 22630 }, { "epoch": 92.37142857142857, "grad_norm": 29.7479248046875, "learning_rate": 3.842857142857143e-06, "loss": 0.3281, "step": 22631 }, { "epoch": 92.37551020408164, "grad_norm": 23.11958122253418, "learning_rate": 3.840816326530612e-06, "loss": 1.8602, "step": 22632 }, { "epoch": 92.37959183673469, "grad_norm": 37.14545440673828, "learning_rate": 3.8387755102040815e-06, "loss": 0.5554, "step": 22633 }, { "epoch": 92.38367346938776, "grad_norm": 41.19419860839844, "learning_rate": 3.836734693877551e-06, "loss": 0.8385, "step": 22634 }, { "epoch": 92.38775510204081, "grad_norm": 28.571075439453125, "learning_rate": 3.83469387755102e-06, "loss": 1.2149, "step": 22635 }, { "epoch": 92.39183673469388, "grad_norm": 25.042829513549805, "learning_rate": 3.83265306122449e-06, "loss": 0.3802, "step": 22636 }, { "epoch": 92.39591836734694, "grad_norm": 27.496623992919922, "learning_rate": 3.830612244897959e-06, "loss": 0.2636, "step": 22637 }, { "epoch": 92.4, "grad_norm": 24.942121505737305, "learning_rate": 3.828571428571429e-06, "loss": 0.2765, "step": 22638 }, { "epoch": 92.40408163265306, "grad_norm": 34.324134826660156, "learning_rate": 3.826530612244899e-06, "loss": 1.4094, "step": 22639 }, { "epoch": 92.40816326530613, "grad_norm": 19.32002830505371, "learning_rate": 3.824489795918368e-06, "loss": 0.18, "step": 22640 }, { "epoch": 92.41224489795918, "grad_norm": 23.10127830505371, "learning_rate": 3.822448979591837e-06, "loss": 0.2443, "step": 22641 }, { "epoch": 92.41632653061224, "grad_norm": 26.71088981628418, "learning_rate": 3.820408163265307e-06, "loss": 0.3274, "step": 22642 }, { "epoch": 92.42040816326531, "grad_norm": 12.039801597595215, "learning_rate": 3.818367346938776e-06, "loss": 0.0755, "step": 22643 }, { "epoch": 92.42448979591836, "grad_norm": 22.679100036621094, "learning_rate": 3.816326530612245e-06, "loss": 2.0539, "step": 22644 }, { "epoch": 92.42857142857143, "grad_norm": 22.758642196655273, "learning_rate": 3.8142857142857145e-06, "loss": 1.9698, "step": 22645 }, { "epoch": 92.43265306122449, "grad_norm": 28.725608825683594, "learning_rate": 3.8122448979591837e-06, "loss": 1.3508, "step": 22646 }, { "epoch": 92.43673469387755, "grad_norm": 25.753681182861328, "learning_rate": 3.8102040816326534e-06, "loss": 1.8507, "step": 22647 }, { "epoch": 92.44081632653061, "grad_norm": 30.971633911132812, "learning_rate": 3.808163265306123e-06, "loss": 0.3222, "step": 22648 }, { "epoch": 92.44489795918368, "grad_norm": 32.83533477783203, "learning_rate": 3.806122448979592e-06, "loss": 0.4479, "step": 22649 }, { "epoch": 92.44897959183673, "grad_norm": 5.602814674377441, "learning_rate": 3.8040816326530616e-06, "loss": 0.066, "step": 22650 }, { "epoch": 92.4530612244898, "grad_norm": 4.35518741607666, "learning_rate": 3.8020408163265312e-06, "loss": 0.048, "step": 22651 }, { "epoch": 92.45714285714286, "grad_norm": 32.41624069213867, "learning_rate": 3.8e-06, "loss": 0.2357, "step": 22652 }, { "epoch": 92.46122448979592, "grad_norm": 29.372413635253906, "learning_rate": 3.7979591836734697e-06, "loss": 0.3029, "step": 22653 }, { "epoch": 92.46530612244898, "grad_norm": 34.32888412475586, "learning_rate": 3.7959183673469385e-06, "loss": 1.1628, "step": 22654 }, { "epoch": 92.46938775510205, "grad_norm": 26.04012107849121, "learning_rate": 3.793877551020408e-06, "loss": 1.4222, "step": 22655 }, { "epoch": 92.4734693877551, "grad_norm": 34.588645935058594, "learning_rate": 3.791836734693878e-06, "loss": 0.6523, "step": 22656 }, { "epoch": 92.47755102040816, "grad_norm": 28.006481170654297, "learning_rate": 3.789795918367347e-06, "loss": 0.2221, "step": 22657 }, { "epoch": 92.48163265306123, "grad_norm": 22.814546585083008, "learning_rate": 3.7877551020408163e-06, "loss": 1.3458, "step": 22658 }, { "epoch": 92.48571428571428, "grad_norm": 21.690196990966797, "learning_rate": 3.785714285714286e-06, "loss": 0.2599, "step": 22659 }, { "epoch": 92.48979591836735, "grad_norm": 21.742652893066406, "learning_rate": 3.7836734693877553e-06, "loss": 1.4884, "step": 22660 }, { "epoch": 92.4938775510204, "grad_norm": 36.48516845703125, "learning_rate": 3.781632653061225e-06, "loss": 0.6103, "step": 22661 }, { "epoch": 92.49795918367347, "grad_norm": 27.903413772583008, "learning_rate": 3.7795918367346937e-06, "loss": 0.5483, "step": 22662 }, { "epoch": 92.50204081632653, "grad_norm": 34.87689971923828, "learning_rate": 3.7775510204081634e-06, "loss": 0.7053, "step": 22663 }, { "epoch": 92.5061224489796, "grad_norm": 21.855548858642578, "learning_rate": 3.775510204081633e-06, "loss": 0.2078, "step": 22664 }, { "epoch": 92.51020408163265, "grad_norm": 26.546720504760742, "learning_rate": 3.773469387755102e-06, "loss": 1.3205, "step": 22665 }, { "epoch": 92.51428571428572, "grad_norm": 34.072139739990234, "learning_rate": 3.7714285714285716e-06, "loss": 1.1325, "step": 22666 }, { "epoch": 92.51836734693877, "grad_norm": 22.28685760498047, "learning_rate": 3.7693877551020412e-06, "loss": 1.8387, "step": 22667 }, { "epoch": 92.52244897959184, "grad_norm": 34.62971496582031, "learning_rate": 3.76734693877551e-06, "loss": 0.5502, "step": 22668 }, { "epoch": 92.5265306122449, "grad_norm": 34.94862747192383, "learning_rate": 3.7653061224489797e-06, "loss": 0.265, "step": 22669 }, { "epoch": 92.53061224489795, "grad_norm": 28.28910255432129, "learning_rate": 3.7632653061224494e-06, "loss": 1.5032, "step": 22670 }, { "epoch": 92.53469387755102, "grad_norm": 33.70848083496094, "learning_rate": 3.761224489795918e-06, "loss": 0.7074, "step": 22671 }, { "epoch": 92.53877551020408, "grad_norm": 48.99936294555664, "learning_rate": 3.759183673469388e-06, "loss": 0.4497, "step": 22672 }, { "epoch": 92.54285714285714, "grad_norm": 36.79753112792969, "learning_rate": 3.757142857142857e-06, "loss": 0.3701, "step": 22673 }, { "epoch": 92.5469387755102, "grad_norm": 35.105201721191406, "learning_rate": 3.7551020408163268e-06, "loss": 0.8561, "step": 22674 }, { "epoch": 92.55102040816327, "grad_norm": 38.4354133605957, "learning_rate": 3.7530612244897964e-06, "loss": 0.9964, "step": 22675 }, { "epoch": 92.55510204081632, "grad_norm": 32.44223403930664, "learning_rate": 3.7510204081632653e-06, "loss": 0.4306, "step": 22676 }, { "epoch": 92.55918367346939, "grad_norm": 23.965360641479492, "learning_rate": 3.748979591836735e-06, "loss": 1.3017, "step": 22677 }, { "epoch": 92.56326530612245, "grad_norm": 8.293532371520996, "learning_rate": 3.7469387755102046e-06, "loss": 0.0639, "step": 22678 }, { "epoch": 92.56734693877551, "grad_norm": 7.785334587097168, "learning_rate": 3.7448979591836734e-06, "loss": 0.1128, "step": 22679 }, { "epoch": 92.57142857142857, "grad_norm": 34.558197021484375, "learning_rate": 3.742857142857143e-06, "loss": 0.9325, "step": 22680 }, { "epoch": 92.57551020408164, "grad_norm": 35.45734786987305, "learning_rate": 3.7408163265306127e-06, "loss": 0.8369, "step": 22681 }, { "epoch": 92.57959183673469, "grad_norm": 11.86469554901123, "learning_rate": 3.7387755102040816e-06, "loss": 0.0909, "step": 22682 }, { "epoch": 92.58367346938776, "grad_norm": 35.517242431640625, "learning_rate": 3.7367346938775512e-06, "loss": 0.5563, "step": 22683 }, { "epoch": 92.58775510204082, "grad_norm": 39.01277160644531, "learning_rate": 3.73469387755102e-06, "loss": 0.7456, "step": 22684 }, { "epoch": 92.59183673469387, "grad_norm": 31.554311752319336, "learning_rate": 3.7326530612244897e-06, "loss": 0.8982, "step": 22685 }, { "epoch": 92.59591836734694, "grad_norm": 20.676069259643555, "learning_rate": 3.7306122448979594e-06, "loss": 0.2121, "step": 22686 }, { "epoch": 92.6, "grad_norm": 25.386425018310547, "learning_rate": 3.7285714285714286e-06, "loss": 2.1694, "step": 22687 }, { "epoch": 92.60408163265306, "grad_norm": 26.35421371459961, "learning_rate": 3.7265306122448983e-06, "loss": 0.2876, "step": 22688 }, { "epoch": 92.60816326530612, "grad_norm": 16.93839454650879, "learning_rate": 3.724489795918368e-06, "loss": 0.1428, "step": 22689 }, { "epoch": 92.61224489795919, "grad_norm": 28.94819450378418, "learning_rate": 3.7224489795918368e-06, "loss": 0.7124, "step": 22690 }, { "epoch": 92.61632653061224, "grad_norm": 27.832294464111328, "learning_rate": 3.7204081632653064e-06, "loss": 1.0993, "step": 22691 }, { "epoch": 92.62040816326531, "grad_norm": 22.01202392578125, "learning_rate": 3.718367346938776e-06, "loss": 0.2274, "step": 22692 }, { "epoch": 92.62448979591836, "grad_norm": 25.53139877319336, "learning_rate": 3.716326530612245e-06, "loss": 1.3551, "step": 22693 }, { "epoch": 92.62857142857143, "grad_norm": 6.955847263336182, "learning_rate": 3.7142857142857146e-06, "loss": 0.0678, "step": 22694 }, { "epoch": 92.63265306122449, "grad_norm": 12.20418930053711, "learning_rate": 3.7122448979591834e-06, "loss": 0.0931, "step": 22695 }, { "epoch": 92.63673469387756, "grad_norm": 12.574142456054688, "learning_rate": 3.710204081632653e-06, "loss": 0.1082, "step": 22696 }, { "epoch": 92.64081632653061, "grad_norm": 33.66033172607422, "learning_rate": 3.7081632653061228e-06, "loss": 1.0093, "step": 22697 }, { "epoch": 92.64489795918368, "grad_norm": 31.44647789001465, "learning_rate": 3.7061224489795916e-06, "loss": 0.2381, "step": 22698 }, { "epoch": 92.64897959183673, "grad_norm": 22.01313018798828, "learning_rate": 3.7040816326530612e-06, "loss": 1.977, "step": 22699 }, { "epoch": 92.65306122448979, "grad_norm": 31.8377628326416, "learning_rate": 3.702040816326531e-06, "loss": 0.8325, "step": 22700 }, { "epoch": 92.65714285714286, "grad_norm": 31.71206283569336, "learning_rate": 3.7e-06, "loss": 1.1074, "step": 22701 }, { "epoch": 92.66122448979591, "grad_norm": 40.003684997558594, "learning_rate": 3.69795918367347e-06, "loss": 1.3297, "step": 22702 }, { "epoch": 92.66530612244898, "grad_norm": 24.742958068847656, "learning_rate": 3.6959183673469395e-06, "loss": 0.3186, "step": 22703 }, { "epoch": 92.66938775510204, "grad_norm": 27.10252571105957, "learning_rate": 3.6938775510204083e-06, "loss": 1.8597, "step": 22704 }, { "epoch": 92.6734693877551, "grad_norm": 38.80109786987305, "learning_rate": 3.691836734693878e-06, "loss": 0.7434, "step": 22705 }, { "epoch": 92.67755102040816, "grad_norm": 6.282304286956787, "learning_rate": 3.689795918367347e-06, "loss": 0.0587, "step": 22706 }, { "epoch": 92.68163265306123, "grad_norm": 26.75885581970215, "learning_rate": 3.6877551020408165e-06, "loss": 1.8502, "step": 22707 }, { "epoch": 92.68571428571428, "grad_norm": 28.314456939697266, "learning_rate": 3.685714285714286e-06, "loss": 0.6055, "step": 22708 }, { "epoch": 92.68979591836735, "grad_norm": 24.06072998046875, "learning_rate": 3.683673469387755e-06, "loss": 0.2128, "step": 22709 }, { "epoch": 92.6938775510204, "grad_norm": 31.463703155517578, "learning_rate": 3.6816326530612246e-06, "loss": 1.3219, "step": 22710 }, { "epoch": 92.69795918367348, "grad_norm": 24.761295318603516, "learning_rate": 3.6795918367346943e-06, "loss": 1.3828, "step": 22711 }, { "epoch": 92.70204081632653, "grad_norm": 28.251890182495117, "learning_rate": 3.677551020408163e-06, "loss": 0.709, "step": 22712 }, { "epoch": 92.70612244897958, "grad_norm": 27.351411819458008, "learning_rate": 3.6755102040816328e-06, "loss": 1.1463, "step": 22713 }, { "epoch": 92.71020408163265, "grad_norm": 31.757768630981445, "learning_rate": 3.6734693877551024e-06, "loss": 0.8299, "step": 22714 }, { "epoch": 92.71428571428571, "grad_norm": 34.19134521484375, "learning_rate": 3.6714285714285717e-06, "loss": 0.4099, "step": 22715 }, { "epoch": 92.71836734693878, "grad_norm": 25.83687973022461, "learning_rate": 3.6693877551020413e-06, "loss": 0.2684, "step": 22716 }, { "epoch": 92.72244897959183, "grad_norm": 43.12625503540039, "learning_rate": 3.66734693877551e-06, "loss": 0.6653, "step": 22717 }, { "epoch": 92.7265306122449, "grad_norm": 38.31399154663086, "learning_rate": 3.66530612244898e-06, "loss": 0.6579, "step": 22718 }, { "epoch": 92.73061224489796, "grad_norm": 28.932674407958984, "learning_rate": 3.6632653061224495e-06, "loss": 0.9385, "step": 22719 }, { "epoch": 92.73469387755102, "grad_norm": 25.346435546875, "learning_rate": 3.6612244897959183e-06, "loss": 0.5068, "step": 22720 }, { "epoch": 92.73877551020408, "grad_norm": 36.65147399902344, "learning_rate": 3.659183673469388e-06, "loss": 1.2636, "step": 22721 }, { "epoch": 92.74285714285715, "grad_norm": 32.76803970336914, "learning_rate": 3.6571428571428576e-06, "loss": 0.4827, "step": 22722 }, { "epoch": 92.7469387755102, "grad_norm": 30.009990692138672, "learning_rate": 3.6551020408163265e-06, "loss": 0.4458, "step": 22723 }, { "epoch": 92.75102040816327, "grad_norm": 27.844511032104492, "learning_rate": 3.653061224489796e-06, "loss": 1.2172, "step": 22724 }, { "epoch": 92.75510204081633, "grad_norm": 27.002817153930664, "learning_rate": 3.651020408163266e-06, "loss": 1.035, "step": 22725 }, { "epoch": 92.7591836734694, "grad_norm": 31.984142303466797, "learning_rate": 3.6489795918367346e-06, "loss": 0.917, "step": 22726 }, { "epoch": 92.76326530612245, "grad_norm": 38.381534576416016, "learning_rate": 3.6469387755102043e-06, "loss": 0.3263, "step": 22727 }, { "epoch": 92.7673469387755, "grad_norm": 29.641433715820312, "learning_rate": 3.6448979591836735e-06, "loss": 1.4066, "step": 22728 }, { "epoch": 92.77142857142857, "grad_norm": 34.26869583129883, "learning_rate": 3.642857142857143e-06, "loss": 1.0499, "step": 22729 }, { "epoch": 92.77551020408163, "grad_norm": 30.51038360595703, "learning_rate": 3.6408163265306124e-06, "loss": 0.9308, "step": 22730 }, { "epoch": 92.7795918367347, "grad_norm": 38.50910949707031, "learning_rate": 3.6387755102040817e-06, "loss": 0.7517, "step": 22731 }, { "epoch": 92.78367346938775, "grad_norm": 28.104528427124023, "learning_rate": 3.6367346938775513e-06, "loss": 1.0839, "step": 22732 }, { "epoch": 92.78775510204082, "grad_norm": 32.76591873168945, "learning_rate": 3.634693877551021e-06, "loss": 0.3319, "step": 22733 }, { "epoch": 92.79183673469387, "grad_norm": 25.477195739746094, "learning_rate": 3.63265306122449e-06, "loss": 0.6003, "step": 22734 }, { "epoch": 92.79591836734694, "grad_norm": 8.208632469177246, "learning_rate": 3.6306122448979595e-06, "loss": 2.9491, "step": 22735 }, { "epoch": 92.8, "grad_norm": 27.03207015991211, "learning_rate": 3.6285714285714283e-06, "loss": 0.6429, "step": 22736 }, { "epoch": 92.80408163265307, "grad_norm": 38.84916305541992, "learning_rate": 3.626530612244898e-06, "loss": 1.0303, "step": 22737 }, { "epoch": 92.80816326530612, "grad_norm": 33.804866790771484, "learning_rate": 3.6244897959183676e-06, "loss": 0.6228, "step": 22738 }, { "epoch": 92.81224489795919, "grad_norm": 25.800622940063477, "learning_rate": 3.6224489795918365e-06, "loss": 0.4899, "step": 22739 }, { "epoch": 92.81632653061224, "grad_norm": 31.109712600708008, "learning_rate": 3.620408163265306e-06, "loss": 1.2431, "step": 22740 }, { "epoch": 92.82040816326531, "grad_norm": 33.12800979614258, "learning_rate": 3.618367346938776e-06, "loss": 1.0993, "step": 22741 }, { "epoch": 92.82448979591837, "grad_norm": 35.195186614990234, "learning_rate": 3.616326530612245e-06, "loss": 0.58, "step": 22742 }, { "epoch": 92.82857142857142, "grad_norm": 30.41033935546875, "learning_rate": 3.6142857142857143e-06, "loss": 0.412, "step": 22743 }, { "epoch": 92.83265306122449, "grad_norm": 28.436664581298828, "learning_rate": 3.612244897959184e-06, "loss": 1.4903, "step": 22744 }, { "epoch": 92.83673469387755, "grad_norm": 28.788681030273438, "learning_rate": 3.610204081632653e-06, "loss": 0.2121, "step": 22745 }, { "epoch": 92.84081632653061, "grad_norm": 26.794389724731445, "learning_rate": 3.608163265306123e-06, "loss": 1.4286, "step": 22746 }, { "epoch": 92.84489795918367, "grad_norm": 35.44424057006836, "learning_rate": 3.6061224489795917e-06, "loss": 1.2581, "step": 22747 }, { "epoch": 92.84897959183674, "grad_norm": 22.386030197143555, "learning_rate": 3.6040816326530614e-06, "loss": 1.3992, "step": 22748 }, { "epoch": 92.85306122448979, "grad_norm": 33.60004425048828, "learning_rate": 3.602040816326531e-06, "loss": 0.4769, "step": 22749 }, { "epoch": 92.85714285714286, "grad_norm": 6.905677318572998, "learning_rate": 3.6e-06, "loss": 0.0815, "step": 22750 }, { "epoch": 92.86122448979592, "grad_norm": 34.53861999511719, "learning_rate": 3.5979591836734695e-06, "loss": 0.7536, "step": 22751 }, { "epoch": 92.86530612244898, "grad_norm": 41.31667709350586, "learning_rate": 3.595918367346939e-06, "loss": 0.5233, "step": 22752 }, { "epoch": 92.86938775510204, "grad_norm": 31.417444229125977, "learning_rate": 3.593877551020408e-06, "loss": 1.2264, "step": 22753 }, { "epoch": 92.87346938775511, "grad_norm": 24.385541915893555, "learning_rate": 3.5918367346938777e-06, "loss": 0.2408, "step": 22754 }, { "epoch": 92.87755102040816, "grad_norm": 16.545217514038086, "learning_rate": 3.5897959183673473e-06, "loss": 0.1035, "step": 22755 }, { "epoch": 92.88163265306123, "grad_norm": 47.46965789794922, "learning_rate": 3.5877551020408166e-06, "loss": 0.2848, "step": 22756 }, { "epoch": 92.88571428571429, "grad_norm": 33.51509475708008, "learning_rate": 3.585714285714286e-06, "loss": 0.9717, "step": 22757 }, { "epoch": 92.88979591836734, "grad_norm": 32.435482025146484, "learning_rate": 3.583673469387755e-06, "loss": 1.574, "step": 22758 }, { "epoch": 92.89387755102041, "grad_norm": 29.704498291015625, "learning_rate": 3.5816326530612247e-06, "loss": 0.4414, "step": 22759 }, { "epoch": 92.89795918367346, "grad_norm": 37.0106315612793, "learning_rate": 3.5795918367346944e-06, "loss": 0.7139, "step": 22760 }, { "epoch": 92.90204081632653, "grad_norm": 22.292428970336914, "learning_rate": 3.577551020408163e-06, "loss": 1.4363, "step": 22761 }, { "epoch": 92.90612244897959, "grad_norm": 1.7652634382247925, "learning_rate": 3.575510204081633e-06, "loss": 0.0266, "step": 22762 }, { "epoch": 92.91020408163266, "grad_norm": 33.09067916870117, "learning_rate": 3.5734693877551025e-06, "loss": 0.387, "step": 22763 }, { "epoch": 92.91428571428571, "grad_norm": 23.99039077758789, "learning_rate": 3.5714285714285714e-06, "loss": 1.9329, "step": 22764 }, { "epoch": 92.91836734693878, "grad_norm": 29.045442581176758, "learning_rate": 3.569387755102041e-06, "loss": 1.1382, "step": 22765 }, { "epoch": 92.92244897959183, "grad_norm": 32.93110656738281, "learning_rate": 3.5673469387755107e-06, "loss": 0.5907, "step": 22766 }, { "epoch": 92.9265306122449, "grad_norm": 28.691377639770508, "learning_rate": 3.5653061224489795e-06, "loss": 0.9777, "step": 22767 }, { "epoch": 92.93061224489796, "grad_norm": 32.27131271362305, "learning_rate": 3.563265306122449e-06, "loss": 0.2983, "step": 22768 }, { "epoch": 92.93469387755103, "grad_norm": 26.854190826416016, "learning_rate": 3.5612244897959184e-06, "loss": 1.1127, "step": 22769 }, { "epoch": 92.93877551020408, "grad_norm": 27.931732177734375, "learning_rate": 3.5591836734693877e-06, "loss": 1.7755, "step": 22770 }, { "epoch": 92.94285714285714, "grad_norm": 36.988887786865234, "learning_rate": 3.5571428571428573e-06, "loss": 0.9026, "step": 22771 }, { "epoch": 92.9469387755102, "grad_norm": 20.577590942382812, "learning_rate": 3.5551020408163266e-06, "loss": 1.7722, "step": 22772 }, { "epoch": 92.95102040816326, "grad_norm": 32.507965087890625, "learning_rate": 3.5530612244897962e-06, "loss": 0.7844, "step": 22773 }, { "epoch": 92.95510204081633, "grad_norm": 8.498994827270508, "learning_rate": 3.551020408163266e-06, "loss": 0.0768, "step": 22774 }, { "epoch": 92.95918367346938, "grad_norm": 32.121238708496094, "learning_rate": 3.5489795918367347e-06, "loss": 1.2013, "step": 22775 }, { "epoch": 92.96326530612245, "grad_norm": 30.370229721069336, "learning_rate": 3.5469387755102044e-06, "loss": 0.1861, "step": 22776 }, { "epoch": 92.9673469387755, "grad_norm": 27.59951400756836, "learning_rate": 3.544897959183674e-06, "loss": 1.4952, "step": 22777 }, { "epoch": 92.97142857142858, "grad_norm": 32.211692810058594, "learning_rate": 3.542857142857143e-06, "loss": 1.0917, "step": 22778 }, { "epoch": 92.97551020408163, "grad_norm": 26.176605224609375, "learning_rate": 3.5408163265306125e-06, "loss": 1.3618, "step": 22779 }, { "epoch": 92.9795918367347, "grad_norm": 24.395606994628906, "learning_rate": 3.5387755102040814e-06, "loss": 1.8196, "step": 22780 }, { "epoch": 92.98367346938775, "grad_norm": 34.31190490722656, "learning_rate": 3.536734693877551e-06, "loss": 0.7176, "step": 22781 }, { "epoch": 92.98775510204082, "grad_norm": 25.748035430908203, "learning_rate": 3.5346938775510207e-06, "loss": 1.3019, "step": 22782 }, { "epoch": 92.99183673469388, "grad_norm": 25.61810874938965, "learning_rate": 3.5326530612244895e-06, "loss": 0.6389, "step": 22783 }, { "epoch": 92.99591836734695, "grad_norm": 42.10578918457031, "learning_rate": 3.530612244897959e-06, "loss": 0.4861, "step": 22784 }, { "epoch": 93.0, "grad_norm": 11.155320167541504, "learning_rate": 3.528571428571429e-06, "loss": 0.0738, "step": 22785 }, { "epoch": 93.00408163265305, "grad_norm": 32.807552337646484, "learning_rate": 3.526530612244898e-06, "loss": 1.2338, "step": 22786 }, { "epoch": 93.00816326530612, "grad_norm": 25.565448760986328, "learning_rate": 3.5244897959183678e-06, "loss": 1.2975, "step": 22787 }, { "epoch": 93.01224489795918, "grad_norm": 17.818784713745117, "learning_rate": 3.5224489795918374e-06, "loss": 0.0649, "step": 22788 }, { "epoch": 93.01632653061225, "grad_norm": 38.47727584838867, "learning_rate": 3.5204081632653062e-06, "loss": 0.4684, "step": 22789 }, { "epoch": 93.0204081632653, "grad_norm": 25.817813873291016, "learning_rate": 3.518367346938776e-06, "loss": 0.3778, "step": 22790 }, { "epoch": 93.02448979591837, "grad_norm": 30.50065803527832, "learning_rate": 3.5163265306122447e-06, "loss": 0.7103, "step": 22791 }, { "epoch": 93.02857142857142, "grad_norm": 25.758729934692383, "learning_rate": 3.5142857142857144e-06, "loss": 1.4066, "step": 22792 }, { "epoch": 93.0326530612245, "grad_norm": 25.951887130737305, "learning_rate": 3.512244897959184e-06, "loss": 1.3745, "step": 22793 }, { "epoch": 93.03673469387755, "grad_norm": 30.177305221557617, "learning_rate": 3.510204081632653e-06, "loss": 0.8163, "step": 22794 }, { "epoch": 93.04081632653062, "grad_norm": 12.910552024841309, "learning_rate": 3.5081632653061226e-06, "loss": 0.108, "step": 22795 }, { "epoch": 93.04489795918367, "grad_norm": 39.42361068725586, "learning_rate": 3.5061224489795922e-06, "loss": 0.6232, "step": 22796 }, { "epoch": 93.04897959183674, "grad_norm": 20.412248611450195, "learning_rate": 3.504081632653061e-06, "loss": 0.25, "step": 22797 }, { "epoch": 93.0530612244898, "grad_norm": 31.804868698120117, "learning_rate": 3.5020408163265307e-06, "loss": 1.2386, "step": 22798 }, { "epoch": 93.05714285714286, "grad_norm": 35.72072982788086, "learning_rate": 3.5000000000000004e-06, "loss": 0.3257, "step": 22799 }, { "epoch": 93.06122448979592, "grad_norm": 34.751930236816406, "learning_rate": 3.4979591836734696e-06, "loss": 0.8961, "step": 22800 }, { "epoch": 93.06530612244897, "grad_norm": 32.906951904296875, "learning_rate": 3.4959183673469393e-06, "loss": 1.229, "step": 22801 }, { "epoch": 93.06938775510204, "grad_norm": 44.46818542480469, "learning_rate": 3.493877551020408e-06, "loss": 0.7456, "step": 22802 }, { "epoch": 93.0734693877551, "grad_norm": 29.98122787475586, "learning_rate": 3.4918367346938778e-06, "loss": 0.8471, "step": 22803 }, { "epoch": 93.07755102040817, "grad_norm": 17.069059371948242, "learning_rate": 3.4897959183673474e-06, "loss": 0.0846, "step": 22804 }, { "epoch": 93.08163265306122, "grad_norm": 29.48521614074707, "learning_rate": 3.4877551020408163e-06, "loss": 0.5221, "step": 22805 }, { "epoch": 93.08571428571429, "grad_norm": 28.219327926635742, "learning_rate": 3.485714285714286e-06, "loss": 1.5417, "step": 22806 }, { "epoch": 93.08979591836734, "grad_norm": 7.631813049316406, "learning_rate": 3.4836734693877556e-06, "loss": 0.0653, "step": 22807 }, { "epoch": 93.09387755102041, "grad_norm": 38.32086944580078, "learning_rate": 3.4816326530612244e-06, "loss": 0.2572, "step": 22808 }, { "epoch": 93.09795918367347, "grad_norm": 20.842060089111328, "learning_rate": 3.479591836734694e-06, "loss": 0.3159, "step": 22809 }, { "epoch": 93.10204081632654, "grad_norm": 25.253887176513672, "learning_rate": 3.4775510204081637e-06, "loss": 1.338, "step": 22810 }, { "epoch": 93.10612244897959, "grad_norm": 34.1801872253418, "learning_rate": 3.4755102040816326e-06, "loss": 0.6895, "step": 22811 }, { "epoch": 93.11020408163266, "grad_norm": 25.637184143066406, "learning_rate": 3.4734693877551022e-06, "loss": 1.4588, "step": 22812 }, { "epoch": 93.11428571428571, "grad_norm": 22.37339973449707, "learning_rate": 3.4714285714285715e-06, "loss": 1.4766, "step": 22813 }, { "epoch": 93.11836734693877, "grad_norm": 23.459674835205078, "learning_rate": 3.469387755102041e-06, "loss": 1.6086, "step": 22814 }, { "epoch": 93.12244897959184, "grad_norm": 37.03349685668945, "learning_rate": 3.4673469387755104e-06, "loss": 0.2521, "step": 22815 }, { "epoch": 93.12653061224489, "grad_norm": 22.34792709350586, "learning_rate": 3.4653061224489796e-06, "loss": 0.2713, "step": 22816 }, { "epoch": 93.13061224489796, "grad_norm": 36.258644104003906, "learning_rate": 3.4632653061224493e-06, "loss": 0.9447, "step": 22817 }, { "epoch": 93.13469387755102, "grad_norm": 27.737424850463867, "learning_rate": 3.461224489795919e-06, "loss": 1.2684, "step": 22818 }, { "epoch": 93.13877551020408, "grad_norm": 53.10580825805664, "learning_rate": 3.4591836734693878e-06, "loss": 1.0677, "step": 22819 }, { "epoch": 93.14285714285714, "grad_norm": 27.744672775268555, "learning_rate": 3.4571428571428574e-06, "loss": 0.4963, "step": 22820 }, { "epoch": 93.14693877551021, "grad_norm": 17.94585418701172, "learning_rate": 3.4551020408163263e-06, "loss": 2.4941, "step": 22821 }, { "epoch": 93.15102040816326, "grad_norm": 40.2024040222168, "learning_rate": 3.453061224489796e-06, "loss": 0.9705, "step": 22822 }, { "epoch": 93.15510204081633, "grad_norm": 23.177391052246094, "learning_rate": 3.4510204081632656e-06, "loss": 1.8077, "step": 22823 }, { "epoch": 93.15918367346939, "grad_norm": 21.595144271850586, "learning_rate": 3.4489795918367344e-06, "loss": 1.8264, "step": 22824 }, { "epoch": 93.16326530612245, "grad_norm": 31.404497146606445, "learning_rate": 3.446938775510204e-06, "loss": 1.2938, "step": 22825 }, { "epoch": 93.16734693877551, "grad_norm": 28.103540420532227, "learning_rate": 3.4448979591836737e-06, "loss": 1.4024, "step": 22826 }, { "epoch": 93.17142857142858, "grad_norm": 34.16627502441406, "learning_rate": 3.442857142857143e-06, "loss": 1.044, "step": 22827 }, { "epoch": 93.17551020408163, "grad_norm": 29.919475555419922, "learning_rate": 3.4408163265306127e-06, "loss": 0.2593, "step": 22828 }, { "epoch": 93.17959183673469, "grad_norm": 23.73268699645996, "learning_rate": 3.438775510204082e-06, "loss": 0.6179, "step": 22829 }, { "epoch": 93.18367346938776, "grad_norm": 34.06480026245117, "learning_rate": 3.436734693877551e-06, "loss": 1.1009, "step": 22830 }, { "epoch": 93.18775510204081, "grad_norm": 28.677852630615234, "learning_rate": 3.434693877551021e-06, "loss": 0.5785, "step": 22831 }, { "epoch": 93.19183673469388, "grad_norm": 34.985931396484375, "learning_rate": 3.4326530612244896e-06, "loss": 0.8386, "step": 22832 }, { "epoch": 93.19591836734693, "grad_norm": 36.50253677368164, "learning_rate": 3.4306122448979593e-06, "loss": 0.726, "step": 22833 }, { "epoch": 93.2, "grad_norm": 18.988964080810547, "learning_rate": 3.428571428571429e-06, "loss": 0.1243, "step": 22834 }, { "epoch": 93.20408163265306, "grad_norm": 22.705129623413086, "learning_rate": 3.4265306122448978e-06, "loss": 1.3769, "step": 22835 }, { "epoch": 93.20816326530613, "grad_norm": 30.862823486328125, "learning_rate": 3.4244897959183674e-06, "loss": 0.2649, "step": 22836 }, { "epoch": 93.21224489795918, "grad_norm": 32.5590705871582, "learning_rate": 3.422448979591837e-06, "loss": 0.6087, "step": 22837 }, { "epoch": 93.21632653061225, "grad_norm": 31.500591278076172, "learning_rate": 3.420408163265306e-06, "loss": 0.7718, "step": 22838 }, { "epoch": 93.2204081632653, "grad_norm": 27.84967803955078, "learning_rate": 3.4183673469387756e-06, "loss": 1.4243, "step": 22839 }, { "epoch": 93.22448979591837, "grad_norm": 36.175132751464844, "learning_rate": 3.4163265306122453e-06, "loss": 0.9294, "step": 22840 }, { "epoch": 93.22857142857143, "grad_norm": 32.49205780029297, "learning_rate": 3.4142857142857145e-06, "loss": 0.5926, "step": 22841 }, { "epoch": 93.2326530612245, "grad_norm": 24.919435501098633, "learning_rate": 3.4122448979591838e-06, "loss": 2.1279, "step": 22842 }, { "epoch": 93.23673469387755, "grad_norm": 30.158063888549805, "learning_rate": 3.410204081632653e-06, "loss": 1.1043, "step": 22843 }, { "epoch": 93.2408163265306, "grad_norm": 27.08176040649414, "learning_rate": 3.4081632653061227e-06, "loss": 1.3409, "step": 22844 }, { "epoch": 93.24489795918367, "grad_norm": 29.18752098083496, "learning_rate": 3.4061224489795923e-06, "loss": 0.6446, "step": 22845 }, { "epoch": 93.24897959183673, "grad_norm": 28.348255157470703, "learning_rate": 3.404081632653061e-06, "loss": 1.1974, "step": 22846 }, { "epoch": 93.2530612244898, "grad_norm": 25.858919143676758, "learning_rate": 3.402040816326531e-06, "loss": 0.5166, "step": 22847 }, { "epoch": 93.25714285714285, "grad_norm": 34.86771011352539, "learning_rate": 3.4000000000000005e-06, "loss": 1.1285, "step": 22848 }, { "epoch": 93.26122448979592, "grad_norm": 38.75572204589844, "learning_rate": 3.3979591836734693e-06, "loss": 1.0885, "step": 22849 }, { "epoch": 93.26530612244898, "grad_norm": 22.263046264648438, "learning_rate": 3.395918367346939e-06, "loss": 1.9576, "step": 22850 }, { "epoch": 93.26938775510204, "grad_norm": 30.25115394592285, "learning_rate": 3.3938775510204086e-06, "loss": 0.3275, "step": 22851 }, { "epoch": 93.2734693877551, "grad_norm": 26.955175399780273, "learning_rate": 3.3918367346938775e-06, "loss": 1.321, "step": 22852 }, { "epoch": 93.27755102040817, "grad_norm": 21.804344177246094, "learning_rate": 3.389795918367347e-06, "loss": 1.8759, "step": 22853 }, { "epoch": 93.28163265306122, "grad_norm": 34.352622985839844, "learning_rate": 3.3877551020408164e-06, "loss": 1.2063, "step": 22854 }, { "epoch": 93.28571428571429, "grad_norm": 30.36517906188965, "learning_rate": 3.3857142857142856e-06, "loss": 0.8905, "step": 22855 }, { "epoch": 93.28979591836735, "grad_norm": 22.083560943603516, "learning_rate": 3.3836734693877553e-06, "loss": 0.2516, "step": 22856 }, { "epoch": 93.29387755102042, "grad_norm": 10.663612365722656, "learning_rate": 3.3816326530612245e-06, "loss": 0.0735, "step": 22857 }, { "epoch": 93.29795918367347, "grad_norm": 29.74944496154785, "learning_rate": 3.379591836734694e-06, "loss": 1.1873, "step": 22858 }, { "epoch": 93.30204081632652, "grad_norm": 26.411455154418945, "learning_rate": 3.377551020408164e-06, "loss": 1.109, "step": 22859 }, { "epoch": 93.3061224489796, "grad_norm": 28.46229362487793, "learning_rate": 3.3755102040816327e-06, "loss": 0.2952, "step": 22860 }, { "epoch": 93.31020408163265, "grad_norm": 36.820274353027344, "learning_rate": 3.3734693877551023e-06, "loss": 0.4351, "step": 22861 }, { "epoch": 93.31428571428572, "grad_norm": 19.142406463623047, "learning_rate": 3.371428571428572e-06, "loss": 0.187, "step": 22862 }, { "epoch": 93.31836734693877, "grad_norm": 38.83650207519531, "learning_rate": 3.369387755102041e-06, "loss": 1.0348, "step": 22863 }, { "epoch": 93.32244897959184, "grad_norm": 20.281160354614258, "learning_rate": 3.3673469387755105e-06, "loss": 2.034, "step": 22864 }, { "epoch": 93.3265306122449, "grad_norm": 33.6756591796875, "learning_rate": 3.3653061224489793e-06, "loss": 0.9902, "step": 22865 }, { "epoch": 93.33061224489796, "grad_norm": 24.660423278808594, "learning_rate": 3.363265306122449e-06, "loss": 0.2856, "step": 22866 }, { "epoch": 93.33469387755102, "grad_norm": 31.346773147583008, "learning_rate": 3.3612244897959186e-06, "loss": 0.9385, "step": 22867 }, { "epoch": 93.33877551020409, "grad_norm": 24.80362892150879, "learning_rate": 3.3591836734693875e-06, "loss": 1.2546, "step": 22868 }, { "epoch": 93.34285714285714, "grad_norm": 31.731430053710938, "learning_rate": 3.357142857142857e-06, "loss": 0.454, "step": 22869 }, { "epoch": 93.34693877551021, "grad_norm": 26.5831298828125, "learning_rate": 3.355102040816327e-06, "loss": 1.3357, "step": 22870 }, { "epoch": 93.35102040816327, "grad_norm": 28.11775016784668, "learning_rate": 3.353061224489796e-06, "loss": 0.3323, "step": 22871 }, { "epoch": 93.35510204081632, "grad_norm": 31.3896541595459, "learning_rate": 3.3510204081632657e-06, "loss": 0.4023, "step": 22872 }, { "epoch": 93.35918367346939, "grad_norm": 15.526422500610352, "learning_rate": 3.3489795918367354e-06, "loss": 0.1017, "step": 22873 }, { "epoch": 93.36326530612244, "grad_norm": 23.376995086669922, "learning_rate": 3.346938775510204e-06, "loss": 1.7691, "step": 22874 }, { "epoch": 93.36734693877551, "grad_norm": 29.534072875976562, "learning_rate": 3.344897959183674e-06, "loss": 1.0726, "step": 22875 }, { "epoch": 93.37142857142857, "grad_norm": 8.913848876953125, "learning_rate": 3.3428571428571427e-06, "loss": 0.082, "step": 22876 }, { "epoch": 93.37551020408164, "grad_norm": 29.223737716674805, "learning_rate": 3.3408163265306123e-06, "loss": 0.9459, "step": 22877 }, { "epoch": 93.37959183673469, "grad_norm": 28.118709564208984, "learning_rate": 3.338775510204082e-06, "loss": 1.0982, "step": 22878 }, { "epoch": 93.38367346938776, "grad_norm": 16.252756118774414, "learning_rate": 3.336734693877551e-06, "loss": 0.0764, "step": 22879 }, { "epoch": 93.38775510204081, "grad_norm": 28.80005645751953, "learning_rate": 3.3346938775510205e-06, "loss": 0.1814, "step": 22880 }, { "epoch": 93.39183673469388, "grad_norm": 24.74579429626465, "learning_rate": 3.33265306122449e-06, "loss": 0.2779, "step": 22881 }, { "epoch": 93.39591836734694, "grad_norm": 26.93122100830078, "learning_rate": 3.330612244897959e-06, "loss": 1.4783, "step": 22882 }, { "epoch": 93.4, "grad_norm": 22.18575668334961, "learning_rate": 3.3285714285714286e-06, "loss": 0.1912, "step": 22883 }, { "epoch": 93.40408163265306, "grad_norm": 34.23999786376953, "learning_rate": 3.3265306122448983e-06, "loss": 0.9429, "step": 22884 }, { "epoch": 93.40816326530613, "grad_norm": 41.36410140991211, "learning_rate": 3.3244897959183676e-06, "loss": 1.2436, "step": 22885 }, { "epoch": 93.41224489795918, "grad_norm": 30.100448608398438, "learning_rate": 3.3224489795918372e-06, "loss": 0.5299, "step": 22886 }, { "epoch": 93.41632653061224, "grad_norm": 28.63858985900879, "learning_rate": 3.320408163265306e-06, "loss": 1.6483, "step": 22887 }, { "epoch": 93.42040816326531, "grad_norm": 29.14950180053711, "learning_rate": 3.3183673469387757e-06, "loss": 0.2703, "step": 22888 }, { "epoch": 93.42448979591836, "grad_norm": 39.127376556396484, "learning_rate": 3.3163265306122454e-06, "loss": 0.3077, "step": 22889 }, { "epoch": 93.42857142857143, "grad_norm": 25.024856567382812, "learning_rate": 3.314285714285714e-06, "loss": 1.457, "step": 22890 }, { "epoch": 93.43265306122449, "grad_norm": 29.058134078979492, "learning_rate": 3.312244897959184e-06, "loss": 1.1036, "step": 22891 }, { "epoch": 93.43673469387755, "grad_norm": 36.403358459472656, "learning_rate": 3.3102040816326535e-06, "loss": 0.7895, "step": 22892 }, { "epoch": 93.44081632653061, "grad_norm": 32.79904556274414, "learning_rate": 3.3081632653061223e-06, "loss": 1.1059, "step": 22893 }, { "epoch": 93.44489795918368, "grad_norm": 24.528316497802734, "learning_rate": 3.306122448979592e-06, "loss": 0.2073, "step": 22894 }, { "epoch": 93.44897959183673, "grad_norm": 30.40880012512207, "learning_rate": 3.3040816326530617e-06, "loss": 0.6702, "step": 22895 }, { "epoch": 93.4530612244898, "grad_norm": 42.889732360839844, "learning_rate": 3.3020408163265305e-06, "loss": 0.7442, "step": 22896 }, { "epoch": 93.45714285714286, "grad_norm": 16.087278366088867, "learning_rate": 3.3e-06, "loss": 2.1796, "step": 22897 }, { "epoch": 93.46122448979592, "grad_norm": 29.712533950805664, "learning_rate": 3.2979591836734694e-06, "loss": 0.8294, "step": 22898 }, { "epoch": 93.46530612244898, "grad_norm": 25.219425201416016, "learning_rate": 3.295918367346939e-06, "loss": 0.4244, "step": 22899 }, { "epoch": 93.46938775510205, "grad_norm": 30.568777084350586, "learning_rate": 3.2938775510204087e-06, "loss": 1.2062, "step": 22900 }, { "epoch": 93.4734693877551, "grad_norm": 22.323205947875977, "learning_rate": 3.2918367346938776e-06, "loss": 0.2987, "step": 22901 }, { "epoch": 93.47755102040816, "grad_norm": 26.377452850341797, "learning_rate": 3.2897959183673472e-06, "loss": 0.2199, "step": 22902 }, { "epoch": 93.48163265306123, "grad_norm": 21.015687942504883, "learning_rate": 3.287755102040817e-06, "loss": 0.1559, "step": 22903 }, { "epoch": 93.48571428571428, "grad_norm": 30.40610694885254, "learning_rate": 3.2857142857142857e-06, "loss": 1.4085, "step": 22904 }, { "epoch": 93.48979591836735, "grad_norm": 35.094390869140625, "learning_rate": 3.2836734693877554e-06, "loss": 0.7308, "step": 22905 }, { "epoch": 93.4938775510204, "grad_norm": 33.03846740722656, "learning_rate": 3.281632653061224e-06, "loss": 0.9073, "step": 22906 }, { "epoch": 93.49795918367347, "grad_norm": 30.86581802368164, "learning_rate": 3.279591836734694e-06, "loss": 1.1104, "step": 22907 }, { "epoch": 93.50204081632653, "grad_norm": 31.486318588256836, "learning_rate": 3.2775510204081635e-06, "loss": 0.3292, "step": 22908 }, { "epoch": 93.5061224489796, "grad_norm": 35.33695983886719, "learning_rate": 3.2755102040816324e-06, "loss": 0.2845, "step": 22909 }, { "epoch": 93.51020408163265, "grad_norm": 44.80403137207031, "learning_rate": 3.273469387755102e-06, "loss": 0.637, "step": 22910 }, { "epoch": 93.51428571428572, "grad_norm": 16.9776668548584, "learning_rate": 3.2714285714285717e-06, "loss": 0.2188, "step": 22911 }, { "epoch": 93.51836734693877, "grad_norm": 33.32655715942383, "learning_rate": 3.269387755102041e-06, "loss": 1.4008, "step": 22912 }, { "epoch": 93.52244897959184, "grad_norm": 27.611040115356445, "learning_rate": 3.2673469387755106e-06, "loss": 0.3069, "step": 22913 }, { "epoch": 93.5265306122449, "grad_norm": 50.56499099731445, "learning_rate": 3.26530612244898e-06, "loss": 0.4538, "step": 22914 }, { "epoch": 93.53061224489795, "grad_norm": 25.590551376342773, "learning_rate": 3.263265306122449e-06, "loss": 1.8455, "step": 22915 }, { "epoch": 93.53469387755102, "grad_norm": 34.534549713134766, "learning_rate": 3.2612244897959188e-06, "loss": 1.0986, "step": 22916 }, { "epoch": 93.53877551020408, "grad_norm": 27.619998931884766, "learning_rate": 3.2591836734693876e-06, "loss": 0.2793, "step": 22917 }, { "epoch": 93.54285714285714, "grad_norm": 29.081457138061523, "learning_rate": 3.2571428571428572e-06, "loss": 0.3383, "step": 22918 }, { "epoch": 93.5469387755102, "grad_norm": 16.203807830810547, "learning_rate": 3.255102040816327e-06, "loss": 0.1002, "step": 22919 }, { "epoch": 93.55102040816327, "grad_norm": 26.84233283996582, "learning_rate": 3.2530612244897957e-06, "loss": 1.2185, "step": 22920 }, { "epoch": 93.55510204081632, "grad_norm": 6.172253131866455, "learning_rate": 3.2510204081632654e-06, "loss": 0.0629, "step": 22921 }, { "epoch": 93.55918367346939, "grad_norm": 33.364322662353516, "learning_rate": 3.248979591836735e-06, "loss": 1.2733, "step": 22922 }, { "epoch": 93.56326530612245, "grad_norm": 23.681640625, "learning_rate": 3.246938775510204e-06, "loss": 0.3362, "step": 22923 }, { "epoch": 93.56734693877551, "grad_norm": 35.57304763793945, "learning_rate": 3.2448979591836735e-06, "loss": 0.8513, "step": 22924 }, { "epoch": 93.57142857142857, "grad_norm": 28.536888122558594, "learning_rate": 3.242857142857143e-06, "loss": 0.9847, "step": 22925 }, { "epoch": 93.57551020408164, "grad_norm": 34.31535339355469, "learning_rate": 3.2408163265306125e-06, "loss": 0.7687, "step": 22926 }, { "epoch": 93.57959183673469, "grad_norm": 37.599029541015625, "learning_rate": 3.2387755102040817e-06, "loss": 0.6977, "step": 22927 }, { "epoch": 93.58367346938776, "grad_norm": 30.453617095947266, "learning_rate": 3.236734693877551e-06, "loss": 0.6612, "step": 22928 }, { "epoch": 93.58775510204082, "grad_norm": 24.945911407470703, "learning_rate": 3.2346938775510206e-06, "loss": 0.3128, "step": 22929 }, { "epoch": 93.59183673469387, "grad_norm": 30.628599166870117, "learning_rate": 3.2326530612244903e-06, "loss": 0.6284, "step": 22930 }, { "epoch": 93.59591836734694, "grad_norm": 32.74762725830078, "learning_rate": 3.230612244897959e-06, "loss": 0.54, "step": 22931 }, { "epoch": 93.6, "grad_norm": 32.92903518676758, "learning_rate": 3.2285714285714288e-06, "loss": 0.7289, "step": 22932 }, { "epoch": 93.60408163265306, "grad_norm": 29.69245147705078, "learning_rate": 3.2265306122448984e-06, "loss": 0.6166, "step": 22933 }, { "epoch": 93.60816326530612, "grad_norm": 30.94261932373047, "learning_rate": 3.2244897959183672e-06, "loss": 0.2199, "step": 22934 }, { "epoch": 93.61224489795919, "grad_norm": 22.49113655090332, "learning_rate": 3.222448979591837e-06, "loss": 1.8501, "step": 22935 }, { "epoch": 93.61632653061224, "grad_norm": 32.66101837158203, "learning_rate": 3.2204081632653066e-06, "loss": 0.5506, "step": 22936 }, { "epoch": 93.62040816326531, "grad_norm": 32.5565299987793, "learning_rate": 3.2183673469387754e-06, "loss": 0.3336, "step": 22937 }, { "epoch": 93.62448979591836, "grad_norm": 26.40835189819336, "learning_rate": 3.216326530612245e-06, "loss": 1.3326, "step": 22938 }, { "epoch": 93.62857142857143, "grad_norm": 21.52910804748535, "learning_rate": 3.2142857142857143e-06, "loss": 1.2502, "step": 22939 }, { "epoch": 93.63265306122449, "grad_norm": 28.579771041870117, "learning_rate": 3.212244897959184e-06, "loss": 0.4798, "step": 22940 }, { "epoch": 93.63673469387756, "grad_norm": 25.857070922851562, "learning_rate": 3.2102040816326532e-06, "loss": 1.1281, "step": 22941 }, { "epoch": 93.64081632653061, "grad_norm": 1.365087628364563, "learning_rate": 3.2081632653061225e-06, "loss": 0.0244, "step": 22942 }, { "epoch": 93.64489795918368, "grad_norm": 35.0186882019043, "learning_rate": 3.206122448979592e-06, "loss": 0.8541, "step": 22943 }, { "epoch": 93.64897959183673, "grad_norm": 34.73614501953125, "learning_rate": 3.204081632653062e-06, "loss": 0.4429, "step": 22944 }, { "epoch": 93.65306122448979, "grad_norm": 32.77031326293945, "learning_rate": 3.2020408163265306e-06, "loss": 0.2491, "step": 22945 }, { "epoch": 93.65714285714286, "grad_norm": 29.56477928161621, "learning_rate": 3.2000000000000003e-06, "loss": 1.0774, "step": 22946 }, { "epoch": 93.66122448979591, "grad_norm": 34.45256042480469, "learning_rate": 3.19795918367347e-06, "loss": 0.4998, "step": 22947 }, { "epoch": 93.66530612244898, "grad_norm": 21.72498893737793, "learning_rate": 3.1959183673469388e-06, "loss": 0.1897, "step": 22948 }, { "epoch": 93.66938775510204, "grad_norm": 34.475460052490234, "learning_rate": 3.1938775510204084e-06, "loss": 0.7271, "step": 22949 }, { "epoch": 93.6734693877551, "grad_norm": 23.58928108215332, "learning_rate": 3.1918367346938773e-06, "loss": 1.729, "step": 22950 }, { "epoch": 93.67755102040816, "grad_norm": 25.030244827270508, "learning_rate": 3.189795918367347e-06, "loss": 0.1997, "step": 22951 }, { "epoch": 93.68163265306123, "grad_norm": 4.615606784820557, "learning_rate": 3.1877551020408166e-06, "loss": 0.0436, "step": 22952 }, { "epoch": 93.68571428571428, "grad_norm": 16.31661605834961, "learning_rate": 3.185714285714286e-06, "loss": 0.1027, "step": 22953 }, { "epoch": 93.68979591836735, "grad_norm": 26.654781341552734, "learning_rate": 3.183673469387755e-06, "loss": 0.4274, "step": 22954 }, { "epoch": 93.6938775510204, "grad_norm": 29.111955642700195, "learning_rate": 3.1816326530612247e-06, "loss": 1.1047, "step": 22955 }, { "epoch": 93.69795918367348, "grad_norm": 24.221715927124023, "learning_rate": 3.179591836734694e-06, "loss": 1.3149, "step": 22956 }, { "epoch": 93.70204081632653, "grad_norm": 31.753021240234375, "learning_rate": 3.1775510204081636e-06, "loss": 0.8402, "step": 22957 }, { "epoch": 93.70612244897958, "grad_norm": 19.955717086791992, "learning_rate": 3.1755102040816333e-06, "loss": 0.1961, "step": 22958 }, { "epoch": 93.71020408163265, "grad_norm": 23.944154739379883, "learning_rate": 3.173469387755102e-06, "loss": 1.4508, "step": 22959 }, { "epoch": 93.71428571428571, "grad_norm": 27.063491821289062, "learning_rate": 3.171428571428572e-06, "loss": 1.4785, "step": 22960 }, { "epoch": 93.71836734693878, "grad_norm": 9.043512344360352, "learning_rate": 3.1693877551020406e-06, "loss": 0.0723, "step": 22961 }, { "epoch": 93.72244897959183, "grad_norm": 29.486083984375, "learning_rate": 3.1673469387755103e-06, "loss": 1.0345, "step": 22962 }, { "epoch": 93.7265306122449, "grad_norm": 35.71508026123047, "learning_rate": 3.16530612244898e-06, "loss": 1.1548, "step": 22963 }, { "epoch": 93.73061224489796, "grad_norm": 32.96908187866211, "learning_rate": 3.1632653061224488e-06, "loss": 0.4782, "step": 22964 }, { "epoch": 93.73469387755102, "grad_norm": 31.811967849731445, "learning_rate": 3.1612244897959184e-06, "loss": 0.5886, "step": 22965 }, { "epoch": 93.73877551020408, "grad_norm": 24.72572135925293, "learning_rate": 3.159183673469388e-06, "loss": 0.6182, "step": 22966 }, { "epoch": 93.74285714285715, "grad_norm": 26.564607620239258, "learning_rate": 3.157142857142857e-06, "loss": 1.7574, "step": 22967 }, { "epoch": 93.7469387755102, "grad_norm": 28.248842239379883, "learning_rate": 3.1551020408163266e-06, "loss": 0.2738, "step": 22968 }, { "epoch": 93.75102040816327, "grad_norm": 50.14771270751953, "learning_rate": 3.1530612244897963e-06, "loss": 0.7902, "step": 22969 }, { "epoch": 93.75510204081633, "grad_norm": 30.942771911621094, "learning_rate": 3.1510204081632655e-06, "loss": 1.2787, "step": 22970 }, { "epoch": 93.7591836734694, "grad_norm": 31.34417152404785, "learning_rate": 3.148979591836735e-06, "loss": 1.5373, "step": 22971 }, { "epoch": 93.76326530612245, "grad_norm": 25.799985885620117, "learning_rate": 3.146938775510204e-06, "loss": 1.8334, "step": 22972 }, { "epoch": 93.7673469387755, "grad_norm": 34.594398498535156, "learning_rate": 3.1448979591836737e-06, "loss": 0.5975, "step": 22973 }, { "epoch": 93.77142857142857, "grad_norm": 28.74675750732422, "learning_rate": 3.1428571428571433e-06, "loss": 0.3756, "step": 22974 }, { "epoch": 93.77551020408163, "grad_norm": 36.50832748413086, "learning_rate": 3.140816326530612e-06, "loss": 0.6709, "step": 22975 }, { "epoch": 93.7795918367347, "grad_norm": 33.907020568847656, "learning_rate": 3.138775510204082e-06, "loss": 1.2173, "step": 22976 }, { "epoch": 93.78367346938775, "grad_norm": 37.645145416259766, "learning_rate": 3.1367346938775515e-06, "loss": 1.046, "step": 22977 }, { "epoch": 93.78775510204082, "grad_norm": 31.479597091674805, "learning_rate": 3.1346938775510203e-06, "loss": 0.4016, "step": 22978 }, { "epoch": 93.79183673469387, "grad_norm": 29.281015396118164, "learning_rate": 3.13265306122449e-06, "loss": 0.5656, "step": 22979 }, { "epoch": 93.79591836734694, "grad_norm": 30.672508239746094, "learning_rate": 3.1306122448979596e-06, "loss": 1.0373, "step": 22980 }, { "epoch": 93.8, "grad_norm": 36.6727294921875, "learning_rate": 3.1285714285714284e-06, "loss": 0.8513, "step": 22981 }, { "epoch": 93.80408163265307, "grad_norm": 21.91058921813965, "learning_rate": 3.126530612244898e-06, "loss": 1.1567, "step": 22982 }, { "epoch": 93.80816326530612, "grad_norm": 27.647628784179688, "learning_rate": 3.1244897959183674e-06, "loss": 1.5333, "step": 22983 }, { "epoch": 93.81224489795919, "grad_norm": 33.00954818725586, "learning_rate": 3.122448979591837e-06, "loss": 0.237, "step": 22984 }, { "epoch": 93.81632653061224, "grad_norm": 18.268882751464844, "learning_rate": 3.1204081632653063e-06, "loss": 1.9154, "step": 22985 }, { "epoch": 93.82040816326531, "grad_norm": 27.55037498474121, "learning_rate": 3.118367346938776e-06, "loss": 0.8682, "step": 22986 }, { "epoch": 93.82448979591837, "grad_norm": 35.92533874511719, "learning_rate": 3.116326530612245e-06, "loss": 0.5107, "step": 22987 }, { "epoch": 93.82857142857142, "grad_norm": 36.94715118408203, "learning_rate": 3.1142857142857144e-06, "loss": 0.631, "step": 22988 }, { "epoch": 93.83265306122449, "grad_norm": 27.40676498413086, "learning_rate": 3.1122448979591837e-06, "loss": 0.607, "step": 22989 }, { "epoch": 93.83673469387755, "grad_norm": 24.580772399902344, "learning_rate": 3.1102040816326533e-06, "loss": 1.3645, "step": 22990 }, { "epoch": 93.84081632653061, "grad_norm": 25.044713973999023, "learning_rate": 3.1081632653061226e-06, "loss": 1.545, "step": 22991 }, { "epoch": 93.84489795918367, "grad_norm": 27.381826400756836, "learning_rate": 3.106122448979592e-06, "loss": 1.9891, "step": 22992 }, { "epoch": 93.84897959183674, "grad_norm": 13.886775970458984, "learning_rate": 3.1040816326530615e-06, "loss": 0.1295, "step": 22993 }, { "epoch": 93.85306122448979, "grad_norm": 25.606313705444336, "learning_rate": 3.1020408163265307e-06, "loss": 1.0248, "step": 22994 }, { "epoch": 93.85714285714286, "grad_norm": 22.330812454223633, "learning_rate": 3.1e-06, "loss": 1.8452, "step": 22995 }, { "epoch": 93.86122448979592, "grad_norm": 29.611421585083008, "learning_rate": 3.097959183673469e-06, "loss": 0.2077, "step": 22996 }, { "epoch": 93.86530612244898, "grad_norm": 45.915435791015625, "learning_rate": 3.095918367346939e-06, "loss": 0.5926, "step": 22997 }, { "epoch": 93.86938775510204, "grad_norm": 18.222900390625, "learning_rate": 3.0938775510204085e-06, "loss": 2.2405, "step": 22998 }, { "epoch": 93.87346938775511, "grad_norm": 22.49323272705078, "learning_rate": 3.0918367346938778e-06, "loss": 1.3692, "step": 22999 }, { "epoch": 93.87755102040816, "grad_norm": 26.6422176361084, "learning_rate": 3.089795918367347e-06, "loss": 0.386, "step": 23000 }, { "epoch": 93.88163265306123, "grad_norm": 27.731727600097656, "learning_rate": 3.0877551020408167e-06, "loss": 0.3918, "step": 23001 }, { "epoch": 93.88571428571429, "grad_norm": 28.429170608520508, "learning_rate": 3.085714285714286e-06, "loss": 1.109, "step": 23002 }, { "epoch": 93.88979591836734, "grad_norm": 36.66922378540039, "learning_rate": 3.083673469387755e-06, "loss": 0.7307, "step": 23003 }, { "epoch": 93.89387755102041, "grad_norm": 36.621219635009766, "learning_rate": 3.0816326530612244e-06, "loss": 0.5455, "step": 23004 }, { "epoch": 93.89795918367346, "grad_norm": 32.30215072631836, "learning_rate": 3.079591836734694e-06, "loss": 0.4805, "step": 23005 }, { "epoch": 93.90204081632653, "grad_norm": 30.55442237854004, "learning_rate": 3.0775510204081633e-06, "loss": 0.484, "step": 23006 }, { "epoch": 93.90612244897959, "grad_norm": 29.62714195251465, "learning_rate": 3.0755102040816326e-06, "loss": 1.4322, "step": 23007 }, { "epoch": 93.91020408163266, "grad_norm": 34.98099899291992, "learning_rate": 3.0734693877551022e-06, "loss": 0.5175, "step": 23008 }, { "epoch": 93.91428571428571, "grad_norm": 18.73402214050293, "learning_rate": 3.0714285714285715e-06, "loss": 0.2263, "step": 23009 }, { "epoch": 93.91836734693878, "grad_norm": 24.111726760864258, "learning_rate": 3.0693877551020407e-06, "loss": 0.2671, "step": 23010 }, { "epoch": 93.92244897959183, "grad_norm": 22.517375946044922, "learning_rate": 3.0673469387755104e-06, "loss": 1.9287, "step": 23011 }, { "epoch": 93.9265306122449, "grad_norm": 39.03537368774414, "learning_rate": 3.06530612244898e-06, "loss": 0.8721, "step": 23012 }, { "epoch": 93.93061224489796, "grad_norm": 34.90469741821289, "learning_rate": 3.0632653061224493e-06, "loss": 0.3287, "step": 23013 }, { "epoch": 93.93469387755103, "grad_norm": 37.7038688659668, "learning_rate": 3.0612244897959185e-06, "loss": 0.6474, "step": 23014 }, { "epoch": 93.93877551020408, "grad_norm": 28.689672470092773, "learning_rate": 3.059183673469388e-06, "loss": 1.3031, "step": 23015 }, { "epoch": 93.94285714285714, "grad_norm": 28.330564498901367, "learning_rate": 3.0571428571428575e-06, "loss": 0.2952, "step": 23016 }, { "epoch": 93.9469387755102, "grad_norm": 21.960372924804688, "learning_rate": 3.0551020408163267e-06, "loss": 1.7855, "step": 23017 }, { "epoch": 93.95102040816326, "grad_norm": 20.57086753845215, "learning_rate": 3.053061224489796e-06, "loss": 1.9433, "step": 23018 }, { "epoch": 93.95510204081633, "grad_norm": 16.647117614746094, "learning_rate": 3.0510204081632656e-06, "loss": 0.128, "step": 23019 }, { "epoch": 93.95918367346938, "grad_norm": 20.38213348388672, "learning_rate": 3.048979591836735e-06, "loss": 0.2185, "step": 23020 }, { "epoch": 93.96326530612245, "grad_norm": 33.67715072631836, "learning_rate": 3.046938775510204e-06, "loss": 0.5716, "step": 23021 }, { "epoch": 93.9673469387755, "grad_norm": 23.407882690429688, "learning_rate": 3.0448979591836733e-06, "loss": 1.3503, "step": 23022 }, { "epoch": 93.97142857142858, "grad_norm": 25.525449752807617, "learning_rate": 3.042857142857143e-06, "loss": 1.2535, "step": 23023 }, { "epoch": 93.97551020408163, "grad_norm": 37.04789733886719, "learning_rate": 3.0408163265306122e-06, "loss": 0.9953, "step": 23024 }, { "epoch": 93.9795918367347, "grad_norm": 27.76584243774414, "learning_rate": 3.038775510204082e-06, "loss": 0.9044, "step": 23025 }, { "epoch": 93.98367346938775, "grad_norm": 9.935532569885254, "learning_rate": 3.036734693877551e-06, "loss": 2.949, "step": 23026 }, { "epoch": 93.98775510204082, "grad_norm": 30.117473602294922, "learning_rate": 3.034693877551021e-06, "loss": 1.3414, "step": 23027 }, { "epoch": 93.99183673469388, "grad_norm": 28.750003814697266, "learning_rate": 3.03265306122449e-06, "loss": 1.3461, "step": 23028 }, { "epoch": 93.99591836734695, "grad_norm": 28.75522232055664, "learning_rate": 3.0306122448979593e-06, "loss": 0.9379, "step": 23029 }, { "epoch": 94.0, "grad_norm": 25.14839744567871, "learning_rate": 3.028571428571429e-06, "loss": 0.1887, "step": 23030 }, { "epoch": 94.00408163265305, "grad_norm": 28.00276756286621, "learning_rate": 3.0265306122448982e-06, "loss": 1.1074, "step": 23031 }, { "epoch": 94.00816326530612, "grad_norm": 22.826772689819336, "learning_rate": 3.0244897959183675e-06, "loss": 1.7074, "step": 23032 }, { "epoch": 94.01224489795918, "grad_norm": 20.254732131958008, "learning_rate": 3.0224489795918367e-06, "loss": 0.1817, "step": 23033 }, { "epoch": 94.01632653061225, "grad_norm": 12.511171340942383, "learning_rate": 3.0204081632653064e-06, "loss": 0.1133, "step": 23034 }, { "epoch": 94.0204081632653, "grad_norm": 23.47007942199707, "learning_rate": 3.0183673469387756e-06, "loss": 1.4282, "step": 23035 }, { "epoch": 94.02448979591837, "grad_norm": 22.54642105102539, "learning_rate": 3.016326530612245e-06, "loss": 1.4095, "step": 23036 }, { "epoch": 94.02857142857142, "grad_norm": 28.143260955810547, "learning_rate": 3.014285714285714e-06, "loss": 1.4238, "step": 23037 }, { "epoch": 94.0326530612245, "grad_norm": 20.117265701293945, "learning_rate": 3.0122448979591838e-06, "loss": 2.0064, "step": 23038 }, { "epoch": 94.03673469387755, "grad_norm": 30.218229293823242, "learning_rate": 3.010204081632653e-06, "loss": 0.6004, "step": 23039 }, { "epoch": 94.04081632653062, "grad_norm": 32.879608154296875, "learning_rate": 3.0081632653061227e-06, "loss": 0.4459, "step": 23040 }, { "epoch": 94.04489795918367, "grad_norm": 26.887487411499023, "learning_rate": 3.0061224489795923e-06, "loss": 1.3291, "step": 23041 }, { "epoch": 94.04897959183674, "grad_norm": 33.166385650634766, "learning_rate": 3.0040816326530616e-06, "loss": 0.827, "step": 23042 }, { "epoch": 94.0530612244898, "grad_norm": 22.741273880004883, "learning_rate": 3.002040816326531e-06, "loss": 1.1602, "step": 23043 }, { "epoch": 94.05714285714286, "grad_norm": 28.403837203979492, "learning_rate": 3e-06, "loss": 0.3781, "step": 23044 }, { "epoch": 94.06122448979592, "grad_norm": 31.632709503173828, "learning_rate": 2.9979591836734697e-06, "loss": 0.8328, "step": 23045 }, { "epoch": 94.06530612244897, "grad_norm": 31.741748809814453, "learning_rate": 2.995918367346939e-06, "loss": 0.686, "step": 23046 }, { "epoch": 94.06938775510204, "grad_norm": 6.937929630279541, "learning_rate": 2.9938775510204082e-06, "loss": 0.0605, "step": 23047 }, { "epoch": 94.0734693877551, "grad_norm": 28.960283279418945, "learning_rate": 2.9918367346938775e-06, "loss": 0.7801, "step": 23048 }, { "epoch": 94.07755102040817, "grad_norm": 29.437969207763672, "learning_rate": 2.989795918367347e-06, "loss": 1.2936, "step": 23049 }, { "epoch": 94.08163265306122, "grad_norm": 30.049665451049805, "learning_rate": 2.9877551020408164e-06, "loss": 0.9489, "step": 23050 }, { "epoch": 94.08571428571429, "grad_norm": 22.955284118652344, "learning_rate": 2.9857142857142856e-06, "loss": 0.5914, "step": 23051 }, { "epoch": 94.08979591836734, "grad_norm": 29.720518112182617, "learning_rate": 2.983673469387755e-06, "loss": 0.3374, "step": 23052 }, { "epoch": 94.09387755102041, "grad_norm": 33.71552658081055, "learning_rate": 2.9816326530612245e-06, "loss": 0.7045, "step": 23053 }, { "epoch": 94.09795918367347, "grad_norm": 34.40045928955078, "learning_rate": 2.979591836734694e-06, "loss": 0.2562, "step": 23054 }, { "epoch": 94.10204081632654, "grad_norm": 39.655723571777344, "learning_rate": 2.9775510204081634e-06, "loss": 0.7518, "step": 23055 }, { "epoch": 94.10612244897959, "grad_norm": 22.834468841552734, "learning_rate": 2.975510204081633e-06, "loss": 0.203, "step": 23056 }, { "epoch": 94.11020408163266, "grad_norm": 26.857316970825195, "learning_rate": 2.9734693877551024e-06, "loss": 0.2531, "step": 23057 }, { "epoch": 94.11428571428571, "grad_norm": 22.914241790771484, "learning_rate": 2.9714285714285716e-06, "loss": 1.7731, "step": 23058 }, { "epoch": 94.11836734693877, "grad_norm": 33.01636505126953, "learning_rate": 2.969387755102041e-06, "loss": 0.8952, "step": 23059 }, { "epoch": 94.12244897959184, "grad_norm": 25.37397003173828, "learning_rate": 2.9673469387755105e-06, "loss": 2.1642, "step": 23060 }, { "epoch": 94.12653061224489, "grad_norm": 25.73552703857422, "learning_rate": 2.9653061224489797e-06, "loss": 1.8039, "step": 23061 }, { "epoch": 94.13061224489796, "grad_norm": 46.56794357299805, "learning_rate": 2.963265306122449e-06, "loss": 1.042, "step": 23062 }, { "epoch": 94.13469387755102, "grad_norm": 29.776443481445312, "learning_rate": 2.9612244897959182e-06, "loss": 1.1778, "step": 23063 }, { "epoch": 94.13877551020408, "grad_norm": 19.321184158325195, "learning_rate": 2.959183673469388e-06, "loss": 1.8837, "step": 23064 }, { "epoch": 94.14285714285714, "grad_norm": 18.389381408691406, "learning_rate": 2.957142857142857e-06, "loss": 0.2356, "step": 23065 }, { "epoch": 94.14693877551021, "grad_norm": 28.206151962280273, "learning_rate": 2.9551020408163264e-06, "loss": 1.2695, "step": 23066 }, { "epoch": 94.15102040816326, "grad_norm": 24.836557388305664, "learning_rate": 2.953061224489796e-06, "loss": 1.3801, "step": 23067 }, { "epoch": 94.15510204081633, "grad_norm": 27.306360244750977, "learning_rate": 2.9510204081632657e-06, "loss": 0.5546, "step": 23068 }, { "epoch": 94.15918367346939, "grad_norm": 30.673442840576172, "learning_rate": 2.948979591836735e-06, "loss": 0.941, "step": 23069 }, { "epoch": 94.16326530612245, "grad_norm": 26.800151824951172, "learning_rate": 2.946938775510204e-06, "loss": 1.413, "step": 23070 }, { "epoch": 94.16734693877551, "grad_norm": 30.88016128540039, "learning_rate": 2.944897959183674e-06, "loss": 0.3827, "step": 23071 }, { "epoch": 94.17142857142858, "grad_norm": 21.51521110534668, "learning_rate": 2.942857142857143e-06, "loss": 1.252, "step": 23072 }, { "epoch": 94.17551020408163, "grad_norm": 32.727783203125, "learning_rate": 2.9408163265306124e-06, "loss": 0.4903, "step": 23073 }, { "epoch": 94.17959183673469, "grad_norm": 38.18198776245117, "learning_rate": 2.9387755102040816e-06, "loss": 1.2795, "step": 23074 }, { "epoch": 94.18367346938776, "grad_norm": 29.368541717529297, "learning_rate": 2.9367346938775513e-06, "loss": 1.3004, "step": 23075 }, { "epoch": 94.18775510204081, "grad_norm": 18.135082244873047, "learning_rate": 2.9346938775510205e-06, "loss": 2.4728, "step": 23076 }, { "epoch": 94.19183673469388, "grad_norm": 37.75331497192383, "learning_rate": 2.9326530612244898e-06, "loss": 0.8506, "step": 23077 }, { "epoch": 94.19591836734693, "grad_norm": 33.87290954589844, "learning_rate": 2.9306122448979594e-06, "loss": 1.1997, "step": 23078 }, { "epoch": 94.2, "grad_norm": 22.71634864807129, "learning_rate": 2.9285714285714287e-06, "loss": 1.8273, "step": 23079 }, { "epoch": 94.20408163265306, "grad_norm": 36.272586822509766, "learning_rate": 2.926530612244898e-06, "loss": 0.292, "step": 23080 }, { "epoch": 94.20816326530613, "grad_norm": 32.53080368041992, "learning_rate": 2.9244897959183676e-06, "loss": 0.4591, "step": 23081 }, { "epoch": 94.21224489795918, "grad_norm": 28.672203063964844, "learning_rate": 2.922448979591837e-06, "loss": 1.4783, "step": 23082 }, { "epoch": 94.21632653061225, "grad_norm": 28.418432235717773, "learning_rate": 2.9204081632653065e-06, "loss": 1.0912, "step": 23083 }, { "epoch": 94.2204081632653, "grad_norm": 24.942054748535156, "learning_rate": 2.9183673469387757e-06, "loss": 1.515, "step": 23084 }, { "epoch": 94.22448979591837, "grad_norm": 9.942456245422363, "learning_rate": 2.916326530612245e-06, "loss": 0.075, "step": 23085 }, { "epoch": 94.22857142857143, "grad_norm": 36.759765625, "learning_rate": 2.9142857142857146e-06, "loss": 1.0033, "step": 23086 }, { "epoch": 94.2326530612245, "grad_norm": 12.918180465698242, "learning_rate": 2.912244897959184e-06, "loss": 0.1107, "step": 23087 }, { "epoch": 94.23673469387755, "grad_norm": 32.94419479370117, "learning_rate": 2.910204081632653e-06, "loss": 0.5134, "step": 23088 }, { "epoch": 94.2408163265306, "grad_norm": 23.583452224731445, "learning_rate": 2.9081632653061224e-06, "loss": 1.955, "step": 23089 }, { "epoch": 94.24489795918367, "grad_norm": 33.934051513671875, "learning_rate": 2.906122448979592e-06, "loss": 1.3756, "step": 23090 }, { "epoch": 94.24897959183673, "grad_norm": 28.635944366455078, "learning_rate": 2.9040816326530613e-06, "loss": 0.9007, "step": 23091 }, { "epoch": 94.2530612244898, "grad_norm": 21.52613639831543, "learning_rate": 2.9020408163265305e-06, "loss": 0.2038, "step": 23092 }, { "epoch": 94.25714285714285, "grad_norm": 32.514259338378906, "learning_rate": 2.9e-06, "loss": 0.6388, "step": 23093 }, { "epoch": 94.26122448979592, "grad_norm": 25.002426147460938, "learning_rate": 2.8979591836734694e-06, "loss": 0.3035, "step": 23094 }, { "epoch": 94.26530612244898, "grad_norm": 27.469223022460938, "learning_rate": 2.8959183673469387e-06, "loss": 0.4388, "step": 23095 }, { "epoch": 94.26938775510204, "grad_norm": 35.55043411254883, "learning_rate": 2.8938775510204083e-06, "loss": 1.0464, "step": 23096 }, { "epoch": 94.2734693877551, "grad_norm": 27.63892936706543, "learning_rate": 2.891836734693878e-06, "loss": 0.6061, "step": 23097 }, { "epoch": 94.27755102040817, "grad_norm": 27.423873901367188, "learning_rate": 2.8897959183673472e-06, "loss": 1.4507, "step": 23098 }, { "epoch": 94.28163265306122, "grad_norm": 19.64322280883789, "learning_rate": 2.8877551020408165e-06, "loss": 0.1482, "step": 23099 }, { "epoch": 94.28571428571429, "grad_norm": 37.8699836730957, "learning_rate": 2.8857142857142857e-06, "loss": 0.9666, "step": 23100 }, { "epoch": 94.28979591836735, "grad_norm": 30.920398712158203, "learning_rate": 2.8836734693877554e-06, "loss": 0.5896, "step": 23101 }, { "epoch": 94.29387755102042, "grad_norm": 30.532499313354492, "learning_rate": 2.8816326530612246e-06, "loss": 1.1219, "step": 23102 }, { "epoch": 94.29795918367347, "grad_norm": 31.443227767944336, "learning_rate": 2.879591836734694e-06, "loss": 0.2063, "step": 23103 }, { "epoch": 94.30204081632652, "grad_norm": 26.948238372802734, "learning_rate": 2.8775510204081636e-06, "loss": 0.656, "step": 23104 }, { "epoch": 94.3061224489796, "grad_norm": 35.50039291381836, "learning_rate": 2.875510204081633e-06, "loss": 0.7064, "step": 23105 }, { "epoch": 94.31020408163265, "grad_norm": 28.876638412475586, "learning_rate": 2.873469387755102e-06, "loss": 1.7394, "step": 23106 }, { "epoch": 94.31428571428572, "grad_norm": 30.256759643554688, "learning_rate": 2.8714285714285713e-06, "loss": 0.4499, "step": 23107 }, { "epoch": 94.31836734693877, "grad_norm": 18.68490982055664, "learning_rate": 2.869387755102041e-06, "loss": 2.2239, "step": 23108 }, { "epoch": 94.32244897959184, "grad_norm": 20.208126068115234, "learning_rate": 2.86734693877551e-06, "loss": 1.9186, "step": 23109 }, { "epoch": 94.3265306122449, "grad_norm": 30.162960052490234, "learning_rate": 2.86530612244898e-06, "loss": 0.3194, "step": 23110 }, { "epoch": 94.33061224489796, "grad_norm": 26.521554946899414, "learning_rate": 2.863265306122449e-06, "loss": 0.4997, "step": 23111 }, { "epoch": 94.33469387755102, "grad_norm": 37.300167083740234, "learning_rate": 2.8612244897959188e-06, "loss": 0.5981, "step": 23112 }, { "epoch": 94.33877551020409, "grad_norm": 28.734411239624023, "learning_rate": 2.859183673469388e-06, "loss": 0.4879, "step": 23113 }, { "epoch": 94.34285714285714, "grad_norm": 30.4775333404541, "learning_rate": 2.8571428571428573e-06, "loss": 0.7177, "step": 23114 }, { "epoch": 94.34693877551021, "grad_norm": 29.92630958557129, "learning_rate": 2.855102040816327e-06, "loss": 1.203, "step": 23115 }, { "epoch": 94.35102040816327, "grad_norm": 33.22977066040039, "learning_rate": 2.853061224489796e-06, "loss": 0.691, "step": 23116 }, { "epoch": 94.35510204081632, "grad_norm": 39.84629440307617, "learning_rate": 2.8510204081632654e-06, "loss": 1.0856, "step": 23117 }, { "epoch": 94.35918367346939, "grad_norm": 35.45761489868164, "learning_rate": 2.8489795918367347e-06, "loss": 1.1113, "step": 23118 }, { "epoch": 94.36326530612244, "grad_norm": 20.46270751953125, "learning_rate": 2.8469387755102043e-06, "loss": 1.7529, "step": 23119 }, { "epoch": 94.36734693877551, "grad_norm": 26.08152961730957, "learning_rate": 2.8448979591836736e-06, "loss": 1.4291, "step": 23120 }, { "epoch": 94.37142857142857, "grad_norm": 11.96626091003418, "learning_rate": 2.842857142857143e-06, "loss": 0.1145, "step": 23121 }, { "epoch": 94.37551020408164, "grad_norm": 25.56197166442871, "learning_rate": 2.840816326530612e-06, "loss": 1.3091, "step": 23122 }, { "epoch": 94.37959183673469, "grad_norm": 35.473915100097656, "learning_rate": 2.8387755102040817e-06, "loss": 0.5659, "step": 23123 }, { "epoch": 94.38367346938776, "grad_norm": 38.17460250854492, "learning_rate": 2.8367346938775514e-06, "loss": 0.7979, "step": 23124 }, { "epoch": 94.38775510204081, "grad_norm": 26.45880889892578, "learning_rate": 2.8346938775510206e-06, "loss": 1.3568, "step": 23125 }, { "epoch": 94.39183673469388, "grad_norm": 31.615154266357422, "learning_rate": 2.8326530612244903e-06, "loss": 0.9377, "step": 23126 }, { "epoch": 94.39591836734694, "grad_norm": 22.970993041992188, "learning_rate": 2.8306122448979595e-06, "loss": 1.3065, "step": 23127 }, { "epoch": 94.4, "grad_norm": 29.550565719604492, "learning_rate": 2.8285714285714288e-06, "loss": 0.2129, "step": 23128 }, { "epoch": 94.40408163265306, "grad_norm": 24.94659423828125, "learning_rate": 2.826530612244898e-06, "loss": 0.2665, "step": 23129 }, { "epoch": 94.40816326530613, "grad_norm": 29.38606071472168, "learning_rate": 2.8244897959183677e-06, "loss": 1.6686, "step": 23130 }, { "epoch": 94.41224489795918, "grad_norm": 34.0284309387207, "learning_rate": 2.822448979591837e-06, "loss": 0.6141, "step": 23131 }, { "epoch": 94.41632653061224, "grad_norm": 10.107185363769531, "learning_rate": 2.820408163265306e-06, "loss": 0.0734, "step": 23132 }, { "epoch": 94.42040816326531, "grad_norm": 33.050140380859375, "learning_rate": 2.8183673469387754e-06, "loss": 1.0733, "step": 23133 }, { "epoch": 94.42448979591836, "grad_norm": 31.085119247436523, "learning_rate": 2.816326530612245e-06, "loss": 1.0807, "step": 23134 }, { "epoch": 94.42857142857143, "grad_norm": 23.447866439819336, "learning_rate": 2.8142857142857143e-06, "loss": 0.2353, "step": 23135 }, { "epoch": 94.43265306122449, "grad_norm": 30.615018844604492, "learning_rate": 2.8122448979591836e-06, "loss": 0.8278, "step": 23136 }, { "epoch": 94.43673469387755, "grad_norm": 35.83748245239258, "learning_rate": 2.8102040816326532e-06, "loss": 0.6278, "step": 23137 }, { "epoch": 94.44081632653061, "grad_norm": 30.568639755249023, "learning_rate": 2.8081632653061225e-06, "loss": 0.2791, "step": 23138 }, { "epoch": 94.44489795918368, "grad_norm": 30.986116409301758, "learning_rate": 2.806122448979592e-06, "loss": 0.5908, "step": 23139 }, { "epoch": 94.44897959183673, "grad_norm": 45.14247131347656, "learning_rate": 2.8040816326530614e-06, "loss": 0.7816, "step": 23140 }, { "epoch": 94.4530612244898, "grad_norm": 33.656471252441406, "learning_rate": 2.802040816326531e-06, "loss": 0.2423, "step": 23141 }, { "epoch": 94.45714285714286, "grad_norm": 26.775461196899414, "learning_rate": 2.8000000000000003e-06, "loss": 1.1881, "step": 23142 }, { "epoch": 94.46122448979592, "grad_norm": 27.39755630493164, "learning_rate": 2.7979591836734695e-06, "loss": 1.0229, "step": 23143 }, { "epoch": 94.46530612244898, "grad_norm": 32.07508850097656, "learning_rate": 2.7959183673469388e-06, "loss": 0.6717, "step": 23144 }, { "epoch": 94.46938775510205, "grad_norm": 24.303247451782227, "learning_rate": 2.7938775510204084e-06, "loss": 1.4395, "step": 23145 }, { "epoch": 94.4734693877551, "grad_norm": 22.059188842773438, "learning_rate": 2.7918367346938777e-06, "loss": 0.1995, "step": 23146 }, { "epoch": 94.47755102040816, "grad_norm": 30.182703018188477, "learning_rate": 2.789795918367347e-06, "loss": 1.0682, "step": 23147 }, { "epoch": 94.48163265306123, "grad_norm": 28.61298179626465, "learning_rate": 2.787755102040816e-06, "loss": 0.4401, "step": 23148 }, { "epoch": 94.48571428571428, "grad_norm": 26.68018341064453, "learning_rate": 2.785714285714286e-06, "loss": 1.0754, "step": 23149 }, { "epoch": 94.48979591836735, "grad_norm": 32.75001525878906, "learning_rate": 2.783673469387755e-06, "loss": 1.2255, "step": 23150 }, { "epoch": 94.4938775510204, "grad_norm": 11.063802719116211, "learning_rate": 2.7816326530612243e-06, "loss": 0.0851, "step": 23151 }, { "epoch": 94.49795918367347, "grad_norm": 26.063976287841797, "learning_rate": 2.779591836734694e-06, "loss": 1.2483, "step": 23152 }, { "epoch": 94.50204081632653, "grad_norm": 27.361177444458008, "learning_rate": 2.7775510204081637e-06, "loss": 0.3115, "step": 23153 }, { "epoch": 94.5061224489796, "grad_norm": 31.38945770263672, "learning_rate": 2.775510204081633e-06, "loss": 0.2686, "step": 23154 }, { "epoch": 94.51020408163265, "grad_norm": 27.23166847229004, "learning_rate": 2.773469387755102e-06, "loss": 1.8456, "step": 23155 }, { "epoch": 94.51428571428572, "grad_norm": 39.07242965698242, "learning_rate": 2.771428571428572e-06, "loss": 0.9592, "step": 23156 }, { "epoch": 94.51836734693877, "grad_norm": 29.37395668029785, "learning_rate": 2.769387755102041e-06, "loss": 0.5421, "step": 23157 }, { "epoch": 94.52244897959184, "grad_norm": 38.79014587402344, "learning_rate": 2.7673469387755103e-06, "loss": 0.8566, "step": 23158 }, { "epoch": 94.5265306122449, "grad_norm": 24.97527503967285, "learning_rate": 2.7653061224489795e-06, "loss": 0.1647, "step": 23159 }, { "epoch": 94.53061224489795, "grad_norm": 35.502288818359375, "learning_rate": 2.7632653061224492e-06, "loss": 0.9796, "step": 23160 }, { "epoch": 94.53469387755102, "grad_norm": 15.35811710357666, "learning_rate": 2.7612244897959185e-06, "loss": 0.0778, "step": 23161 }, { "epoch": 94.53877551020408, "grad_norm": 25.646739959716797, "learning_rate": 2.7591836734693877e-06, "loss": 1.2775, "step": 23162 }, { "epoch": 94.54285714285714, "grad_norm": 31.788877487182617, "learning_rate": 2.7571428571428574e-06, "loss": 0.6833, "step": 23163 }, { "epoch": 94.5469387755102, "grad_norm": 31.908321380615234, "learning_rate": 2.7551020408163266e-06, "loss": 0.8044, "step": 23164 }, { "epoch": 94.55102040816327, "grad_norm": 16.69303321838379, "learning_rate": 2.753061224489796e-06, "loss": 2.1699, "step": 23165 }, { "epoch": 94.55510204081632, "grad_norm": 32.351043701171875, "learning_rate": 2.7510204081632655e-06, "loss": 0.2879, "step": 23166 }, { "epoch": 94.55918367346939, "grad_norm": 30.960168838500977, "learning_rate": 2.7489795918367348e-06, "loss": 0.8439, "step": 23167 }, { "epoch": 94.56326530612245, "grad_norm": 24.129785537719727, "learning_rate": 2.7469387755102044e-06, "loss": 1.3725, "step": 23168 }, { "epoch": 94.56734693877551, "grad_norm": 27.73480224609375, "learning_rate": 2.7448979591836737e-06, "loss": 1.3465, "step": 23169 }, { "epoch": 94.57142857142857, "grad_norm": 35.311973571777344, "learning_rate": 2.742857142857143e-06, "loss": 0.9693, "step": 23170 }, { "epoch": 94.57551020408164, "grad_norm": 24.501415252685547, "learning_rate": 2.7408163265306126e-06, "loss": 1.6098, "step": 23171 }, { "epoch": 94.57959183673469, "grad_norm": 35.0910530090332, "learning_rate": 2.738775510204082e-06, "loss": 0.295, "step": 23172 }, { "epoch": 94.58367346938776, "grad_norm": 31.163286209106445, "learning_rate": 2.736734693877551e-06, "loss": 0.334, "step": 23173 }, { "epoch": 94.58775510204082, "grad_norm": 23.781227111816406, "learning_rate": 2.7346938775510203e-06, "loss": 1.3067, "step": 23174 }, { "epoch": 94.59183673469387, "grad_norm": 27.5137882232666, "learning_rate": 2.73265306122449e-06, "loss": 0.24, "step": 23175 }, { "epoch": 94.59591836734694, "grad_norm": 31.447498321533203, "learning_rate": 2.7306122448979592e-06, "loss": 0.9289, "step": 23176 }, { "epoch": 94.6, "grad_norm": 31.145967483520508, "learning_rate": 2.7285714285714285e-06, "loss": 1.3652, "step": 23177 }, { "epoch": 94.60408163265306, "grad_norm": 32.04595947265625, "learning_rate": 2.726530612244898e-06, "loss": 0.3042, "step": 23178 }, { "epoch": 94.60816326530612, "grad_norm": 29.02985382080078, "learning_rate": 2.7244897959183674e-06, "loss": 0.995, "step": 23179 }, { "epoch": 94.61224489795919, "grad_norm": 25.47909164428711, "learning_rate": 2.7224489795918366e-06, "loss": 0.371, "step": 23180 }, { "epoch": 94.61632653061224, "grad_norm": 27.512184143066406, "learning_rate": 2.7204081632653063e-06, "loss": 0.2827, "step": 23181 }, { "epoch": 94.62040816326531, "grad_norm": 28.22821617126465, "learning_rate": 2.718367346938776e-06, "loss": 1.4143, "step": 23182 }, { "epoch": 94.62448979591836, "grad_norm": 22.70351791381836, "learning_rate": 2.716326530612245e-06, "loss": 1.8541, "step": 23183 }, { "epoch": 94.62857142857143, "grad_norm": 19.996545791625977, "learning_rate": 2.7142857142857144e-06, "loss": 0.2289, "step": 23184 }, { "epoch": 94.63265306122449, "grad_norm": 25.814149856567383, "learning_rate": 2.7122448979591837e-06, "loss": 1.4865, "step": 23185 }, { "epoch": 94.63673469387756, "grad_norm": 38.33202362060547, "learning_rate": 2.7102040816326533e-06, "loss": 0.7122, "step": 23186 }, { "epoch": 94.64081632653061, "grad_norm": 26.136425018310547, "learning_rate": 2.7081632653061226e-06, "loss": 0.37, "step": 23187 }, { "epoch": 94.64489795918368, "grad_norm": 34.5958251953125, "learning_rate": 2.706122448979592e-06, "loss": 0.7423, "step": 23188 }, { "epoch": 94.64897959183673, "grad_norm": 44.36152267456055, "learning_rate": 2.7040816326530615e-06, "loss": 0.4112, "step": 23189 }, { "epoch": 94.65306122448979, "grad_norm": 32.2003059387207, "learning_rate": 2.7020408163265307e-06, "loss": 0.5869, "step": 23190 }, { "epoch": 94.65714285714286, "grad_norm": 10.016663551330566, "learning_rate": 2.7e-06, "loss": 0.0747, "step": 23191 }, { "epoch": 94.66122448979591, "grad_norm": 36.171146392822266, "learning_rate": 2.6979591836734692e-06, "loss": 0.432, "step": 23192 }, { "epoch": 94.66530612244898, "grad_norm": 24.051666259765625, "learning_rate": 2.695918367346939e-06, "loss": 1.9674, "step": 23193 }, { "epoch": 94.66938775510204, "grad_norm": 33.18512725830078, "learning_rate": 2.693877551020408e-06, "loss": 1.1048, "step": 23194 }, { "epoch": 94.6734693877551, "grad_norm": 25.794082641601562, "learning_rate": 2.691836734693878e-06, "loss": 1.8312, "step": 23195 }, { "epoch": 94.67755102040816, "grad_norm": 28.133028030395508, "learning_rate": 2.689795918367347e-06, "loss": 0.3158, "step": 23196 }, { "epoch": 94.68163265306123, "grad_norm": 27.48488998413086, "learning_rate": 2.6877551020408167e-06, "loss": 1.0173, "step": 23197 }, { "epoch": 94.68571428571428, "grad_norm": 47.444801330566406, "learning_rate": 2.685714285714286e-06, "loss": 0.6487, "step": 23198 }, { "epoch": 94.68979591836735, "grad_norm": 21.522335052490234, "learning_rate": 2.683673469387755e-06, "loss": 0.3313, "step": 23199 }, { "epoch": 94.6938775510204, "grad_norm": 36.48334884643555, "learning_rate": 2.681632653061225e-06, "loss": 0.8911, "step": 23200 }, { "epoch": 94.69795918367348, "grad_norm": 6.326722145080566, "learning_rate": 2.679591836734694e-06, "loss": 0.0699, "step": 23201 }, { "epoch": 94.70204081632653, "grad_norm": 38.186702728271484, "learning_rate": 2.6775510204081634e-06, "loss": 1.1418, "step": 23202 }, { "epoch": 94.70612244897958, "grad_norm": 28.531526565551758, "learning_rate": 2.6755102040816326e-06, "loss": 1.2019, "step": 23203 }, { "epoch": 94.71020408163265, "grad_norm": 29.36676597595215, "learning_rate": 2.6734693877551023e-06, "loss": 0.3843, "step": 23204 }, { "epoch": 94.71428571428571, "grad_norm": 27.864437103271484, "learning_rate": 2.6714285714285715e-06, "loss": 0.1401, "step": 23205 }, { "epoch": 94.71836734693878, "grad_norm": 21.53768539428711, "learning_rate": 2.6693877551020407e-06, "loss": 1.8628, "step": 23206 }, { "epoch": 94.72244897959183, "grad_norm": 28.17512321472168, "learning_rate": 2.66734693877551e-06, "loss": 1.1437, "step": 23207 }, { "epoch": 94.7265306122449, "grad_norm": 27.616657257080078, "learning_rate": 2.6653061224489797e-06, "loss": 1.3827, "step": 23208 }, { "epoch": 94.73061224489796, "grad_norm": 36.19099807739258, "learning_rate": 2.6632653061224493e-06, "loss": 0.4788, "step": 23209 }, { "epoch": 94.73469387755102, "grad_norm": 34.5426025390625, "learning_rate": 2.6612244897959186e-06, "loss": 0.7049, "step": 23210 }, { "epoch": 94.73877551020408, "grad_norm": 30.621292114257812, "learning_rate": 2.659183673469388e-06, "loss": 1.2637, "step": 23211 }, { "epoch": 94.74285714285715, "grad_norm": 27.1766414642334, "learning_rate": 2.6571428571428575e-06, "loss": 0.6105, "step": 23212 }, { "epoch": 94.7469387755102, "grad_norm": 27.055076599121094, "learning_rate": 2.6551020408163267e-06, "loss": 1.218, "step": 23213 }, { "epoch": 94.75102040816327, "grad_norm": 25.218563079833984, "learning_rate": 2.653061224489796e-06, "loss": 1.3173, "step": 23214 }, { "epoch": 94.75510204081633, "grad_norm": 23.013280868530273, "learning_rate": 2.6510204081632656e-06, "loss": 1.8756, "step": 23215 }, { "epoch": 94.7591836734694, "grad_norm": 29.411022186279297, "learning_rate": 2.648979591836735e-06, "loss": 0.2178, "step": 23216 }, { "epoch": 94.76326530612245, "grad_norm": 36.011966705322266, "learning_rate": 2.646938775510204e-06, "loss": 0.7886, "step": 23217 }, { "epoch": 94.7673469387755, "grad_norm": 41.538307189941406, "learning_rate": 2.6448979591836734e-06, "loss": 0.5782, "step": 23218 }, { "epoch": 94.77142857142857, "grad_norm": 23.5874080657959, "learning_rate": 2.642857142857143e-06, "loss": 0.2422, "step": 23219 }, { "epoch": 94.77551020408163, "grad_norm": 35.230709075927734, "learning_rate": 2.6408163265306123e-06, "loss": 0.5177, "step": 23220 }, { "epoch": 94.7795918367347, "grad_norm": 30.37588882446289, "learning_rate": 2.6387755102040815e-06, "loss": 0.4562, "step": 23221 }, { "epoch": 94.78367346938775, "grad_norm": 22.77313995361328, "learning_rate": 2.636734693877551e-06, "loss": 0.2105, "step": 23222 }, { "epoch": 94.78775510204082, "grad_norm": 15.555718421936035, "learning_rate": 2.6346938775510204e-06, "loss": 0.1611, "step": 23223 }, { "epoch": 94.79183673469387, "grad_norm": 28.612133026123047, "learning_rate": 2.63265306122449e-06, "loss": 1.1218, "step": 23224 }, { "epoch": 94.79591836734694, "grad_norm": 32.11656951904297, "learning_rate": 2.6306122448979593e-06, "loss": 1.2284, "step": 23225 }, { "epoch": 94.8, "grad_norm": 34.52381134033203, "learning_rate": 2.628571428571429e-06, "loss": 0.9371, "step": 23226 }, { "epoch": 94.80408163265307, "grad_norm": 30.349754333496094, "learning_rate": 2.6265306122448982e-06, "loss": 0.579, "step": 23227 }, { "epoch": 94.80816326530612, "grad_norm": 25.601598739624023, "learning_rate": 2.6244897959183675e-06, "loss": 0.2396, "step": 23228 }, { "epoch": 94.81224489795919, "grad_norm": 32.0107421875, "learning_rate": 2.6224489795918367e-06, "loss": 1.0773, "step": 23229 }, { "epoch": 94.81632653061224, "grad_norm": 8.020620346069336, "learning_rate": 2.6204081632653064e-06, "loss": 0.0646, "step": 23230 }, { "epoch": 94.82040816326531, "grad_norm": 40.6572265625, "learning_rate": 2.6183673469387756e-06, "loss": 0.9792, "step": 23231 }, { "epoch": 94.82448979591837, "grad_norm": 19.58705711364746, "learning_rate": 2.616326530612245e-06, "loss": 1.9289, "step": 23232 }, { "epoch": 94.82857142857142, "grad_norm": 33.00175094604492, "learning_rate": 2.614285714285714e-06, "loss": 1.299, "step": 23233 }, { "epoch": 94.83265306122449, "grad_norm": 24.85205078125, "learning_rate": 2.612244897959184e-06, "loss": 1.2805, "step": 23234 }, { "epoch": 94.83673469387755, "grad_norm": 1.3455978631973267, "learning_rate": 2.610204081632653e-06, "loss": 0.0253, "step": 23235 }, { "epoch": 94.84081632653061, "grad_norm": 25.022104263305664, "learning_rate": 2.6081632653061223e-06, "loss": 0.4917, "step": 23236 }, { "epoch": 94.84489795918367, "grad_norm": 8.08792495727539, "learning_rate": 2.606122448979592e-06, "loss": 0.0694, "step": 23237 }, { "epoch": 94.84897959183674, "grad_norm": 28.519386291503906, "learning_rate": 2.6040816326530616e-06, "loss": 1.557, "step": 23238 }, { "epoch": 94.85306122448979, "grad_norm": 32.9332160949707, "learning_rate": 2.602040816326531e-06, "loss": 0.6706, "step": 23239 }, { "epoch": 94.85714285714286, "grad_norm": 13.209905624389648, "learning_rate": 2.6e-06, "loss": 0.09, "step": 23240 }, { "epoch": 94.86122448979592, "grad_norm": 33.28447341918945, "learning_rate": 2.5979591836734698e-06, "loss": 0.4011, "step": 23241 }, { "epoch": 94.86530612244898, "grad_norm": 19.933685302734375, "learning_rate": 2.595918367346939e-06, "loss": 0.1673, "step": 23242 }, { "epoch": 94.86938775510204, "grad_norm": 33.23905563354492, "learning_rate": 2.5938775510204082e-06, "loss": 0.4741, "step": 23243 }, { "epoch": 94.87346938775511, "grad_norm": 12.832666397094727, "learning_rate": 2.5918367346938775e-06, "loss": 0.0959, "step": 23244 }, { "epoch": 94.87755102040816, "grad_norm": 17.363401412963867, "learning_rate": 2.589795918367347e-06, "loss": 0.2126, "step": 23245 }, { "epoch": 94.88163265306123, "grad_norm": 25.70039176940918, "learning_rate": 2.5877551020408164e-06, "loss": 0.262, "step": 23246 }, { "epoch": 94.88571428571429, "grad_norm": 29.331735610961914, "learning_rate": 2.5857142857142856e-06, "loss": 0.8873, "step": 23247 }, { "epoch": 94.88979591836734, "grad_norm": 44.13553237915039, "learning_rate": 2.5836734693877553e-06, "loss": 0.7358, "step": 23248 }, { "epoch": 94.89387755102041, "grad_norm": 33.80923080444336, "learning_rate": 2.5816326530612246e-06, "loss": 0.8951, "step": 23249 }, { "epoch": 94.89795918367346, "grad_norm": 26.755964279174805, "learning_rate": 2.579591836734694e-06, "loss": 1.4694, "step": 23250 }, { "epoch": 94.90204081632653, "grad_norm": 32.834808349609375, "learning_rate": 2.5775510204081635e-06, "loss": 0.5977, "step": 23251 }, { "epoch": 94.90612244897959, "grad_norm": 24.410741806030273, "learning_rate": 2.575510204081633e-06, "loss": 0.6084, "step": 23252 }, { "epoch": 94.91020408163266, "grad_norm": 20.16120719909668, "learning_rate": 2.5734693877551024e-06, "loss": 0.1974, "step": 23253 }, { "epoch": 94.91428571428571, "grad_norm": 21.508033752441406, "learning_rate": 2.5714285714285716e-06, "loss": 1.4905, "step": 23254 }, { "epoch": 94.91836734693878, "grad_norm": 37.53347396850586, "learning_rate": 2.569387755102041e-06, "loss": 0.5569, "step": 23255 }, { "epoch": 94.92244897959183, "grad_norm": 30.718048095703125, "learning_rate": 2.5673469387755105e-06, "loss": 0.3035, "step": 23256 }, { "epoch": 94.9265306122449, "grad_norm": 31.871950149536133, "learning_rate": 2.5653061224489798e-06, "loss": 1.2253, "step": 23257 }, { "epoch": 94.93061224489796, "grad_norm": 27.73357391357422, "learning_rate": 2.563265306122449e-06, "loss": 0.2643, "step": 23258 }, { "epoch": 94.93469387755103, "grad_norm": 30.03602409362793, "learning_rate": 2.5612244897959183e-06, "loss": 1.1926, "step": 23259 }, { "epoch": 94.93877551020408, "grad_norm": 8.493170738220215, "learning_rate": 2.559183673469388e-06, "loss": 2.9492, "step": 23260 }, { "epoch": 94.94285714285714, "grad_norm": 22.117517471313477, "learning_rate": 2.557142857142857e-06, "loss": 0.3182, "step": 23261 }, { "epoch": 94.9469387755102, "grad_norm": 27.302213668823242, "learning_rate": 2.5551020408163264e-06, "loss": 0.3085, "step": 23262 }, { "epoch": 94.95102040816326, "grad_norm": 32.15421676635742, "learning_rate": 2.553061224489796e-06, "loss": 0.4844, "step": 23263 }, { "epoch": 94.95510204081633, "grad_norm": 23.88387680053711, "learning_rate": 2.5510204081632653e-06, "loss": 1.3323, "step": 23264 }, { "epoch": 94.95918367346938, "grad_norm": 31.014875411987305, "learning_rate": 2.548979591836735e-06, "loss": 0.8337, "step": 23265 }, { "epoch": 94.96326530612245, "grad_norm": 4.395205497741699, "learning_rate": 2.5469387755102042e-06, "loss": 0.0441, "step": 23266 }, { "epoch": 94.9673469387755, "grad_norm": 39.486175537109375, "learning_rate": 2.544897959183674e-06, "loss": 0.5028, "step": 23267 }, { "epoch": 94.97142857142858, "grad_norm": 27.87151527404785, "learning_rate": 2.542857142857143e-06, "loss": 1.3871, "step": 23268 }, { "epoch": 94.97551020408163, "grad_norm": 31.308425903320312, "learning_rate": 2.5408163265306124e-06, "loss": 1.0146, "step": 23269 }, { "epoch": 94.9795918367347, "grad_norm": 27.227779388427734, "learning_rate": 2.5387755102040816e-06, "loss": 0.2466, "step": 23270 }, { "epoch": 94.98367346938775, "grad_norm": 26.314208984375, "learning_rate": 2.5367346938775513e-06, "loss": 0.2702, "step": 23271 }, { "epoch": 94.98775510204082, "grad_norm": 29.46263313293457, "learning_rate": 2.5346938775510205e-06, "loss": 1.0898, "step": 23272 }, { "epoch": 94.99183673469388, "grad_norm": 24.359960556030273, "learning_rate": 2.5326530612244898e-06, "loss": 0.3007, "step": 23273 }, { "epoch": 94.99591836734695, "grad_norm": 28.415449142456055, "learning_rate": 2.5306122448979594e-06, "loss": 0.5648, "step": 23274 }, { "epoch": 95.0, "grad_norm": 29.185653686523438, "learning_rate": 2.5285714285714287e-06, "loss": 0.9851, "step": 23275 }, { "epoch": 95.00408163265305, "grad_norm": 31.908781051635742, "learning_rate": 2.526530612244898e-06, "loss": 0.4285, "step": 23276 }, { "epoch": 95.00816326530612, "grad_norm": 35.30718994140625, "learning_rate": 2.524489795918367e-06, "loss": 0.8148, "step": 23277 }, { "epoch": 95.01224489795918, "grad_norm": 28.27425193786621, "learning_rate": 2.522448979591837e-06, "loss": 0.2829, "step": 23278 }, { "epoch": 95.01632653061225, "grad_norm": 28.92582893371582, "learning_rate": 2.520408163265306e-06, "loss": 0.5473, "step": 23279 }, { "epoch": 95.0204081632653, "grad_norm": 29.53354835510254, "learning_rate": 2.5183673469387757e-06, "loss": 0.9929, "step": 23280 }, { "epoch": 95.02448979591837, "grad_norm": 35.49972915649414, "learning_rate": 2.516326530612245e-06, "loss": 1.2304, "step": 23281 }, { "epoch": 95.02857142857142, "grad_norm": 28.005014419555664, "learning_rate": 2.5142857142857147e-06, "loss": 1.2453, "step": 23282 }, { "epoch": 95.0326530612245, "grad_norm": 36.60420608520508, "learning_rate": 2.512244897959184e-06, "loss": 0.6009, "step": 23283 }, { "epoch": 95.03673469387755, "grad_norm": 25.896400451660156, "learning_rate": 2.510204081632653e-06, "loss": 1.0638, "step": 23284 }, { "epoch": 95.04081632653062, "grad_norm": 35.984554290771484, "learning_rate": 2.508163265306123e-06, "loss": 0.7313, "step": 23285 }, { "epoch": 95.04489795918367, "grad_norm": 30.090576171875, "learning_rate": 2.506122448979592e-06, "loss": 0.9213, "step": 23286 }, { "epoch": 95.04897959183674, "grad_norm": 26.408145904541016, "learning_rate": 2.5040816326530613e-06, "loss": 1.4512, "step": 23287 }, { "epoch": 95.0530612244898, "grad_norm": 17.373117446899414, "learning_rate": 2.5020408163265305e-06, "loss": 0.1434, "step": 23288 }, { "epoch": 95.05714285714286, "grad_norm": 26.37169075012207, "learning_rate": 2.5e-06, "loss": 0.2193, "step": 23289 }, { "epoch": 95.06122448979592, "grad_norm": 29.114824295043945, "learning_rate": 2.4979591836734694e-06, "loss": 0.6445, "step": 23290 }, { "epoch": 95.06530612244897, "grad_norm": 25.534910202026367, "learning_rate": 2.4959183673469387e-06, "loss": 0.9531, "step": 23291 }, { "epoch": 95.06938775510204, "grad_norm": 35.993263244628906, "learning_rate": 2.493877551020408e-06, "loss": 0.7874, "step": 23292 }, { "epoch": 95.0734693877551, "grad_norm": 26.301937103271484, "learning_rate": 2.4918367346938776e-06, "loss": 0.2852, "step": 23293 }, { "epoch": 95.07755102040817, "grad_norm": 6.935793876647949, "learning_rate": 2.4897959183673473e-06, "loss": 0.0595, "step": 23294 }, { "epoch": 95.08163265306122, "grad_norm": 34.1974983215332, "learning_rate": 2.4877551020408165e-06, "loss": 0.7241, "step": 23295 }, { "epoch": 95.08571428571429, "grad_norm": 20.549367904663086, "learning_rate": 2.4857142857142858e-06, "loss": 0.1864, "step": 23296 }, { "epoch": 95.08979591836734, "grad_norm": 40.05323791503906, "learning_rate": 2.4836734693877554e-06, "loss": 0.9854, "step": 23297 }, { "epoch": 95.09387755102041, "grad_norm": 7.870858669281006, "learning_rate": 2.4816326530612247e-06, "loss": 0.0734, "step": 23298 }, { "epoch": 95.09795918367347, "grad_norm": 29.311134338378906, "learning_rate": 2.479591836734694e-06, "loss": 0.7704, "step": 23299 }, { "epoch": 95.10204081632654, "grad_norm": 27.3523006439209, "learning_rate": 2.4775510204081636e-06, "loss": 1.0965, "step": 23300 }, { "epoch": 95.10612244897959, "grad_norm": 28.684307098388672, "learning_rate": 2.475510204081633e-06, "loss": 0.3332, "step": 23301 }, { "epoch": 95.11020408163266, "grad_norm": 11.226624488830566, "learning_rate": 2.473469387755102e-06, "loss": 0.0924, "step": 23302 }, { "epoch": 95.11428571428571, "grad_norm": 29.861391067504883, "learning_rate": 2.4714285714285713e-06, "loss": 0.797, "step": 23303 }, { "epoch": 95.11836734693877, "grad_norm": 26.83383560180664, "learning_rate": 2.469387755102041e-06, "loss": 0.5325, "step": 23304 }, { "epoch": 95.12244897959184, "grad_norm": 27.79264259338379, "learning_rate": 2.46734693877551e-06, "loss": 1.2918, "step": 23305 }, { "epoch": 95.12653061224489, "grad_norm": 29.253116607666016, "learning_rate": 2.4653061224489795e-06, "loss": 1.0442, "step": 23306 }, { "epoch": 95.13061224489796, "grad_norm": 23.266098022460938, "learning_rate": 2.463265306122449e-06, "loss": 1.9526, "step": 23307 }, { "epoch": 95.13469387755102, "grad_norm": 27.81352424621582, "learning_rate": 2.4612244897959184e-06, "loss": 1.2838, "step": 23308 }, { "epoch": 95.13877551020408, "grad_norm": 32.680702209472656, "learning_rate": 2.459183673469388e-06, "loss": 1.0226, "step": 23309 }, { "epoch": 95.14285714285714, "grad_norm": 27.55391502380371, "learning_rate": 2.4571428571428573e-06, "loss": 0.4266, "step": 23310 }, { "epoch": 95.14693877551021, "grad_norm": 64.74987030029297, "learning_rate": 2.455102040816327e-06, "loss": 0.5767, "step": 23311 }, { "epoch": 95.15102040816326, "grad_norm": 18.422000885009766, "learning_rate": 2.453061224489796e-06, "loss": 1.8788, "step": 23312 }, { "epoch": 95.15510204081633, "grad_norm": 31.243318557739258, "learning_rate": 2.4510204081632654e-06, "loss": 0.8265, "step": 23313 }, { "epoch": 95.15918367346939, "grad_norm": 22.147796630859375, "learning_rate": 2.4489795918367347e-06, "loss": 1.2398, "step": 23314 }, { "epoch": 95.16326530612245, "grad_norm": 27.776622772216797, "learning_rate": 2.4469387755102043e-06, "loss": 0.2722, "step": 23315 }, { "epoch": 95.16734693877551, "grad_norm": 32.6453971862793, "learning_rate": 2.4448979591836736e-06, "loss": 0.9291, "step": 23316 }, { "epoch": 95.17142857142858, "grad_norm": 31.82051658630371, "learning_rate": 2.442857142857143e-06, "loss": 0.573, "step": 23317 }, { "epoch": 95.17551020408163, "grad_norm": 16.722618103027344, "learning_rate": 2.440816326530612e-06, "loss": 2.1508, "step": 23318 }, { "epoch": 95.17959183673469, "grad_norm": 31.598445892333984, "learning_rate": 2.4387755102040817e-06, "loss": 1.311, "step": 23319 }, { "epoch": 95.18367346938776, "grad_norm": 31.306644439697266, "learning_rate": 2.436734693877551e-06, "loss": 0.4328, "step": 23320 }, { "epoch": 95.18775510204081, "grad_norm": 28.028850555419922, "learning_rate": 2.4346938775510206e-06, "loss": 0.3757, "step": 23321 }, { "epoch": 95.19183673469388, "grad_norm": 25.090429306030273, "learning_rate": 2.43265306122449e-06, "loss": 0.2068, "step": 23322 }, { "epoch": 95.19591836734693, "grad_norm": 26.292076110839844, "learning_rate": 2.4306122448979596e-06, "loss": 0.2517, "step": 23323 }, { "epoch": 95.2, "grad_norm": 35.382476806640625, "learning_rate": 2.428571428571429e-06, "loss": 0.8352, "step": 23324 }, { "epoch": 95.20408163265306, "grad_norm": 27.91771125793457, "learning_rate": 2.426530612244898e-06, "loss": 1.4566, "step": 23325 }, { "epoch": 95.20816326530613, "grad_norm": 27.2406063079834, "learning_rate": 2.4244897959183677e-06, "loss": 0.3022, "step": 23326 }, { "epoch": 95.21224489795918, "grad_norm": 29.787952423095703, "learning_rate": 2.422448979591837e-06, "loss": 1.2773, "step": 23327 }, { "epoch": 95.21632653061225, "grad_norm": 30.340856552124023, "learning_rate": 2.420408163265306e-06, "loss": 0.3006, "step": 23328 }, { "epoch": 95.2204081632653, "grad_norm": 35.45641326904297, "learning_rate": 2.4183673469387754e-06, "loss": 0.9586, "step": 23329 }, { "epoch": 95.22448979591837, "grad_norm": 30.84255599975586, "learning_rate": 2.416326530612245e-06, "loss": 0.3308, "step": 23330 }, { "epoch": 95.22857142857143, "grad_norm": 29.6276912689209, "learning_rate": 2.4142857142857143e-06, "loss": 1.3085, "step": 23331 }, { "epoch": 95.2326530612245, "grad_norm": 22.462215423583984, "learning_rate": 2.4122448979591836e-06, "loss": 0.1725, "step": 23332 }, { "epoch": 95.23673469387755, "grad_norm": 24.05550193786621, "learning_rate": 2.4102040816326533e-06, "loss": 1.8098, "step": 23333 }, { "epoch": 95.2408163265306, "grad_norm": 33.530052185058594, "learning_rate": 2.4081632653061225e-06, "loss": 0.7359, "step": 23334 }, { "epoch": 95.24489795918367, "grad_norm": 24.772205352783203, "learning_rate": 2.4061224489795917e-06, "loss": 1.7688, "step": 23335 }, { "epoch": 95.24897959183673, "grad_norm": 28.354887008666992, "learning_rate": 2.4040816326530614e-06, "loss": 0.3404, "step": 23336 }, { "epoch": 95.2530612244898, "grad_norm": 31.27756118774414, "learning_rate": 2.402040816326531e-06, "loss": 1.4238, "step": 23337 }, { "epoch": 95.25714285714285, "grad_norm": 27.612682342529297, "learning_rate": 2.4000000000000003e-06, "loss": 0.2782, "step": 23338 }, { "epoch": 95.26122448979592, "grad_norm": 20.57991600036621, "learning_rate": 2.3979591836734696e-06, "loss": 1.3518, "step": 23339 }, { "epoch": 95.26530612244898, "grad_norm": 32.1195068359375, "learning_rate": 2.395918367346939e-06, "loss": 0.2858, "step": 23340 }, { "epoch": 95.26938775510204, "grad_norm": 20.51203155517578, "learning_rate": 2.3938775510204085e-06, "loss": 0.1825, "step": 23341 }, { "epoch": 95.2734693877551, "grad_norm": 26.091108322143555, "learning_rate": 2.3918367346938777e-06, "loss": 1.428, "step": 23342 }, { "epoch": 95.27755102040817, "grad_norm": 36.30695343017578, "learning_rate": 2.389795918367347e-06, "loss": 0.2435, "step": 23343 }, { "epoch": 95.28163265306122, "grad_norm": 41.425567626953125, "learning_rate": 2.387755102040816e-06, "loss": 0.7326, "step": 23344 }, { "epoch": 95.28571428571429, "grad_norm": 30.385318756103516, "learning_rate": 2.385714285714286e-06, "loss": 0.5697, "step": 23345 }, { "epoch": 95.28979591836735, "grad_norm": 27.58322525024414, "learning_rate": 2.383673469387755e-06, "loss": 1.719, "step": 23346 }, { "epoch": 95.29387755102042, "grad_norm": 34.67782974243164, "learning_rate": 2.3816326530612243e-06, "loss": 0.8742, "step": 23347 }, { "epoch": 95.29795918367347, "grad_norm": 24.300785064697266, "learning_rate": 2.379591836734694e-06, "loss": 1.3016, "step": 23348 }, { "epoch": 95.30204081632652, "grad_norm": 25.061073303222656, "learning_rate": 2.3775510204081633e-06, "loss": 1.7889, "step": 23349 }, { "epoch": 95.3061224489796, "grad_norm": 23.362112045288086, "learning_rate": 2.375510204081633e-06, "loss": 1.8322, "step": 23350 }, { "epoch": 95.31020408163265, "grad_norm": 35.42208480834961, "learning_rate": 2.373469387755102e-06, "loss": 0.7049, "step": 23351 }, { "epoch": 95.31428571428572, "grad_norm": 8.567094802856445, "learning_rate": 2.371428571428572e-06, "loss": 0.1106, "step": 23352 }, { "epoch": 95.31836734693877, "grad_norm": 34.69744873046875, "learning_rate": 2.369387755102041e-06, "loss": 0.5015, "step": 23353 }, { "epoch": 95.32244897959184, "grad_norm": 32.14778137207031, "learning_rate": 2.3673469387755103e-06, "loss": 0.2657, "step": 23354 }, { "epoch": 95.3265306122449, "grad_norm": 30.49242401123047, "learning_rate": 2.3653061224489796e-06, "loss": 0.9351, "step": 23355 }, { "epoch": 95.33061224489796, "grad_norm": 27.4268741607666, "learning_rate": 2.3632653061224492e-06, "loss": 0.5779, "step": 23356 }, { "epoch": 95.33469387755102, "grad_norm": 19.343374252319336, "learning_rate": 2.3612244897959185e-06, "loss": 0.1927, "step": 23357 }, { "epoch": 95.33877551020409, "grad_norm": 24.48826026916504, "learning_rate": 2.3591836734693877e-06, "loss": 0.3614, "step": 23358 }, { "epoch": 95.34285714285714, "grad_norm": 26.78609275817871, "learning_rate": 2.3571428571428574e-06, "loss": 0.2861, "step": 23359 }, { "epoch": 95.34693877551021, "grad_norm": 25.90130043029785, "learning_rate": 2.3551020408163266e-06, "loss": 1.7455, "step": 23360 }, { "epoch": 95.35102040816327, "grad_norm": 23.231985092163086, "learning_rate": 2.353061224489796e-06, "loss": 0.3497, "step": 23361 }, { "epoch": 95.35510204081632, "grad_norm": 32.073524475097656, "learning_rate": 2.351020408163265e-06, "loss": 0.8072, "step": 23362 }, { "epoch": 95.35918367346939, "grad_norm": 25.010408401489258, "learning_rate": 2.3489795918367348e-06, "loss": 1.3487, "step": 23363 }, { "epoch": 95.36326530612244, "grad_norm": 32.16973876953125, "learning_rate": 2.346938775510204e-06, "loss": 0.4406, "step": 23364 }, { "epoch": 95.36734693877551, "grad_norm": 41.13865280151367, "learning_rate": 2.3448979591836737e-06, "loss": 0.7225, "step": 23365 }, { "epoch": 95.37142857142857, "grad_norm": 33.56492614746094, "learning_rate": 2.342857142857143e-06, "loss": 1.0067, "step": 23366 }, { "epoch": 95.37551020408164, "grad_norm": 39.76310348510742, "learning_rate": 2.3408163265306126e-06, "loss": 0.4454, "step": 23367 }, { "epoch": 95.37959183673469, "grad_norm": 26.98660659790039, "learning_rate": 2.338775510204082e-06, "loss": 0.2147, "step": 23368 }, { "epoch": 95.38367346938776, "grad_norm": 31.156238555908203, "learning_rate": 2.336734693877551e-06, "loss": 0.7905, "step": 23369 }, { "epoch": 95.38775510204081, "grad_norm": 30.024547576904297, "learning_rate": 2.3346938775510208e-06, "loss": 1.5158, "step": 23370 }, { "epoch": 95.39183673469388, "grad_norm": 31.3876953125, "learning_rate": 2.33265306122449e-06, "loss": 1.1993, "step": 23371 }, { "epoch": 95.39591836734694, "grad_norm": 28.921876907348633, "learning_rate": 2.3306122448979592e-06, "loss": 0.6127, "step": 23372 }, { "epoch": 95.4, "grad_norm": 31.727073669433594, "learning_rate": 2.3285714285714285e-06, "loss": 0.9487, "step": 23373 }, { "epoch": 95.40408163265306, "grad_norm": 22.31960678100586, "learning_rate": 2.326530612244898e-06, "loss": 1.4045, "step": 23374 }, { "epoch": 95.40816326530613, "grad_norm": 43.97077560424805, "learning_rate": 2.3244897959183674e-06, "loss": 0.7545, "step": 23375 }, { "epoch": 95.41224489795918, "grad_norm": 23.309642791748047, "learning_rate": 2.3224489795918366e-06, "loss": 1.3039, "step": 23376 }, { "epoch": 95.41632653061224, "grad_norm": 27.293724060058594, "learning_rate": 2.3204081632653063e-06, "loss": 1.5154, "step": 23377 }, { "epoch": 95.42040816326531, "grad_norm": 38.436859130859375, "learning_rate": 2.3183673469387755e-06, "loss": 0.6957, "step": 23378 }, { "epoch": 95.42448979591836, "grad_norm": 33.975852966308594, "learning_rate": 2.316326530612245e-06, "loss": 0.4614, "step": 23379 }, { "epoch": 95.42857142857143, "grad_norm": 25.618736267089844, "learning_rate": 2.3142857142857145e-06, "loss": 0.5684, "step": 23380 }, { "epoch": 95.43265306122449, "grad_norm": 27.3006534576416, "learning_rate": 2.3122448979591837e-06, "loss": 0.2732, "step": 23381 }, { "epoch": 95.43673469387755, "grad_norm": 8.806422233581543, "learning_rate": 2.3102040816326534e-06, "loss": 0.0659, "step": 23382 }, { "epoch": 95.44081632653061, "grad_norm": 43.87021255493164, "learning_rate": 2.3081632653061226e-06, "loss": 1.3008, "step": 23383 }, { "epoch": 95.44489795918368, "grad_norm": 30.008283615112305, "learning_rate": 2.306122448979592e-06, "loss": 0.4611, "step": 23384 }, { "epoch": 95.44897959183673, "grad_norm": 30.01980209350586, "learning_rate": 2.3040816326530615e-06, "loss": 1.0633, "step": 23385 }, { "epoch": 95.4530612244898, "grad_norm": 32.812347412109375, "learning_rate": 2.3020408163265308e-06, "loss": 1.1357, "step": 23386 }, { "epoch": 95.45714285714286, "grad_norm": 26.994224548339844, "learning_rate": 2.3e-06, "loss": 0.4177, "step": 23387 }, { "epoch": 95.46122448979592, "grad_norm": 30.4890193939209, "learning_rate": 2.2979591836734692e-06, "loss": 1.0337, "step": 23388 }, { "epoch": 95.46530612244898, "grad_norm": 39.87975311279297, "learning_rate": 2.295918367346939e-06, "loss": 1.1289, "step": 23389 }, { "epoch": 95.46938775510205, "grad_norm": 32.60346221923828, "learning_rate": 2.293877551020408e-06, "loss": 1.0633, "step": 23390 }, { "epoch": 95.4734693877551, "grad_norm": 31.19955062866211, "learning_rate": 2.2918367346938774e-06, "loss": 1.2221, "step": 23391 }, { "epoch": 95.47755102040816, "grad_norm": 28.772581100463867, "learning_rate": 2.289795918367347e-06, "loss": 0.4321, "step": 23392 }, { "epoch": 95.48163265306123, "grad_norm": 24.59521484375, "learning_rate": 2.2877551020408167e-06, "loss": 1.8038, "step": 23393 }, { "epoch": 95.48571428571428, "grad_norm": 29.916133880615234, "learning_rate": 2.285714285714286e-06, "loss": 0.5462, "step": 23394 }, { "epoch": 95.48979591836735, "grad_norm": 29.075231552124023, "learning_rate": 2.2836734693877552e-06, "loss": 0.6571, "step": 23395 }, { "epoch": 95.4938775510204, "grad_norm": 27.356189727783203, "learning_rate": 2.281632653061225e-06, "loss": 0.246, "step": 23396 }, { "epoch": 95.49795918367347, "grad_norm": 34.38303756713867, "learning_rate": 2.279591836734694e-06, "loss": 0.6845, "step": 23397 }, { "epoch": 95.50204081632653, "grad_norm": 30.194002151489258, "learning_rate": 2.2775510204081634e-06, "loss": 0.454, "step": 23398 }, { "epoch": 95.5061224489796, "grad_norm": 44.33366394042969, "learning_rate": 2.2755102040816326e-06, "loss": 0.9689, "step": 23399 }, { "epoch": 95.51020408163265, "grad_norm": 24.833017349243164, "learning_rate": 2.2734693877551023e-06, "loss": 0.2612, "step": 23400 }, { "epoch": 95.51428571428572, "grad_norm": 21.454713821411133, "learning_rate": 2.2714285714285715e-06, "loss": 1.7297, "step": 23401 }, { "epoch": 95.51836734693877, "grad_norm": 37.99565505981445, "learning_rate": 2.2693877551020408e-06, "loss": 0.8204, "step": 23402 }, { "epoch": 95.52244897959184, "grad_norm": 25.929298400878906, "learning_rate": 2.26734693877551e-06, "loss": 1.5093, "step": 23403 }, { "epoch": 95.5265306122449, "grad_norm": 25.02754783630371, "learning_rate": 2.2653061224489797e-06, "loss": 1.3936, "step": 23404 }, { "epoch": 95.53061224489795, "grad_norm": 23.39006996154785, "learning_rate": 2.263265306122449e-06, "loss": 0.5938, "step": 23405 }, { "epoch": 95.53469387755102, "grad_norm": 36.400753021240234, "learning_rate": 2.2612244897959186e-06, "loss": 0.6624, "step": 23406 }, { "epoch": 95.53877551020408, "grad_norm": 26.99115753173828, "learning_rate": 2.259183673469388e-06, "loss": 0.2708, "step": 23407 }, { "epoch": 95.54285714285714, "grad_norm": 1.2929946184158325, "learning_rate": 2.2571428571428575e-06, "loss": 0.0216, "step": 23408 }, { "epoch": 95.5469387755102, "grad_norm": 24.35270118713379, "learning_rate": 2.2551020408163267e-06, "loss": 0.3255, "step": 23409 }, { "epoch": 95.55102040816327, "grad_norm": 27.272790908813477, "learning_rate": 2.253061224489796e-06, "loss": 0.4293, "step": 23410 }, { "epoch": 95.55510204081632, "grad_norm": 26.141504287719727, "learning_rate": 2.2510204081632656e-06, "loss": 1.1702, "step": 23411 }, { "epoch": 95.55918367346939, "grad_norm": 27.556108474731445, "learning_rate": 2.248979591836735e-06, "loss": 1.3461, "step": 23412 }, { "epoch": 95.56326530612245, "grad_norm": 28.098508834838867, "learning_rate": 2.246938775510204e-06, "loss": 1.0208, "step": 23413 }, { "epoch": 95.56734693877551, "grad_norm": 28.4710636138916, "learning_rate": 2.2448979591836734e-06, "loss": 1.0869, "step": 23414 }, { "epoch": 95.57142857142857, "grad_norm": 31.933332443237305, "learning_rate": 2.242857142857143e-06, "loss": 1.2053, "step": 23415 }, { "epoch": 95.57551020408164, "grad_norm": 26.661685943603516, "learning_rate": 2.2408163265306123e-06, "loss": 0.2523, "step": 23416 }, { "epoch": 95.57959183673469, "grad_norm": 26.57062530517578, "learning_rate": 2.2387755102040815e-06, "loss": 1.4582, "step": 23417 }, { "epoch": 95.58367346938776, "grad_norm": 4.483138561248779, "learning_rate": 2.2367346938775508e-06, "loss": 0.0427, "step": 23418 }, { "epoch": 95.58775510204082, "grad_norm": 31.095321655273438, "learning_rate": 2.2346938775510204e-06, "loss": 0.869, "step": 23419 }, { "epoch": 95.59183673469387, "grad_norm": 35.37538146972656, "learning_rate": 2.2326530612244897e-06, "loss": 0.5516, "step": 23420 }, { "epoch": 95.59591836734694, "grad_norm": 33.81618118286133, "learning_rate": 2.2306122448979593e-06, "loss": 1.1837, "step": 23421 }, { "epoch": 95.6, "grad_norm": 19.1984920501709, "learning_rate": 2.228571428571429e-06, "loss": 2.2244, "step": 23422 }, { "epoch": 95.60408163265306, "grad_norm": 27.770606994628906, "learning_rate": 2.2265306122448983e-06, "loss": 1.1931, "step": 23423 }, { "epoch": 95.60816326530612, "grad_norm": 22.45844078063965, "learning_rate": 2.2244897959183675e-06, "loss": 1.1668, "step": 23424 }, { "epoch": 95.61224489795919, "grad_norm": 45.75031661987305, "learning_rate": 2.2224489795918367e-06, "loss": 0.6335, "step": 23425 }, { "epoch": 95.61632653061224, "grad_norm": 33.647830963134766, "learning_rate": 2.2204081632653064e-06, "loss": 0.9314, "step": 23426 }, { "epoch": 95.62040816326531, "grad_norm": 34.61021041870117, "learning_rate": 2.2183673469387757e-06, "loss": 1.2437, "step": 23427 }, { "epoch": 95.62448979591836, "grad_norm": 24.338947296142578, "learning_rate": 2.216326530612245e-06, "loss": 1.4516, "step": 23428 }, { "epoch": 95.62857142857143, "grad_norm": 10.585935592651367, "learning_rate": 2.214285714285714e-06, "loss": 0.0744, "step": 23429 }, { "epoch": 95.63265306122449, "grad_norm": 9.517684936523438, "learning_rate": 2.212244897959184e-06, "loss": 2.9494, "step": 23430 }, { "epoch": 95.63673469387756, "grad_norm": 5.965230464935303, "learning_rate": 2.210204081632653e-06, "loss": 0.0693, "step": 23431 }, { "epoch": 95.64081632653061, "grad_norm": 20.12194061279297, "learning_rate": 2.2081632653061223e-06, "loss": 0.2014, "step": 23432 }, { "epoch": 95.64489795918368, "grad_norm": 35.526649475097656, "learning_rate": 2.206122448979592e-06, "loss": 1.1096, "step": 23433 }, { "epoch": 95.64897959183673, "grad_norm": 15.807902336120605, "learning_rate": 2.204081632653061e-06, "loss": 0.0821, "step": 23434 }, { "epoch": 95.65306122448979, "grad_norm": 29.5152587890625, "learning_rate": 2.202040816326531e-06, "loss": 0.5598, "step": 23435 }, { "epoch": 95.65714285714286, "grad_norm": 23.798534393310547, "learning_rate": 2.2e-06, "loss": 0.2452, "step": 23436 }, { "epoch": 95.66122448979591, "grad_norm": 27.928926467895508, "learning_rate": 2.1979591836734698e-06, "loss": 0.313, "step": 23437 }, { "epoch": 95.66530612244898, "grad_norm": 27.153146743774414, "learning_rate": 2.195918367346939e-06, "loss": 1.338, "step": 23438 }, { "epoch": 95.66938775510204, "grad_norm": 31.550718307495117, "learning_rate": 2.1938775510204083e-06, "loss": 1.0539, "step": 23439 }, { "epoch": 95.6734693877551, "grad_norm": 27.563444137573242, "learning_rate": 2.1918367346938775e-06, "loss": 0.8871, "step": 23440 }, { "epoch": 95.67755102040816, "grad_norm": 33.959407806396484, "learning_rate": 2.189795918367347e-06, "loss": 0.6796, "step": 23441 }, { "epoch": 95.68163265306123, "grad_norm": 32.930267333984375, "learning_rate": 2.1877551020408164e-06, "loss": 0.5737, "step": 23442 }, { "epoch": 95.68571428571428, "grad_norm": 32.18071365356445, "learning_rate": 2.1857142857142857e-06, "loss": 0.7089, "step": 23443 }, { "epoch": 95.68979591836735, "grad_norm": 37.31532669067383, "learning_rate": 2.1836734693877553e-06, "loss": 1.0809, "step": 23444 }, { "epoch": 95.6938775510204, "grad_norm": 23.07710838317871, "learning_rate": 2.1816326530612246e-06, "loss": 1.3496, "step": 23445 }, { "epoch": 95.69795918367348, "grad_norm": 19.977216720581055, "learning_rate": 2.179591836734694e-06, "loss": 1.931, "step": 23446 }, { "epoch": 95.70204081632653, "grad_norm": 28.138643264770508, "learning_rate": 2.177551020408163e-06, "loss": 1.6775, "step": 23447 }, { "epoch": 95.70612244897958, "grad_norm": 29.355854034423828, "learning_rate": 2.1755102040816327e-06, "loss": 0.3674, "step": 23448 }, { "epoch": 95.71020408163265, "grad_norm": 24.09303855895996, "learning_rate": 2.1734693877551024e-06, "loss": 0.3181, "step": 23449 }, { "epoch": 95.71428571428571, "grad_norm": 26.123245239257812, "learning_rate": 2.1714285714285716e-06, "loss": 0.6257, "step": 23450 }, { "epoch": 95.71836734693878, "grad_norm": 24.911462783813477, "learning_rate": 2.169387755102041e-06, "loss": 1.562, "step": 23451 }, { "epoch": 95.72244897959183, "grad_norm": 28.2983341217041, "learning_rate": 2.1673469387755105e-06, "loss": 0.2735, "step": 23452 }, { "epoch": 95.7265306122449, "grad_norm": 29.852956771850586, "learning_rate": 2.1653061224489798e-06, "loss": 0.8952, "step": 23453 }, { "epoch": 95.73061224489796, "grad_norm": 28.175758361816406, "learning_rate": 2.163265306122449e-06, "loss": 1.2861, "step": 23454 }, { "epoch": 95.73469387755102, "grad_norm": 32.450992584228516, "learning_rate": 2.1612244897959187e-06, "loss": 0.6265, "step": 23455 }, { "epoch": 95.73877551020408, "grad_norm": 19.10245704650879, "learning_rate": 2.159183673469388e-06, "loss": 1.9807, "step": 23456 }, { "epoch": 95.74285714285715, "grad_norm": 24.927400588989258, "learning_rate": 2.157142857142857e-06, "loss": 1.8583, "step": 23457 }, { "epoch": 95.7469387755102, "grad_norm": 37.687740325927734, "learning_rate": 2.1551020408163264e-06, "loss": 0.5941, "step": 23458 }, { "epoch": 95.75102040816327, "grad_norm": 22.637670516967773, "learning_rate": 2.153061224489796e-06, "loss": 0.2323, "step": 23459 }, { "epoch": 95.75510204081633, "grad_norm": 34.435672760009766, "learning_rate": 2.1510204081632653e-06, "loss": 0.7182, "step": 23460 }, { "epoch": 95.7591836734694, "grad_norm": 29.039287567138672, "learning_rate": 2.1489795918367346e-06, "loss": 0.5783, "step": 23461 }, { "epoch": 95.76326530612245, "grad_norm": 28.629140853881836, "learning_rate": 2.1469387755102042e-06, "loss": 1.3252, "step": 23462 }, { "epoch": 95.7673469387755, "grad_norm": 38.69459533691406, "learning_rate": 2.1448979591836735e-06, "loss": 1.0791, "step": 23463 }, { "epoch": 95.77142857142857, "grad_norm": 30.746789932250977, "learning_rate": 2.142857142857143e-06, "loss": 1.0919, "step": 23464 }, { "epoch": 95.77551020408163, "grad_norm": 26.63893699645996, "learning_rate": 2.1408163265306124e-06, "loss": 1.3062, "step": 23465 }, { "epoch": 95.7795918367347, "grad_norm": 5.740665435791016, "learning_rate": 2.1387755102040816e-06, "loss": 0.064, "step": 23466 }, { "epoch": 95.78367346938775, "grad_norm": 31.981115341186523, "learning_rate": 2.1367346938775513e-06, "loss": 0.876, "step": 23467 }, { "epoch": 95.78775510204082, "grad_norm": 20.428403854370117, "learning_rate": 2.1346938775510205e-06, "loss": 1.903, "step": 23468 }, { "epoch": 95.79183673469387, "grad_norm": 31.735559463500977, "learning_rate": 2.13265306122449e-06, "loss": 0.6754, "step": 23469 }, { "epoch": 95.79591836734694, "grad_norm": 16.43824005126953, "learning_rate": 2.1306122448979595e-06, "loss": 0.1013, "step": 23470 }, { "epoch": 95.8, "grad_norm": 14.916719436645508, "learning_rate": 2.1285714285714287e-06, "loss": 0.1179, "step": 23471 }, { "epoch": 95.80408163265307, "grad_norm": 33.80746078491211, "learning_rate": 2.126530612244898e-06, "loss": 0.5225, "step": 23472 }, { "epoch": 95.80816326530612, "grad_norm": 39.05326843261719, "learning_rate": 2.124489795918367e-06, "loss": 0.9269, "step": 23473 }, { "epoch": 95.81224489795919, "grad_norm": 32.343536376953125, "learning_rate": 2.122448979591837e-06, "loss": 1.2278, "step": 23474 }, { "epoch": 95.81632653061224, "grad_norm": 23.336896896362305, "learning_rate": 2.120408163265306e-06, "loss": 0.2334, "step": 23475 }, { "epoch": 95.82040816326531, "grad_norm": 21.180356979370117, "learning_rate": 2.1183673469387753e-06, "loss": 1.8498, "step": 23476 }, { "epoch": 95.82448979591837, "grad_norm": 41.58824157714844, "learning_rate": 2.116326530612245e-06, "loss": 0.3344, "step": 23477 }, { "epoch": 95.82857142857142, "grad_norm": 24.80278205871582, "learning_rate": 2.1142857142857147e-06, "loss": 0.2772, "step": 23478 }, { "epoch": 95.83265306122449, "grad_norm": 17.819374084472656, "learning_rate": 2.112244897959184e-06, "loss": 2.493, "step": 23479 }, { "epoch": 95.83673469387755, "grad_norm": 33.83475875854492, "learning_rate": 2.110204081632653e-06, "loss": 0.8563, "step": 23480 }, { "epoch": 95.84081632653061, "grad_norm": 26.70689582824707, "learning_rate": 2.108163265306123e-06, "loss": 1.4059, "step": 23481 }, { "epoch": 95.84489795918367, "grad_norm": 31.819459915161133, "learning_rate": 2.106122448979592e-06, "loss": 1.1153, "step": 23482 }, { "epoch": 95.84897959183674, "grad_norm": 31.16569709777832, "learning_rate": 2.1040816326530613e-06, "loss": 1.1741, "step": 23483 }, { "epoch": 95.85306122448979, "grad_norm": 30.522415161132812, "learning_rate": 2.1020408163265306e-06, "loss": 0.2418, "step": 23484 }, { "epoch": 95.85714285714286, "grad_norm": 25.0111141204834, "learning_rate": 2.1000000000000002e-06, "loss": 0.2484, "step": 23485 }, { "epoch": 95.86122448979592, "grad_norm": 28.35240936279297, "learning_rate": 2.0979591836734695e-06, "loss": 0.5009, "step": 23486 }, { "epoch": 95.86530612244898, "grad_norm": 31.058727264404297, "learning_rate": 2.0959183673469387e-06, "loss": 0.9047, "step": 23487 }, { "epoch": 95.86938775510204, "grad_norm": 16.126720428466797, "learning_rate": 2.093877551020408e-06, "loss": 0.1091, "step": 23488 }, { "epoch": 95.87346938775511, "grad_norm": 26.71100425720215, "learning_rate": 2.0918367346938776e-06, "loss": 1.3245, "step": 23489 }, { "epoch": 95.87755102040816, "grad_norm": 26.40463638305664, "learning_rate": 2.089795918367347e-06, "loss": 1.2574, "step": 23490 }, { "epoch": 95.88163265306123, "grad_norm": 27.177520751953125, "learning_rate": 2.0877551020408165e-06, "loss": 1.2349, "step": 23491 }, { "epoch": 95.88571428571429, "grad_norm": 25.73961067199707, "learning_rate": 2.0857142857142858e-06, "loss": 0.2062, "step": 23492 }, { "epoch": 95.88979591836734, "grad_norm": 26.544658660888672, "learning_rate": 2.0836734693877554e-06, "loss": 1.9794, "step": 23493 }, { "epoch": 95.89387755102041, "grad_norm": 15.63172435760498, "learning_rate": 2.0816326530612247e-06, "loss": 0.2069, "step": 23494 }, { "epoch": 95.89795918367346, "grad_norm": 32.49201583862305, "learning_rate": 2.079591836734694e-06, "loss": 0.4965, "step": 23495 }, { "epoch": 95.90204081632653, "grad_norm": 34.211456298828125, "learning_rate": 2.0775510204081636e-06, "loss": 1.4143, "step": 23496 }, { "epoch": 95.90612244897959, "grad_norm": 22.674291610717773, "learning_rate": 2.075510204081633e-06, "loss": 2.1188, "step": 23497 }, { "epoch": 95.91020408163266, "grad_norm": 50.925289154052734, "learning_rate": 2.073469387755102e-06, "loss": 1.0435, "step": 23498 }, { "epoch": 95.91428571428571, "grad_norm": 27.286914825439453, "learning_rate": 2.0714285714285713e-06, "loss": 1.335, "step": 23499 }, { "epoch": 95.91836734693878, "grad_norm": 31.933712005615234, "learning_rate": 2.069387755102041e-06, "loss": 1.0924, "step": 23500 }, { "epoch": 95.92244897959183, "grad_norm": 7.7584099769592285, "learning_rate": 2.0673469387755102e-06, "loss": 0.0721, "step": 23501 }, { "epoch": 95.9265306122449, "grad_norm": 37.81672668457031, "learning_rate": 2.0653061224489795e-06, "loss": 0.3804, "step": 23502 }, { "epoch": 95.93061224489796, "grad_norm": 29.08929443359375, "learning_rate": 2.0632653061224487e-06, "loss": 1.39, "step": 23503 }, { "epoch": 95.93469387755103, "grad_norm": 36.26927185058594, "learning_rate": 2.0612244897959184e-06, "loss": 0.225, "step": 23504 }, { "epoch": 95.93877551020408, "grad_norm": 19.90019989013672, "learning_rate": 2.059183673469388e-06, "loss": 0.1712, "step": 23505 }, { "epoch": 95.94285714285714, "grad_norm": 8.914188385009766, "learning_rate": 2.0571428571428573e-06, "loss": 0.104, "step": 23506 }, { "epoch": 95.9469387755102, "grad_norm": 28.723983764648438, "learning_rate": 2.055102040816327e-06, "loss": 0.3998, "step": 23507 }, { "epoch": 95.95102040816326, "grad_norm": 28.302093505859375, "learning_rate": 2.053061224489796e-06, "loss": 0.6824, "step": 23508 }, { "epoch": 95.95510204081633, "grad_norm": 37.07375717163086, "learning_rate": 2.0510204081632654e-06, "loss": 0.6084, "step": 23509 }, { "epoch": 95.95918367346938, "grad_norm": 29.694982528686523, "learning_rate": 2.0489795918367347e-06, "loss": 1.0717, "step": 23510 }, { "epoch": 95.96326530612245, "grad_norm": 22.6269474029541, "learning_rate": 2.0469387755102044e-06, "loss": 1.4691, "step": 23511 }, { "epoch": 95.9673469387755, "grad_norm": 27.52776527404785, "learning_rate": 2.0448979591836736e-06, "loss": 0.4708, "step": 23512 }, { "epoch": 95.97142857142858, "grad_norm": 26.317707061767578, "learning_rate": 2.042857142857143e-06, "loss": 0.1684, "step": 23513 }, { "epoch": 95.97551020408163, "grad_norm": 28.510950088500977, "learning_rate": 2.040816326530612e-06, "loss": 0.3852, "step": 23514 }, { "epoch": 95.9795918367347, "grad_norm": 15.629609107971191, "learning_rate": 2.0387755102040817e-06, "loss": 0.0912, "step": 23515 }, { "epoch": 95.98367346938775, "grad_norm": 22.32317543029785, "learning_rate": 2.036734693877551e-06, "loss": 1.8332, "step": 23516 }, { "epoch": 95.98775510204082, "grad_norm": 25.045854568481445, "learning_rate": 2.0346938775510202e-06, "loss": 0.4923, "step": 23517 }, { "epoch": 95.99183673469388, "grad_norm": 29.032100677490234, "learning_rate": 2.03265306122449e-06, "loss": 0.1981, "step": 23518 }, { "epoch": 95.99591836734695, "grad_norm": 19.980464935302734, "learning_rate": 2.030612244897959e-06, "loss": 0.3009, "step": 23519 }, { "epoch": 96.0, "grad_norm": 28.273605346679688, "learning_rate": 2.028571428571429e-06, "loss": 1.3603, "step": 23520 }, { "epoch": 96.00408163265305, "grad_norm": 16.24798011779785, "learning_rate": 2.026530612244898e-06, "loss": 0.1438, "step": 23521 }, { "epoch": 96.00816326530612, "grad_norm": 23.93828582763672, "learning_rate": 2.0244897959183677e-06, "loss": 1.275, "step": 23522 }, { "epoch": 96.01224489795918, "grad_norm": 33.35209655761719, "learning_rate": 2.022448979591837e-06, "loss": 0.4608, "step": 23523 }, { "epoch": 96.01632653061225, "grad_norm": 26.09809112548828, "learning_rate": 2.020408163265306e-06, "loss": 0.2286, "step": 23524 }, { "epoch": 96.0204081632653, "grad_norm": 33.29530715942383, "learning_rate": 2.0183673469387755e-06, "loss": 0.5334, "step": 23525 }, { "epoch": 96.02448979591837, "grad_norm": 29.37764549255371, "learning_rate": 2.016326530612245e-06, "loss": 1.0472, "step": 23526 }, { "epoch": 96.02857142857142, "grad_norm": 24.731765747070312, "learning_rate": 2.0142857142857144e-06, "loss": 1.4227, "step": 23527 }, { "epoch": 96.0326530612245, "grad_norm": 27.737340927124023, "learning_rate": 2.0122448979591836e-06, "loss": 0.2695, "step": 23528 }, { "epoch": 96.03673469387755, "grad_norm": 34.303375244140625, "learning_rate": 2.0102040816326533e-06, "loss": 0.9061, "step": 23529 }, { "epoch": 96.04081632653062, "grad_norm": 30.53012466430664, "learning_rate": 2.0081632653061225e-06, "loss": 1.0636, "step": 23530 }, { "epoch": 96.04489795918367, "grad_norm": 37.644248962402344, "learning_rate": 2.0061224489795918e-06, "loss": 0.7534, "step": 23531 }, { "epoch": 96.04897959183674, "grad_norm": 30.77113914489746, "learning_rate": 2.004081632653061e-06, "loss": 0.3887, "step": 23532 }, { "epoch": 96.0530612244898, "grad_norm": 11.068793296813965, "learning_rate": 2.0020408163265307e-06, "loss": 0.0871, "step": 23533 }, { "epoch": 96.05714285714286, "grad_norm": 31.5708065032959, "learning_rate": 2.0000000000000003e-06, "loss": 0.3101, "step": 23534 }, { "epoch": 96.06122448979592, "grad_norm": 34.6336669921875, "learning_rate": 1.9979591836734696e-06, "loss": 0.6702, "step": 23535 }, { "epoch": 96.06530612244897, "grad_norm": 27.60454750061035, "learning_rate": 1.995918367346939e-06, "loss": 1.2093, "step": 23536 }, { "epoch": 96.06938775510204, "grad_norm": 25.509653091430664, "learning_rate": 1.9938775510204085e-06, "loss": 0.2716, "step": 23537 }, { "epoch": 96.0734693877551, "grad_norm": 26.38058090209961, "learning_rate": 1.9918367346938777e-06, "loss": 1.2807, "step": 23538 }, { "epoch": 96.07755102040817, "grad_norm": 24.760570526123047, "learning_rate": 1.989795918367347e-06, "loss": 1.8793, "step": 23539 }, { "epoch": 96.08163265306122, "grad_norm": 28.53217124938965, "learning_rate": 1.9877551020408166e-06, "loss": 1.1065, "step": 23540 }, { "epoch": 96.08571428571429, "grad_norm": 21.483905792236328, "learning_rate": 1.985714285714286e-06, "loss": 1.7486, "step": 23541 }, { "epoch": 96.08979591836734, "grad_norm": 30.158935546875, "learning_rate": 1.983673469387755e-06, "loss": 0.4663, "step": 23542 }, { "epoch": 96.09387755102041, "grad_norm": 33.08941650390625, "learning_rate": 1.9816326530612244e-06, "loss": 0.804, "step": 23543 }, { "epoch": 96.09795918367347, "grad_norm": 26.352359771728516, "learning_rate": 1.979591836734694e-06, "loss": 1.1785, "step": 23544 }, { "epoch": 96.10204081632654, "grad_norm": 35.10564041137695, "learning_rate": 1.9775510204081633e-06, "loss": 0.6083, "step": 23545 }, { "epoch": 96.10612244897959, "grad_norm": 17.630788803100586, "learning_rate": 1.9755102040816325e-06, "loss": 0.1816, "step": 23546 }, { "epoch": 96.11020408163266, "grad_norm": 29.206398010253906, "learning_rate": 1.973469387755102e-06, "loss": 1.4482, "step": 23547 }, { "epoch": 96.11428571428571, "grad_norm": 28.492687225341797, "learning_rate": 1.9714285714285714e-06, "loss": 1.4052, "step": 23548 }, { "epoch": 96.11836734693877, "grad_norm": 23.032005310058594, "learning_rate": 1.969387755102041e-06, "loss": 1.9329, "step": 23549 }, { "epoch": 96.12244897959184, "grad_norm": 30.00362777709961, "learning_rate": 1.9673469387755103e-06, "loss": 1.3793, "step": 23550 }, { "epoch": 96.12653061224489, "grad_norm": 43.844970703125, "learning_rate": 1.9653061224489796e-06, "loss": 0.9453, "step": 23551 }, { "epoch": 96.13061224489796, "grad_norm": 32.655784606933594, "learning_rate": 1.9632653061224492e-06, "loss": 1.05, "step": 23552 }, { "epoch": 96.13469387755102, "grad_norm": 24.275129318237305, "learning_rate": 1.9612244897959185e-06, "loss": 1.5073, "step": 23553 }, { "epoch": 96.13877551020408, "grad_norm": 25.93522071838379, "learning_rate": 1.9591836734693877e-06, "loss": 0.601, "step": 23554 }, { "epoch": 96.14285714285714, "grad_norm": 27.743940353393555, "learning_rate": 1.9571428571428574e-06, "loss": 1.3281, "step": 23555 }, { "epoch": 96.14693877551021, "grad_norm": 39.35095977783203, "learning_rate": 1.9551020408163266e-06, "loss": 0.7386, "step": 23556 }, { "epoch": 96.15102040816326, "grad_norm": 24.52863883972168, "learning_rate": 1.953061224489796e-06, "loss": 1.2991, "step": 23557 }, { "epoch": 96.15510204081633, "grad_norm": 36.16164016723633, "learning_rate": 1.951020408163265e-06, "loss": 0.7977, "step": 23558 }, { "epoch": 96.15918367346939, "grad_norm": 29.472278594970703, "learning_rate": 1.948979591836735e-06, "loss": 0.5821, "step": 23559 }, { "epoch": 96.16326530612245, "grad_norm": 34.75483703613281, "learning_rate": 1.946938775510204e-06, "loss": 1.0485, "step": 23560 }, { "epoch": 96.16734693877551, "grad_norm": 26.767925262451172, "learning_rate": 1.9448979591836737e-06, "loss": 0.5033, "step": 23561 }, { "epoch": 96.17142857142858, "grad_norm": 33.22970962524414, "learning_rate": 1.942857142857143e-06, "loss": 1.2331, "step": 23562 }, { "epoch": 96.17551020408163, "grad_norm": 26.42277717590332, "learning_rate": 1.9408163265306126e-06, "loss": 0.7502, "step": 23563 }, { "epoch": 96.17959183673469, "grad_norm": 1.353479266166687, "learning_rate": 1.938775510204082e-06, "loss": 0.0225, "step": 23564 }, { "epoch": 96.18367346938776, "grad_norm": 25.97024917602539, "learning_rate": 1.936734693877551e-06, "loss": 1.2908, "step": 23565 }, { "epoch": 96.18775510204081, "grad_norm": 30.857784271240234, "learning_rate": 1.9346938775510208e-06, "loss": 0.8518, "step": 23566 }, { "epoch": 96.19183673469388, "grad_norm": 5.187882423400879, "learning_rate": 1.93265306122449e-06, "loss": 0.0443, "step": 23567 }, { "epoch": 96.19591836734693, "grad_norm": 24.264041900634766, "learning_rate": 1.9306122448979593e-06, "loss": 0.3194, "step": 23568 }, { "epoch": 96.2, "grad_norm": 26.79075813293457, "learning_rate": 1.9285714285714285e-06, "loss": 1.4703, "step": 23569 }, { "epoch": 96.20408163265306, "grad_norm": 24.849267959594727, "learning_rate": 1.926530612244898e-06, "loss": 0.4552, "step": 23570 }, { "epoch": 96.20816326530613, "grad_norm": 27.473613739013672, "learning_rate": 1.9244897959183674e-06, "loss": 1.4507, "step": 23571 }, { "epoch": 96.21224489795918, "grad_norm": 5.986854553222656, "learning_rate": 1.9224489795918367e-06, "loss": 0.0615, "step": 23572 }, { "epoch": 96.21632653061225, "grad_norm": 26.737810134887695, "learning_rate": 1.920408163265306e-06, "loss": 1.2999, "step": 23573 }, { "epoch": 96.2204081632653, "grad_norm": 38.162784576416016, "learning_rate": 1.9183673469387756e-06, "loss": 0.7122, "step": 23574 }, { "epoch": 96.22448979591837, "grad_norm": 28.212047576904297, "learning_rate": 1.916326530612245e-06, "loss": 0.3825, "step": 23575 }, { "epoch": 96.22857142857143, "grad_norm": 18.37622833251953, "learning_rate": 1.9142857142857145e-06, "loss": 0.1134, "step": 23576 }, { "epoch": 96.2326530612245, "grad_norm": 22.223854064941406, "learning_rate": 1.912244897959184e-06, "loss": 1.8214, "step": 23577 }, { "epoch": 96.23673469387755, "grad_norm": 32.91822814941406, "learning_rate": 1.9102040816326534e-06, "loss": 0.693, "step": 23578 }, { "epoch": 96.2408163265306, "grad_norm": 27.106595993041992, "learning_rate": 1.9081632653061226e-06, "loss": 1.4447, "step": 23579 }, { "epoch": 96.24489795918367, "grad_norm": 32.45755386352539, "learning_rate": 1.9061224489795919e-06, "loss": 0.6853, "step": 23580 }, { "epoch": 96.24897959183673, "grad_norm": 8.119019508361816, "learning_rate": 1.9040816326530615e-06, "loss": 0.0741, "step": 23581 }, { "epoch": 96.2530612244898, "grad_norm": 28.45063018798828, "learning_rate": 1.9020408163265308e-06, "loss": 0.2951, "step": 23582 }, { "epoch": 96.25714285714285, "grad_norm": 31.69072723388672, "learning_rate": 1.9e-06, "loss": 0.6683, "step": 23583 }, { "epoch": 96.26122448979592, "grad_norm": 7.200850009918213, "learning_rate": 1.8979591836734693e-06, "loss": 0.0599, "step": 23584 }, { "epoch": 96.26530612244898, "grad_norm": 26.89657974243164, "learning_rate": 1.895918367346939e-06, "loss": 0.3145, "step": 23585 }, { "epoch": 96.26938775510204, "grad_norm": 32.27576446533203, "learning_rate": 1.8938775510204082e-06, "loss": 0.8999, "step": 23586 }, { "epoch": 96.2734693877551, "grad_norm": 45.117942810058594, "learning_rate": 1.8918367346938776e-06, "loss": 0.7175, "step": 23587 }, { "epoch": 96.27755102040817, "grad_norm": 25.016815185546875, "learning_rate": 1.8897959183673469e-06, "loss": 0.1915, "step": 23588 }, { "epoch": 96.28163265306122, "grad_norm": 25.989469528198242, "learning_rate": 1.8877551020408165e-06, "loss": 0.1781, "step": 23589 }, { "epoch": 96.28571428571429, "grad_norm": 29.892358779907227, "learning_rate": 1.8857142857142858e-06, "loss": 1.1464, "step": 23590 }, { "epoch": 96.28979591836735, "grad_norm": 35.80937194824219, "learning_rate": 1.883673469387755e-06, "loss": 1.0962, "step": 23591 }, { "epoch": 96.29387755102042, "grad_norm": 21.08190155029297, "learning_rate": 1.8816326530612247e-06, "loss": 0.1849, "step": 23592 }, { "epoch": 96.29795918367347, "grad_norm": 34.564762115478516, "learning_rate": 1.879591836734694e-06, "loss": 1.0894, "step": 23593 }, { "epoch": 96.30204081632652, "grad_norm": 23.541088104248047, "learning_rate": 1.8775510204081634e-06, "loss": 1.3545, "step": 23594 }, { "epoch": 96.3061224489796, "grad_norm": 31.089706420898438, "learning_rate": 1.8755102040816326e-06, "loss": 0.8069, "step": 23595 }, { "epoch": 96.31020408163265, "grad_norm": 30.765399932861328, "learning_rate": 1.8734693877551023e-06, "loss": 0.2506, "step": 23596 }, { "epoch": 96.31428571428572, "grad_norm": 30.17063331604004, "learning_rate": 1.8714285714285715e-06, "loss": 0.6004, "step": 23597 }, { "epoch": 96.31836734693877, "grad_norm": 30.140615463256836, "learning_rate": 1.8693877551020408e-06, "loss": 0.7932, "step": 23598 }, { "epoch": 96.32244897959184, "grad_norm": 34.60991287231445, "learning_rate": 1.86734693877551e-06, "loss": 0.827, "step": 23599 }, { "epoch": 96.3265306122449, "grad_norm": 44.10660171508789, "learning_rate": 1.8653061224489797e-06, "loss": 0.3943, "step": 23600 }, { "epoch": 96.33061224489796, "grad_norm": 32.69308853149414, "learning_rate": 1.8632653061224491e-06, "loss": 0.5965, "step": 23601 }, { "epoch": 96.33469387755102, "grad_norm": 27.079179763793945, "learning_rate": 1.8612244897959184e-06, "loss": 0.4071, "step": 23602 }, { "epoch": 96.33877551020409, "grad_norm": 36.94057846069336, "learning_rate": 1.859183673469388e-06, "loss": 0.5613, "step": 23603 }, { "epoch": 96.34285714285714, "grad_norm": 19.861507415771484, "learning_rate": 1.8571428571428573e-06, "loss": 1.8826, "step": 23604 }, { "epoch": 96.34693877551021, "grad_norm": 26.179094314575195, "learning_rate": 1.8551020408163265e-06, "loss": 1.7905, "step": 23605 }, { "epoch": 96.35102040816327, "grad_norm": 19.129093170166016, "learning_rate": 1.8530612244897958e-06, "loss": 1.8926, "step": 23606 }, { "epoch": 96.35510204081632, "grad_norm": 25.073183059692383, "learning_rate": 1.8510204081632655e-06, "loss": 0.9535, "step": 23607 }, { "epoch": 96.35918367346939, "grad_norm": 29.53086280822754, "learning_rate": 1.848979591836735e-06, "loss": 0.5641, "step": 23608 }, { "epoch": 96.36326530612244, "grad_norm": 16.931331634521484, "learning_rate": 1.8469387755102042e-06, "loss": 2.4828, "step": 23609 }, { "epoch": 96.36734693877551, "grad_norm": 21.480247497558594, "learning_rate": 1.8448979591836734e-06, "loss": 0.2941, "step": 23610 }, { "epoch": 96.37142857142857, "grad_norm": 37.75776290893555, "learning_rate": 1.842857142857143e-06, "loss": 1.0004, "step": 23611 }, { "epoch": 96.37551020408164, "grad_norm": 30.53550148010254, "learning_rate": 1.8408163265306123e-06, "loss": 0.7529, "step": 23612 }, { "epoch": 96.37959183673469, "grad_norm": 32.046852111816406, "learning_rate": 1.8387755102040815e-06, "loss": 1.2069, "step": 23613 }, { "epoch": 96.38367346938776, "grad_norm": 33.9695930480957, "learning_rate": 1.8367346938775512e-06, "loss": 1.0556, "step": 23614 }, { "epoch": 96.38775510204081, "grad_norm": 23.22172737121582, "learning_rate": 1.8346938775510207e-06, "loss": 1.37, "step": 23615 }, { "epoch": 96.39183673469388, "grad_norm": 32.85117721557617, "learning_rate": 1.83265306122449e-06, "loss": 0.3796, "step": 23616 }, { "epoch": 96.39591836734694, "grad_norm": 30.223033905029297, "learning_rate": 1.8306122448979592e-06, "loss": 0.6772, "step": 23617 }, { "epoch": 96.4, "grad_norm": 21.29039764404297, "learning_rate": 1.8285714285714288e-06, "loss": 0.2409, "step": 23618 }, { "epoch": 96.40408163265306, "grad_norm": 27.275964736938477, "learning_rate": 1.826530612244898e-06, "loss": 1.328, "step": 23619 }, { "epoch": 96.40816326530613, "grad_norm": 25.872697830200195, "learning_rate": 1.8244897959183673e-06, "loss": 1.7453, "step": 23620 }, { "epoch": 96.41224489795918, "grad_norm": 14.710280418395996, "learning_rate": 1.8224489795918368e-06, "loss": 0.1261, "step": 23621 }, { "epoch": 96.41632653061224, "grad_norm": 31.451580047607422, "learning_rate": 1.8204081632653062e-06, "loss": 0.5753, "step": 23622 }, { "epoch": 96.42040816326531, "grad_norm": 27.690509796142578, "learning_rate": 1.8183673469387757e-06, "loss": 1.0701, "step": 23623 }, { "epoch": 96.42448979591836, "grad_norm": 15.833431243896484, "learning_rate": 1.816326530612245e-06, "loss": 2.1548, "step": 23624 }, { "epoch": 96.42857142857143, "grad_norm": 40.894981384277344, "learning_rate": 1.8142857142857142e-06, "loss": 0.497, "step": 23625 }, { "epoch": 96.43265306122449, "grad_norm": 18.25018310546875, "learning_rate": 1.8122448979591838e-06, "loss": 2.2105, "step": 23626 }, { "epoch": 96.43673469387755, "grad_norm": 30.387422561645508, "learning_rate": 1.810204081632653e-06, "loss": 0.1844, "step": 23627 }, { "epoch": 96.44081632653061, "grad_norm": 25.590547561645508, "learning_rate": 1.8081632653061225e-06, "loss": 1.3129, "step": 23628 }, { "epoch": 96.44489795918368, "grad_norm": 33.73219299316406, "learning_rate": 1.806122448979592e-06, "loss": 0.5437, "step": 23629 }, { "epoch": 96.44897959183673, "grad_norm": 28.212196350097656, "learning_rate": 1.8040816326530614e-06, "loss": 0.3283, "step": 23630 }, { "epoch": 96.4530612244898, "grad_norm": 31.058582305908203, "learning_rate": 1.8020408163265307e-06, "loss": 1.1839, "step": 23631 }, { "epoch": 96.45714285714286, "grad_norm": 29.943992614746094, "learning_rate": 1.8e-06, "loss": 1.5166, "step": 23632 }, { "epoch": 96.46122448979592, "grad_norm": 21.95793914794922, "learning_rate": 1.7979591836734696e-06, "loss": 0.2147, "step": 23633 }, { "epoch": 96.46530612244898, "grad_norm": 27.248018264770508, "learning_rate": 1.7959183673469388e-06, "loss": 0.2142, "step": 23634 }, { "epoch": 96.46938775510205, "grad_norm": 31.728822708129883, "learning_rate": 1.7938775510204083e-06, "loss": 1.1115, "step": 23635 }, { "epoch": 96.4734693877551, "grad_norm": 22.533037185668945, "learning_rate": 1.7918367346938775e-06, "loss": 1.9398, "step": 23636 }, { "epoch": 96.47755102040816, "grad_norm": 31.94440460205078, "learning_rate": 1.7897959183673472e-06, "loss": 1.1145, "step": 23637 }, { "epoch": 96.48163265306123, "grad_norm": 38.112884521484375, "learning_rate": 1.7877551020408164e-06, "loss": 0.9715, "step": 23638 }, { "epoch": 96.48571428571428, "grad_norm": 21.467647552490234, "learning_rate": 1.7857142857142857e-06, "loss": 0.3132, "step": 23639 }, { "epoch": 96.48979591836735, "grad_norm": 25.269031524658203, "learning_rate": 1.7836734693877553e-06, "loss": 0.2552, "step": 23640 }, { "epoch": 96.4938775510204, "grad_norm": 24.99810218811035, "learning_rate": 1.7816326530612246e-06, "loss": 0.2549, "step": 23641 }, { "epoch": 96.49795918367347, "grad_norm": 22.309734344482422, "learning_rate": 1.7795918367346938e-06, "loss": 0.244, "step": 23642 }, { "epoch": 96.50204081632653, "grad_norm": 20.493515014648438, "learning_rate": 1.7775510204081633e-06, "loss": 1.2315, "step": 23643 }, { "epoch": 96.5061224489796, "grad_norm": 25.246814727783203, "learning_rate": 1.775510204081633e-06, "loss": 1.8233, "step": 23644 }, { "epoch": 96.51020408163265, "grad_norm": 33.41436767578125, "learning_rate": 1.7734693877551022e-06, "loss": 0.4305, "step": 23645 }, { "epoch": 96.51428571428572, "grad_norm": 25.491539001464844, "learning_rate": 1.7714285714285714e-06, "loss": 1.308, "step": 23646 }, { "epoch": 96.51836734693877, "grad_norm": 24.006227493286133, "learning_rate": 1.7693877551020407e-06, "loss": 1.5655, "step": 23647 }, { "epoch": 96.52244897959184, "grad_norm": 32.505741119384766, "learning_rate": 1.7673469387755103e-06, "loss": 1.218, "step": 23648 }, { "epoch": 96.5265306122449, "grad_norm": 37.665069580078125, "learning_rate": 1.7653061224489796e-06, "loss": 0.7956, "step": 23649 }, { "epoch": 96.53061224489795, "grad_norm": 47.80582809448242, "learning_rate": 1.763265306122449e-06, "loss": 1.0382, "step": 23650 }, { "epoch": 96.53469387755102, "grad_norm": 37.389312744140625, "learning_rate": 1.7612244897959187e-06, "loss": 0.5232, "step": 23651 }, { "epoch": 96.53877551020408, "grad_norm": 24.265113830566406, "learning_rate": 1.759183673469388e-06, "loss": 0.2288, "step": 23652 }, { "epoch": 96.54285714285714, "grad_norm": 34.41127014160156, "learning_rate": 1.7571428571428572e-06, "loss": 0.8541, "step": 23653 }, { "epoch": 96.5469387755102, "grad_norm": 38.472740173339844, "learning_rate": 1.7551020408163264e-06, "loss": 0.9768, "step": 23654 }, { "epoch": 96.55102040816327, "grad_norm": 31.260923385620117, "learning_rate": 1.7530612244897961e-06, "loss": 1.2786, "step": 23655 }, { "epoch": 96.55510204081632, "grad_norm": 33.113990783691406, "learning_rate": 1.7510204081632654e-06, "loss": 0.9092, "step": 23656 }, { "epoch": 96.55918367346939, "grad_norm": 34.35908126831055, "learning_rate": 1.7489795918367348e-06, "loss": 0.4759, "step": 23657 }, { "epoch": 96.56326530612245, "grad_norm": 41.858455657958984, "learning_rate": 1.746938775510204e-06, "loss": 0.7257, "step": 23658 }, { "epoch": 96.56734693877551, "grad_norm": 29.043214797973633, "learning_rate": 1.7448979591836737e-06, "loss": 1.1832, "step": 23659 }, { "epoch": 96.57142857142857, "grad_norm": 30.409269332885742, "learning_rate": 1.742857142857143e-06, "loss": 1.0099, "step": 23660 }, { "epoch": 96.57551020408164, "grad_norm": 25.787399291992188, "learning_rate": 1.7408163265306122e-06, "loss": 1.2747, "step": 23661 }, { "epoch": 96.57959183673469, "grad_norm": 25.0218563079834, "learning_rate": 1.7387755102040819e-06, "loss": 0.4364, "step": 23662 }, { "epoch": 96.58367346938776, "grad_norm": 20.34433364868164, "learning_rate": 1.7367346938775511e-06, "loss": 2.0031, "step": 23663 }, { "epoch": 96.58775510204082, "grad_norm": 31.155675888061523, "learning_rate": 1.7346938775510206e-06, "loss": 0.3533, "step": 23664 }, { "epoch": 96.59183673469387, "grad_norm": 34.37208938598633, "learning_rate": 1.7326530612244898e-06, "loss": 1.0018, "step": 23665 }, { "epoch": 96.59591836734694, "grad_norm": 18.216474533081055, "learning_rate": 1.7306122448979595e-06, "loss": 0.164, "step": 23666 }, { "epoch": 96.6, "grad_norm": 29.525503158569336, "learning_rate": 1.7285714285714287e-06, "loss": 0.4233, "step": 23667 }, { "epoch": 96.60408163265306, "grad_norm": 21.137319564819336, "learning_rate": 1.726530612244898e-06, "loss": 0.2303, "step": 23668 }, { "epoch": 96.60816326530612, "grad_norm": 21.9006290435791, "learning_rate": 1.7244897959183672e-06, "loss": 0.171, "step": 23669 }, { "epoch": 96.61224489795919, "grad_norm": 36.3465576171875, "learning_rate": 1.7224489795918369e-06, "loss": 1.0366, "step": 23670 }, { "epoch": 96.61632653061224, "grad_norm": 12.345017433166504, "learning_rate": 1.7204081632653063e-06, "loss": 0.1149, "step": 23671 }, { "epoch": 96.62040816326531, "grad_norm": 29.997020721435547, "learning_rate": 1.7183673469387756e-06, "loss": 0.2044, "step": 23672 }, { "epoch": 96.62448979591836, "grad_norm": 16.42423439025879, "learning_rate": 1.7163265306122448e-06, "loss": 0.1808, "step": 23673 }, { "epoch": 96.62857142857143, "grad_norm": 31.66037940979004, "learning_rate": 1.7142857142857145e-06, "loss": 0.4153, "step": 23674 }, { "epoch": 96.63265306122449, "grad_norm": 29.388151168823242, "learning_rate": 1.7122448979591837e-06, "loss": 1.1836, "step": 23675 }, { "epoch": 96.63673469387756, "grad_norm": 30.755956649780273, "learning_rate": 1.710204081632653e-06, "loss": 0.2151, "step": 23676 }, { "epoch": 96.64081632653061, "grad_norm": 25.95779800415039, "learning_rate": 1.7081632653061226e-06, "loss": 1.401, "step": 23677 }, { "epoch": 96.64489795918368, "grad_norm": 21.9549503326416, "learning_rate": 1.7061224489795919e-06, "loss": 1.8232, "step": 23678 }, { "epoch": 96.64897959183673, "grad_norm": 7.382178783416748, "learning_rate": 1.7040816326530613e-06, "loss": 0.0678, "step": 23679 }, { "epoch": 96.65306122448979, "grad_norm": 29.749530792236328, "learning_rate": 1.7020408163265306e-06, "loss": 1.2788, "step": 23680 }, { "epoch": 96.65714285714286, "grad_norm": 30.751911163330078, "learning_rate": 1.7000000000000002e-06, "loss": 1.5299, "step": 23681 }, { "epoch": 96.66122448979591, "grad_norm": 27.162322998046875, "learning_rate": 1.6979591836734695e-06, "loss": 1.0218, "step": 23682 }, { "epoch": 96.66530612244898, "grad_norm": 33.12889862060547, "learning_rate": 1.6959183673469387e-06, "loss": 0.8894, "step": 23683 }, { "epoch": 96.66938775510204, "grad_norm": 25.59161376953125, "learning_rate": 1.6938775510204082e-06, "loss": 1.7882, "step": 23684 }, { "epoch": 96.6734693877551, "grad_norm": 43.36817932128906, "learning_rate": 1.6918367346938776e-06, "loss": 0.9151, "step": 23685 }, { "epoch": 96.67755102040816, "grad_norm": 32.86729049682617, "learning_rate": 1.689795918367347e-06, "loss": 0.6664, "step": 23686 }, { "epoch": 96.68163265306123, "grad_norm": 31.571165084838867, "learning_rate": 1.6877551020408163e-06, "loss": 0.8162, "step": 23687 }, { "epoch": 96.68571428571428, "grad_norm": 11.241033554077148, "learning_rate": 1.685714285714286e-06, "loss": 0.09, "step": 23688 }, { "epoch": 96.68979591836735, "grad_norm": 30.136157989501953, "learning_rate": 1.6836734693877552e-06, "loss": 0.4366, "step": 23689 }, { "epoch": 96.6938775510204, "grad_norm": 29.026456832885742, "learning_rate": 1.6816326530612245e-06, "loss": 1.0956, "step": 23690 }, { "epoch": 96.69795918367348, "grad_norm": 22.29550552368164, "learning_rate": 1.6795918367346937e-06, "loss": 1.4756, "step": 23691 }, { "epoch": 96.70204081632653, "grad_norm": 27.655254364013672, "learning_rate": 1.6775510204081634e-06, "loss": 0.5571, "step": 23692 }, { "epoch": 96.70612244897958, "grad_norm": 27.768569946289062, "learning_rate": 1.6755102040816329e-06, "loss": 0.2667, "step": 23693 }, { "epoch": 96.71020408163265, "grad_norm": 35.68974304199219, "learning_rate": 1.673469387755102e-06, "loss": 0.4929, "step": 23694 }, { "epoch": 96.71428571428571, "grad_norm": 21.68063735961914, "learning_rate": 1.6714285714285713e-06, "loss": 1.1484, "step": 23695 }, { "epoch": 96.71836734693878, "grad_norm": 38.423744201660156, "learning_rate": 1.669387755102041e-06, "loss": 1.2323, "step": 23696 }, { "epoch": 96.72244897959183, "grad_norm": 24.905874252319336, "learning_rate": 1.6673469387755102e-06, "loss": 1.3479, "step": 23697 }, { "epoch": 96.7265306122449, "grad_norm": 30.40387725830078, "learning_rate": 1.6653061224489795e-06, "loss": 0.9876, "step": 23698 }, { "epoch": 96.73061224489796, "grad_norm": 28.27365493774414, "learning_rate": 1.6632653061224492e-06, "loss": 0.3082, "step": 23699 }, { "epoch": 96.73469387755102, "grad_norm": 32.71171188354492, "learning_rate": 1.6612244897959186e-06, "loss": 1.3518, "step": 23700 }, { "epoch": 96.73877551020408, "grad_norm": 34.13048553466797, "learning_rate": 1.6591836734693879e-06, "loss": 0.9333, "step": 23701 }, { "epoch": 96.74285714285715, "grad_norm": 29.353975296020508, "learning_rate": 1.657142857142857e-06, "loss": 0.4625, "step": 23702 }, { "epoch": 96.7469387755102, "grad_norm": 24.91676139831543, "learning_rate": 1.6551020408163268e-06, "loss": 1.0576, "step": 23703 }, { "epoch": 96.75102040816327, "grad_norm": 28.492708206176758, "learning_rate": 1.653061224489796e-06, "loss": 0.5099, "step": 23704 }, { "epoch": 96.75510204081633, "grad_norm": 24.115745544433594, "learning_rate": 1.6510204081632653e-06, "loss": 1.2168, "step": 23705 }, { "epoch": 96.7591836734694, "grad_norm": 29.3499813079834, "learning_rate": 1.6489795918367347e-06, "loss": 0.3241, "step": 23706 }, { "epoch": 96.76326530612245, "grad_norm": 9.894272804260254, "learning_rate": 1.6469387755102044e-06, "loss": 0.1037, "step": 23707 }, { "epoch": 96.7673469387755, "grad_norm": 28.447158813476562, "learning_rate": 1.6448979591836736e-06, "loss": 0.2613, "step": 23708 }, { "epoch": 96.77142857142857, "grad_norm": 22.02050018310547, "learning_rate": 1.6428571428571429e-06, "loss": 1.9395, "step": 23709 }, { "epoch": 96.77551020408163, "grad_norm": 29.800283432006836, "learning_rate": 1.640816326530612e-06, "loss": 1.1745, "step": 23710 }, { "epoch": 96.7795918367347, "grad_norm": 13.41414737701416, "learning_rate": 1.6387755102040818e-06, "loss": 0.0925, "step": 23711 }, { "epoch": 96.78367346938775, "grad_norm": 30.18000602722168, "learning_rate": 1.636734693877551e-06, "loss": 1.2673, "step": 23712 }, { "epoch": 96.78775510204082, "grad_norm": 23.017255783081055, "learning_rate": 1.6346938775510205e-06, "loss": 0.5807, "step": 23713 }, { "epoch": 96.79183673469387, "grad_norm": 28.260360717773438, "learning_rate": 1.63265306122449e-06, "loss": 0.5775, "step": 23714 }, { "epoch": 96.79591836734694, "grad_norm": 33.03264617919922, "learning_rate": 1.6306122448979594e-06, "loss": 0.426, "step": 23715 }, { "epoch": 96.8, "grad_norm": 35.140838623046875, "learning_rate": 1.6285714285714286e-06, "loss": 1.0766, "step": 23716 }, { "epoch": 96.80408163265307, "grad_norm": 28.54412078857422, "learning_rate": 1.6265306122448979e-06, "loss": 0.3814, "step": 23717 }, { "epoch": 96.80816326530612, "grad_norm": 26.470447540283203, "learning_rate": 1.6244897959183675e-06, "loss": 1.6301, "step": 23718 }, { "epoch": 96.81224489795919, "grad_norm": 19.787519454956055, "learning_rate": 1.6224489795918368e-06, "loss": 0.2498, "step": 23719 }, { "epoch": 96.81632653061224, "grad_norm": 30.242019653320312, "learning_rate": 1.6204081632653062e-06, "loss": 0.2333, "step": 23720 }, { "epoch": 96.82040816326531, "grad_norm": 19.361623764038086, "learning_rate": 1.6183673469387755e-06, "loss": 0.2189, "step": 23721 }, { "epoch": 96.82448979591837, "grad_norm": 25.735755920410156, "learning_rate": 1.6163265306122451e-06, "loss": 0.6295, "step": 23722 }, { "epoch": 96.82857142857142, "grad_norm": 32.295833587646484, "learning_rate": 1.6142857142857144e-06, "loss": 0.8603, "step": 23723 }, { "epoch": 96.83265306122449, "grad_norm": 26.765535354614258, "learning_rate": 1.6122448979591836e-06, "loss": 0.4601, "step": 23724 }, { "epoch": 96.83673469387755, "grad_norm": 23.15947151184082, "learning_rate": 1.6102040816326533e-06, "loss": 0.2052, "step": 23725 }, { "epoch": 96.84081632653061, "grad_norm": 10.161070823669434, "learning_rate": 1.6081632653061225e-06, "loss": 0.0649, "step": 23726 }, { "epoch": 96.84489795918367, "grad_norm": 30.623916625976562, "learning_rate": 1.606122448979592e-06, "loss": 0.5433, "step": 23727 }, { "epoch": 96.84897959183674, "grad_norm": 23.901832580566406, "learning_rate": 1.6040816326530612e-06, "loss": 2.1257, "step": 23728 }, { "epoch": 96.85306122448979, "grad_norm": 26.44131088256836, "learning_rate": 1.602040816326531e-06, "loss": 0.304, "step": 23729 }, { "epoch": 96.85714285714286, "grad_norm": 30.952484130859375, "learning_rate": 1.6000000000000001e-06, "loss": 0.8121, "step": 23730 }, { "epoch": 96.86122448979592, "grad_norm": 8.127259254455566, "learning_rate": 1.5979591836734694e-06, "loss": 0.0608, "step": 23731 }, { "epoch": 96.86530612244898, "grad_norm": 42.08803939819336, "learning_rate": 1.5959183673469386e-06, "loss": 0.3149, "step": 23732 }, { "epoch": 96.86938775510204, "grad_norm": 25.875629425048828, "learning_rate": 1.5938775510204083e-06, "loss": 1.3819, "step": 23733 }, { "epoch": 96.87346938775511, "grad_norm": 27.27938461303711, "learning_rate": 1.5918367346938775e-06, "loss": 0.6707, "step": 23734 }, { "epoch": 96.87755102040816, "grad_norm": 27.297658920288086, "learning_rate": 1.589795918367347e-06, "loss": 0.8968, "step": 23735 }, { "epoch": 96.88163265306123, "grad_norm": 28.112703323364258, "learning_rate": 1.5877551020408167e-06, "loss": 0.5417, "step": 23736 }, { "epoch": 96.88571428571429, "grad_norm": 23.210956573486328, "learning_rate": 1.585714285714286e-06, "loss": 1.8115, "step": 23737 }, { "epoch": 96.88979591836734, "grad_norm": 9.660175323486328, "learning_rate": 1.5836734693877551e-06, "loss": 2.9462, "step": 23738 }, { "epoch": 96.89387755102041, "grad_norm": 39.71556854248047, "learning_rate": 1.5816326530612244e-06, "loss": 0.7154, "step": 23739 }, { "epoch": 96.89795918367346, "grad_norm": 35.41867446899414, "learning_rate": 1.579591836734694e-06, "loss": 1.3022, "step": 23740 }, { "epoch": 96.90204081632653, "grad_norm": 26.525793075561523, "learning_rate": 1.5775510204081633e-06, "loss": 1.7192, "step": 23741 }, { "epoch": 96.90612244897959, "grad_norm": 27.714176177978516, "learning_rate": 1.5755102040816328e-06, "loss": 0.886, "step": 23742 }, { "epoch": 96.91020408163266, "grad_norm": 26.44767951965332, "learning_rate": 1.573469387755102e-06, "loss": 1.4323, "step": 23743 }, { "epoch": 96.91428571428571, "grad_norm": 27.506391525268555, "learning_rate": 1.5714285714285717e-06, "loss": 0.6036, "step": 23744 }, { "epoch": 96.91836734693878, "grad_norm": 27.486373901367188, "learning_rate": 1.569387755102041e-06, "loss": 1.0655, "step": 23745 }, { "epoch": 96.92244897959183, "grad_norm": 26.317880630493164, "learning_rate": 1.5673469387755101e-06, "loss": 1.4175, "step": 23746 }, { "epoch": 96.9265306122449, "grad_norm": 20.734399795532227, "learning_rate": 1.5653061224489798e-06, "loss": 0.1764, "step": 23747 }, { "epoch": 96.93061224489796, "grad_norm": 21.571237564086914, "learning_rate": 1.563265306122449e-06, "loss": 0.2582, "step": 23748 }, { "epoch": 96.93469387755103, "grad_norm": 37.775516510009766, "learning_rate": 1.5612244897959185e-06, "loss": 0.554, "step": 23749 }, { "epoch": 96.93877551020408, "grad_norm": 33.291259765625, "learning_rate": 1.559183673469388e-06, "loss": 0.5857, "step": 23750 }, { "epoch": 96.94285714285714, "grad_norm": 27.13443374633789, "learning_rate": 1.5571428571428572e-06, "loss": 0.2761, "step": 23751 }, { "epoch": 96.9469387755102, "grad_norm": 18.260881423950195, "learning_rate": 1.5551020408163267e-06, "loss": 0.2041, "step": 23752 }, { "epoch": 96.95102040816326, "grad_norm": 24.65380096435547, "learning_rate": 1.553061224489796e-06, "loss": 1.2535, "step": 23753 }, { "epoch": 96.95510204081633, "grad_norm": 6.751873016357422, "learning_rate": 1.5510204081632654e-06, "loss": 0.0685, "step": 23754 }, { "epoch": 96.95918367346938, "grad_norm": 37.12360763549805, "learning_rate": 1.5489795918367346e-06, "loss": 0.4948, "step": 23755 }, { "epoch": 96.96326530612245, "grad_norm": 40.98980712890625, "learning_rate": 1.5469387755102043e-06, "loss": 0.5875, "step": 23756 }, { "epoch": 96.9673469387755, "grad_norm": 31.57137107849121, "learning_rate": 1.5448979591836735e-06, "loss": 0.6911, "step": 23757 }, { "epoch": 96.97142857142858, "grad_norm": 24.028255462646484, "learning_rate": 1.542857142857143e-06, "loss": 0.2564, "step": 23758 }, { "epoch": 96.97551020408163, "grad_norm": 23.625009536743164, "learning_rate": 1.5408163265306122e-06, "loss": 1.8073, "step": 23759 }, { "epoch": 96.9795918367347, "grad_norm": 32.848655700683594, "learning_rate": 1.5387755102040817e-06, "loss": 0.9436, "step": 23760 }, { "epoch": 96.98367346938775, "grad_norm": 23.67802619934082, "learning_rate": 1.5367346938775511e-06, "loss": 0.2289, "step": 23761 }, { "epoch": 96.98775510204082, "grad_norm": 32.87844467163086, "learning_rate": 1.5346938775510204e-06, "loss": 1.0706, "step": 23762 }, { "epoch": 96.99183673469388, "grad_norm": 44.67705535888672, "learning_rate": 1.53265306122449e-06, "loss": 0.6294, "step": 23763 }, { "epoch": 96.99591836734695, "grad_norm": 29.718761444091797, "learning_rate": 1.5306122448979593e-06, "loss": 1.4361, "step": 23764 }, { "epoch": 97.0, "grad_norm": 10.566256523132324, "learning_rate": 1.5285714285714287e-06, "loss": 0.0802, "step": 23765 }, { "epoch": 97.00408163265305, "grad_norm": 26.038999557495117, "learning_rate": 1.526530612244898e-06, "loss": 1.8164, "step": 23766 }, { "epoch": 97.00816326530612, "grad_norm": 37.016273498535156, "learning_rate": 1.5244897959183674e-06, "loss": 0.7114, "step": 23767 }, { "epoch": 97.01224489795918, "grad_norm": 20.55361557006836, "learning_rate": 1.5224489795918367e-06, "loss": 0.1721, "step": 23768 }, { "epoch": 97.01632653061225, "grad_norm": 20.039472579956055, "learning_rate": 1.5204081632653061e-06, "loss": 0.2335, "step": 23769 }, { "epoch": 97.0204081632653, "grad_norm": 19.232336044311523, "learning_rate": 1.5183673469387756e-06, "loss": 0.1837, "step": 23770 }, { "epoch": 97.02448979591837, "grad_norm": 33.37104034423828, "learning_rate": 1.516326530612245e-06, "loss": 1.06, "step": 23771 }, { "epoch": 97.02857142857142, "grad_norm": 16.7760066986084, "learning_rate": 1.5142857142857145e-06, "loss": 2.144, "step": 23772 }, { "epoch": 97.0326530612245, "grad_norm": 14.01570987701416, "learning_rate": 1.5122448979591837e-06, "loss": 0.0778, "step": 23773 }, { "epoch": 97.03673469387755, "grad_norm": 20.226654052734375, "learning_rate": 1.5102040816326532e-06, "loss": 1.9815, "step": 23774 }, { "epoch": 97.04081632653062, "grad_norm": 27.73120880126953, "learning_rate": 1.5081632653061224e-06, "loss": 0.3066, "step": 23775 }, { "epoch": 97.04489795918367, "grad_norm": 10.440131187438965, "learning_rate": 1.5061224489795919e-06, "loss": 0.0774, "step": 23776 }, { "epoch": 97.04897959183674, "grad_norm": 29.277925491333008, "learning_rate": 1.5040816326530613e-06, "loss": 0.5669, "step": 23777 }, { "epoch": 97.0530612244898, "grad_norm": 32.22951126098633, "learning_rate": 1.5020408163265308e-06, "loss": 1.1716, "step": 23778 }, { "epoch": 97.05714285714286, "grad_norm": 60.935665130615234, "learning_rate": 1.5e-06, "loss": 1.0856, "step": 23779 }, { "epoch": 97.06122448979592, "grad_norm": 25.95033073425293, "learning_rate": 1.4979591836734695e-06, "loss": 0.5234, "step": 23780 }, { "epoch": 97.06530612244897, "grad_norm": 25.132566452026367, "learning_rate": 1.4959183673469387e-06, "loss": 0.4352, "step": 23781 }, { "epoch": 97.06938775510204, "grad_norm": 33.68144607543945, "learning_rate": 1.4938775510204082e-06, "loss": 1.2216, "step": 23782 }, { "epoch": 97.0734693877551, "grad_norm": 9.258515357971191, "learning_rate": 1.4918367346938774e-06, "loss": 0.082, "step": 23783 }, { "epoch": 97.07755102040817, "grad_norm": 22.422332763671875, "learning_rate": 1.489795918367347e-06, "loss": 1.75, "step": 23784 }, { "epoch": 97.08163265306122, "grad_norm": 6.693460941314697, "learning_rate": 1.4877551020408166e-06, "loss": 0.061, "step": 23785 }, { "epoch": 97.08571428571429, "grad_norm": 24.721818923950195, "learning_rate": 1.4857142857142858e-06, "loss": 1.2259, "step": 23786 }, { "epoch": 97.08979591836734, "grad_norm": 28.102128982543945, "learning_rate": 1.4836734693877553e-06, "loss": 1.1656, "step": 23787 }, { "epoch": 97.09387755102041, "grad_norm": 30.4058895111084, "learning_rate": 1.4816326530612245e-06, "loss": 0.6618, "step": 23788 }, { "epoch": 97.09795918367347, "grad_norm": 25.11957550048828, "learning_rate": 1.479591836734694e-06, "loss": 1.2267, "step": 23789 }, { "epoch": 97.10204081632654, "grad_norm": 28.91683578491211, "learning_rate": 1.4775510204081632e-06, "loss": 0.8077, "step": 23790 }, { "epoch": 97.10612244897959, "grad_norm": 23.489299774169922, "learning_rate": 1.4755102040816329e-06, "loss": 0.2979, "step": 23791 }, { "epoch": 97.11020408163266, "grad_norm": 23.647682189941406, "learning_rate": 1.473469387755102e-06, "loss": 1.7558, "step": 23792 }, { "epoch": 97.11428571428571, "grad_norm": 42.051910400390625, "learning_rate": 1.4714285714285716e-06, "loss": 0.962, "step": 23793 }, { "epoch": 97.11836734693877, "grad_norm": 26.034648895263672, "learning_rate": 1.4693877551020408e-06, "loss": 0.3067, "step": 23794 }, { "epoch": 97.12244897959184, "grad_norm": 27.594070434570312, "learning_rate": 1.4673469387755103e-06, "loss": 0.8608, "step": 23795 }, { "epoch": 97.12653061224489, "grad_norm": 29.14635467529297, "learning_rate": 1.4653061224489797e-06, "loss": 1.0778, "step": 23796 }, { "epoch": 97.13061224489796, "grad_norm": 26.711889266967773, "learning_rate": 1.463265306122449e-06, "loss": 1.526, "step": 23797 }, { "epoch": 97.13469387755102, "grad_norm": 26.127378463745117, "learning_rate": 1.4612244897959184e-06, "loss": 0.3124, "step": 23798 }, { "epoch": 97.13877551020408, "grad_norm": 11.74072551727295, "learning_rate": 1.4591836734693879e-06, "loss": 0.0758, "step": 23799 }, { "epoch": 97.14285714285714, "grad_norm": 30.148775100708008, "learning_rate": 1.4571428571428573e-06, "loss": 1.1059, "step": 23800 }, { "epoch": 97.14693877551021, "grad_norm": 25.963912963867188, "learning_rate": 1.4551020408163266e-06, "loss": 1.2153, "step": 23801 }, { "epoch": 97.15102040816326, "grad_norm": 28.922061920166016, "learning_rate": 1.453061224489796e-06, "loss": 0.495, "step": 23802 }, { "epoch": 97.15510204081633, "grad_norm": 26.084020614624023, "learning_rate": 1.4510204081632653e-06, "loss": 0.1954, "step": 23803 }, { "epoch": 97.15918367346939, "grad_norm": 25.87429428100586, "learning_rate": 1.4489795918367347e-06, "loss": 0.9454, "step": 23804 }, { "epoch": 97.16326530612245, "grad_norm": 31.290578842163086, "learning_rate": 1.4469387755102042e-06, "loss": 0.5989, "step": 23805 }, { "epoch": 97.16734693877551, "grad_norm": 23.450220108032227, "learning_rate": 1.4448979591836736e-06, "loss": 0.3655, "step": 23806 }, { "epoch": 97.17142857142858, "grad_norm": 32.833045959472656, "learning_rate": 1.4428571428571429e-06, "loss": 0.8234, "step": 23807 }, { "epoch": 97.17551020408163, "grad_norm": 25.37539291381836, "learning_rate": 1.4408163265306123e-06, "loss": 1.4124, "step": 23808 }, { "epoch": 97.17959183673469, "grad_norm": 39.37627029418945, "learning_rate": 1.4387755102040818e-06, "loss": 0.6041, "step": 23809 }, { "epoch": 97.18367346938776, "grad_norm": 20.16897964477539, "learning_rate": 1.436734693877551e-06, "loss": 0.2555, "step": 23810 }, { "epoch": 97.18775510204081, "grad_norm": 1.3087164163589478, "learning_rate": 1.4346938775510205e-06, "loss": 0.0226, "step": 23811 }, { "epoch": 97.19183673469388, "grad_norm": 12.890218734741211, "learning_rate": 1.43265306122449e-06, "loss": 0.0927, "step": 23812 }, { "epoch": 97.19591836734693, "grad_norm": 27.781579971313477, "learning_rate": 1.4306122448979594e-06, "loss": 0.4906, "step": 23813 }, { "epoch": 97.2, "grad_norm": 29.28769874572754, "learning_rate": 1.4285714285714286e-06, "loss": 0.5295, "step": 23814 }, { "epoch": 97.20408163265306, "grad_norm": 31.90906524658203, "learning_rate": 1.426530612244898e-06, "loss": 0.2809, "step": 23815 }, { "epoch": 97.20816326530613, "grad_norm": 24.559480667114258, "learning_rate": 1.4244897959183673e-06, "loss": 0.2418, "step": 23816 }, { "epoch": 97.21224489795918, "grad_norm": 37.66596221923828, "learning_rate": 1.4224489795918368e-06, "loss": 0.4796, "step": 23817 }, { "epoch": 97.21632653061225, "grad_norm": 21.673479080200195, "learning_rate": 1.420408163265306e-06, "loss": 0.2199, "step": 23818 }, { "epoch": 97.2204081632653, "grad_norm": 28.978757858276367, "learning_rate": 1.4183673469387757e-06, "loss": 0.854, "step": 23819 }, { "epoch": 97.22448979591837, "grad_norm": 21.730390548706055, "learning_rate": 1.4163265306122451e-06, "loss": 0.3151, "step": 23820 }, { "epoch": 97.22857142857143, "grad_norm": 18.185684204101562, "learning_rate": 1.4142857142857144e-06, "loss": 1.8873, "step": 23821 }, { "epoch": 97.2326530612245, "grad_norm": 32.99609375, "learning_rate": 1.4122448979591838e-06, "loss": 1.347, "step": 23822 }, { "epoch": 97.23673469387755, "grad_norm": 22.283985137939453, "learning_rate": 1.410204081632653e-06, "loss": 1.9316, "step": 23823 }, { "epoch": 97.2408163265306, "grad_norm": 26.494338989257812, "learning_rate": 1.4081632653061225e-06, "loss": 0.4431, "step": 23824 }, { "epoch": 97.24489795918367, "grad_norm": 21.902118682861328, "learning_rate": 1.4061224489795918e-06, "loss": 1.4506, "step": 23825 }, { "epoch": 97.24897959183673, "grad_norm": 37.60405731201172, "learning_rate": 1.4040816326530612e-06, "loss": 0.7371, "step": 23826 }, { "epoch": 97.2530612244898, "grad_norm": 21.740528106689453, "learning_rate": 1.4020408163265307e-06, "loss": 1.9399, "step": 23827 }, { "epoch": 97.25714285714285, "grad_norm": 31.939931869506836, "learning_rate": 1.4000000000000001e-06, "loss": 0.8553, "step": 23828 }, { "epoch": 97.26122448979592, "grad_norm": 25.706645965576172, "learning_rate": 1.3979591836734694e-06, "loss": 0.8992, "step": 23829 }, { "epoch": 97.26530612244898, "grad_norm": 17.970096588134766, "learning_rate": 1.3959183673469388e-06, "loss": 0.2855, "step": 23830 }, { "epoch": 97.26938775510204, "grad_norm": 24.079456329345703, "learning_rate": 1.393877551020408e-06, "loss": 1.7253, "step": 23831 }, { "epoch": 97.2734693877551, "grad_norm": 26.16110610961914, "learning_rate": 1.3918367346938775e-06, "loss": 0.4131, "step": 23832 }, { "epoch": 97.27755102040817, "grad_norm": 25.700183868408203, "learning_rate": 1.389795918367347e-06, "loss": 1.4144, "step": 23833 }, { "epoch": 97.28163265306122, "grad_norm": 9.158849716186523, "learning_rate": 1.3877551020408165e-06, "loss": 0.0753, "step": 23834 }, { "epoch": 97.28571428571429, "grad_norm": 36.80162048339844, "learning_rate": 1.385714285714286e-06, "loss": 0.923, "step": 23835 }, { "epoch": 97.28979591836735, "grad_norm": 22.77711296081543, "learning_rate": 1.3836734693877552e-06, "loss": 0.2334, "step": 23836 }, { "epoch": 97.29387755102042, "grad_norm": 30.410701751708984, "learning_rate": 1.3816326530612246e-06, "loss": 0.2708, "step": 23837 }, { "epoch": 97.29795918367347, "grad_norm": 21.153202056884766, "learning_rate": 1.3795918367346938e-06, "loss": 0.1892, "step": 23838 }, { "epoch": 97.30204081632652, "grad_norm": 24.785737991333008, "learning_rate": 1.3775510204081633e-06, "loss": 1.3371, "step": 23839 }, { "epoch": 97.3061224489796, "grad_norm": 29.495101928710938, "learning_rate": 1.3755102040816328e-06, "loss": 1.1682, "step": 23840 }, { "epoch": 97.31020408163265, "grad_norm": 33.00503921508789, "learning_rate": 1.3734693877551022e-06, "loss": 0.9334, "step": 23841 }, { "epoch": 97.31428571428572, "grad_norm": 41.76909255981445, "learning_rate": 1.3714285714285715e-06, "loss": 1.258, "step": 23842 }, { "epoch": 97.31836734693877, "grad_norm": 25.26911735534668, "learning_rate": 1.369387755102041e-06, "loss": 1.7085, "step": 23843 }, { "epoch": 97.32244897959184, "grad_norm": 31.32213592529297, "learning_rate": 1.3673469387755102e-06, "loss": 0.7772, "step": 23844 }, { "epoch": 97.3265306122449, "grad_norm": 35.443912506103516, "learning_rate": 1.3653061224489796e-06, "loss": 0.4691, "step": 23845 }, { "epoch": 97.33061224489796, "grad_norm": 34.231422424316406, "learning_rate": 1.363265306122449e-06, "loss": 0.5442, "step": 23846 }, { "epoch": 97.33469387755102, "grad_norm": 28.98261833190918, "learning_rate": 1.3612244897959183e-06, "loss": 0.5611, "step": 23847 }, { "epoch": 97.33877551020409, "grad_norm": 32.7809944152832, "learning_rate": 1.359183673469388e-06, "loss": 0.2313, "step": 23848 }, { "epoch": 97.34285714285714, "grad_norm": 26.09972381591797, "learning_rate": 1.3571428571428572e-06, "loss": 0.4972, "step": 23849 }, { "epoch": 97.34693877551021, "grad_norm": 30.819005966186523, "learning_rate": 1.3551020408163267e-06, "loss": 1.4972, "step": 23850 }, { "epoch": 97.35102040816327, "grad_norm": 18.788999557495117, "learning_rate": 1.353061224489796e-06, "loss": 0.1879, "step": 23851 }, { "epoch": 97.35510204081632, "grad_norm": 28.749656677246094, "learning_rate": 1.3510204081632654e-06, "loss": 1.0974, "step": 23852 }, { "epoch": 97.35918367346939, "grad_norm": 34.44350814819336, "learning_rate": 1.3489795918367346e-06, "loss": 0.9944, "step": 23853 }, { "epoch": 97.36326530612244, "grad_norm": 28.673612594604492, "learning_rate": 1.346938775510204e-06, "loss": 1.3078, "step": 23854 }, { "epoch": 97.36734693877551, "grad_norm": 16.28299331665039, "learning_rate": 1.3448979591836735e-06, "loss": 0.2067, "step": 23855 }, { "epoch": 97.37142857142857, "grad_norm": 28.884902954101562, "learning_rate": 1.342857142857143e-06, "loss": 1.0517, "step": 23856 }, { "epoch": 97.37551020408164, "grad_norm": 23.834476470947266, "learning_rate": 1.3408163265306124e-06, "loss": 1.3111, "step": 23857 }, { "epoch": 97.37959183673469, "grad_norm": 35.288665771484375, "learning_rate": 1.3387755102040817e-06, "loss": 0.3034, "step": 23858 }, { "epoch": 97.38367346938776, "grad_norm": 27.92240333557129, "learning_rate": 1.3367346938775511e-06, "loss": 0.588, "step": 23859 }, { "epoch": 97.38775510204081, "grad_norm": 34.791996002197266, "learning_rate": 1.3346938775510204e-06, "loss": 0.8192, "step": 23860 }, { "epoch": 97.39183673469388, "grad_norm": 30.864368438720703, "learning_rate": 1.3326530612244898e-06, "loss": 1.0545, "step": 23861 }, { "epoch": 97.39591836734694, "grad_norm": 36.41449737548828, "learning_rate": 1.3306122448979593e-06, "loss": 0.9505, "step": 23862 }, { "epoch": 97.4, "grad_norm": 34.02644348144531, "learning_rate": 1.3285714285714287e-06, "loss": 0.6955, "step": 23863 }, { "epoch": 97.40408163265306, "grad_norm": 21.001407623291016, "learning_rate": 1.326530612244898e-06, "loss": 0.1994, "step": 23864 }, { "epoch": 97.40816326530613, "grad_norm": 22.417327880859375, "learning_rate": 1.3244897959183674e-06, "loss": 0.2607, "step": 23865 }, { "epoch": 97.41224489795918, "grad_norm": 33.4912223815918, "learning_rate": 1.3224489795918367e-06, "loss": 0.8164, "step": 23866 }, { "epoch": 97.41632653061224, "grad_norm": 29.46295738220215, "learning_rate": 1.3204081632653061e-06, "loss": 1.1626, "step": 23867 }, { "epoch": 97.42040816326531, "grad_norm": 25.319143295288086, "learning_rate": 1.3183673469387756e-06, "loss": 0.6306, "step": 23868 }, { "epoch": 97.42448979591836, "grad_norm": 40.34266662597656, "learning_rate": 1.316326530612245e-06, "loss": 0.3978, "step": 23869 }, { "epoch": 97.42857142857143, "grad_norm": 28.790861129760742, "learning_rate": 1.3142857142857145e-06, "loss": 0.3023, "step": 23870 }, { "epoch": 97.43265306122449, "grad_norm": 27.251808166503906, "learning_rate": 1.3122448979591837e-06, "loss": 1.2788, "step": 23871 }, { "epoch": 97.43673469387755, "grad_norm": 25.412931442260742, "learning_rate": 1.3102040816326532e-06, "loss": 0.6106, "step": 23872 }, { "epoch": 97.44081632653061, "grad_norm": 36.84096145629883, "learning_rate": 1.3081632653061224e-06, "loss": 0.5998, "step": 23873 }, { "epoch": 97.44489795918368, "grad_norm": 24.640165328979492, "learning_rate": 1.306122448979592e-06, "loss": 1.2988, "step": 23874 }, { "epoch": 97.44897959183673, "grad_norm": 33.2862663269043, "learning_rate": 1.3040816326530611e-06, "loss": 0.8968, "step": 23875 }, { "epoch": 97.4530612244898, "grad_norm": 24.687213897705078, "learning_rate": 1.3020408163265308e-06, "loss": 1.053, "step": 23876 }, { "epoch": 97.45714285714286, "grad_norm": 7.58333683013916, "learning_rate": 1.3e-06, "loss": 0.0669, "step": 23877 }, { "epoch": 97.46122448979592, "grad_norm": 21.966625213623047, "learning_rate": 1.2979591836734695e-06, "loss": 1.8284, "step": 23878 }, { "epoch": 97.46530612244898, "grad_norm": 24.389720916748047, "learning_rate": 1.2959183673469387e-06, "loss": 1.8534, "step": 23879 }, { "epoch": 97.46938775510205, "grad_norm": 33.375511169433594, "learning_rate": 1.2938775510204082e-06, "loss": 0.6613, "step": 23880 }, { "epoch": 97.4734693877551, "grad_norm": 38.57282638549805, "learning_rate": 1.2918367346938777e-06, "loss": 0.4141, "step": 23881 }, { "epoch": 97.47755102040816, "grad_norm": 27.252498626708984, "learning_rate": 1.289795918367347e-06, "loss": 1.1969, "step": 23882 }, { "epoch": 97.48163265306123, "grad_norm": 3.9325430393218994, "learning_rate": 1.2877551020408166e-06, "loss": 0.0458, "step": 23883 }, { "epoch": 97.48571428571428, "grad_norm": 27.43307876586914, "learning_rate": 1.2857142857142858e-06, "loss": 1.1614, "step": 23884 }, { "epoch": 97.48979591836735, "grad_norm": 29.524084091186523, "learning_rate": 1.2836734693877553e-06, "loss": 0.4513, "step": 23885 }, { "epoch": 97.4938775510204, "grad_norm": 30.773611068725586, "learning_rate": 1.2816326530612245e-06, "loss": 0.799, "step": 23886 }, { "epoch": 97.49795918367347, "grad_norm": 7.610679626464844, "learning_rate": 1.279591836734694e-06, "loss": 0.0591, "step": 23887 }, { "epoch": 97.50204081632653, "grad_norm": 27.894588470458984, "learning_rate": 1.2775510204081632e-06, "loss": 1.0082, "step": 23888 }, { "epoch": 97.5061224489796, "grad_norm": 25.27878761291504, "learning_rate": 1.2755102040816327e-06, "loss": 0.271, "step": 23889 }, { "epoch": 97.51020408163265, "grad_norm": 24.66636848449707, "learning_rate": 1.2734693877551021e-06, "loss": 1.4476, "step": 23890 }, { "epoch": 97.51428571428572, "grad_norm": 32.48085021972656, "learning_rate": 1.2714285714285716e-06, "loss": 1.1911, "step": 23891 }, { "epoch": 97.51836734693877, "grad_norm": 4.33212423324585, "learning_rate": 1.2693877551020408e-06, "loss": 0.0623, "step": 23892 }, { "epoch": 97.52244897959184, "grad_norm": 27.532211303710938, "learning_rate": 1.2673469387755103e-06, "loss": 1.0048, "step": 23893 }, { "epoch": 97.5265306122449, "grad_norm": 25.003936767578125, "learning_rate": 1.2653061224489797e-06, "loss": 1.478, "step": 23894 }, { "epoch": 97.53061224489795, "grad_norm": 29.30447006225586, "learning_rate": 1.263265306122449e-06, "loss": 0.6826, "step": 23895 }, { "epoch": 97.53469387755102, "grad_norm": 22.288921356201172, "learning_rate": 1.2612244897959184e-06, "loss": 1.3255, "step": 23896 }, { "epoch": 97.53877551020408, "grad_norm": 25.329931259155273, "learning_rate": 1.2591836734693879e-06, "loss": 0.2344, "step": 23897 }, { "epoch": 97.54285714285714, "grad_norm": 27.286020278930664, "learning_rate": 1.2571428571428573e-06, "loss": 0.5157, "step": 23898 }, { "epoch": 97.5469387755102, "grad_norm": 36.399375915527344, "learning_rate": 1.2551020408163266e-06, "loss": 0.8292, "step": 23899 }, { "epoch": 97.55102040816327, "grad_norm": 24.829896926879883, "learning_rate": 1.253061224489796e-06, "loss": 0.3493, "step": 23900 }, { "epoch": 97.55510204081632, "grad_norm": 30.449750900268555, "learning_rate": 1.2510204081632653e-06, "loss": 0.3595, "step": 23901 }, { "epoch": 97.55918367346939, "grad_norm": 8.317889213562012, "learning_rate": 1.2489795918367347e-06, "loss": 0.1075, "step": 23902 }, { "epoch": 97.56326530612245, "grad_norm": 27.379419326782227, "learning_rate": 1.246938775510204e-06, "loss": 1.7954, "step": 23903 }, { "epoch": 97.56734693877551, "grad_norm": 31.671911239624023, "learning_rate": 1.2448979591836736e-06, "loss": 1.2712, "step": 23904 }, { "epoch": 97.57142857142857, "grad_norm": 33.631736755371094, "learning_rate": 1.2428571428571429e-06, "loss": 1.2091, "step": 23905 }, { "epoch": 97.57551020408164, "grad_norm": 29.667009353637695, "learning_rate": 1.2408163265306123e-06, "loss": 0.2084, "step": 23906 }, { "epoch": 97.57959183673469, "grad_norm": 33.88370132446289, "learning_rate": 1.2387755102040818e-06, "loss": 0.2189, "step": 23907 }, { "epoch": 97.58367346938776, "grad_norm": 28.999530792236328, "learning_rate": 1.236734693877551e-06, "loss": 0.3087, "step": 23908 }, { "epoch": 97.58775510204082, "grad_norm": 31.981477737426758, "learning_rate": 1.2346938775510205e-06, "loss": 1.4315, "step": 23909 }, { "epoch": 97.59183673469387, "grad_norm": 24.863874435424805, "learning_rate": 1.2326530612244897e-06, "loss": 0.2209, "step": 23910 }, { "epoch": 97.59591836734694, "grad_norm": 29.171268463134766, "learning_rate": 1.2306122448979592e-06, "loss": 1.0954, "step": 23911 }, { "epoch": 97.6, "grad_norm": 35.571163177490234, "learning_rate": 1.2285714285714286e-06, "loss": 1.0714, "step": 23912 }, { "epoch": 97.60408163265306, "grad_norm": 32.393035888671875, "learning_rate": 1.226530612244898e-06, "loss": 0.9923, "step": 23913 }, { "epoch": 97.60816326530612, "grad_norm": 34.056732177734375, "learning_rate": 1.2244897959183673e-06, "loss": 0.9669, "step": 23914 }, { "epoch": 97.61224489795919, "grad_norm": 25.10953140258789, "learning_rate": 1.2224489795918368e-06, "loss": 1.388, "step": 23915 }, { "epoch": 97.61632653061224, "grad_norm": 23.082786560058594, "learning_rate": 1.220408163265306e-06, "loss": 1.934, "step": 23916 }, { "epoch": 97.62040816326531, "grad_norm": 32.252296447753906, "learning_rate": 1.2183673469387755e-06, "loss": 0.6044, "step": 23917 }, { "epoch": 97.62448979591836, "grad_norm": 20.521203994750977, "learning_rate": 1.216326530612245e-06, "loss": 1.3539, "step": 23918 }, { "epoch": 97.62857142857143, "grad_norm": 29.249313354492188, "learning_rate": 1.2142857142857144e-06, "loss": 1.2487, "step": 23919 }, { "epoch": 97.63265306122449, "grad_norm": 9.91542911529541, "learning_rate": 1.2122448979591839e-06, "loss": 0.1047, "step": 23920 }, { "epoch": 97.63673469387756, "grad_norm": 32.351810455322266, "learning_rate": 1.210204081632653e-06, "loss": 0.8903, "step": 23921 }, { "epoch": 97.64081632653061, "grad_norm": 24.065433502197266, "learning_rate": 1.2081632653061225e-06, "loss": 0.2559, "step": 23922 }, { "epoch": 97.64489795918368, "grad_norm": 14.93220043182373, "learning_rate": 1.2061224489795918e-06, "loss": 0.0938, "step": 23923 }, { "epoch": 97.64897959183673, "grad_norm": 23.549068450927734, "learning_rate": 1.2040816326530612e-06, "loss": 1.2924, "step": 23924 }, { "epoch": 97.65306122448979, "grad_norm": 19.58966064453125, "learning_rate": 1.2020408163265307e-06, "loss": 1.7373, "step": 23925 }, { "epoch": 97.65714285714286, "grad_norm": 24.708232879638672, "learning_rate": 1.2000000000000002e-06, "loss": 0.604, "step": 23926 }, { "epoch": 97.66122448979591, "grad_norm": 34.97562026977539, "learning_rate": 1.1979591836734694e-06, "loss": 1.1022, "step": 23927 }, { "epoch": 97.66530612244898, "grad_norm": 22.653776168823242, "learning_rate": 1.1959183673469389e-06, "loss": 0.181, "step": 23928 }, { "epoch": 97.66938775510204, "grad_norm": 34.74650573730469, "learning_rate": 1.193877551020408e-06, "loss": 0.8274, "step": 23929 }, { "epoch": 97.6734693877551, "grad_norm": 31.35152244567871, "learning_rate": 1.1918367346938776e-06, "loss": 1.0689, "step": 23930 }, { "epoch": 97.67755102040816, "grad_norm": 33.83087158203125, "learning_rate": 1.189795918367347e-06, "loss": 0.5659, "step": 23931 }, { "epoch": 97.68163265306123, "grad_norm": 22.0670223236084, "learning_rate": 1.1877551020408165e-06, "loss": 1.7976, "step": 23932 }, { "epoch": 97.68571428571428, "grad_norm": 25.511722564697266, "learning_rate": 1.185714285714286e-06, "loss": 1.4459, "step": 23933 }, { "epoch": 97.68979591836735, "grad_norm": 24.994611740112305, "learning_rate": 1.1836734693877552e-06, "loss": 0.3004, "step": 23934 }, { "epoch": 97.6938775510204, "grad_norm": 24.075777053833008, "learning_rate": 1.1816326530612246e-06, "loss": 0.243, "step": 23935 }, { "epoch": 97.69795918367348, "grad_norm": 29.378705978393555, "learning_rate": 1.1795918367346939e-06, "loss": 0.359, "step": 23936 }, { "epoch": 97.70204081632653, "grad_norm": 30.967321395874023, "learning_rate": 1.1775510204081633e-06, "loss": 0.6749, "step": 23937 }, { "epoch": 97.70612244897958, "grad_norm": 28.612224578857422, "learning_rate": 1.1755102040816326e-06, "loss": 1.2092, "step": 23938 }, { "epoch": 97.71020408163265, "grad_norm": 20.168537139892578, "learning_rate": 1.173469387755102e-06, "loss": 1.882, "step": 23939 }, { "epoch": 97.71428571428571, "grad_norm": 33.548980712890625, "learning_rate": 1.1714285714285715e-06, "loss": 0.476, "step": 23940 }, { "epoch": 97.71836734693878, "grad_norm": 37.64643096923828, "learning_rate": 1.169387755102041e-06, "loss": 0.9262, "step": 23941 }, { "epoch": 97.72244897959183, "grad_norm": 25.624082565307617, "learning_rate": 1.1673469387755104e-06, "loss": 1.3875, "step": 23942 }, { "epoch": 97.7265306122449, "grad_norm": 34.978912353515625, "learning_rate": 1.1653061224489796e-06, "loss": 0.5851, "step": 23943 }, { "epoch": 97.73061224489796, "grad_norm": 25.230928421020508, "learning_rate": 1.163265306122449e-06, "loss": 1.312, "step": 23944 }, { "epoch": 97.73469387755102, "grad_norm": 31.77243423461914, "learning_rate": 1.1612244897959183e-06, "loss": 0.6797, "step": 23945 }, { "epoch": 97.73877551020408, "grad_norm": 26.467016220092773, "learning_rate": 1.1591836734693878e-06, "loss": 0.5964, "step": 23946 }, { "epoch": 97.74285714285715, "grad_norm": 21.348405838012695, "learning_rate": 1.1571428571428572e-06, "loss": 0.1865, "step": 23947 }, { "epoch": 97.7469387755102, "grad_norm": 25.20167350769043, "learning_rate": 1.1551020408163267e-06, "loss": 0.4018, "step": 23948 }, { "epoch": 97.75102040816327, "grad_norm": 29.425777435302734, "learning_rate": 1.153061224489796e-06, "loss": 0.4354, "step": 23949 }, { "epoch": 97.75510204081633, "grad_norm": 26.749847412109375, "learning_rate": 1.1510204081632654e-06, "loss": 0.2944, "step": 23950 }, { "epoch": 97.7591836734694, "grad_norm": 42.84140396118164, "learning_rate": 1.1489795918367346e-06, "loss": 0.7751, "step": 23951 }, { "epoch": 97.76326530612245, "grad_norm": 35.60497283935547, "learning_rate": 1.146938775510204e-06, "loss": 0.5037, "step": 23952 }, { "epoch": 97.7673469387755, "grad_norm": 25.173303604125977, "learning_rate": 1.1448979591836735e-06, "loss": 0.2585, "step": 23953 }, { "epoch": 97.77142857142857, "grad_norm": 25.871091842651367, "learning_rate": 1.142857142857143e-06, "loss": 1.0569, "step": 23954 }, { "epoch": 97.77551020408163, "grad_norm": 8.477828979492188, "learning_rate": 1.1408163265306124e-06, "loss": 0.105, "step": 23955 }, { "epoch": 97.7795918367347, "grad_norm": 30.438318252563477, "learning_rate": 1.1387755102040817e-06, "loss": 0.4577, "step": 23956 }, { "epoch": 97.78367346938775, "grad_norm": 23.71249771118164, "learning_rate": 1.1367346938775511e-06, "loss": 2.1339, "step": 23957 }, { "epoch": 97.78775510204082, "grad_norm": 31.67525863647461, "learning_rate": 1.1346938775510204e-06, "loss": 0.7207, "step": 23958 }, { "epoch": 97.79183673469387, "grad_norm": 36.02322769165039, "learning_rate": 1.1326530612244898e-06, "loss": 0.8063, "step": 23959 }, { "epoch": 97.79591836734694, "grad_norm": 30.28755760192871, "learning_rate": 1.1306122448979593e-06, "loss": 1.0067, "step": 23960 }, { "epoch": 97.8, "grad_norm": 37.66128158569336, "learning_rate": 1.1285714285714287e-06, "loss": 0.9019, "step": 23961 }, { "epoch": 97.80408163265307, "grad_norm": 24.739965438842773, "learning_rate": 1.126530612244898e-06, "loss": 0.2062, "step": 23962 }, { "epoch": 97.80816326530612, "grad_norm": 38.31757354736328, "learning_rate": 1.1244897959183674e-06, "loss": 0.2968, "step": 23963 }, { "epoch": 97.81224489795919, "grad_norm": 25.404022216796875, "learning_rate": 1.1224489795918367e-06, "loss": 1.3679, "step": 23964 }, { "epoch": 97.81632653061224, "grad_norm": 26.015216827392578, "learning_rate": 1.1204081632653061e-06, "loss": 0.1447, "step": 23965 }, { "epoch": 97.82040816326531, "grad_norm": 35.2050666809082, "learning_rate": 1.1183673469387754e-06, "loss": 0.68, "step": 23966 }, { "epoch": 97.82448979591837, "grad_norm": 32.232269287109375, "learning_rate": 1.1163265306122448e-06, "loss": 0.521, "step": 23967 }, { "epoch": 97.82857142857142, "grad_norm": 39.04723358154297, "learning_rate": 1.1142857142857145e-06, "loss": 0.5395, "step": 23968 }, { "epoch": 97.83265306122449, "grad_norm": 22.546634674072266, "learning_rate": 1.1122448979591838e-06, "loss": 0.1606, "step": 23969 }, { "epoch": 97.83673469387755, "grad_norm": 33.46396255493164, "learning_rate": 1.1102040816326532e-06, "loss": 0.7227, "step": 23970 }, { "epoch": 97.84081632653061, "grad_norm": 24.290180206298828, "learning_rate": 1.1081632653061224e-06, "loss": 1.3369, "step": 23971 }, { "epoch": 97.84489795918367, "grad_norm": 24.455928802490234, "learning_rate": 1.106122448979592e-06, "loss": 1.149, "step": 23972 }, { "epoch": 97.84897959183674, "grad_norm": 36.419517517089844, "learning_rate": 1.1040816326530611e-06, "loss": 1.0424, "step": 23973 }, { "epoch": 97.85306122448979, "grad_norm": 20.947542190551758, "learning_rate": 1.1020408163265306e-06, "loss": 0.2417, "step": 23974 }, { "epoch": 97.85714285714286, "grad_norm": 14.179450988769531, "learning_rate": 1.1e-06, "loss": 0.1023, "step": 23975 }, { "epoch": 97.86122448979592, "grad_norm": 17.430986404418945, "learning_rate": 1.0979591836734695e-06, "loss": 2.4628, "step": 23976 }, { "epoch": 97.86530612244898, "grad_norm": 37.48475646972656, "learning_rate": 1.0959183673469388e-06, "loss": 1.0744, "step": 23977 }, { "epoch": 97.86938775510204, "grad_norm": 30.08986473083496, "learning_rate": 1.0938775510204082e-06, "loss": 1.1685, "step": 23978 }, { "epoch": 97.87346938775511, "grad_norm": 23.230710983276367, "learning_rate": 1.0918367346938777e-06, "loss": 1.5592, "step": 23979 }, { "epoch": 97.87755102040816, "grad_norm": 26.191505432128906, "learning_rate": 1.089795918367347e-06, "loss": 1.444, "step": 23980 }, { "epoch": 97.88163265306123, "grad_norm": 36.00209045410156, "learning_rate": 1.0877551020408164e-06, "loss": 0.892, "step": 23981 }, { "epoch": 97.88571428571429, "grad_norm": 39.633888244628906, "learning_rate": 1.0857142857142858e-06, "loss": 0.4136, "step": 23982 }, { "epoch": 97.88979591836734, "grad_norm": 18.824405670166016, "learning_rate": 1.0836734693877553e-06, "loss": 2.2174, "step": 23983 }, { "epoch": 97.89387755102041, "grad_norm": 29.83477020263672, "learning_rate": 1.0816326530612245e-06, "loss": 0.4412, "step": 23984 }, { "epoch": 97.89795918367346, "grad_norm": 26.715726852416992, "learning_rate": 1.079591836734694e-06, "loss": 1.326, "step": 23985 }, { "epoch": 97.90204081632653, "grad_norm": 32.985321044921875, "learning_rate": 1.0775510204081632e-06, "loss": 0.2155, "step": 23986 }, { "epoch": 97.90612244897959, "grad_norm": 35.06239318847656, "learning_rate": 1.0755102040816327e-06, "loss": 0.6007, "step": 23987 }, { "epoch": 97.91020408163266, "grad_norm": 17.053768157958984, "learning_rate": 1.0734693877551021e-06, "loss": 0.2173, "step": 23988 }, { "epoch": 97.91428571428571, "grad_norm": 28.874858856201172, "learning_rate": 1.0714285714285716e-06, "loss": 1.348, "step": 23989 }, { "epoch": 97.91836734693878, "grad_norm": 27.500478744506836, "learning_rate": 1.0693877551020408e-06, "loss": 0.5763, "step": 23990 }, { "epoch": 97.92244897959183, "grad_norm": 28.673839569091797, "learning_rate": 1.0673469387755103e-06, "loss": 1.0133, "step": 23991 }, { "epoch": 97.9265306122449, "grad_norm": 30.582834243774414, "learning_rate": 1.0653061224489797e-06, "loss": 0.279, "step": 23992 }, { "epoch": 97.93061224489796, "grad_norm": 39.218936920166016, "learning_rate": 1.063265306122449e-06, "loss": 0.6986, "step": 23993 }, { "epoch": 97.93469387755103, "grad_norm": 27.706804275512695, "learning_rate": 1.0612244897959184e-06, "loss": 1.3196, "step": 23994 }, { "epoch": 97.93877551020408, "grad_norm": 50.66884231567383, "learning_rate": 1.0591836734693877e-06, "loss": 0.7304, "step": 23995 }, { "epoch": 97.94285714285714, "grad_norm": 23.98999786376953, "learning_rate": 1.0571428571428573e-06, "loss": 1.2951, "step": 23996 }, { "epoch": 97.9469387755102, "grad_norm": 27.828311920166016, "learning_rate": 1.0551020408163266e-06, "loss": 1.4481, "step": 23997 }, { "epoch": 97.95102040816326, "grad_norm": 27.305917739868164, "learning_rate": 1.053061224489796e-06, "loss": 1.319, "step": 23998 }, { "epoch": 97.95510204081633, "grad_norm": 28.51175880432129, "learning_rate": 1.0510204081632653e-06, "loss": 0.6592, "step": 23999 }, { "epoch": 97.95918367346938, "grad_norm": 28.528295516967773, "learning_rate": 1.0489795918367347e-06, "loss": 1.0855, "step": 24000 }, { "epoch": 97.96326530612245, "grad_norm": 20.84547233581543, "learning_rate": 1.046938775510204e-06, "loss": 1.2285, "step": 24001 }, { "epoch": 97.9673469387755, "grad_norm": 32.8227653503418, "learning_rate": 1.0448979591836734e-06, "loss": 0.9482, "step": 24002 }, { "epoch": 97.97142857142858, "grad_norm": 34.12478256225586, "learning_rate": 1.0428571428571429e-06, "loss": 0.4844, "step": 24003 }, { "epoch": 97.97551020408163, "grad_norm": 22.30643081665039, "learning_rate": 1.0408163265306123e-06, "loss": 1.8205, "step": 24004 }, { "epoch": 97.9795918367347, "grad_norm": 29.16041374206543, "learning_rate": 1.0387755102040818e-06, "loss": 0.4026, "step": 24005 }, { "epoch": 97.98367346938775, "grad_norm": 39.02433776855469, "learning_rate": 1.036734693877551e-06, "loss": 0.7887, "step": 24006 }, { "epoch": 97.98775510204082, "grad_norm": 30.746959686279297, "learning_rate": 1.0346938775510205e-06, "loss": 0.3876, "step": 24007 }, { "epoch": 97.99183673469388, "grad_norm": 29.525428771972656, "learning_rate": 1.0326530612244897e-06, "loss": 1.6362, "step": 24008 }, { "epoch": 97.99591836734695, "grad_norm": 28.652917861938477, "learning_rate": 1.0306122448979592e-06, "loss": 0.2291, "step": 24009 }, { "epoch": 98.0, "grad_norm": 8.317858695983887, "learning_rate": 1.0285714285714286e-06, "loss": 2.9356, "step": 24010 }, { "epoch": 98.00408163265305, "grad_norm": 29.778087615966797, "learning_rate": 1.026530612244898e-06, "loss": 0.7869, "step": 24011 }, { "epoch": 98.00816326530612, "grad_norm": 33.70808029174805, "learning_rate": 1.0244897959183673e-06, "loss": 0.3141, "step": 24012 }, { "epoch": 98.01224489795918, "grad_norm": 23.024234771728516, "learning_rate": 1.0224489795918368e-06, "loss": 1.822, "step": 24013 }, { "epoch": 98.01632653061225, "grad_norm": 31.18609619140625, "learning_rate": 1.020408163265306e-06, "loss": 1.0212, "step": 24014 }, { "epoch": 98.0204081632653, "grad_norm": 20.64497184753418, "learning_rate": 1.0183673469387755e-06, "loss": 1.8325, "step": 24015 }, { "epoch": 98.02448979591837, "grad_norm": 29.68761444091797, "learning_rate": 1.016326530612245e-06, "loss": 1.402, "step": 24016 }, { "epoch": 98.02857142857142, "grad_norm": 38.772579193115234, "learning_rate": 1.0142857142857144e-06, "loss": 1.0663, "step": 24017 }, { "epoch": 98.0326530612245, "grad_norm": 35.12569046020508, "learning_rate": 1.0122448979591839e-06, "loss": 0.2148, "step": 24018 }, { "epoch": 98.03673469387755, "grad_norm": 10.790488243103027, "learning_rate": 1.010204081632653e-06, "loss": 0.0713, "step": 24019 }, { "epoch": 98.04081632653062, "grad_norm": 24.0100154876709, "learning_rate": 1.0081632653061226e-06, "loss": 0.3407, "step": 24020 }, { "epoch": 98.04489795918367, "grad_norm": 23.00543212890625, "learning_rate": 1.0061224489795918e-06, "loss": 1.4506, "step": 24021 }, { "epoch": 98.04897959183674, "grad_norm": 28.80768585205078, "learning_rate": 1.0040816326530613e-06, "loss": 1.4431, "step": 24022 }, { "epoch": 98.0530612244898, "grad_norm": 34.57950210571289, "learning_rate": 1.0020408163265305e-06, "loss": 0.733, "step": 24023 }, { "epoch": 98.05714285714286, "grad_norm": 6.926464557647705, "learning_rate": 1.0000000000000002e-06, "loss": 0.1059, "step": 24024 }, { "epoch": 98.06122448979592, "grad_norm": 29.025537490844727, "learning_rate": 9.979591836734694e-07, "loss": 1.299, "step": 24025 }, { "epoch": 98.06530612244897, "grad_norm": 28.27193260192871, "learning_rate": 9.959183673469389e-07, "loss": 1.0982, "step": 24026 }, { "epoch": 98.06938775510204, "grad_norm": 29.216285705566406, "learning_rate": 9.938775510204083e-07, "loss": 0.8668, "step": 24027 }, { "epoch": 98.0734693877551, "grad_norm": 28.236862182617188, "learning_rate": 9.918367346938776e-07, "loss": 1.2166, "step": 24028 }, { "epoch": 98.07755102040817, "grad_norm": 23.3875675201416, "learning_rate": 9.89795918367347e-07, "loss": 0.2581, "step": 24029 }, { "epoch": 98.08163265306122, "grad_norm": 28.74781036376953, "learning_rate": 9.877551020408163e-07, "loss": 0.2857, "step": 24030 }, { "epoch": 98.08571428571429, "grad_norm": 33.34561538696289, "learning_rate": 9.857142857142857e-07, "loss": 0.4761, "step": 24031 }, { "epoch": 98.08979591836734, "grad_norm": 6.87099027633667, "learning_rate": 9.836734693877552e-07, "loss": 0.071, "step": 24032 }, { "epoch": 98.09387755102041, "grad_norm": 29.73465919494629, "learning_rate": 9.816326530612246e-07, "loss": 1.0704, "step": 24033 }, { "epoch": 98.09795918367347, "grad_norm": 45.40984344482422, "learning_rate": 9.795918367346939e-07, "loss": 0.9875, "step": 24034 }, { "epoch": 98.10204081632654, "grad_norm": 24.42127227783203, "learning_rate": 9.775510204081633e-07, "loss": 1.4569, "step": 24035 }, { "epoch": 98.10612244897959, "grad_norm": 26.088031768798828, "learning_rate": 9.755102040816326e-07, "loss": 1.5084, "step": 24036 }, { "epoch": 98.11020408163266, "grad_norm": 18.050373077392578, "learning_rate": 9.73469387755102e-07, "loss": 0.2929, "step": 24037 }, { "epoch": 98.11428571428571, "grad_norm": 24.742937088012695, "learning_rate": 9.714285714285715e-07, "loss": 0.9722, "step": 24038 }, { "epoch": 98.11836734693877, "grad_norm": 32.35130310058594, "learning_rate": 9.69387755102041e-07, "loss": 1.0337, "step": 24039 }, { "epoch": 98.12244897959184, "grad_norm": 27.094377517700195, "learning_rate": 9.673469387755104e-07, "loss": 0.9227, "step": 24040 }, { "epoch": 98.12653061224489, "grad_norm": 18.87547492980957, "learning_rate": 9.653061224489796e-07, "loss": 2.1928, "step": 24041 }, { "epoch": 98.13061224489796, "grad_norm": 20.123435974121094, "learning_rate": 9.63265306122449e-07, "loss": 1.8972, "step": 24042 }, { "epoch": 98.13469387755102, "grad_norm": 25.676807403564453, "learning_rate": 9.612244897959183e-07, "loss": 1.213, "step": 24043 }, { "epoch": 98.13877551020408, "grad_norm": 38.168949127197266, "learning_rate": 9.591836734693878e-07, "loss": 0.4462, "step": 24044 }, { "epoch": 98.14285714285714, "grad_norm": 28.980152130126953, "learning_rate": 9.571428571428572e-07, "loss": 0.4636, "step": 24045 }, { "epoch": 98.14693877551021, "grad_norm": 29.437454223632812, "learning_rate": 9.551020408163267e-07, "loss": 0.8095, "step": 24046 }, { "epoch": 98.15102040816326, "grad_norm": 18.257598876953125, "learning_rate": 9.530612244897959e-07, "loss": 2.4681, "step": 24047 }, { "epoch": 98.15510204081633, "grad_norm": 25.26431655883789, "learning_rate": 9.510204081632654e-07, "loss": 1.3071, "step": 24048 }, { "epoch": 98.15918367346939, "grad_norm": 27.015825271606445, "learning_rate": 9.489795918367346e-07, "loss": 1.2903, "step": 24049 }, { "epoch": 98.16326530612245, "grad_norm": 27.784530639648438, "learning_rate": 9.469387755102041e-07, "loss": 1.1539, "step": 24050 }, { "epoch": 98.16734693877551, "grad_norm": 29.88630485534668, "learning_rate": 9.448979591836734e-07, "loss": 0.8534, "step": 24051 }, { "epoch": 98.17142857142858, "grad_norm": 29.22362518310547, "learning_rate": 9.428571428571429e-07, "loss": 0.639, "step": 24052 }, { "epoch": 98.17551020408163, "grad_norm": 21.881540298461914, "learning_rate": 9.408163265306123e-07, "loss": 1.5344, "step": 24053 }, { "epoch": 98.17959183673469, "grad_norm": 22.277729034423828, "learning_rate": 9.387755102040817e-07, "loss": 0.1852, "step": 24054 }, { "epoch": 98.18367346938776, "grad_norm": 30.303598403930664, "learning_rate": 9.367346938775511e-07, "loss": 0.4468, "step": 24055 }, { "epoch": 98.18775510204081, "grad_norm": 17.456815719604492, "learning_rate": 9.346938775510204e-07, "loss": 0.1735, "step": 24056 }, { "epoch": 98.19183673469388, "grad_norm": 30.737668991088867, "learning_rate": 9.326530612244898e-07, "loss": 0.6523, "step": 24057 }, { "epoch": 98.19591836734693, "grad_norm": 26.579198837280273, "learning_rate": 9.306122448979592e-07, "loss": 0.1637, "step": 24058 }, { "epoch": 98.2, "grad_norm": 25.925495147705078, "learning_rate": 9.285714285714287e-07, "loss": 0.5277, "step": 24059 }, { "epoch": 98.20408163265306, "grad_norm": 31.953269958496094, "learning_rate": 9.265306122448979e-07, "loss": 0.8998, "step": 24060 }, { "epoch": 98.20816326530613, "grad_norm": 23.858495712280273, "learning_rate": 9.244897959183675e-07, "loss": 0.2392, "step": 24061 }, { "epoch": 98.21224489795918, "grad_norm": 26.544857025146484, "learning_rate": 9.224489795918367e-07, "loss": 1.1674, "step": 24062 }, { "epoch": 98.21632653061225, "grad_norm": 6.146634101867676, "learning_rate": 9.204081632653062e-07, "loss": 0.0626, "step": 24063 }, { "epoch": 98.2204081632653, "grad_norm": 22.373693466186523, "learning_rate": 9.183673469387756e-07, "loss": 1.8347, "step": 24064 }, { "epoch": 98.22448979591837, "grad_norm": 34.514739990234375, "learning_rate": 9.16326530612245e-07, "loss": 0.5884, "step": 24065 }, { "epoch": 98.22857142857143, "grad_norm": 29.89881706237793, "learning_rate": 9.142857142857144e-07, "loss": 0.2709, "step": 24066 }, { "epoch": 98.2326530612245, "grad_norm": 24.4395694732666, "learning_rate": 9.122448979591837e-07, "loss": 0.5621, "step": 24067 }, { "epoch": 98.23673469387755, "grad_norm": 25.438552856445312, "learning_rate": 9.102040816326531e-07, "loss": 1.2944, "step": 24068 }, { "epoch": 98.2408163265306, "grad_norm": 31.432964324951172, "learning_rate": 9.081632653061225e-07, "loss": 0.9932, "step": 24069 }, { "epoch": 98.24489795918367, "grad_norm": 8.523886680603027, "learning_rate": 9.061224489795919e-07, "loss": 2.9335, "step": 24070 }, { "epoch": 98.24897959183673, "grad_norm": 27.551807403564453, "learning_rate": 9.040816326530613e-07, "loss": 1.4593, "step": 24071 }, { "epoch": 98.2530612244898, "grad_norm": 31.04717254638672, "learning_rate": 9.020408163265307e-07, "loss": 0.8924, "step": 24072 }, { "epoch": 98.25714285714285, "grad_norm": 22.265403747558594, "learning_rate": 9e-07, "loss": 0.2366, "step": 24073 }, { "epoch": 98.26122448979592, "grad_norm": 24.959379196166992, "learning_rate": 8.979591836734694e-07, "loss": 1.8024, "step": 24074 }, { "epoch": 98.26530612244898, "grad_norm": 35.73457336425781, "learning_rate": 8.959183673469388e-07, "loss": 0.9826, "step": 24075 }, { "epoch": 98.26938775510204, "grad_norm": 25.939380645751953, "learning_rate": 8.938775510204082e-07, "loss": 0.2314, "step": 24076 }, { "epoch": 98.2734693877551, "grad_norm": 9.400582313537598, "learning_rate": 8.918367346938777e-07, "loss": 0.0752, "step": 24077 }, { "epoch": 98.27755102040817, "grad_norm": 27.451772689819336, "learning_rate": 8.897959183673469e-07, "loss": 0.3213, "step": 24078 }, { "epoch": 98.28163265306122, "grad_norm": 30.521059036254883, "learning_rate": 8.877551020408165e-07, "loss": 1.1628, "step": 24079 }, { "epoch": 98.28571428571429, "grad_norm": 38.95825958251953, "learning_rate": 8.857142857142857e-07, "loss": 1.2549, "step": 24080 }, { "epoch": 98.28979591836735, "grad_norm": 24.11781120300293, "learning_rate": 8.836734693877552e-07, "loss": 1.7567, "step": 24081 }, { "epoch": 98.29387755102042, "grad_norm": 11.588539123535156, "learning_rate": 8.816326530612245e-07, "loss": 0.1136, "step": 24082 }, { "epoch": 98.29795918367347, "grad_norm": 28.022781372070312, "learning_rate": 8.79591836734694e-07, "loss": 0.2885, "step": 24083 }, { "epoch": 98.30204081632652, "grad_norm": 25.127580642700195, "learning_rate": 8.775510204081632e-07, "loss": 0.2868, "step": 24084 }, { "epoch": 98.3061224489796, "grad_norm": 27.409122467041016, "learning_rate": 8.755102040816327e-07, "loss": 0.6565, "step": 24085 }, { "epoch": 98.31020408163265, "grad_norm": 33.37813186645508, "learning_rate": 8.73469387755102e-07, "loss": 0.7966, "step": 24086 }, { "epoch": 98.31428571428572, "grad_norm": 26.082870483398438, "learning_rate": 8.714285714285715e-07, "loss": 0.2373, "step": 24087 }, { "epoch": 98.31836734693877, "grad_norm": 15.202454566955566, "learning_rate": 8.693877551020409e-07, "loss": 2.1343, "step": 24088 }, { "epoch": 98.32244897959184, "grad_norm": 25.795011520385742, "learning_rate": 8.673469387755103e-07, "loss": 1.365, "step": 24089 }, { "epoch": 98.3265306122449, "grad_norm": 27.589597702026367, "learning_rate": 8.653061224489797e-07, "loss": 0.5242, "step": 24090 }, { "epoch": 98.33061224489796, "grad_norm": 27.33929443359375, "learning_rate": 8.63265306122449e-07, "loss": 1.2686, "step": 24091 }, { "epoch": 98.33469387755102, "grad_norm": 43.30397415161133, "learning_rate": 8.612244897959184e-07, "loss": 0.6094, "step": 24092 }, { "epoch": 98.33877551020409, "grad_norm": 23.689027786254883, "learning_rate": 8.591836734693878e-07, "loss": 1.255, "step": 24093 }, { "epoch": 98.34285714285714, "grad_norm": 1.205008625984192, "learning_rate": 8.571428571428572e-07, "loss": 0.0209, "step": 24094 }, { "epoch": 98.34693877551021, "grad_norm": 22.652952194213867, "learning_rate": 8.551020408163265e-07, "loss": 0.2513, "step": 24095 }, { "epoch": 98.35102040816327, "grad_norm": 26.882177352905273, "learning_rate": 8.530612244897959e-07, "loss": 1.4243, "step": 24096 }, { "epoch": 98.35510204081632, "grad_norm": 24.42762565612793, "learning_rate": 8.510204081632653e-07, "loss": 1.2803, "step": 24097 }, { "epoch": 98.35918367346939, "grad_norm": 11.850268363952637, "learning_rate": 8.489795918367347e-07, "loss": 0.0817, "step": 24098 }, { "epoch": 98.36326530612244, "grad_norm": 28.149457931518555, "learning_rate": 8.469387755102041e-07, "loss": 0.8992, "step": 24099 }, { "epoch": 98.36734693877551, "grad_norm": 24.855749130249023, "learning_rate": 8.448979591836735e-07, "loss": 1.3954, "step": 24100 }, { "epoch": 98.37142857142857, "grad_norm": 29.700777053833008, "learning_rate": 8.42857142857143e-07, "loss": 1.0059, "step": 24101 }, { "epoch": 98.37551020408164, "grad_norm": 15.380816459655762, "learning_rate": 8.408163265306122e-07, "loss": 0.2005, "step": 24102 }, { "epoch": 98.37959183673469, "grad_norm": 28.895366668701172, "learning_rate": 8.387755102040817e-07, "loss": 1.4213, "step": 24103 }, { "epoch": 98.38367346938776, "grad_norm": 22.329879760742188, "learning_rate": 8.36734693877551e-07, "loss": 1.1529, "step": 24104 }, { "epoch": 98.38775510204081, "grad_norm": 30.61821746826172, "learning_rate": 8.346938775510205e-07, "loss": 0.6367, "step": 24105 }, { "epoch": 98.39183673469388, "grad_norm": 26.893264770507812, "learning_rate": 8.326530612244897e-07, "loss": 0.2893, "step": 24106 }, { "epoch": 98.39591836734694, "grad_norm": 29.37789535522461, "learning_rate": 8.306122448979593e-07, "loss": 0.2659, "step": 24107 }, { "epoch": 98.4, "grad_norm": 34.49418640136719, "learning_rate": 8.285714285714285e-07, "loss": 1.3329, "step": 24108 }, { "epoch": 98.40408163265306, "grad_norm": 42.25801086425781, "learning_rate": 8.26530612244898e-07, "loss": 1.0799, "step": 24109 }, { "epoch": 98.40816326530613, "grad_norm": 22.752803802490234, "learning_rate": 8.244897959183674e-07, "loss": 1.328, "step": 24110 }, { "epoch": 98.41224489795918, "grad_norm": 23.804847717285156, "learning_rate": 8.224489795918368e-07, "loss": 1.9591, "step": 24111 }, { "epoch": 98.41632653061224, "grad_norm": 30.452146530151367, "learning_rate": 8.20408163265306e-07, "loss": 0.4284, "step": 24112 }, { "epoch": 98.42040816326531, "grad_norm": 26.930171966552734, "learning_rate": 8.183673469387755e-07, "loss": 1.3437, "step": 24113 }, { "epoch": 98.42448979591836, "grad_norm": 19.18902015686035, "learning_rate": 8.16326530612245e-07, "loss": 1.9678, "step": 24114 }, { "epoch": 98.42857142857143, "grad_norm": 33.126922607421875, "learning_rate": 8.142857142857143e-07, "loss": 1.2035, "step": 24115 }, { "epoch": 98.43265306122449, "grad_norm": 28.46459197998047, "learning_rate": 8.122448979591838e-07, "loss": 1.2366, "step": 24116 }, { "epoch": 98.43673469387755, "grad_norm": 33.05431365966797, "learning_rate": 8.102040816326531e-07, "loss": 0.8035, "step": 24117 }, { "epoch": 98.44081632653061, "grad_norm": 32.0777702331543, "learning_rate": 8.081632653061226e-07, "loss": 0.4618, "step": 24118 }, { "epoch": 98.44489795918368, "grad_norm": 14.898821830749512, "learning_rate": 8.061224489795918e-07, "loss": 0.0873, "step": 24119 }, { "epoch": 98.44897959183673, "grad_norm": 26.96802520751953, "learning_rate": 8.040816326530613e-07, "loss": 1.3414, "step": 24120 }, { "epoch": 98.4530612244898, "grad_norm": 30.159631729125977, "learning_rate": 8.020408163265306e-07, "loss": 1.1822, "step": 24121 }, { "epoch": 98.45714285714286, "grad_norm": 28.674421310424805, "learning_rate": 8.000000000000001e-07, "loss": 1.0685, "step": 24122 }, { "epoch": 98.46122448979592, "grad_norm": 35.08232498168945, "learning_rate": 7.979591836734693e-07, "loss": 0.7025, "step": 24123 }, { "epoch": 98.46530612244898, "grad_norm": 19.008989334106445, "learning_rate": 7.959183673469388e-07, "loss": 1.8651, "step": 24124 }, { "epoch": 98.46938775510205, "grad_norm": 28.271163940429688, "learning_rate": 7.938775510204083e-07, "loss": 1.1749, "step": 24125 }, { "epoch": 98.4734693877551, "grad_norm": 30.02206802368164, "learning_rate": 7.918367346938776e-07, "loss": 1.1273, "step": 24126 }, { "epoch": 98.47755102040816, "grad_norm": 34.90517807006836, "learning_rate": 7.89795918367347e-07, "loss": 1.0844, "step": 24127 }, { "epoch": 98.48163265306123, "grad_norm": 20.47736167907715, "learning_rate": 7.877551020408164e-07, "loss": 0.1607, "step": 24128 }, { "epoch": 98.48571428571428, "grad_norm": 19.397260665893555, "learning_rate": 7.857142857142858e-07, "loss": 0.2191, "step": 24129 }, { "epoch": 98.48979591836735, "grad_norm": 29.431285858154297, "learning_rate": 7.836734693877551e-07, "loss": 0.5764, "step": 24130 }, { "epoch": 98.4938775510204, "grad_norm": 23.39327049255371, "learning_rate": 7.816326530612245e-07, "loss": 1.7408, "step": 24131 }, { "epoch": 98.49795918367347, "grad_norm": 24.57107162475586, "learning_rate": 7.79591836734694e-07, "loss": 0.3082, "step": 24132 }, { "epoch": 98.50204081632653, "grad_norm": 35.15668869018555, "learning_rate": 7.775510204081633e-07, "loss": 0.78, "step": 24133 }, { "epoch": 98.5061224489796, "grad_norm": 22.428071975708008, "learning_rate": 7.755102040816327e-07, "loss": 1.7427, "step": 24134 }, { "epoch": 98.51020408163265, "grad_norm": 34.0852165222168, "learning_rate": 7.734693877551021e-07, "loss": 1.0809, "step": 24135 }, { "epoch": 98.51428571428572, "grad_norm": 22.856292724609375, "learning_rate": 7.714285714285715e-07, "loss": 0.1938, "step": 24136 }, { "epoch": 98.51836734693877, "grad_norm": 26.153432846069336, "learning_rate": 7.693877551020408e-07, "loss": 1.2909, "step": 24137 }, { "epoch": 98.52244897959184, "grad_norm": 25.567956924438477, "learning_rate": 7.673469387755102e-07, "loss": 1.2519, "step": 24138 }, { "epoch": 98.5265306122449, "grad_norm": 22.80805206298828, "learning_rate": 7.653061224489796e-07, "loss": 1.6798, "step": 24139 }, { "epoch": 98.53061224489795, "grad_norm": 30.01741600036621, "learning_rate": 7.63265306122449e-07, "loss": 0.41, "step": 24140 }, { "epoch": 98.53469387755102, "grad_norm": 34.67870330810547, "learning_rate": 7.612244897959183e-07, "loss": 0.5504, "step": 24141 }, { "epoch": 98.53877551020408, "grad_norm": 39.27817916870117, "learning_rate": 7.591836734693878e-07, "loss": 0.8965, "step": 24142 }, { "epoch": 98.54285714285714, "grad_norm": 24.693784713745117, "learning_rate": 7.571428571428572e-07, "loss": 2.1436, "step": 24143 }, { "epoch": 98.5469387755102, "grad_norm": 30.472652435302734, "learning_rate": 7.551020408163266e-07, "loss": 0.5232, "step": 24144 }, { "epoch": 98.55102040816327, "grad_norm": 32.54728317260742, "learning_rate": 7.530612244897959e-07, "loss": 0.5945, "step": 24145 }, { "epoch": 98.55510204081632, "grad_norm": 20.14241600036621, "learning_rate": 7.510204081632654e-07, "loss": 1.8665, "step": 24146 }, { "epoch": 98.55918367346939, "grad_norm": 25.097524642944336, "learning_rate": 7.489795918367347e-07, "loss": 0.2511, "step": 24147 }, { "epoch": 98.56326530612245, "grad_norm": 25.663415908813477, "learning_rate": 7.469387755102041e-07, "loss": 1.1394, "step": 24148 }, { "epoch": 98.56734693877551, "grad_norm": 25.47911834716797, "learning_rate": 7.448979591836736e-07, "loss": 0.6201, "step": 24149 }, { "epoch": 98.57142857142857, "grad_norm": 29.665103912353516, "learning_rate": 7.428571428571429e-07, "loss": 0.7606, "step": 24150 }, { "epoch": 98.57551020408164, "grad_norm": 10.797529220581055, "learning_rate": 7.408163265306122e-07, "loss": 0.1214, "step": 24151 }, { "epoch": 98.57959183673469, "grad_norm": 29.101947784423828, "learning_rate": 7.387755102040816e-07, "loss": 0.4615, "step": 24152 }, { "epoch": 98.58367346938776, "grad_norm": 34.59245300292969, "learning_rate": 7.36734693877551e-07, "loss": 1.0974, "step": 24153 }, { "epoch": 98.58775510204082, "grad_norm": 20.230348587036133, "learning_rate": 7.346938775510204e-07, "loss": 0.2518, "step": 24154 }, { "epoch": 98.59183673469387, "grad_norm": 27.31275177001953, "learning_rate": 7.326530612244899e-07, "loss": 1.642, "step": 24155 }, { "epoch": 98.59591836734694, "grad_norm": 22.871742248535156, "learning_rate": 7.306122448979592e-07, "loss": 0.2408, "step": 24156 }, { "epoch": 98.6, "grad_norm": 25.568532943725586, "learning_rate": 7.285714285714287e-07, "loss": 1.046, "step": 24157 }, { "epoch": 98.60408163265306, "grad_norm": 29.005916595458984, "learning_rate": 7.26530612244898e-07, "loss": 0.7252, "step": 24158 }, { "epoch": 98.60816326530612, "grad_norm": 32.94391632080078, "learning_rate": 7.244897959183674e-07, "loss": 0.7017, "step": 24159 }, { "epoch": 98.61224489795919, "grad_norm": 34.07007598876953, "learning_rate": 7.224489795918368e-07, "loss": 0.5132, "step": 24160 }, { "epoch": 98.61632653061224, "grad_norm": 27.581262588500977, "learning_rate": 7.204081632653062e-07, "loss": 0.3002, "step": 24161 }, { "epoch": 98.62040816326531, "grad_norm": 29.697216033935547, "learning_rate": 7.183673469387755e-07, "loss": 0.22, "step": 24162 }, { "epoch": 98.62448979591836, "grad_norm": 20.609453201293945, "learning_rate": 7.16326530612245e-07, "loss": 0.1682, "step": 24163 }, { "epoch": 98.62857142857143, "grad_norm": 20.249574661254883, "learning_rate": 7.142857142857143e-07, "loss": 1.3525, "step": 24164 }, { "epoch": 98.63265306122449, "grad_norm": 15.043668746948242, "learning_rate": 7.122448979591837e-07, "loss": 0.1675, "step": 24165 }, { "epoch": 98.63673469387756, "grad_norm": 30.005807876586914, "learning_rate": 7.10204081632653e-07, "loss": 1.1655, "step": 24166 }, { "epoch": 98.64081632653061, "grad_norm": 36.50516128540039, "learning_rate": 7.081632653061226e-07, "loss": 0.7837, "step": 24167 }, { "epoch": 98.64489795918368, "grad_norm": 39.74156188964844, "learning_rate": 7.061224489795919e-07, "loss": 0.5, "step": 24168 }, { "epoch": 98.64897959183673, "grad_norm": 27.372272491455078, "learning_rate": 7.040816326530613e-07, "loss": 1.2313, "step": 24169 }, { "epoch": 98.65306122448979, "grad_norm": 36.253578186035156, "learning_rate": 7.020408163265306e-07, "loss": 0.7186, "step": 24170 }, { "epoch": 98.65714285714286, "grad_norm": 30.54524803161621, "learning_rate": 7.000000000000001e-07, "loss": 0.4375, "step": 24171 }, { "epoch": 98.66122448979591, "grad_norm": 28.278989791870117, "learning_rate": 6.979591836734694e-07, "loss": 1.3043, "step": 24172 }, { "epoch": 98.66530612244898, "grad_norm": 47.931182861328125, "learning_rate": 6.959183673469388e-07, "loss": 0.7443, "step": 24173 }, { "epoch": 98.66938775510204, "grad_norm": 27.180116653442383, "learning_rate": 6.938775510204082e-07, "loss": 1.217, "step": 24174 }, { "epoch": 98.6734693877551, "grad_norm": 7.217944145202637, "learning_rate": 6.918367346938776e-07, "loss": 0.067, "step": 24175 }, { "epoch": 98.67755102040816, "grad_norm": 39.16801452636719, "learning_rate": 6.897959183673469e-07, "loss": 0.7249, "step": 24176 }, { "epoch": 98.68163265306123, "grad_norm": 29.595600128173828, "learning_rate": 6.877551020408164e-07, "loss": 0.5961, "step": 24177 }, { "epoch": 98.68571428571428, "grad_norm": 32.772884368896484, "learning_rate": 6.857142857142857e-07, "loss": 0.902, "step": 24178 }, { "epoch": 98.68979591836735, "grad_norm": 25.28544807434082, "learning_rate": 6.836734693877551e-07, "loss": 0.3243, "step": 24179 }, { "epoch": 98.6938775510204, "grad_norm": 10.947213172912598, "learning_rate": 6.816326530612245e-07, "loss": 0.1001, "step": 24180 }, { "epoch": 98.69795918367348, "grad_norm": 21.44681167602539, "learning_rate": 6.79591836734694e-07, "loss": 0.2354, "step": 24181 }, { "epoch": 98.70204081632653, "grad_norm": 21.420024871826172, "learning_rate": 6.775510204081633e-07, "loss": 0.1778, "step": 24182 }, { "epoch": 98.70612244897958, "grad_norm": 22.540363311767578, "learning_rate": 6.755102040816327e-07, "loss": 0.5759, "step": 24183 }, { "epoch": 98.71020408163265, "grad_norm": 31.965063095092773, "learning_rate": 6.73469387755102e-07, "loss": 0.8921, "step": 24184 }, { "epoch": 98.71428571428571, "grad_norm": 30.79743766784668, "learning_rate": 6.714285714285715e-07, "loss": 1.0953, "step": 24185 }, { "epoch": 98.71836734693878, "grad_norm": 24.818904876708984, "learning_rate": 6.693877551020408e-07, "loss": 0.5892, "step": 24186 }, { "epoch": 98.72244897959183, "grad_norm": 6.1442766189575195, "learning_rate": 6.673469387755102e-07, "loss": 0.0593, "step": 24187 }, { "epoch": 98.7265306122449, "grad_norm": 26.493745803833008, "learning_rate": 6.653061224489796e-07, "loss": 0.2269, "step": 24188 }, { "epoch": 98.73061224489796, "grad_norm": 26.77227210998535, "learning_rate": 6.63265306122449e-07, "loss": 0.8987, "step": 24189 }, { "epoch": 98.73469387755102, "grad_norm": 24.554590225219727, "learning_rate": 6.612244897959183e-07, "loss": 1.3124, "step": 24190 }, { "epoch": 98.73877551020408, "grad_norm": 36.45619201660156, "learning_rate": 6.591836734693878e-07, "loss": 0.7097, "step": 24191 }, { "epoch": 98.74285714285715, "grad_norm": 23.813552856445312, "learning_rate": 6.571428571428572e-07, "loss": 1.4055, "step": 24192 }, { "epoch": 98.7469387755102, "grad_norm": 28.304227828979492, "learning_rate": 6.551020408163266e-07, "loss": 0.9959, "step": 24193 }, { "epoch": 98.75102040816327, "grad_norm": 19.61402130126953, "learning_rate": 6.53061224489796e-07, "loss": 0.2306, "step": 24194 }, { "epoch": 98.75510204081633, "grad_norm": 28.009899139404297, "learning_rate": 6.510204081632654e-07, "loss": 1.1163, "step": 24195 }, { "epoch": 98.7591836734694, "grad_norm": 32.95979690551758, "learning_rate": 6.489795918367348e-07, "loss": 0.5602, "step": 24196 }, { "epoch": 98.76326530612245, "grad_norm": 4.9425249099731445, "learning_rate": 6.469387755102041e-07, "loss": 0.0547, "step": 24197 }, { "epoch": 98.7673469387755, "grad_norm": 40.616573333740234, "learning_rate": 6.448979591836734e-07, "loss": 0.9285, "step": 24198 }, { "epoch": 98.77142857142857, "grad_norm": 26.466106414794922, "learning_rate": 6.428571428571429e-07, "loss": 0.5971, "step": 24199 }, { "epoch": 98.77551020408163, "grad_norm": 23.355308532714844, "learning_rate": 6.408163265306123e-07, "loss": 0.2911, "step": 24200 }, { "epoch": 98.7795918367347, "grad_norm": 35.124549865722656, "learning_rate": 6.387755102040816e-07, "loss": 0.5967, "step": 24201 }, { "epoch": 98.78367346938775, "grad_norm": 22.211109161376953, "learning_rate": 6.367346938775511e-07, "loss": 1.7926, "step": 24202 }, { "epoch": 98.78775510204082, "grad_norm": 9.053930282592773, "learning_rate": 6.346938775510204e-07, "loss": 0.0838, "step": 24203 }, { "epoch": 98.79183673469387, "grad_norm": 20.716510772705078, "learning_rate": 6.326530612244899e-07, "loss": 0.174, "step": 24204 }, { "epoch": 98.79591836734694, "grad_norm": 3.19317889213562, "learning_rate": 6.306122448979592e-07, "loss": 0.0409, "step": 24205 }, { "epoch": 98.8, "grad_norm": 26.483989715576172, "learning_rate": 6.285714285714287e-07, "loss": 0.2426, "step": 24206 }, { "epoch": 98.80408163265307, "grad_norm": 34.22465515136719, "learning_rate": 6.26530612244898e-07, "loss": 0.4859, "step": 24207 }, { "epoch": 98.80816326530612, "grad_norm": 29.441198348999023, "learning_rate": 6.244897959183674e-07, "loss": 0.5532, "step": 24208 }, { "epoch": 98.81224489795919, "grad_norm": 8.624212265014648, "learning_rate": 6.224489795918368e-07, "loss": 0.0718, "step": 24209 }, { "epoch": 98.81632653061224, "grad_norm": 25.73749351501465, "learning_rate": 6.204081632653062e-07, "loss": 0.2032, "step": 24210 }, { "epoch": 98.82040816326531, "grad_norm": 31.803272247314453, "learning_rate": 6.183673469387755e-07, "loss": 0.4496, "step": 24211 }, { "epoch": 98.82448979591837, "grad_norm": 12.928574562072754, "learning_rate": 6.163265306122449e-07, "loss": 0.1123, "step": 24212 }, { "epoch": 98.82857142857142, "grad_norm": 29.388633728027344, "learning_rate": 6.142857142857143e-07, "loss": 0.3958, "step": 24213 }, { "epoch": 98.83265306122449, "grad_norm": 32.60211181640625, "learning_rate": 6.122448979591837e-07, "loss": 1.1884, "step": 24214 }, { "epoch": 98.83673469387755, "grad_norm": 34.8279914855957, "learning_rate": 6.10204081632653e-07, "loss": 0.8569, "step": 24215 }, { "epoch": 98.84081632653061, "grad_norm": 30.681049346923828, "learning_rate": 6.081632653061225e-07, "loss": 0.3696, "step": 24216 }, { "epoch": 98.84489795918367, "grad_norm": 22.405406951904297, "learning_rate": 6.061224489795919e-07, "loss": 0.3352, "step": 24217 }, { "epoch": 98.84897959183674, "grad_norm": 22.727325439453125, "learning_rate": 6.040816326530613e-07, "loss": 1.9154, "step": 24218 }, { "epoch": 98.85306122448979, "grad_norm": 25.880456924438477, "learning_rate": 6.020408163265306e-07, "loss": 0.3704, "step": 24219 }, { "epoch": 98.85714285714286, "grad_norm": 27.308734893798828, "learning_rate": 6.000000000000001e-07, "loss": 0.8357, "step": 24220 }, { "epoch": 98.86122448979592, "grad_norm": 36.94781494140625, "learning_rate": 5.979591836734694e-07, "loss": 0.3608, "step": 24221 }, { "epoch": 98.86530612244898, "grad_norm": 30.801027297973633, "learning_rate": 5.959183673469388e-07, "loss": 0.7888, "step": 24222 }, { "epoch": 98.86938775510204, "grad_norm": 36.15007400512695, "learning_rate": 5.938775510204082e-07, "loss": 1.0874, "step": 24223 }, { "epoch": 98.87346938775511, "grad_norm": 30.92455291748047, "learning_rate": 5.918367346938776e-07, "loss": 0.675, "step": 24224 }, { "epoch": 98.87755102040816, "grad_norm": 33.07475662231445, "learning_rate": 5.897959183673469e-07, "loss": 0.9719, "step": 24225 }, { "epoch": 98.88163265306123, "grad_norm": 34.35525131225586, "learning_rate": 5.877551020408163e-07, "loss": 0.4779, "step": 24226 }, { "epoch": 98.88571428571429, "grad_norm": 29.738292694091797, "learning_rate": 5.857142857142857e-07, "loss": 0.6383, "step": 24227 }, { "epoch": 98.88979591836734, "grad_norm": 25.899484634399414, "learning_rate": 5.836734693877552e-07, "loss": 1.6992, "step": 24228 }, { "epoch": 98.89387755102041, "grad_norm": 25.11919403076172, "learning_rate": 5.816326530612245e-07, "loss": 1.2734, "step": 24229 }, { "epoch": 98.89795918367346, "grad_norm": 28.0784969329834, "learning_rate": 5.795918367346939e-07, "loss": 0.4976, "step": 24230 }, { "epoch": 98.90204081632653, "grad_norm": 22.70296859741211, "learning_rate": 5.775510204081633e-07, "loss": 0.2605, "step": 24231 }, { "epoch": 98.90612244897959, "grad_norm": 31.422643661499023, "learning_rate": 5.755102040816327e-07, "loss": 1.0755, "step": 24232 }, { "epoch": 98.91020408163266, "grad_norm": 27.53005027770996, "learning_rate": 5.73469387755102e-07, "loss": 0.2294, "step": 24233 }, { "epoch": 98.91428571428571, "grad_norm": 31.493450164794922, "learning_rate": 5.714285714285715e-07, "loss": 1.5297, "step": 24234 }, { "epoch": 98.91836734693878, "grad_norm": 41.80940628051758, "learning_rate": 5.693877551020408e-07, "loss": 0.9407, "step": 24235 }, { "epoch": 98.92244897959183, "grad_norm": 27.553401947021484, "learning_rate": 5.673469387755102e-07, "loss": 1.4302, "step": 24236 }, { "epoch": 98.9265306122449, "grad_norm": 30.74406623840332, "learning_rate": 5.653061224489796e-07, "loss": 0.3002, "step": 24237 }, { "epoch": 98.93061224489796, "grad_norm": 30.951501846313477, "learning_rate": 5.63265306122449e-07, "loss": 0.4167, "step": 24238 }, { "epoch": 98.93469387755103, "grad_norm": 29.523801803588867, "learning_rate": 5.612244897959183e-07, "loss": 0.2003, "step": 24239 }, { "epoch": 98.93877551020408, "grad_norm": 43.61115646362305, "learning_rate": 5.591836734693877e-07, "loss": 0.5699, "step": 24240 }, { "epoch": 98.94285714285714, "grad_norm": 37.421791076660156, "learning_rate": 5.571428571428573e-07, "loss": 0.8225, "step": 24241 }, { "epoch": 98.9469387755102, "grad_norm": 34.24059295654297, "learning_rate": 5.551020408163266e-07, "loss": 0.4508, "step": 24242 }, { "epoch": 98.95102040816326, "grad_norm": 27.333959579467773, "learning_rate": 5.53061224489796e-07, "loss": 0.5251, "step": 24243 }, { "epoch": 98.95510204081633, "grad_norm": 24.45035743713379, "learning_rate": 5.510204081632653e-07, "loss": 1.3584, "step": 24244 }, { "epoch": 98.95918367346938, "grad_norm": 39.62820816040039, "learning_rate": 5.489795918367348e-07, "loss": 0.3039, "step": 24245 }, { "epoch": 98.96326530612245, "grad_norm": 33.78081512451172, "learning_rate": 5.469387755102041e-07, "loss": 0.6224, "step": 24246 }, { "epoch": 98.9673469387755, "grad_norm": 31.259349822998047, "learning_rate": 5.448979591836735e-07, "loss": 1.5288, "step": 24247 }, { "epoch": 98.97142857142858, "grad_norm": 28.07815170288086, "learning_rate": 5.428571428571429e-07, "loss": 0.3672, "step": 24248 }, { "epoch": 98.97551020408163, "grad_norm": 31.954635620117188, "learning_rate": 5.408163265306123e-07, "loss": 0.9389, "step": 24249 }, { "epoch": 98.9795918367347, "grad_norm": 26.683500289916992, "learning_rate": 5.387755102040816e-07, "loss": 0.4797, "step": 24250 }, { "epoch": 98.98367346938775, "grad_norm": 27.92270278930664, "learning_rate": 5.367346938775511e-07, "loss": 1.0852, "step": 24251 }, { "epoch": 98.98775510204082, "grad_norm": 31.99496841430664, "learning_rate": 5.346938775510204e-07, "loss": 0.6723, "step": 24252 }, { "epoch": 98.99183673469388, "grad_norm": 32.33107376098633, "learning_rate": 5.326530612244899e-07, "loss": 0.8535, "step": 24253 }, { "epoch": 98.99591836734695, "grad_norm": 30.1488094329834, "learning_rate": 5.306122448979592e-07, "loss": 0.5348, "step": 24254 }, { "epoch": 99.0, "grad_norm": 27.13990592956543, "learning_rate": 5.285714285714287e-07, "loss": 1.7998, "step": 24255 }, { "epoch": 99.00408163265305, "grad_norm": 29.953651428222656, "learning_rate": 5.26530612244898e-07, "loss": 0.99, "step": 24256 }, { "epoch": 99.00816326530612, "grad_norm": 24.460214614868164, "learning_rate": 5.244897959183674e-07, "loss": 1.828, "step": 24257 }, { "epoch": 99.01224489795918, "grad_norm": 22.860897064208984, "learning_rate": 5.224489795918367e-07, "loss": 1.2453, "step": 24258 }, { "epoch": 99.01632653061225, "grad_norm": 8.12833309173584, "learning_rate": 5.204081632653062e-07, "loss": 0.064, "step": 24259 }, { "epoch": 99.0204081632653, "grad_norm": 28.417524337768555, "learning_rate": 5.183673469387755e-07, "loss": 1.4414, "step": 24260 }, { "epoch": 99.02448979591837, "grad_norm": 12.110686302185059, "learning_rate": 5.163265306122449e-07, "loss": 0.1037, "step": 24261 }, { "epoch": 99.02857142857142, "grad_norm": 32.9696159362793, "learning_rate": 5.142857142857143e-07, "loss": 1.5306, "step": 24262 }, { "epoch": 99.0326530612245, "grad_norm": 36.50502395629883, "learning_rate": 5.122448979591837e-07, "loss": 0.6245, "step": 24263 }, { "epoch": 99.03673469387755, "grad_norm": 27.66378402709961, "learning_rate": 5.10204081632653e-07, "loss": 0.5668, "step": 24264 }, { "epoch": 99.04081632653062, "grad_norm": 26.343578338623047, "learning_rate": 5.081632653061225e-07, "loss": 0.649, "step": 24265 }, { "epoch": 99.04489795918367, "grad_norm": 33.38346862792969, "learning_rate": 5.061224489795919e-07, "loss": 0.3757, "step": 24266 }, { "epoch": 99.04897959183674, "grad_norm": 27.815778732299805, "learning_rate": 5.040816326530613e-07, "loss": 0.6255, "step": 24267 }, { "epoch": 99.0530612244898, "grad_norm": 20.44352149963379, "learning_rate": 5.020408163265306e-07, "loss": 0.1607, "step": 24268 }, { "epoch": 99.05714285714286, "grad_norm": 30.049449920654297, "learning_rate": 5.000000000000001e-07, "loss": 0.8935, "step": 24269 }, { "epoch": 99.06122448979592, "grad_norm": 42.049034118652344, "learning_rate": 4.979591836734694e-07, "loss": 0.7165, "step": 24270 }, { "epoch": 99.06530612244897, "grad_norm": 25.294204711914062, "learning_rate": 4.959183673469388e-07, "loss": 0.2646, "step": 24271 }, { "epoch": 99.06938775510204, "grad_norm": 28.81964683532715, "learning_rate": 4.938775510204081e-07, "loss": 1.3895, "step": 24272 }, { "epoch": 99.0734693877551, "grad_norm": 27.737564086914062, "learning_rate": 4.918367346938776e-07, "loss": 0.9825, "step": 24273 }, { "epoch": 99.07755102040817, "grad_norm": 40.5572624206543, "learning_rate": 4.897959183673469e-07, "loss": 0.7502, "step": 24274 }, { "epoch": 99.08163265306122, "grad_norm": 43.51612854003906, "learning_rate": 4.877551020408163e-07, "loss": 1.0103, "step": 24275 }, { "epoch": 99.08571428571429, "grad_norm": 30.808746337890625, "learning_rate": 4.857142857142857e-07, "loss": 0.7391, "step": 24276 }, { "epoch": 99.08979591836734, "grad_norm": 14.536710739135742, "learning_rate": 4.836734693877552e-07, "loss": 0.131, "step": 24277 }, { "epoch": 99.09387755102041, "grad_norm": 26.30107879638672, "learning_rate": 4.816326530612245e-07, "loss": 1.0435, "step": 24278 }, { "epoch": 99.09795918367347, "grad_norm": 17.82100486755371, "learning_rate": 4.795918367346939e-07, "loss": 0.1663, "step": 24279 }, { "epoch": 99.10204081632654, "grad_norm": 28.2614803314209, "learning_rate": 4.775510204081633e-07, "loss": 0.3066, "step": 24280 }, { "epoch": 99.10612244897959, "grad_norm": 31.9572696685791, "learning_rate": 4.755102040816327e-07, "loss": 1.1938, "step": 24281 }, { "epoch": 99.11020408163266, "grad_norm": 25.593576431274414, "learning_rate": 4.7346938775510204e-07, "loss": 1.393, "step": 24282 }, { "epoch": 99.11428571428571, "grad_norm": 24.279033660888672, "learning_rate": 4.7142857142857145e-07, "loss": 0.2407, "step": 24283 }, { "epoch": 99.11836734693877, "grad_norm": 33.76512145996094, "learning_rate": 4.6938775510204085e-07, "loss": 0.5139, "step": 24284 }, { "epoch": 99.12244897959184, "grad_norm": 32.15656280517578, "learning_rate": 4.673469387755102e-07, "loss": 0.4025, "step": 24285 }, { "epoch": 99.12653061224489, "grad_norm": 32.9510498046875, "learning_rate": 4.653061224489796e-07, "loss": 0.6895, "step": 24286 }, { "epoch": 99.13061224489796, "grad_norm": 29.968626022338867, "learning_rate": 4.6326530612244895e-07, "loss": 1.1511, "step": 24287 }, { "epoch": 99.13469387755102, "grad_norm": 37.74798583984375, "learning_rate": 4.6122448979591835e-07, "loss": 1.0756, "step": 24288 }, { "epoch": 99.13877551020408, "grad_norm": 30.49663734436035, "learning_rate": 4.591836734693878e-07, "loss": 1.261, "step": 24289 }, { "epoch": 99.14285714285714, "grad_norm": 31.645729064941406, "learning_rate": 4.571428571428572e-07, "loss": 0.5957, "step": 24290 }, { "epoch": 99.14693877551021, "grad_norm": 36.671024322509766, "learning_rate": 4.5510204081632655e-07, "loss": 0.8234, "step": 24291 }, { "epoch": 99.15102040816326, "grad_norm": 19.15718650817871, "learning_rate": 4.5306122448979596e-07, "loss": 1.2996, "step": 24292 }, { "epoch": 99.15510204081633, "grad_norm": 26.291427612304688, "learning_rate": 4.5102040816326536e-07, "loss": 0.2205, "step": 24293 }, { "epoch": 99.15918367346939, "grad_norm": 22.261295318603516, "learning_rate": 4.489795918367347e-07, "loss": 0.3159, "step": 24294 }, { "epoch": 99.16326530612245, "grad_norm": 25.916702270507812, "learning_rate": 4.469387755102041e-07, "loss": 1.4658, "step": 24295 }, { "epoch": 99.16734693877551, "grad_norm": 28.058876037597656, "learning_rate": 4.4489795918367346e-07, "loss": 1.0541, "step": 24296 }, { "epoch": 99.17142857142858, "grad_norm": 32.083274841308594, "learning_rate": 4.4285714285714286e-07, "loss": 0.6902, "step": 24297 }, { "epoch": 99.17551020408163, "grad_norm": 24.845943450927734, "learning_rate": 4.4081632653061226e-07, "loss": 1.2156, "step": 24298 }, { "epoch": 99.17959183673469, "grad_norm": 26.71269416809082, "learning_rate": 4.387755102040816e-07, "loss": 1.1489, "step": 24299 }, { "epoch": 99.18367346938776, "grad_norm": 40.48277282714844, "learning_rate": 4.36734693877551e-07, "loss": 0.5723, "step": 24300 }, { "epoch": 99.18775510204081, "grad_norm": 27.384769439697266, "learning_rate": 4.3469387755102047e-07, "loss": 0.793, "step": 24301 }, { "epoch": 99.19183673469388, "grad_norm": 25.03949546813965, "learning_rate": 4.3265306122448987e-07, "loss": 0.5678, "step": 24302 }, { "epoch": 99.19591836734693, "grad_norm": 9.558844566345215, "learning_rate": 4.306122448979592e-07, "loss": 0.1098, "step": 24303 }, { "epoch": 99.2, "grad_norm": 27.546430587768555, "learning_rate": 4.285714285714286e-07, "loss": 0.5549, "step": 24304 }, { "epoch": 99.20408163265306, "grad_norm": 29.895809173583984, "learning_rate": 4.2653061224489797e-07, "loss": 0.7684, "step": 24305 }, { "epoch": 99.20816326530613, "grad_norm": 5.190649509429932, "learning_rate": 4.2448979591836737e-07, "loss": 0.0548, "step": 24306 }, { "epoch": 99.21224489795918, "grad_norm": 23.823301315307617, "learning_rate": 4.2244897959183677e-07, "loss": 0.2144, "step": 24307 }, { "epoch": 99.21632653061225, "grad_norm": 31.2686824798584, "learning_rate": 4.204081632653061e-07, "loss": 0.5876, "step": 24308 }, { "epoch": 99.2204081632653, "grad_norm": 19.055496215820312, "learning_rate": 4.183673469387755e-07, "loss": 0.1735, "step": 24309 }, { "epoch": 99.22448979591837, "grad_norm": 32.27088928222656, "learning_rate": 4.1632653061224487e-07, "loss": 1.3811, "step": 24310 }, { "epoch": 99.22857142857143, "grad_norm": 31.476593017578125, "learning_rate": 4.142857142857143e-07, "loss": 1.0268, "step": 24311 }, { "epoch": 99.2326530612245, "grad_norm": 29.848857879638672, "learning_rate": 4.122448979591837e-07, "loss": 1.2001, "step": 24312 }, { "epoch": 99.23673469387755, "grad_norm": 7.255160331726074, "learning_rate": 4.10204081632653e-07, "loss": 0.0642, "step": 24313 }, { "epoch": 99.2408163265306, "grad_norm": 28.70242691040039, "learning_rate": 4.081632653061225e-07, "loss": 0.2812, "step": 24314 }, { "epoch": 99.24489795918367, "grad_norm": 30.845645904541016, "learning_rate": 4.061224489795919e-07, "loss": 0.7897, "step": 24315 }, { "epoch": 99.24897959183673, "grad_norm": 23.893840789794922, "learning_rate": 4.040816326530613e-07, "loss": 0.5973, "step": 24316 }, { "epoch": 99.2530612244898, "grad_norm": 24.725452423095703, "learning_rate": 4.0204081632653063e-07, "loss": 1.2892, "step": 24317 }, { "epoch": 99.25714285714285, "grad_norm": 28.504444122314453, "learning_rate": 4.0000000000000003e-07, "loss": 1.5127, "step": 24318 }, { "epoch": 99.26122448979592, "grad_norm": 18.75404167175293, "learning_rate": 3.979591836734694e-07, "loss": 0.2325, "step": 24319 }, { "epoch": 99.26530612244898, "grad_norm": 36.24754333496094, "learning_rate": 3.959183673469388e-07, "loss": 1.0431, "step": 24320 }, { "epoch": 99.26938775510204, "grad_norm": 24.712156295776367, "learning_rate": 3.938775510204082e-07, "loss": 0.8468, "step": 24321 }, { "epoch": 99.2734693877551, "grad_norm": 27.706844329833984, "learning_rate": 3.9183673469387754e-07, "loss": 0.7444, "step": 24322 }, { "epoch": 99.27755102040817, "grad_norm": 32.044044494628906, "learning_rate": 3.89795918367347e-07, "loss": 1.0562, "step": 24323 }, { "epoch": 99.28163265306122, "grad_norm": 28.9628849029541, "learning_rate": 3.8775510204081634e-07, "loss": 0.4427, "step": 24324 }, { "epoch": 99.28571428571429, "grad_norm": 29.674402236938477, "learning_rate": 3.8571428571428574e-07, "loss": 1.4134, "step": 24325 }, { "epoch": 99.28979591836735, "grad_norm": 24.914573669433594, "learning_rate": 3.836734693877551e-07, "loss": 1.3196, "step": 24326 }, { "epoch": 99.29387755102042, "grad_norm": 10.45948314666748, "learning_rate": 3.816326530612245e-07, "loss": 0.0819, "step": 24327 }, { "epoch": 99.29795918367347, "grad_norm": 25.91757583618164, "learning_rate": 3.795918367346939e-07, "loss": 1.7227, "step": 24328 }, { "epoch": 99.30204081632652, "grad_norm": 27.09489631652832, "learning_rate": 3.775510204081633e-07, "loss": 1.2709, "step": 24329 }, { "epoch": 99.3061224489796, "grad_norm": 22.87249755859375, "learning_rate": 3.755102040816327e-07, "loss": 1.752, "step": 24330 }, { "epoch": 99.31020408163265, "grad_norm": 24.786571502685547, "learning_rate": 3.7346938775510205e-07, "loss": 1.2416, "step": 24331 }, { "epoch": 99.31428571428572, "grad_norm": 24.4174861907959, "learning_rate": 3.7142857142857145e-07, "loss": 0.461, "step": 24332 }, { "epoch": 99.31836734693877, "grad_norm": 23.98815155029297, "learning_rate": 3.693877551020408e-07, "loss": 2.1123, "step": 24333 }, { "epoch": 99.32244897959184, "grad_norm": 15.09586238861084, "learning_rate": 3.673469387755102e-07, "loss": 0.1865, "step": 24334 }, { "epoch": 99.3265306122449, "grad_norm": 29.024015426635742, "learning_rate": 3.653061224489796e-07, "loss": 1.0758, "step": 24335 }, { "epoch": 99.33061224489796, "grad_norm": 38.82337951660156, "learning_rate": 3.63265306122449e-07, "loss": 0.844, "step": 24336 }, { "epoch": 99.33469387755102, "grad_norm": 33.428306579589844, "learning_rate": 3.612244897959184e-07, "loss": 1.189, "step": 24337 }, { "epoch": 99.33877551020409, "grad_norm": 37.039329528808594, "learning_rate": 3.5918367346938776e-07, "loss": 0.9363, "step": 24338 }, { "epoch": 99.34285714285714, "grad_norm": 30.714263916015625, "learning_rate": 3.5714285714285716e-07, "loss": 1.0491, "step": 24339 }, { "epoch": 99.34693877551021, "grad_norm": 23.261314392089844, "learning_rate": 3.551020408163265e-07, "loss": 0.577, "step": 24340 }, { "epoch": 99.35102040816327, "grad_norm": 22.8292179107666, "learning_rate": 3.5306122448979596e-07, "loss": 1.4711, "step": 24341 }, { "epoch": 99.35510204081632, "grad_norm": 31.565582275390625, "learning_rate": 3.510204081632653e-07, "loss": 1.0478, "step": 24342 }, { "epoch": 99.35918367346939, "grad_norm": 23.811260223388672, "learning_rate": 3.489795918367347e-07, "loss": 1.7677, "step": 24343 }, { "epoch": 99.36326530612244, "grad_norm": 28.2938232421875, "learning_rate": 3.469387755102041e-07, "loss": 1.3385, "step": 24344 }, { "epoch": 99.36734693877551, "grad_norm": 31.424850463867188, "learning_rate": 3.4489795918367346e-07, "loss": 0.6422, "step": 24345 }, { "epoch": 99.37142857142857, "grad_norm": 12.98487663269043, "learning_rate": 3.4285714285714286e-07, "loss": 0.1035, "step": 24346 }, { "epoch": 99.37551020408164, "grad_norm": 21.3670597076416, "learning_rate": 3.4081632653061227e-07, "loss": 0.2204, "step": 24347 }, { "epoch": 99.37959183673469, "grad_norm": 18.654390335083008, "learning_rate": 3.3877551020408167e-07, "loss": 0.2094, "step": 24348 }, { "epoch": 99.38367346938776, "grad_norm": 24.289512634277344, "learning_rate": 3.36734693877551e-07, "loss": 1.3377, "step": 24349 }, { "epoch": 99.38775510204081, "grad_norm": 25.17058563232422, "learning_rate": 3.346938775510204e-07, "loss": 1.2689, "step": 24350 }, { "epoch": 99.39183673469388, "grad_norm": 26.457923889160156, "learning_rate": 3.326530612244898e-07, "loss": 1.1638, "step": 24351 }, { "epoch": 99.39591836734694, "grad_norm": 26.031005859375, "learning_rate": 3.3061224489795917e-07, "loss": 1.0165, "step": 24352 }, { "epoch": 99.4, "grad_norm": 24.129924774169922, "learning_rate": 3.285714285714286e-07, "loss": 0.2164, "step": 24353 }, { "epoch": 99.40408163265306, "grad_norm": 23.908138275146484, "learning_rate": 3.26530612244898e-07, "loss": 0.1862, "step": 24354 }, { "epoch": 99.40816326530613, "grad_norm": 23.789588928222656, "learning_rate": 3.244897959183674e-07, "loss": 0.1886, "step": 24355 }, { "epoch": 99.41224489795918, "grad_norm": 21.871313095092773, "learning_rate": 3.224489795918367e-07, "loss": 1.9177, "step": 24356 }, { "epoch": 99.41632653061224, "grad_norm": 25.273780822753906, "learning_rate": 3.204081632653061e-07, "loss": 0.5267, "step": 24357 }, { "epoch": 99.42040816326531, "grad_norm": 29.10909652709961, "learning_rate": 3.1836734693877553e-07, "loss": 1.6454, "step": 24358 }, { "epoch": 99.42448979591836, "grad_norm": 26.193395614624023, "learning_rate": 3.1632653061224493e-07, "loss": 0.2654, "step": 24359 }, { "epoch": 99.42857142857143, "grad_norm": 29.322444915771484, "learning_rate": 3.1428571428571433e-07, "loss": 0.3719, "step": 24360 }, { "epoch": 99.43265306122449, "grad_norm": 18.925962448120117, "learning_rate": 3.122448979591837e-07, "loss": 0.2383, "step": 24361 }, { "epoch": 99.43673469387755, "grad_norm": 38.06391906738281, "learning_rate": 3.102040816326531e-07, "loss": 0.9122, "step": 24362 }, { "epoch": 99.44081632653061, "grad_norm": 26.25502586364746, "learning_rate": 3.0816326530612243e-07, "loss": 0.3029, "step": 24363 }, { "epoch": 99.44489795918368, "grad_norm": 22.51128387451172, "learning_rate": 3.0612244897959183e-07, "loss": 1.8086, "step": 24364 }, { "epoch": 99.44897959183673, "grad_norm": 19.0904483795166, "learning_rate": 3.0408163265306124e-07, "loss": 1.9695, "step": 24365 }, { "epoch": 99.4530612244898, "grad_norm": 19.191287994384766, "learning_rate": 3.0204081632653064e-07, "loss": 1.856, "step": 24366 }, { "epoch": 99.45714285714286, "grad_norm": 25.059904098510742, "learning_rate": 3.0000000000000004e-07, "loss": 0.4114, "step": 24367 }, { "epoch": 99.46122448979592, "grad_norm": 25.070018768310547, "learning_rate": 2.979591836734694e-07, "loss": 1.556, "step": 24368 }, { "epoch": 99.46530612244898, "grad_norm": 27.5717830657959, "learning_rate": 2.959183673469388e-07, "loss": 0.1952, "step": 24369 }, { "epoch": 99.46938775510205, "grad_norm": 46.20458221435547, "learning_rate": 2.9387755102040814e-07, "loss": 1.0158, "step": 24370 }, { "epoch": 99.4734693877551, "grad_norm": 24.972850799560547, "learning_rate": 2.918367346938776e-07, "loss": 1.3014, "step": 24371 }, { "epoch": 99.47755102040816, "grad_norm": 12.761693000793457, "learning_rate": 2.8979591836734694e-07, "loss": 0.1179, "step": 24372 }, { "epoch": 99.48163265306123, "grad_norm": 21.216297149658203, "learning_rate": 2.8775510204081634e-07, "loss": 1.4498, "step": 24373 }, { "epoch": 99.48571428571428, "grad_norm": 29.317983627319336, "learning_rate": 2.8571428571428575e-07, "loss": 1.0714, "step": 24374 }, { "epoch": 99.48979591836735, "grad_norm": 26.29987907409668, "learning_rate": 2.836734693877551e-07, "loss": 0.2452, "step": 24375 }, { "epoch": 99.4938775510204, "grad_norm": 31.894344329833984, "learning_rate": 2.816326530612245e-07, "loss": 1.4272, "step": 24376 }, { "epoch": 99.49795918367347, "grad_norm": 23.23419189453125, "learning_rate": 2.7959183673469385e-07, "loss": 1.3059, "step": 24377 }, { "epoch": 99.50204081632653, "grad_norm": 34.09791564941406, "learning_rate": 2.775510204081633e-07, "loss": 0.9943, "step": 24378 }, { "epoch": 99.5061224489796, "grad_norm": 31.03120231628418, "learning_rate": 2.7551020408163265e-07, "loss": 0.4265, "step": 24379 }, { "epoch": 99.51020408163265, "grad_norm": 21.164766311645508, "learning_rate": 2.7346938775510205e-07, "loss": 0.3279, "step": 24380 }, { "epoch": 99.51428571428572, "grad_norm": 20.67793846130371, "learning_rate": 2.7142857142857145e-07, "loss": 0.2794, "step": 24381 }, { "epoch": 99.51836734693877, "grad_norm": 31.45487403869629, "learning_rate": 2.693877551020408e-07, "loss": 0.6408, "step": 24382 }, { "epoch": 99.52244897959184, "grad_norm": 30.0175724029541, "learning_rate": 2.673469387755102e-07, "loss": 0.4539, "step": 24383 }, { "epoch": 99.5265306122449, "grad_norm": 43.84593963623047, "learning_rate": 2.653061224489796e-07, "loss": 0.6968, "step": 24384 }, { "epoch": 99.53061224489795, "grad_norm": 29.833921432495117, "learning_rate": 2.63265306122449e-07, "loss": 0.8525, "step": 24385 }, { "epoch": 99.53469387755102, "grad_norm": 26.613399505615234, "learning_rate": 2.6122448979591836e-07, "loss": 1.2594, "step": 24386 }, { "epoch": 99.53877551020408, "grad_norm": 35.943729400634766, "learning_rate": 2.5918367346938776e-07, "loss": 0.5423, "step": 24387 }, { "epoch": 99.54285714285714, "grad_norm": 33.23590850830078, "learning_rate": 2.5714285714285716e-07, "loss": 0.4868, "step": 24388 }, { "epoch": 99.5469387755102, "grad_norm": 27.289508819580078, "learning_rate": 2.551020408163265e-07, "loss": 0.8432, "step": 24389 }, { "epoch": 99.55102040816327, "grad_norm": 30.93259620666504, "learning_rate": 2.5306122448979597e-07, "loss": 0.3132, "step": 24390 }, { "epoch": 99.55510204081632, "grad_norm": 13.49602222442627, "learning_rate": 2.510204081632653e-07, "loss": 0.0778, "step": 24391 }, { "epoch": 99.55918367346939, "grad_norm": 17.759227752685547, "learning_rate": 2.489795918367347e-07, "loss": 0.1715, "step": 24392 }, { "epoch": 99.56326530612245, "grad_norm": 10.82715892791748, "learning_rate": 2.4693877551020407e-07, "loss": 0.0645, "step": 24393 }, { "epoch": 99.56734693877551, "grad_norm": 23.547935485839844, "learning_rate": 2.4489795918367347e-07, "loss": 0.2229, "step": 24394 }, { "epoch": 99.57142857142857, "grad_norm": 35.91304016113281, "learning_rate": 2.4285714285714287e-07, "loss": 1.0805, "step": 24395 }, { "epoch": 99.57551020408164, "grad_norm": 36.062232971191406, "learning_rate": 2.4081632653061227e-07, "loss": 1.2445, "step": 24396 }, { "epoch": 99.57959183673469, "grad_norm": 16.347074508666992, "learning_rate": 2.3877551020408167e-07, "loss": 0.1795, "step": 24397 }, { "epoch": 99.58367346938776, "grad_norm": 28.83228302001953, "learning_rate": 2.3673469387755102e-07, "loss": 0.4952, "step": 24398 }, { "epoch": 99.58775510204082, "grad_norm": 31.873943328857422, "learning_rate": 2.3469387755102042e-07, "loss": 0.9998, "step": 24399 }, { "epoch": 99.59183673469387, "grad_norm": 33.88199234008789, "learning_rate": 2.326530612244898e-07, "loss": 0.9137, "step": 24400 }, { "epoch": 99.59591836734694, "grad_norm": 29.691932678222656, "learning_rate": 2.3061224489795917e-07, "loss": 0.2682, "step": 24401 }, { "epoch": 99.6, "grad_norm": 29.732666015625, "learning_rate": 2.285714285714286e-07, "loss": 1.2453, "step": 24402 }, { "epoch": 99.60408163265306, "grad_norm": 33.880210876464844, "learning_rate": 2.2653061224489798e-07, "loss": 0.8593, "step": 24403 }, { "epoch": 99.60816326530612, "grad_norm": 29.862037658691406, "learning_rate": 2.2448979591836735e-07, "loss": 0.6272, "step": 24404 }, { "epoch": 99.61224489795919, "grad_norm": 23.48302459716797, "learning_rate": 2.2244897959183673e-07, "loss": 1.4041, "step": 24405 }, { "epoch": 99.61632653061224, "grad_norm": 25.704660415649414, "learning_rate": 2.2040816326530613e-07, "loss": 1.2866, "step": 24406 }, { "epoch": 99.62040816326531, "grad_norm": 22.317333221435547, "learning_rate": 2.183673469387755e-07, "loss": 1.8038, "step": 24407 }, { "epoch": 99.62448979591836, "grad_norm": 10.958857536315918, "learning_rate": 2.1632653061224493e-07, "loss": 2.9423, "step": 24408 }, { "epoch": 99.62857142857143, "grad_norm": 26.36326026916504, "learning_rate": 2.142857142857143e-07, "loss": 1.0639, "step": 24409 }, { "epoch": 99.63265306122449, "grad_norm": 33.161373138427734, "learning_rate": 2.1224489795918369e-07, "loss": 1.1829, "step": 24410 }, { "epoch": 99.63673469387756, "grad_norm": 33.53339385986328, "learning_rate": 2.1020408163265306e-07, "loss": 0.9338, "step": 24411 }, { "epoch": 99.64081632653061, "grad_norm": 29.001073837280273, "learning_rate": 2.0816326530612244e-07, "loss": 1.0357, "step": 24412 }, { "epoch": 99.64489795918368, "grad_norm": 30.329626083374023, "learning_rate": 2.0612244897959184e-07, "loss": 1.1897, "step": 24413 }, { "epoch": 99.64897959183673, "grad_norm": 36.11658477783203, "learning_rate": 2.0408163265306124e-07, "loss": 0.4773, "step": 24414 }, { "epoch": 99.65306122448979, "grad_norm": 21.898258209228516, "learning_rate": 2.0204081632653064e-07, "loss": 1.9168, "step": 24415 }, { "epoch": 99.65714285714286, "grad_norm": 31.968036651611328, "learning_rate": 2.0000000000000002e-07, "loss": 1.2826, "step": 24416 }, { "epoch": 99.66122448979591, "grad_norm": 32.249481201171875, "learning_rate": 1.979591836734694e-07, "loss": 0.6771, "step": 24417 }, { "epoch": 99.66530612244898, "grad_norm": 34.1009521484375, "learning_rate": 1.9591836734693877e-07, "loss": 0.8599, "step": 24418 }, { "epoch": 99.66938775510204, "grad_norm": 33.662811279296875, "learning_rate": 1.9387755102040817e-07, "loss": 0.8077, "step": 24419 }, { "epoch": 99.6734693877551, "grad_norm": 14.933006286621094, "learning_rate": 1.9183673469387755e-07, "loss": 2.1439, "step": 24420 }, { "epoch": 99.67755102040816, "grad_norm": 29.916439056396484, "learning_rate": 1.8979591836734695e-07, "loss": 0.7919, "step": 24421 }, { "epoch": 99.68163265306123, "grad_norm": 30.236547470092773, "learning_rate": 1.8775510204081635e-07, "loss": 0.5, "step": 24422 }, { "epoch": 99.68571428571428, "grad_norm": 30.422252655029297, "learning_rate": 1.8571428571428572e-07, "loss": 0.5377, "step": 24423 }, { "epoch": 99.68979591836735, "grad_norm": 18.050504684448242, "learning_rate": 1.836734693877551e-07, "loss": 2.1834, "step": 24424 }, { "epoch": 99.6938775510204, "grad_norm": 31.40635108947754, "learning_rate": 1.816326530612245e-07, "loss": 0.3015, "step": 24425 }, { "epoch": 99.69795918367348, "grad_norm": 25.285837173461914, "learning_rate": 1.7959183673469388e-07, "loss": 1.4257, "step": 24426 }, { "epoch": 99.70204081632653, "grad_norm": 34.40610122680664, "learning_rate": 1.7755102040816325e-07, "loss": 0.5883, "step": 24427 }, { "epoch": 99.70612244897958, "grad_norm": 28.786684036254883, "learning_rate": 1.7551020408163265e-07, "loss": 0.5475, "step": 24428 }, { "epoch": 99.71020408163265, "grad_norm": 33.35331726074219, "learning_rate": 1.7346938775510206e-07, "loss": 0.7285, "step": 24429 }, { "epoch": 99.71428571428571, "grad_norm": 22.637117385864258, "learning_rate": 1.7142857142857143e-07, "loss": 1.7684, "step": 24430 }, { "epoch": 99.71836734693878, "grad_norm": 26.6032657623291, "learning_rate": 1.6938775510204083e-07, "loss": 1.3965, "step": 24431 }, { "epoch": 99.72244897959183, "grad_norm": 22.36990737915039, "learning_rate": 1.673469387755102e-07, "loss": 1.1101, "step": 24432 }, { "epoch": 99.7265306122449, "grad_norm": 6.689327716827393, "learning_rate": 1.6530612244897958e-07, "loss": 0.0594, "step": 24433 }, { "epoch": 99.73061224489796, "grad_norm": 31.164735794067383, "learning_rate": 1.63265306122449e-07, "loss": 0.4861, "step": 24434 }, { "epoch": 99.73469387755102, "grad_norm": 17.10797691345215, "learning_rate": 1.6122448979591836e-07, "loss": 0.2024, "step": 24435 }, { "epoch": 99.73877551020408, "grad_norm": 27.084728240966797, "learning_rate": 1.5918367346938776e-07, "loss": 0.3692, "step": 24436 }, { "epoch": 99.74285714285715, "grad_norm": 23.329126358032227, "learning_rate": 1.5714285714285717e-07, "loss": 0.1615, "step": 24437 }, { "epoch": 99.7469387755102, "grad_norm": 24.462005615234375, "learning_rate": 1.5510204081632654e-07, "loss": 1.0106, "step": 24438 }, { "epoch": 99.75102040816327, "grad_norm": 1.3825674057006836, "learning_rate": 1.5306122448979592e-07, "loss": 0.022, "step": 24439 }, { "epoch": 99.75510204081633, "grad_norm": 27.467323303222656, "learning_rate": 1.5102040816326532e-07, "loss": 1.1271, "step": 24440 }, { "epoch": 99.7591836734694, "grad_norm": 30.035879135131836, "learning_rate": 1.489795918367347e-07, "loss": 0.4272, "step": 24441 }, { "epoch": 99.76326530612245, "grad_norm": 29.385974884033203, "learning_rate": 1.4693877551020407e-07, "loss": 0.2647, "step": 24442 }, { "epoch": 99.7673469387755, "grad_norm": 33.50077819824219, "learning_rate": 1.4489795918367347e-07, "loss": 0.3804, "step": 24443 }, { "epoch": 99.77142857142857, "grad_norm": 23.975879669189453, "learning_rate": 1.4285714285714287e-07, "loss": 0.2586, "step": 24444 }, { "epoch": 99.77551020408163, "grad_norm": 28.705860137939453, "learning_rate": 1.4081632653061225e-07, "loss": 1.1668, "step": 24445 }, { "epoch": 99.7795918367347, "grad_norm": 29.15391731262207, "learning_rate": 1.3877551020408165e-07, "loss": 0.4643, "step": 24446 }, { "epoch": 99.78367346938775, "grad_norm": 30.463808059692383, "learning_rate": 1.3673469387755103e-07, "loss": 1.3094, "step": 24447 }, { "epoch": 99.78775510204082, "grad_norm": 22.45585060119629, "learning_rate": 1.346938775510204e-07, "loss": 0.2175, "step": 24448 }, { "epoch": 99.79183673469387, "grad_norm": 27.880321502685547, "learning_rate": 1.326530612244898e-07, "loss": 1.3284, "step": 24449 }, { "epoch": 99.79591836734694, "grad_norm": 17.672143936157227, "learning_rate": 1.3061224489795918e-07, "loss": 2.4742, "step": 24450 }, { "epoch": 99.8, "grad_norm": 43.16410827636719, "learning_rate": 1.2857142857142858e-07, "loss": 0.3909, "step": 24451 }, { "epoch": 99.80408163265307, "grad_norm": 24.70939826965332, "learning_rate": 1.2653061224489798e-07, "loss": 0.2046, "step": 24452 }, { "epoch": 99.80816326530612, "grad_norm": 30.12680435180664, "learning_rate": 1.2448979591836736e-07, "loss": 0.611, "step": 24453 }, { "epoch": 99.81224489795919, "grad_norm": 33.89920425415039, "learning_rate": 1.2244897959183673e-07, "loss": 0.7335, "step": 24454 }, { "epoch": 99.81632653061224, "grad_norm": 26.68091583251953, "learning_rate": 1.2040816326530614e-07, "loss": 1.2294, "step": 24455 }, { "epoch": 99.82040816326531, "grad_norm": 22.30754280090332, "learning_rate": 1.1836734693877551e-07, "loss": 1.9215, "step": 24456 }, { "epoch": 99.82448979591837, "grad_norm": 32.06108474731445, "learning_rate": 1.163265306122449e-07, "loss": 0.586, "step": 24457 }, { "epoch": 99.82857142857142, "grad_norm": 28.91009521484375, "learning_rate": 1.142857142857143e-07, "loss": 0.4317, "step": 24458 }, { "epoch": 99.83265306122449, "grad_norm": 26.184070587158203, "learning_rate": 1.1224489795918368e-07, "loss": 1.8788, "step": 24459 }, { "epoch": 99.83673469387755, "grad_norm": 35.16274642944336, "learning_rate": 1.1020408163265307e-07, "loss": 0.916, "step": 24460 }, { "epoch": 99.84081632653061, "grad_norm": 25.227519989013672, "learning_rate": 1.0816326530612247e-07, "loss": 0.9166, "step": 24461 }, { "epoch": 99.84489795918367, "grad_norm": 25.47072982788086, "learning_rate": 1.0612244897959184e-07, "loss": 1.3172, "step": 24462 }, { "epoch": 99.84897959183674, "grad_norm": 27.6292724609375, "learning_rate": 1.0408163265306122e-07, "loss": 0.4102, "step": 24463 }, { "epoch": 99.85306122448979, "grad_norm": 42.45000457763672, "learning_rate": 1.0204081632653062e-07, "loss": 0.4376, "step": 24464 }, { "epoch": 99.85714285714286, "grad_norm": 41.21529769897461, "learning_rate": 1.0000000000000001e-07, "loss": 0.3121, "step": 24465 }, { "epoch": 99.86122448979592, "grad_norm": 37.607513427734375, "learning_rate": 9.795918367346938e-08, "loss": 0.972, "step": 24466 }, { "epoch": 99.86530612244898, "grad_norm": 40.39865493774414, "learning_rate": 9.591836734693877e-08, "loss": 0.7368, "step": 24467 }, { "epoch": 99.86938775510204, "grad_norm": 26.904199600219727, "learning_rate": 9.387755102040817e-08, "loss": 0.3111, "step": 24468 }, { "epoch": 99.87346938775511, "grad_norm": 18.923179626464844, "learning_rate": 9.183673469387755e-08, "loss": 0.29, "step": 24469 }, { "epoch": 99.87755102040816, "grad_norm": 26.0899600982666, "learning_rate": 8.979591836734694e-08, "loss": 0.5556, "step": 24470 }, { "epoch": 99.88163265306123, "grad_norm": 26.662294387817383, "learning_rate": 8.775510204081633e-08, "loss": 0.3058, "step": 24471 }, { "epoch": 99.88571428571429, "grad_norm": 30.594621658325195, "learning_rate": 8.571428571428572e-08, "loss": 0.3828, "step": 24472 }, { "epoch": 99.88979591836734, "grad_norm": 5.860211372375488, "learning_rate": 8.36734693877551e-08, "loss": 0.0598, "step": 24473 }, { "epoch": 99.89387755102041, "grad_norm": 18.169334411621094, "learning_rate": 8.16326530612245e-08, "loss": 1.874, "step": 24474 }, { "epoch": 99.89795918367346, "grad_norm": 32.259395599365234, "learning_rate": 7.959183673469388e-08, "loss": 0.6723, "step": 24475 }, { "epoch": 99.90204081632653, "grad_norm": 19.530851364135742, "learning_rate": 7.755102040816327e-08, "loss": 0.1774, "step": 24476 }, { "epoch": 99.90612244897959, "grad_norm": 30.346988677978516, "learning_rate": 7.551020408163266e-08, "loss": 0.9, "step": 24477 }, { "epoch": 99.91020408163266, "grad_norm": 29.622339248657227, "learning_rate": 7.346938775510203e-08, "loss": 0.529, "step": 24478 }, { "epoch": 99.91428571428571, "grad_norm": 32.221763610839844, "learning_rate": 7.142857142857144e-08, "loss": 0.7896, "step": 24479 }, { "epoch": 99.91836734693878, "grad_norm": 22.547889709472656, "learning_rate": 6.938775510204083e-08, "loss": 0.2856, "step": 24480 }, { "epoch": 99.92244897959183, "grad_norm": 25.858795166015625, "learning_rate": 6.73469387755102e-08, "loss": 1.8043, "step": 24481 }, { "epoch": 99.9265306122449, "grad_norm": 29.623369216918945, "learning_rate": 6.530612244897959e-08, "loss": 1.2216, "step": 24482 }, { "epoch": 99.93061224489796, "grad_norm": 19.930374145507812, "learning_rate": 6.326530612244899e-08, "loss": 1.702, "step": 24483 }, { "epoch": 99.93469387755103, "grad_norm": 20.370819091796875, "learning_rate": 6.122448979591837e-08, "loss": 0.2382, "step": 24484 }, { "epoch": 99.93877551020408, "grad_norm": 14.198051452636719, "learning_rate": 5.9183673469387755e-08, "loss": 0.0935, "step": 24485 }, { "epoch": 99.94285714285714, "grad_norm": 25.986909866333008, "learning_rate": 5.714285714285715e-08, "loss": 0.1878, "step": 24486 }, { "epoch": 99.9469387755102, "grad_norm": 25.61292266845703, "learning_rate": 5.510204081632653e-08, "loss": 1.4376, "step": 24487 }, { "epoch": 99.95102040816326, "grad_norm": 32.29615783691406, "learning_rate": 5.306122448979592e-08, "loss": 0.9362, "step": 24488 }, { "epoch": 99.95510204081633, "grad_norm": 35.276451110839844, "learning_rate": 5.102040816326531e-08, "loss": 0.5674, "step": 24489 }, { "epoch": 99.95918367346938, "grad_norm": 23.12346076965332, "learning_rate": 4.897959183673469e-08, "loss": 1.6818, "step": 24490 }, { "epoch": 99.96326530612245, "grad_norm": 26.590002059936523, "learning_rate": 4.693877551020409e-08, "loss": 1.0516, "step": 24491 }, { "epoch": 99.9673469387755, "grad_norm": 35.0167121887207, "learning_rate": 4.489795918367347e-08, "loss": 0.4711, "step": 24492 }, { "epoch": 99.97142857142858, "grad_norm": 4.940554618835449, "learning_rate": 4.285714285714286e-08, "loss": 0.0419, "step": 24493 }, { "epoch": 99.97551020408163, "grad_norm": 28.95920181274414, "learning_rate": 4.081632653061225e-08, "loss": 0.2761, "step": 24494 }, { "epoch": 99.9795918367347, "grad_norm": 28.105064392089844, "learning_rate": 3.8775510204081635e-08, "loss": 0.2507, "step": 24495 }, { "epoch": 99.98367346938775, "grad_norm": 11.4292631149292, "learning_rate": 3.673469387755102e-08, "loss": 0.086, "step": 24496 }, { "epoch": 99.98775510204082, "grad_norm": 28.318988800048828, "learning_rate": 3.469387755102041e-08, "loss": 0.5227, "step": 24497 }, { "epoch": 99.99183673469388, "grad_norm": 5.8413825035095215, "learning_rate": 3.2653061224489795e-08, "loss": 0.0732, "step": 24498 }, { "epoch": 99.99591836734695, "grad_norm": 32.414222717285156, "learning_rate": 3.0612244897959183e-08, "loss": 0.4516, "step": 24499 }, { "epoch": 100.0, "grad_norm": 28.39579200744629, "learning_rate": 2.8571428571428575e-08, "loss": 0.8745, "step": 24500 } ], "logging_steps": 1, "max_steps": 24500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.16866001260544e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }