| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 48870, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003069367710251688, | |
| "grad_norm": 6.029524326324463, | |
| "learning_rate": 2.9969920196439535e-05, | |
| "loss": 2.626, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006138735420503376, | |
| "grad_norm": 4.651051044464111, | |
| "learning_rate": 2.993922651933702e-05, | |
| "loss": 2.5542, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009208103130755065, | |
| "grad_norm": 4.611798286437988, | |
| "learning_rate": 2.99085328422345e-05, | |
| "loss": 2.4749, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.012277470841006752, | |
| "grad_norm": 5.144523620605469, | |
| "learning_rate": 2.9877839165131984e-05, | |
| "loss": 2.4706, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.015346838551258441, | |
| "grad_norm": 5.4743242263793945, | |
| "learning_rate": 2.9847145488029468e-05, | |
| "loss": 2.5232, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01841620626151013, | |
| "grad_norm": 4.009519100189209, | |
| "learning_rate": 2.981645181092695e-05, | |
| "loss": 2.4524, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.021485573971761818, | |
| "grad_norm": 4.539732456207275, | |
| "learning_rate": 2.9785758133824433e-05, | |
| "loss": 2.4725, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.024554941682013505, | |
| "grad_norm": 3.896458148956299, | |
| "learning_rate": 2.9755064456721917e-05, | |
| "loss": 2.4763, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.027624309392265192, | |
| "grad_norm": 4.5362982749938965, | |
| "learning_rate": 2.9724370779619398e-05, | |
| "loss": 2.4475, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.030693677102516883, | |
| "grad_norm": 3.903672456741333, | |
| "learning_rate": 2.9693677102516883e-05, | |
| "loss": 2.4884, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03376304481276857, | |
| "grad_norm": 3.7690269947052, | |
| "learning_rate": 2.9662983425414363e-05, | |
| "loss": 2.4525, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03683241252302026, | |
| "grad_norm": 3.7247776985168457, | |
| "learning_rate": 2.9632289748311848e-05, | |
| "loss": 2.4778, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03990178023327195, | |
| "grad_norm": 5.1837310791015625, | |
| "learning_rate": 2.9601596071209332e-05, | |
| "loss": 2.4917, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.042971147943523635, | |
| "grad_norm": 4.13453483581543, | |
| "learning_rate": 2.9570902394106813e-05, | |
| "loss": 2.4869, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04604051565377532, | |
| "grad_norm": 3.7461624145507812, | |
| "learning_rate": 2.9540208717004297e-05, | |
| "loss": 2.4408, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04910988336402701, | |
| "grad_norm": 3.429506778717041, | |
| "learning_rate": 2.950951503990178e-05, | |
| "loss": 2.442, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0521792510742787, | |
| "grad_norm": 4.001476287841797, | |
| "learning_rate": 2.9478821362799262e-05, | |
| "loss": 2.3645, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.055248618784530384, | |
| "grad_norm": 4.0728960037231445, | |
| "learning_rate": 2.9448127685696746e-05, | |
| "loss": 2.5016, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05831798649478207, | |
| "grad_norm": 4.214133262634277, | |
| "learning_rate": 2.941743400859423e-05, | |
| "loss": 2.4142, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.061387354205033766, | |
| "grad_norm": 4.556704521179199, | |
| "learning_rate": 2.938674033149171e-05, | |
| "loss": 2.5025, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06445672191528545, | |
| "grad_norm": 4.376175403594971, | |
| "learning_rate": 2.9356046654389195e-05, | |
| "loss": 2.3906, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06752608962553713, | |
| "grad_norm": 4.843928813934326, | |
| "learning_rate": 2.932535297728668e-05, | |
| "loss": 2.4598, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.07059545733578883, | |
| "grad_norm": 4.809575080871582, | |
| "learning_rate": 2.9294659300184164e-05, | |
| "loss": 2.4699, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.07366482504604052, | |
| "grad_norm": 4.783190727233887, | |
| "learning_rate": 2.9263965623081648e-05, | |
| "loss": 2.4256, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0767341927562922, | |
| "grad_norm": 3.410017728805542, | |
| "learning_rate": 2.923327194597913e-05, | |
| "loss": 2.469, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0798035604665439, | |
| "grad_norm": 4.03816556930542, | |
| "learning_rate": 2.9202578268876613e-05, | |
| "loss": 2.3954, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.08287292817679558, | |
| "grad_norm": 4.535765171051025, | |
| "learning_rate": 2.9171884591774097e-05, | |
| "loss": 2.3857, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08594229588704727, | |
| "grad_norm": 3.941899061203003, | |
| "learning_rate": 2.9141190914671578e-05, | |
| "loss": 2.4259, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08901166359729895, | |
| "grad_norm": 3.957204818725586, | |
| "learning_rate": 2.9110497237569062e-05, | |
| "loss": 2.4487, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.09208103130755065, | |
| "grad_norm": 4.369974136352539, | |
| "learning_rate": 2.9079803560466547e-05, | |
| "loss": 2.4166, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09515039901780234, | |
| "grad_norm": 3.611785650253296, | |
| "learning_rate": 2.9049109883364027e-05, | |
| "loss": 2.4958, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.09821976672805402, | |
| "grad_norm": 5.152332305908203, | |
| "learning_rate": 2.901841620626151e-05, | |
| "loss": 2.4116, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.10128913443830571, | |
| "grad_norm": 3.69728684425354, | |
| "learning_rate": 2.8987722529158996e-05, | |
| "loss": 2.3373, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.1043585021485574, | |
| "grad_norm": 4.104907512664795, | |
| "learning_rate": 2.8957028852056477e-05, | |
| "loss": 2.3961, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.10742786985880909, | |
| "grad_norm": 4.160801887512207, | |
| "learning_rate": 2.892633517495396e-05, | |
| "loss": 2.3749, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.11049723756906077, | |
| "grad_norm": 4.3206329345703125, | |
| "learning_rate": 2.8895641497851445e-05, | |
| "loss": 2.3582, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.11356660527931246, | |
| "grad_norm": 5.133695125579834, | |
| "learning_rate": 2.8864947820748926e-05, | |
| "loss": 2.4267, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.11663597298956414, | |
| "grad_norm": 3.392789125442505, | |
| "learning_rate": 2.883425414364641e-05, | |
| "loss": 2.4299, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.11970534069981584, | |
| "grad_norm": 4.9408135414123535, | |
| "learning_rate": 2.880356046654389e-05, | |
| "loss": 2.3965, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.12277470841006753, | |
| "grad_norm": 4.085551738739014, | |
| "learning_rate": 2.8772866789441375e-05, | |
| "loss": 2.4148, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12584407612031923, | |
| "grad_norm": 4.733358860015869, | |
| "learning_rate": 2.874217311233886e-05, | |
| "loss": 2.4431, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.1289134438305709, | |
| "grad_norm": 3.3507819175720215, | |
| "learning_rate": 2.871147943523634e-05, | |
| "loss": 2.3127, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.1319828115408226, | |
| "grad_norm": 5.074892520904541, | |
| "learning_rate": 2.8680785758133825e-05, | |
| "loss": 2.2961, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.13505217925107427, | |
| "grad_norm": 4.0248332023620605, | |
| "learning_rate": 2.865009208103131e-05, | |
| "loss": 2.4545, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.13812154696132597, | |
| "grad_norm": 3.959451675415039, | |
| "learning_rate": 2.861939840392879e-05, | |
| "loss": 2.4582, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.14119091467157766, | |
| "grad_norm": 5.6295294761657715, | |
| "learning_rate": 2.8588704726826274e-05, | |
| "loss": 2.4184, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.14426028238182934, | |
| "grad_norm": 4.008995056152344, | |
| "learning_rate": 2.8558011049723758e-05, | |
| "loss": 2.436, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.14732965009208104, | |
| "grad_norm": 4.01780891418457, | |
| "learning_rate": 2.852731737262124e-05, | |
| "loss": 2.4243, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.15039901780233272, | |
| "grad_norm": 3.839801549911499, | |
| "learning_rate": 2.8496623695518723e-05, | |
| "loss": 2.4003, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.1534683855125844, | |
| "grad_norm": 3.6963717937469482, | |
| "learning_rate": 2.8465930018416207e-05, | |
| "loss": 2.4496, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15653775322283608, | |
| "grad_norm": 4.555826187133789, | |
| "learning_rate": 2.8435236341313688e-05, | |
| "loss": 2.4101, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.1596071209330878, | |
| "grad_norm": 3.507671356201172, | |
| "learning_rate": 2.8404542664211172e-05, | |
| "loss": 2.4258, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.16267648864333947, | |
| "grad_norm": 4.644598007202148, | |
| "learning_rate": 2.8373848987108653e-05, | |
| "loss": 2.3937, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.16574585635359115, | |
| "grad_norm": 3.737030506134033, | |
| "learning_rate": 2.834315531000614e-05, | |
| "loss": 2.3961, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.16881522406384286, | |
| "grad_norm": 3.4786527156829834, | |
| "learning_rate": 2.8312461632903625e-05, | |
| "loss": 2.4414, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.17188459177409454, | |
| "grad_norm": 3.9619481563568115, | |
| "learning_rate": 2.8281767955801106e-05, | |
| "loss": 2.4952, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.17495395948434622, | |
| "grad_norm": 4.628708362579346, | |
| "learning_rate": 2.825107427869859e-05, | |
| "loss": 2.4284, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.1780233271945979, | |
| "grad_norm": 3.561638593673706, | |
| "learning_rate": 2.8220380601596074e-05, | |
| "loss": 2.3983, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1810926949048496, | |
| "grad_norm": 4.126139163970947, | |
| "learning_rate": 2.8189686924493555e-05, | |
| "loss": 2.3642, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.1841620626151013, | |
| "grad_norm": 3.888535737991333, | |
| "learning_rate": 2.815899324739104e-05, | |
| "loss": 2.2939, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.18723143032535297, | |
| "grad_norm": 4.639522552490234, | |
| "learning_rate": 2.8128299570288524e-05, | |
| "loss": 2.5008, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.19030079803560468, | |
| "grad_norm": 4.336818695068359, | |
| "learning_rate": 2.8097605893186004e-05, | |
| "loss": 2.3622, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.19337016574585636, | |
| "grad_norm": 3.351541519165039, | |
| "learning_rate": 2.806691221608349e-05, | |
| "loss": 2.3638, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.19643953345610804, | |
| "grad_norm": 7.246761798858643, | |
| "learning_rate": 2.8036218538980973e-05, | |
| "loss": 2.4054, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.19950890116635972, | |
| "grad_norm": 3.76177978515625, | |
| "learning_rate": 2.8005524861878454e-05, | |
| "loss": 2.4991, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.20257826887661143, | |
| "grad_norm": 3.489014148712158, | |
| "learning_rate": 2.7974831184775938e-05, | |
| "loss": 2.3624, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.2056476365868631, | |
| "grad_norm": 3.3915023803710938, | |
| "learning_rate": 2.794413750767342e-05, | |
| "loss": 2.3862, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.2087170042971148, | |
| "grad_norm": 3.8997626304626465, | |
| "learning_rate": 2.7913443830570903e-05, | |
| "loss": 2.2968, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.21178637200736647, | |
| "grad_norm": 3.1932547092437744, | |
| "learning_rate": 2.7882750153468387e-05, | |
| "loss": 2.3592, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.21485573971761818, | |
| "grad_norm": 4.888554096221924, | |
| "learning_rate": 2.7852056476365868e-05, | |
| "loss": 2.4128, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.21792510742786986, | |
| "grad_norm": 4.414622783660889, | |
| "learning_rate": 2.7821362799263352e-05, | |
| "loss": 2.4198, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.22099447513812154, | |
| "grad_norm": 3.756305456161499, | |
| "learning_rate": 2.7790669122160836e-05, | |
| "loss": 2.3664, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.22406384284837325, | |
| "grad_norm": 4.073141098022461, | |
| "learning_rate": 2.7759975445058317e-05, | |
| "loss": 2.3876, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.22713321055862493, | |
| "grad_norm": 2.9590511322021484, | |
| "learning_rate": 2.77292817679558e-05, | |
| "loss": 2.3702, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2302025782688766, | |
| "grad_norm": 3.9760727882385254, | |
| "learning_rate": 2.7698588090853286e-05, | |
| "loss": 2.3944, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.2332719459791283, | |
| "grad_norm": 3.8550660610198975, | |
| "learning_rate": 2.7667894413750767e-05, | |
| "loss": 2.3543, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.23634131368938, | |
| "grad_norm": 5.260376930236816, | |
| "learning_rate": 2.763720073664825e-05, | |
| "loss": 2.3058, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.23941068139963168, | |
| "grad_norm": 3.408364772796631, | |
| "learning_rate": 2.7606507059545735e-05, | |
| "loss": 2.4173, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.24248004910988336, | |
| "grad_norm": 3.482743263244629, | |
| "learning_rate": 2.7575813382443216e-05, | |
| "loss": 2.3997, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.24554941682013506, | |
| "grad_norm": 3.7070858478546143, | |
| "learning_rate": 2.75451197053407e-05, | |
| "loss": 2.4266, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.24861878453038674, | |
| "grad_norm": 4.024835109710693, | |
| "learning_rate": 2.751442602823818e-05, | |
| "loss": 2.3201, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.25168815224063845, | |
| "grad_norm": 4.50393533706665, | |
| "learning_rate": 2.7483732351135665e-05, | |
| "loss": 2.3541, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.25475751995089013, | |
| "grad_norm": 4.806800365447998, | |
| "learning_rate": 2.745303867403315e-05, | |
| "loss": 2.4102, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.2578268876611418, | |
| "grad_norm": 3.5676867961883545, | |
| "learning_rate": 2.742234499693063e-05, | |
| "loss": 2.2263, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.2608962553713935, | |
| "grad_norm": 3.5269808769226074, | |
| "learning_rate": 2.7391651319828114e-05, | |
| "loss": 2.2747, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.2639656230816452, | |
| "grad_norm": 4.45748233795166, | |
| "learning_rate": 2.73609576427256e-05, | |
| "loss": 2.3761, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.26703499079189685, | |
| "grad_norm": 4.212084770202637, | |
| "learning_rate": 2.7330263965623083e-05, | |
| "loss": 2.4948, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.27010435850214853, | |
| "grad_norm": 4.344183444976807, | |
| "learning_rate": 2.7299570288520567e-05, | |
| "loss": 2.4308, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.27317372621240027, | |
| "grad_norm": 3.9603850841522217, | |
| "learning_rate": 2.726887661141805e-05, | |
| "loss": 2.4254, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.27624309392265195, | |
| "grad_norm": 4.453806400299072, | |
| "learning_rate": 2.7238182934315532e-05, | |
| "loss": 2.4717, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.27931246163290363, | |
| "grad_norm": 3.924269437789917, | |
| "learning_rate": 2.7207489257213016e-05, | |
| "loss": 2.3845, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.2823818293431553, | |
| "grad_norm": 4.238902568817139, | |
| "learning_rate": 2.71767955801105e-05, | |
| "loss": 2.4194, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.285451197053407, | |
| "grad_norm": 3.7095208168029785, | |
| "learning_rate": 2.714610190300798e-05, | |
| "loss": 2.3036, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.28852056476365867, | |
| "grad_norm": 3.765899419784546, | |
| "learning_rate": 2.7115408225905466e-05, | |
| "loss": 2.4129, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.29158993247391035, | |
| "grad_norm": 4.793846130371094, | |
| "learning_rate": 2.7084714548802946e-05, | |
| "loss": 2.485, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.2946593001841621, | |
| "grad_norm": 3.294372797012329, | |
| "learning_rate": 2.705402087170043e-05, | |
| "loss": 2.3896, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.29772866789441377, | |
| "grad_norm": 4.122358798980713, | |
| "learning_rate": 2.7023327194597915e-05, | |
| "loss": 2.3746, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.30079803560466545, | |
| "grad_norm": 3.1560487747192383, | |
| "learning_rate": 2.6992633517495396e-05, | |
| "loss": 2.346, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.30386740331491713, | |
| "grad_norm": 3.7786178588867188, | |
| "learning_rate": 2.696193984039288e-05, | |
| "loss": 2.3669, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.3069367710251688, | |
| "grad_norm": 3.4450085163116455, | |
| "learning_rate": 2.6931246163290364e-05, | |
| "loss": 2.3475, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.3100061387354205, | |
| "grad_norm": 4.380395889282227, | |
| "learning_rate": 2.6900552486187845e-05, | |
| "loss": 2.3755, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.31307550644567217, | |
| "grad_norm": 4.272715091705322, | |
| "learning_rate": 2.686985880908533e-05, | |
| "loss": 2.3517, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.3161448741559239, | |
| "grad_norm": 5.976320266723633, | |
| "learning_rate": 2.6839165131982813e-05, | |
| "loss": 2.3665, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.3192142418661756, | |
| "grad_norm": 4.1683759689331055, | |
| "learning_rate": 2.6808471454880294e-05, | |
| "loss": 2.2517, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.32228360957642727, | |
| "grad_norm": 3.623004674911499, | |
| "learning_rate": 2.677777777777778e-05, | |
| "loss": 2.4741, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.32535297728667895, | |
| "grad_norm": 4.282279968261719, | |
| "learning_rate": 2.6747084100675263e-05, | |
| "loss": 2.3191, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.3284223449969306, | |
| "grad_norm": 3.761108636856079, | |
| "learning_rate": 2.6716390423572744e-05, | |
| "loss": 2.4249, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.3314917127071823, | |
| "grad_norm": 3.7316195964813232, | |
| "learning_rate": 2.6685696746470228e-05, | |
| "loss": 2.3679, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.334561080417434, | |
| "grad_norm": 3.8297908306121826, | |
| "learning_rate": 2.665500306936771e-05, | |
| "loss": 2.4157, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.3376304481276857, | |
| "grad_norm": 4.17257833480835, | |
| "learning_rate": 2.6624309392265193e-05, | |
| "loss": 2.2362, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.3406998158379374, | |
| "grad_norm": 4.0827484130859375, | |
| "learning_rate": 2.6593615715162677e-05, | |
| "loss": 2.3369, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.3437691835481891, | |
| "grad_norm": 3.3072879314422607, | |
| "learning_rate": 2.6562922038060158e-05, | |
| "loss": 2.3797, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.34683855125844076, | |
| "grad_norm": 3.316751480102539, | |
| "learning_rate": 2.6532228360957642e-05, | |
| "loss": 2.4562, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.34990791896869244, | |
| "grad_norm": 4.482247829437256, | |
| "learning_rate": 2.6501534683855126e-05, | |
| "loss": 2.373, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.3529772866789441, | |
| "grad_norm": 4.056297779083252, | |
| "learning_rate": 2.6470841006752607e-05, | |
| "loss": 2.4294, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.3560466543891958, | |
| "grad_norm": 3.596730947494507, | |
| "learning_rate": 2.644014732965009e-05, | |
| "loss": 2.3219, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.35911602209944754, | |
| "grad_norm": 4.089284896850586, | |
| "learning_rate": 2.6409453652547576e-05, | |
| "loss": 2.4088, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.3621853898096992, | |
| "grad_norm": 4.409155368804932, | |
| "learning_rate": 2.6378759975445056e-05, | |
| "loss": 2.3561, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.3652547575199509, | |
| "grad_norm": 5.397696018218994, | |
| "learning_rate": 2.6348066298342544e-05, | |
| "loss": 2.3602, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.3683241252302026, | |
| "grad_norm": 3.62880277633667, | |
| "learning_rate": 2.6317372621240028e-05, | |
| "loss": 2.4074, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.37139349294045426, | |
| "grad_norm": 3.9512007236480713, | |
| "learning_rate": 2.628667894413751e-05, | |
| "loss": 2.3754, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.37446286065070594, | |
| "grad_norm": 4.806766033172607, | |
| "learning_rate": 2.6255985267034993e-05, | |
| "loss": 2.362, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.3775322283609576, | |
| "grad_norm": 4.055029392242432, | |
| "learning_rate": 2.6225291589932474e-05, | |
| "loss": 2.3769, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.38060159607120936, | |
| "grad_norm": 4.231038570404053, | |
| "learning_rate": 2.619459791282996e-05, | |
| "loss": 2.4561, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.38367096378146104, | |
| "grad_norm": 3.5260846614837646, | |
| "learning_rate": 2.6163904235727443e-05, | |
| "loss": 2.4241, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.3867403314917127, | |
| "grad_norm": 4.557247161865234, | |
| "learning_rate": 2.6133210558624923e-05, | |
| "loss": 2.3357, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.3898096992019644, | |
| "grad_norm": 3.3867931365966797, | |
| "learning_rate": 2.6102516881522408e-05, | |
| "loss": 2.2559, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.3928790669122161, | |
| "grad_norm": 4.224850177764893, | |
| "learning_rate": 2.6071823204419892e-05, | |
| "loss": 2.3338, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.39594843462246776, | |
| "grad_norm": 4.095287322998047, | |
| "learning_rate": 2.6041129527317373e-05, | |
| "loss": 2.3384, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.39901780233271944, | |
| "grad_norm": 2.982825517654419, | |
| "learning_rate": 2.6010435850214857e-05, | |
| "loss": 2.2956, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.4020871700429711, | |
| "grad_norm": 4.3587164878845215, | |
| "learning_rate": 2.597974217311234e-05, | |
| "loss": 2.313, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.40515653775322286, | |
| "grad_norm": 4.225688457489014, | |
| "learning_rate": 2.5949048496009822e-05, | |
| "loss": 2.3302, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.40822590546347454, | |
| "grad_norm": 3.7242987155914307, | |
| "learning_rate": 2.5918354818907306e-05, | |
| "loss": 2.3699, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.4112952731737262, | |
| "grad_norm": 4.286329746246338, | |
| "learning_rate": 2.588766114180479e-05, | |
| "loss": 2.3835, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.4143646408839779, | |
| "grad_norm": 3.054641008377075, | |
| "learning_rate": 2.585696746470227e-05, | |
| "loss": 2.3717, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.4174340085942296, | |
| "grad_norm": 3.183530330657959, | |
| "learning_rate": 2.5826273787599756e-05, | |
| "loss": 2.4187, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.42050337630448126, | |
| "grad_norm": 3.300554037094116, | |
| "learning_rate": 2.5795580110497236e-05, | |
| "loss": 2.381, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.42357274401473294, | |
| "grad_norm": 3.742980718612671, | |
| "learning_rate": 2.576488643339472e-05, | |
| "loss": 2.3156, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.4266421117249847, | |
| "grad_norm": 4.5531182289123535, | |
| "learning_rate": 2.5734192756292205e-05, | |
| "loss": 2.2977, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.42971147943523635, | |
| "grad_norm": 4.084106922149658, | |
| "learning_rate": 2.5703499079189686e-05, | |
| "loss": 2.3399, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.43278084714548803, | |
| "grad_norm": 3.3674798011779785, | |
| "learning_rate": 2.567280540208717e-05, | |
| "loss": 2.4205, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.4358502148557397, | |
| "grad_norm": 3.401578664779663, | |
| "learning_rate": 2.5642111724984654e-05, | |
| "loss": 2.3318, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.4389195825659914, | |
| "grad_norm": 3.7515037059783936, | |
| "learning_rate": 2.5611418047882135e-05, | |
| "loss": 2.3514, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.4419889502762431, | |
| "grad_norm": 3.1685400009155273, | |
| "learning_rate": 2.558072437077962e-05, | |
| "loss": 2.3682, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.44505831798649476, | |
| "grad_norm": 3.990283489227295, | |
| "learning_rate": 2.5550030693677103e-05, | |
| "loss": 2.2508, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.4481276856967465, | |
| "grad_norm": 4.544641971588135, | |
| "learning_rate": 2.5519337016574584e-05, | |
| "loss": 2.3624, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.45119705340699817, | |
| "grad_norm": 3.986804962158203, | |
| "learning_rate": 2.548864333947207e-05, | |
| "loss": 2.384, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.45426642111724985, | |
| "grad_norm": 4.338369846343994, | |
| "learning_rate": 2.5457949662369553e-05, | |
| "loss": 2.3311, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.45733578882750153, | |
| "grad_norm": 3.8103485107421875, | |
| "learning_rate": 2.5427255985267033e-05, | |
| "loss": 2.2405, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.4604051565377532, | |
| "grad_norm": 3.461311101913452, | |
| "learning_rate": 2.5396562308164518e-05, | |
| "loss": 2.3843, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4634745242480049, | |
| "grad_norm": 3.6048476696014404, | |
| "learning_rate": 2.5365868631062002e-05, | |
| "loss": 2.395, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.4665438919582566, | |
| "grad_norm": 3.4888088703155518, | |
| "learning_rate": 2.5335174953959486e-05, | |
| "loss": 2.3449, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.4696132596685083, | |
| "grad_norm": 3.9866726398468018, | |
| "learning_rate": 2.530448127685697e-05, | |
| "loss": 2.3806, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.47268262737876, | |
| "grad_norm": 3.5875093936920166, | |
| "learning_rate": 2.527378759975445e-05, | |
| "loss": 2.32, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.47575199508901167, | |
| "grad_norm": 3.5421364307403564, | |
| "learning_rate": 2.5243093922651935e-05, | |
| "loss": 2.4109, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.47882136279926335, | |
| "grad_norm": 3.4700534343719482, | |
| "learning_rate": 2.521240024554942e-05, | |
| "loss": 2.38, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.48189073050951503, | |
| "grad_norm": 3.879786729812622, | |
| "learning_rate": 2.51817065684469e-05, | |
| "loss": 2.389, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.4849600982197667, | |
| "grad_norm": 4.261646747589111, | |
| "learning_rate": 2.5151012891344385e-05, | |
| "loss": 2.3983, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.4880294659300184, | |
| "grad_norm": 3.6179709434509277, | |
| "learning_rate": 2.512031921424187e-05, | |
| "loss": 2.3239, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.4910988336402701, | |
| "grad_norm": 3.9921975135803223, | |
| "learning_rate": 2.508962553713935e-05, | |
| "loss": 2.3513, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.4941682013505218, | |
| "grad_norm": 3.7387430667877197, | |
| "learning_rate": 2.5058931860036834e-05, | |
| "loss": 2.3613, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.4972375690607735, | |
| "grad_norm": 5.033226013183594, | |
| "learning_rate": 2.5028238182934318e-05, | |
| "loss": 2.3512, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.5003069367710252, | |
| "grad_norm": 3.8006532192230225, | |
| "learning_rate": 2.49975445058318e-05, | |
| "loss": 2.4073, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.5033763044812769, | |
| "grad_norm": 3.866370916366577, | |
| "learning_rate": 2.4966850828729283e-05, | |
| "loss": 2.2788, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.5064456721915286, | |
| "grad_norm": 3.6110663414001465, | |
| "learning_rate": 2.4936157151626764e-05, | |
| "loss": 2.3755, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.5095150399017803, | |
| "grad_norm": 3.55604887008667, | |
| "learning_rate": 2.4905463474524248e-05, | |
| "loss": 2.3332, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.512584407612032, | |
| "grad_norm": 3.8975794315338135, | |
| "learning_rate": 2.4874769797421732e-05, | |
| "loss": 2.3691, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.5156537753222836, | |
| "grad_norm": 3.8415000438690186, | |
| "learning_rate": 2.4844076120319213e-05, | |
| "loss": 2.3481, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.5187231430325353, | |
| "grad_norm": 2.9400956630706787, | |
| "learning_rate": 2.4813382443216698e-05, | |
| "loss": 2.3491, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.521792510742787, | |
| "grad_norm": 3.419921875, | |
| "learning_rate": 2.4782688766114182e-05, | |
| "loss": 2.2612, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.5248618784530387, | |
| "grad_norm": 3.066887617111206, | |
| "learning_rate": 2.4751995089011663e-05, | |
| "loss": 2.3013, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.5279312461632903, | |
| "grad_norm": 3.7565064430236816, | |
| "learning_rate": 2.4721301411909147e-05, | |
| "loss": 2.3407, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.531000613873542, | |
| "grad_norm": 3.6040759086608887, | |
| "learning_rate": 2.469060773480663e-05, | |
| "loss": 2.3678, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.5340699815837937, | |
| "grad_norm": 3.4393301010131836, | |
| "learning_rate": 2.4659914057704112e-05, | |
| "loss": 2.321, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.5371393492940454, | |
| "grad_norm": 4.549371242523193, | |
| "learning_rate": 2.4629220380601596e-05, | |
| "loss": 2.235, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.5402087170042971, | |
| "grad_norm": 3.2161850929260254, | |
| "learning_rate": 2.459852670349908e-05, | |
| "loss": 2.4156, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.5432780847145487, | |
| "grad_norm": 4.914154052734375, | |
| "learning_rate": 2.456783302639656e-05, | |
| "loss": 2.3127, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.5463474524248005, | |
| "grad_norm": 2.948514699935913, | |
| "learning_rate": 2.4537139349294045e-05, | |
| "loss": 2.3924, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.5494168201350522, | |
| "grad_norm": 3.669295072555542, | |
| "learning_rate": 2.4506445672191526e-05, | |
| "loss": 2.3639, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.5524861878453039, | |
| "grad_norm": 4.5282793045043945, | |
| "learning_rate": 2.447575199508901e-05, | |
| "loss": 2.2916, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 3.6461524963378906, | |
| "learning_rate": 2.4445058317986495e-05, | |
| "loss": 2.4551, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.5586249232658073, | |
| "grad_norm": 3.558283567428589, | |
| "learning_rate": 2.4414364640883975e-05, | |
| "loss": 2.3189, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.5616942909760589, | |
| "grad_norm": 3.31160569190979, | |
| "learning_rate": 2.4383670963781463e-05, | |
| "loss": 2.2667, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.5647636586863106, | |
| "grad_norm": 4.318973541259766, | |
| "learning_rate": 2.4352977286678947e-05, | |
| "loss": 2.3747, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.5678330263965623, | |
| "grad_norm": 3.0922465324401855, | |
| "learning_rate": 2.4322283609576428e-05, | |
| "loss": 2.2472, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.570902394106814, | |
| "grad_norm": 3.615382432937622, | |
| "learning_rate": 2.4291589932473912e-05, | |
| "loss": 2.3784, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.5739717618170657, | |
| "grad_norm": 4.567912578582764, | |
| "learning_rate": 2.4260896255371397e-05, | |
| "loss": 2.3389, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.5770411295273173, | |
| "grad_norm": 6.014679431915283, | |
| "learning_rate": 2.4230202578268877e-05, | |
| "loss": 2.2728, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.580110497237569, | |
| "grad_norm": 3.3985538482666016, | |
| "learning_rate": 2.419950890116636e-05, | |
| "loss": 2.3619, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.5831798649478207, | |
| "grad_norm": 3.6719815731048584, | |
| "learning_rate": 2.4168815224063846e-05, | |
| "loss": 2.3639, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.5862492326580724, | |
| "grad_norm": 4.365039348602295, | |
| "learning_rate": 2.4138121546961327e-05, | |
| "loss": 2.3359, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.5893186003683242, | |
| "grad_norm": 4.179307460784912, | |
| "learning_rate": 2.410742786985881e-05, | |
| "loss": 2.3482, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.5923879680785759, | |
| "grad_norm": 3.4102518558502197, | |
| "learning_rate": 2.4076734192756292e-05, | |
| "loss": 2.3535, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.5954573357888275, | |
| "grad_norm": 4.294723033905029, | |
| "learning_rate": 2.4046040515653776e-05, | |
| "loss": 2.3686, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.5985267034990792, | |
| "grad_norm": 4.105884552001953, | |
| "learning_rate": 2.401534683855126e-05, | |
| "loss": 2.3453, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.6015960712093309, | |
| "grad_norm": 3.321399450302124, | |
| "learning_rate": 2.398465316144874e-05, | |
| "loss": 2.4262, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.6046654389195826, | |
| "grad_norm": 4.505974292755127, | |
| "learning_rate": 2.3953959484346225e-05, | |
| "loss": 2.3449, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.6077348066298343, | |
| "grad_norm": 3.7572021484375, | |
| "learning_rate": 2.392326580724371e-05, | |
| "loss": 2.3861, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.6108041743400859, | |
| "grad_norm": 3.6873481273651123, | |
| "learning_rate": 2.389257213014119e-05, | |
| "loss": 2.3733, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.6138735420503376, | |
| "grad_norm": 4.152860641479492, | |
| "learning_rate": 2.3861878453038675e-05, | |
| "loss": 2.3398, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.6169429097605893, | |
| "grad_norm": 3.437129497528076, | |
| "learning_rate": 2.383118477593616e-05, | |
| "loss": 2.3327, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.620012277470841, | |
| "grad_norm": 4.148296356201172, | |
| "learning_rate": 2.380049109883364e-05, | |
| "loss": 2.278, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.6230816451810927, | |
| "grad_norm": 3.596064805984497, | |
| "learning_rate": 2.3769797421731124e-05, | |
| "loss": 2.38, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.6261510128913443, | |
| "grad_norm": 3.819524049758911, | |
| "learning_rate": 2.3739103744628608e-05, | |
| "loss": 2.2876, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.629220380601596, | |
| "grad_norm": 4.420265197753906, | |
| "learning_rate": 2.370841006752609e-05, | |
| "loss": 2.3627, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.6322897483118478, | |
| "grad_norm": 4.018989562988281, | |
| "learning_rate": 2.3677716390423573e-05, | |
| "loss": 2.3528, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.6353591160220995, | |
| "grad_norm": 3.5963191986083984, | |
| "learning_rate": 2.3647022713321054e-05, | |
| "loss": 2.3546, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.6384284837323512, | |
| "grad_norm": 4.6964874267578125, | |
| "learning_rate": 2.3616329036218538e-05, | |
| "loss": 2.4427, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.6414978514426029, | |
| "grad_norm": 3.6673765182495117, | |
| "learning_rate": 2.3585635359116022e-05, | |
| "loss": 2.3476, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.6445672191528545, | |
| "grad_norm": 4.190234184265137, | |
| "learning_rate": 2.3554941682013503e-05, | |
| "loss": 2.3326, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.6476365868631062, | |
| "grad_norm": 3.8142518997192383, | |
| "learning_rate": 2.3524248004910987e-05, | |
| "loss": 2.2891, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.6507059545733579, | |
| "grad_norm": 3.8022055625915527, | |
| "learning_rate": 2.349355432780847e-05, | |
| "loss": 2.3652, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.6537753222836096, | |
| "grad_norm": 3.8915176391601562, | |
| "learning_rate": 2.3462860650705952e-05, | |
| "loss": 2.3169, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.6568446899938613, | |
| "grad_norm": 3.6545536518096924, | |
| "learning_rate": 2.3432166973603437e-05, | |
| "loss": 2.3371, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.6599140577041129, | |
| "grad_norm": 3.763852596282959, | |
| "learning_rate": 2.3401473296500924e-05, | |
| "loss": 2.3069, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.6629834254143646, | |
| "grad_norm": 3.8284034729003906, | |
| "learning_rate": 2.3370779619398405e-05, | |
| "loss": 2.3334, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.6660527931246163, | |
| "grad_norm": 7.501564979553223, | |
| "learning_rate": 2.334008594229589e-05, | |
| "loss": 2.4061, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.669122160834868, | |
| "grad_norm": 3.7189035415649414, | |
| "learning_rate": 2.3309392265193374e-05, | |
| "loss": 2.3226, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.6721915285451197, | |
| "grad_norm": 3.5018155574798584, | |
| "learning_rate": 2.3278698588090854e-05, | |
| "loss": 2.2974, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.6752608962553714, | |
| "grad_norm": 4.443968772888184, | |
| "learning_rate": 2.324800491098834e-05, | |
| "loss": 2.3936, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.6783302639656231, | |
| "grad_norm": 3.740535020828247, | |
| "learning_rate": 2.321731123388582e-05, | |
| "loss": 2.2829, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.6813996316758748, | |
| "grad_norm": 3.538335084915161, | |
| "learning_rate": 2.3186617556783304e-05, | |
| "loss": 2.2824, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.6844689993861265, | |
| "grad_norm": 3.7425730228424072, | |
| "learning_rate": 2.3155923879680788e-05, | |
| "loss": 2.2808, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.6875383670963782, | |
| "grad_norm": 3.8225579261779785, | |
| "learning_rate": 2.312523020257827e-05, | |
| "loss": 2.3258, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.6906077348066298, | |
| "grad_norm": 4.689228057861328, | |
| "learning_rate": 2.3094536525475753e-05, | |
| "loss": 2.2818, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.6936771025168815, | |
| "grad_norm": 3.968703031539917, | |
| "learning_rate": 2.3063842848373237e-05, | |
| "loss": 2.3767, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.6967464702271332, | |
| "grad_norm": 4.036931037902832, | |
| "learning_rate": 2.3033149171270718e-05, | |
| "loss": 2.3459, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.6998158379373849, | |
| "grad_norm": 4.426519870758057, | |
| "learning_rate": 2.3002455494168202e-05, | |
| "loss": 2.327, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.7028852056476366, | |
| "grad_norm": 3.6122524738311768, | |
| "learning_rate": 2.2971761817065686e-05, | |
| "loss": 2.3813, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.7059545733578882, | |
| "grad_norm": 5.523836612701416, | |
| "learning_rate": 2.2941068139963167e-05, | |
| "loss": 2.3577, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.7090239410681399, | |
| "grad_norm": 3.1946020126342773, | |
| "learning_rate": 2.291037446286065e-05, | |
| "loss": 2.3005, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.7120933087783916, | |
| "grad_norm": 4.517838001251221, | |
| "learning_rate": 2.2879680785758136e-05, | |
| "loss": 2.3537, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.7151626764886433, | |
| "grad_norm": 3.4100501537323, | |
| "learning_rate": 2.2848987108655617e-05, | |
| "loss": 2.3526, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.7182320441988951, | |
| "grad_norm": 4.370871067047119, | |
| "learning_rate": 2.28182934315531e-05, | |
| "loss": 2.2843, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.7213014119091468, | |
| "grad_norm": 3.3597848415374756, | |
| "learning_rate": 2.2787599754450585e-05, | |
| "loss": 2.4289, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.7243707796193984, | |
| "grad_norm": 4.361307144165039, | |
| "learning_rate": 2.2756906077348066e-05, | |
| "loss": 2.3334, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.7274401473296501, | |
| "grad_norm": 3.5192790031433105, | |
| "learning_rate": 2.272621240024555e-05, | |
| "loss": 2.3935, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.7305095150399018, | |
| "grad_norm": 3.058115005493164, | |
| "learning_rate": 2.269551872314303e-05, | |
| "loss": 2.2948, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.7335788827501535, | |
| "grad_norm": 3.7125728130340576, | |
| "learning_rate": 2.2664825046040515e-05, | |
| "loss": 2.2702, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.7366482504604052, | |
| "grad_norm": 3.5723328590393066, | |
| "learning_rate": 2.2634131368938e-05, | |
| "loss": 2.3361, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.7397176181706568, | |
| "grad_norm": 4.428549766540527, | |
| "learning_rate": 2.260343769183548e-05, | |
| "loss": 2.2938, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.7427869858809085, | |
| "grad_norm": 3.8374624252319336, | |
| "learning_rate": 2.2572744014732964e-05, | |
| "loss": 2.2387, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.7458563535911602, | |
| "grad_norm": 4.0965657234191895, | |
| "learning_rate": 2.254205033763045e-05, | |
| "loss": 2.2988, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.7489257213014119, | |
| "grad_norm": 3.138101816177368, | |
| "learning_rate": 2.251135666052793e-05, | |
| "loss": 2.3083, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.7519950890116636, | |
| "grad_norm": 3.7243382930755615, | |
| "learning_rate": 2.2480662983425414e-05, | |
| "loss": 2.3094, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.7550644567219152, | |
| "grad_norm": 4.791036605834961, | |
| "learning_rate": 2.2449969306322898e-05, | |
| "loss": 2.317, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.7581338244321669, | |
| "grad_norm": 4.1747236251831055, | |
| "learning_rate": 2.2419275629220382e-05, | |
| "loss": 2.462, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.7612031921424187, | |
| "grad_norm": 4.427381992340088, | |
| "learning_rate": 2.2388581952117866e-05, | |
| "loss": 2.3935, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.7642725598526704, | |
| "grad_norm": 4.517187118530273, | |
| "learning_rate": 2.235788827501535e-05, | |
| "loss": 2.3619, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.7673419275629221, | |
| "grad_norm": 3.2976391315460205, | |
| "learning_rate": 2.232719459791283e-05, | |
| "loss": 2.3363, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.7704112952731738, | |
| "grad_norm": 3.017157793045044, | |
| "learning_rate": 2.2296500920810316e-05, | |
| "loss": 2.254, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.7734806629834254, | |
| "grad_norm": 4.820321083068848, | |
| "learning_rate": 2.2265807243707796e-05, | |
| "loss": 2.2243, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.7765500306936771, | |
| "grad_norm": 4.536325454711914, | |
| "learning_rate": 2.223511356660528e-05, | |
| "loss": 2.3242, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.7796193984039288, | |
| "grad_norm": 4.465803623199463, | |
| "learning_rate": 2.2204419889502765e-05, | |
| "loss": 2.2615, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.7826887661141805, | |
| "grad_norm": 4.061604022979736, | |
| "learning_rate": 2.2173726212400246e-05, | |
| "loss": 2.2753, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.7857581338244322, | |
| "grad_norm": 3.7470462322235107, | |
| "learning_rate": 2.214303253529773e-05, | |
| "loss": 2.3521, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.7888275015346838, | |
| "grad_norm": 3.7258481979370117, | |
| "learning_rate": 2.2112338858195214e-05, | |
| "loss": 2.2881, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.7918968692449355, | |
| "grad_norm": 4.148687839508057, | |
| "learning_rate": 2.2081645181092695e-05, | |
| "loss": 2.2603, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.7949662369551872, | |
| "grad_norm": 3.804433584213257, | |
| "learning_rate": 2.205095150399018e-05, | |
| "loss": 2.2985, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.7980356046654389, | |
| "grad_norm": 4.394881248474121, | |
| "learning_rate": 2.2020257826887663e-05, | |
| "loss": 2.353, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.8011049723756906, | |
| "grad_norm": 5.619194030761719, | |
| "learning_rate": 2.1989564149785144e-05, | |
| "loss": 2.3212, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.8041743400859422, | |
| "grad_norm": 3.7602977752685547, | |
| "learning_rate": 2.195887047268263e-05, | |
| "loss": 2.2732, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.807243707796194, | |
| "grad_norm": 3.5865325927734375, | |
| "learning_rate": 2.1928176795580113e-05, | |
| "loss": 2.2786, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.8103130755064457, | |
| "grad_norm": 4.248644828796387, | |
| "learning_rate": 2.1897483118477594e-05, | |
| "loss": 2.2986, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.8133824432166974, | |
| "grad_norm": 3.960653781890869, | |
| "learning_rate": 2.1866789441375078e-05, | |
| "loss": 2.3929, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.8164518109269491, | |
| "grad_norm": 4.417232990264893, | |
| "learning_rate": 2.183609576427256e-05, | |
| "loss": 2.331, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.8195211786372008, | |
| "grad_norm": 4.520796298980713, | |
| "learning_rate": 2.1805402087170043e-05, | |
| "loss": 2.3343, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.8225905463474524, | |
| "grad_norm": 3.5469796657562256, | |
| "learning_rate": 2.1774708410067527e-05, | |
| "loss": 2.3623, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.8256599140577041, | |
| "grad_norm": 3.0526225566864014, | |
| "learning_rate": 2.1744014732965008e-05, | |
| "loss": 2.2649, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.8287292817679558, | |
| "grad_norm": 3.904680013656616, | |
| "learning_rate": 2.1713321055862492e-05, | |
| "loss": 2.3419, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.8317986494782075, | |
| "grad_norm": 3.709381580352783, | |
| "learning_rate": 2.1682627378759976e-05, | |
| "loss": 2.37, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.8348680171884592, | |
| "grad_norm": 3.5717175006866455, | |
| "learning_rate": 2.1651933701657457e-05, | |
| "loss": 2.3169, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.8379373848987108, | |
| "grad_norm": 4.073272228240967, | |
| "learning_rate": 2.162124002455494e-05, | |
| "loss": 2.3328, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.8410067526089625, | |
| "grad_norm": 3.8410749435424805, | |
| "learning_rate": 2.1590546347452426e-05, | |
| "loss": 2.3072, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.8440761203192142, | |
| "grad_norm": 4.8291144371032715, | |
| "learning_rate": 2.1559852670349906e-05, | |
| "loss": 2.3592, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.8471454880294659, | |
| "grad_norm": 4.293553352355957, | |
| "learning_rate": 2.152915899324739e-05, | |
| "loss": 2.3657, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.8502148557397177, | |
| "grad_norm": 4.011140823364258, | |
| "learning_rate": 2.1498465316144875e-05, | |
| "loss": 2.304, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.8532842234499693, | |
| "grad_norm": 3.8303871154785156, | |
| "learning_rate": 2.1467771639042356e-05, | |
| "loss": 2.2724, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.856353591160221, | |
| "grad_norm": 4.488582611083984, | |
| "learning_rate": 2.143707796193984e-05, | |
| "loss": 2.2965, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.8594229588704727, | |
| "grad_norm": 3.4766058921813965, | |
| "learning_rate": 2.1406384284837324e-05, | |
| "loss": 2.3577, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.8624923265807244, | |
| "grad_norm": 4.232321262359619, | |
| "learning_rate": 2.137569060773481e-05, | |
| "loss": 2.3609, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.8655616942909761, | |
| "grad_norm": 4.51991605758667, | |
| "learning_rate": 2.1344996930632293e-05, | |
| "loss": 2.3213, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.8686310620012277, | |
| "grad_norm": 3.356311321258545, | |
| "learning_rate": 2.1314303253529773e-05, | |
| "loss": 2.3987, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.8717004297114794, | |
| "grad_norm": 3.596140146255493, | |
| "learning_rate": 2.1283609576427258e-05, | |
| "loss": 2.362, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.8747697974217311, | |
| "grad_norm": 5.02532958984375, | |
| "learning_rate": 2.1252915899324742e-05, | |
| "loss": 2.4156, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.8778391651319828, | |
| "grad_norm": 3.531360626220703, | |
| "learning_rate": 2.1222222222222223e-05, | |
| "loss": 2.312, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.8809085328422345, | |
| "grad_norm": 4.262710094451904, | |
| "learning_rate": 2.1191528545119707e-05, | |
| "loss": 2.3432, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.8839779005524862, | |
| "grad_norm": 4.449579238891602, | |
| "learning_rate": 2.116083486801719e-05, | |
| "loss": 2.3759, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.8870472682627378, | |
| "grad_norm": 4.485136032104492, | |
| "learning_rate": 2.1130141190914672e-05, | |
| "loss": 2.2131, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.8901166359729895, | |
| "grad_norm": 3.402162551879883, | |
| "learning_rate": 2.1099447513812156e-05, | |
| "loss": 2.3088, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.8931860036832413, | |
| "grad_norm": 3.7395241260528564, | |
| "learning_rate": 2.106875383670964e-05, | |
| "loss": 2.3447, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.896255371393493, | |
| "grad_norm": 3.98075008392334, | |
| "learning_rate": 2.103806015960712e-05, | |
| "loss": 2.3214, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.8993247391037447, | |
| "grad_norm": 3.871152400970459, | |
| "learning_rate": 2.1007366482504605e-05, | |
| "loss": 2.3396, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.9023941068139963, | |
| "grad_norm": 3.5539169311523438, | |
| "learning_rate": 2.0976672805402086e-05, | |
| "loss": 2.269, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.905463474524248, | |
| "grad_norm": 4.932919502258301, | |
| "learning_rate": 2.094597912829957e-05, | |
| "loss": 2.3098, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.9085328422344997, | |
| "grad_norm": 3.794808864593506, | |
| "learning_rate": 2.0915285451197055e-05, | |
| "loss": 2.277, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.9116022099447514, | |
| "grad_norm": 3.729210138320923, | |
| "learning_rate": 2.0884591774094536e-05, | |
| "loss": 2.4038, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.9146715776550031, | |
| "grad_norm": 4.467114448547363, | |
| "learning_rate": 2.085389809699202e-05, | |
| "loss": 2.2834, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.9177409453652547, | |
| "grad_norm": 4.059779644012451, | |
| "learning_rate": 2.0823204419889504e-05, | |
| "loss": 2.3535, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.9208103130755064, | |
| "grad_norm": 4.626834392547607, | |
| "learning_rate": 2.0792510742786985e-05, | |
| "loss": 2.2687, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.9238796807857581, | |
| "grad_norm": 4.254091739654541, | |
| "learning_rate": 2.076181706568447e-05, | |
| "loss": 2.3406, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.9269490484960098, | |
| "grad_norm": 4.225036144256592, | |
| "learning_rate": 2.0731123388581953e-05, | |
| "loss": 2.2917, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.9300184162062615, | |
| "grad_norm": 3.2555954456329346, | |
| "learning_rate": 2.0700429711479434e-05, | |
| "loss": 2.2775, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.9330877839165131, | |
| "grad_norm": 4.839592933654785, | |
| "learning_rate": 2.066973603437692e-05, | |
| "loss": 2.2714, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.9361571516267649, | |
| "grad_norm": 4.091184616088867, | |
| "learning_rate": 2.0639042357274403e-05, | |
| "loss": 2.2871, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.9392265193370166, | |
| "grad_norm": 3.666154384613037, | |
| "learning_rate": 2.0608348680171883e-05, | |
| "loss": 2.3108, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.9422958870472683, | |
| "grad_norm": 4.287258625030518, | |
| "learning_rate": 2.0577655003069368e-05, | |
| "loss": 2.2785, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.94536525475752, | |
| "grad_norm": 3.8487017154693604, | |
| "learning_rate": 2.054696132596685e-05, | |
| "loss": 2.3108, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.9484346224677717, | |
| "grad_norm": 3.3819682598114014, | |
| "learning_rate": 2.0516267648864333e-05, | |
| "loss": 2.3336, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.9515039901780233, | |
| "grad_norm": 4.332981109619141, | |
| "learning_rate": 2.0485573971761817e-05, | |
| "loss": 2.2581, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.954573357888275, | |
| "grad_norm": 4.729110240936279, | |
| "learning_rate": 2.0454880294659298e-05, | |
| "loss": 2.3418, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.9576427255985267, | |
| "grad_norm": 3.3030595779418945, | |
| "learning_rate": 2.0424186617556785e-05, | |
| "loss": 2.2136, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.9607120933087784, | |
| "grad_norm": 4.455896377563477, | |
| "learning_rate": 2.039349294045427e-05, | |
| "loss": 2.2574, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.9637814610190301, | |
| "grad_norm": 4.112648010253906, | |
| "learning_rate": 2.036279926335175e-05, | |
| "loss": 2.3621, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.9668508287292817, | |
| "grad_norm": 3.577320098876953, | |
| "learning_rate": 2.0332105586249235e-05, | |
| "loss": 2.2561, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.9699201964395334, | |
| "grad_norm": 4.021890163421631, | |
| "learning_rate": 2.030141190914672e-05, | |
| "loss": 2.3077, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.9729895641497851, | |
| "grad_norm": 3.277940273284912, | |
| "learning_rate": 2.02707182320442e-05, | |
| "loss": 2.2639, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.9760589318600368, | |
| "grad_norm": 4.129881858825684, | |
| "learning_rate": 2.0240024554941684e-05, | |
| "loss": 2.3038, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.9791282995702886, | |
| "grad_norm": 3.3244733810424805, | |
| "learning_rate": 2.0209330877839168e-05, | |
| "loss": 2.346, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.9821976672805403, | |
| "grad_norm": 3.341198444366455, | |
| "learning_rate": 2.017863720073665e-05, | |
| "loss": 2.321, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.9852670349907919, | |
| "grad_norm": 3.724945306777954, | |
| "learning_rate": 2.0147943523634133e-05, | |
| "loss": 2.2743, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.9883364027010436, | |
| "grad_norm": 3.501654624938965, | |
| "learning_rate": 2.0117249846531614e-05, | |
| "loss": 2.2676, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.9914057704112953, | |
| "grad_norm": 4.758657932281494, | |
| "learning_rate": 2.0086556169429098e-05, | |
| "loss": 2.3508, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.994475138121547, | |
| "grad_norm": 3.5216405391693115, | |
| "learning_rate": 2.0055862492326582e-05, | |
| "loss": 2.2924, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.9975445058317987, | |
| "grad_norm": 12.397980690002441, | |
| "learning_rate": 2.0025168815224063e-05, | |
| "loss": 2.2766, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 1.0006138735420504, | |
| "grad_norm": 3.59272837638855, | |
| "learning_rate": 1.9994475138121548e-05, | |
| "loss": 2.271, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.003683241252302, | |
| "grad_norm": 2.9649710655212402, | |
| "learning_rate": 1.9963781461019032e-05, | |
| "loss": 2.2595, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 1.0067526089625538, | |
| "grad_norm": 3.3721020221710205, | |
| "learning_rate": 1.9933087783916513e-05, | |
| "loss": 2.2275, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.0098219766728054, | |
| "grad_norm": 3.2784862518310547, | |
| "learning_rate": 1.9902394106813997e-05, | |
| "loss": 2.3262, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 1.0128913443830572, | |
| "grad_norm": 3.301400661468506, | |
| "learning_rate": 1.987170042971148e-05, | |
| "loss": 2.2383, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.0159607120933087, | |
| "grad_norm": 2.7174506187438965, | |
| "learning_rate": 1.9841006752608962e-05, | |
| "loss": 2.266, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 1.0190300798035605, | |
| "grad_norm": 4.05548095703125, | |
| "learning_rate": 1.9810313075506446e-05, | |
| "loss": 2.2317, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.022099447513812, | |
| "grad_norm": 3.362386703491211, | |
| "learning_rate": 1.977961939840393e-05, | |
| "loss": 2.2317, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 1.025168815224064, | |
| "grad_norm": 3.4570345878601074, | |
| "learning_rate": 1.974892572130141e-05, | |
| "loss": 2.3333, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.0282381829343155, | |
| "grad_norm": 3.4020121097564697, | |
| "learning_rate": 1.9718232044198895e-05, | |
| "loss": 2.2965, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 1.0313075506445673, | |
| "grad_norm": 3.3160858154296875, | |
| "learning_rate": 1.9687538367096376e-05, | |
| "loss": 2.2659, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.0343769183548188, | |
| "grad_norm": 3.5787899494171143, | |
| "learning_rate": 1.965684468999386e-05, | |
| "loss": 2.3484, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 1.0374462860650706, | |
| "grad_norm": 4.029461860656738, | |
| "learning_rate": 1.9626151012891345e-05, | |
| "loss": 2.3333, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.0405156537753222, | |
| "grad_norm": 3.743760824203491, | |
| "learning_rate": 1.9595457335788825e-05, | |
| "loss": 2.2458, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 1.043585021485574, | |
| "grad_norm": 3.3272945880889893, | |
| "learning_rate": 1.956476365868631e-05, | |
| "loss": 2.3081, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.0466543891958258, | |
| "grad_norm": 3.3701705932617188, | |
| "learning_rate": 1.9534069981583794e-05, | |
| "loss": 2.19, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 1.0497237569060773, | |
| "grad_norm": 4.10990571975708, | |
| "learning_rate": 1.9503376304481275e-05, | |
| "loss": 2.2962, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.0527931246163291, | |
| "grad_norm": 3.226930856704712, | |
| "learning_rate": 1.947268262737876e-05, | |
| "loss": 2.3523, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 1.0558624923265807, | |
| "grad_norm": 3.901716947555542, | |
| "learning_rate": 1.9441988950276247e-05, | |
| "loss": 2.244, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.0589318600368325, | |
| "grad_norm": 3.943704128265381, | |
| "learning_rate": 1.9411295273173727e-05, | |
| "loss": 2.3179, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 1.062001227747084, | |
| "grad_norm": 3.7991671562194824, | |
| "learning_rate": 1.938060159607121e-05, | |
| "loss": 2.1783, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.0650705954573358, | |
| "grad_norm": 3.63051438331604, | |
| "learning_rate": 1.9349907918968696e-05, | |
| "loss": 2.2307, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 1.0681399631675874, | |
| "grad_norm": 3.9437952041625977, | |
| "learning_rate": 1.9319214241866177e-05, | |
| "loss": 2.2864, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.0712093308778392, | |
| "grad_norm": 3.544645309448242, | |
| "learning_rate": 1.928852056476366e-05, | |
| "loss": 2.3161, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 1.0742786985880908, | |
| "grad_norm": 3.8155930042266846, | |
| "learning_rate": 1.9257826887661142e-05, | |
| "loss": 2.2036, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.0773480662983426, | |
| "grad_norm": 3.956928014755249, | |
| "learning_rate": 1.9227133210558626e-05, | |
| "loss": 2.3254, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 1.0804174340085941, | |
| "grad_norm": 2.997299909591675, | |
| "learning_rate": 1.919643953345611e-05, | |
| "loss": 2.2741, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.083486801718846, | |
| "grad_norm": 3.1786820888519287, | |
| "learning_rate": 1.916574585635359e-05, | |
| "loss": 2.2991, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 1.0865561694290977, | |
| "grad_norm": 3.475252151489258, | |
| "learning_rate": 1.9135052179251075e-05, | |
| "loss": 2.2742, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.0896255371393493, | |
| "grad_norm": 3.8195457458496094, | |
| "learning_rate": 1.910435850214856e-05, | |
| "loss": 2.2437, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 1.092694904849601, | |
| "grad_norm": 3.205800771713257, | |
| "learning_rate": 1.907366482504604e-05, | |
| "loss": 2.3202, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.0957642725598526, | |
| "grad_norm": 4.425097465515137, | |
| "learning_rate": 1.9042971147943524e-05, | |
| "loss": 2.2275, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 1.0988336402701044, | |
| "grad_norm": 3.6546781063079834, | |
| "learning_rate": 1.901227747084101e-05, | |
| "loss": 2.2484, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.101903007980356, | |
| "grad_norm": 3.9607748985290527, | |
| "learning_rate": 1.898158379373849e-05, | |
| "loss": 2.3012, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 1.1049723756906078, | |
| "grad_norm": 3.728654623031616, | |
| "learning_rate": 1.8950890116635974e-05, | |
| "loss": 2.2324, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.1080417434008594, | |
| "grad_norm": 4.3351149559021, | |
| "learning_rate": 1.8920196439533458e-05, | |
| "loss": 2.2714, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 3.8495333194732666, | |
| "learning_rate": 1.888950276243094e-05, | |
| "loss": 2.2416, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.1141804788213627, | |
| "grad_norm": 3.4237616062164307, | |
| "learning_rate": 1.8858809085328423e-05, | |
| "loss": 2.2531, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 1.1172498465316145, | |
| "grad_norm": 4.699343681335449, | |
| "learning_rate": 1.8828115408225904e-05, | |
| "loss": 2.299, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.120319214241866, | |
| "grad_norm": 3.130164861679077, | |
| "learning_rate": 1.8797421731123388e-05, | |
| "loss": 2.2208, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 1.1233885819521179, | |
| "grad_norm": 3.84944224357605, | |
| "learning_rate": 1.8766728054020872e-05, | |
| "loss": 2.2946, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.1264579496623695, | |
| "grad_norm": 3.48579478263855, | |
| "learning_rate": 1.8736034376918353e-05, | |
| "loss": 2.2766, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 1.1295273173726212, | |
| "grad_norm": 3.44059157371521, | |
| "learning_rate": 1.8705340699815837e-05, | |
| "loss": 2.3324, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.132596685082873, | |
| "grad_norm": 4.056128978729248, | |
| "learning_rate": 1.867464702271332e-05, | |
| "loss": 2.2425, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 1.1356660527931246, | |
| "grad_norm": 3.4333908557891846, | |
| "learning_rate": 1.8643953345610802e-05, | |
| "loss": 2.2956, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.1387354205033764, | |
| "grad_norm": 3.3986668586730957, | |
| "learning_rate": 1.8613259668508287e-05, | |
| "loss": 2.2745, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 1.141804788213628, | |
| "grad_norm": 3.854893684387207, | |
| "learning_rate": 1.858256599140577e-05, | |
| "loss": 2.2604, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.1448741559238798, | |
| "grad_norm": 3.557697296142578, | |
| "learning_rate": 1.8551872314303252e-05, | |
| "loss": 2.2547, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 1.1479435236341313, | |
| "grad_norm": 3.741943597793579, | |
| "learning_rate": 1.8521178637200736e-05, | |
| "loss": 2.3682, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.1510128913443831, | |
| "grad_norm": 4.78795862197876, | |
| "learning_rate": 1.849048496009822e-05, | |
| "loss": 2.2635, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 1.1540822590546347, | |
| "grad_norm": 2.904060125350952, | |
| "learning_rate": 1.8459791282995704e-05, | |
| "loss": 2.2896, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.1571516267648865, | |
| "grad_norm": 4.221961498260498, | |
| "learning_rate": 1.842909760589319e-05, | |
| "loss": 2.2288, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 1.160220994475138, | |
| "grad_norm": 3.9817323684692383, | |
| "learning_rate": 1.839840392879067e-05, | |
| "loss": 2.314, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.1632903621853898, | |
| "grad_norm": 4.283735275268555, | |
| "learning_rate": 1.8367710251688154e-05, | |
| "loss": 2.3065, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 1.1663597298956414, | |
| "grad_norm": 4.681687831878662, | |
| "learning_rate": 1.8337016574585638e-05, | |
| "loss": 2.2481, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.1694290976058932, | |
| "grad_norm": 5.139055252075195, | |
| "learning_rate": 1.830632289748312e-05, | |
| "loss": 2.2553, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 1.1724984653161448, | |
| "grad_norm": 2.987617015838623, | |
| "learning_rate": 1.8275629220380603e-05, | |
| "loss": 2.2837, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.1755678330263966, | |
| "grad_norm": 3.6754627227783203, | |
| "learning_rate": 1.8244935543278087e-05, | |
| "loss": 2.3089, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 1.1786372007366483, | |
| "grad_norm": 4.376922607421875, | |
| "learning_rate": 1.8214241866175568e-05, | |
| "loss": 2.3423, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.1817065684469, | |
| "grad_norm": 3.4154927730560303, | |
| "learning_rate": 1.8183548189073052e-05, | |
| "loss": 2.2326, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 1.1847759361571517, | |
| "grad_norm": 3.935561418533325, | |
| "learning_rate": 1.8152854511970536e-05, | |
| "loss": 2.2588, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.1878453038674033, | |
| "grad_norm": 3.814129114151001, | |
| "learning_rate": 1.8122160834868017e-05, | |
| "loss": 2.2798, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 1.190914671577655, | |
| "grad_norm": 4.349081516265869, | |
| "learning_rate": 1.80914671577655e-05, | |
| "loss": 2.2618, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.1939840392879066, | |
| "grad_norm": 4.567361354827881, | |
| "learning_rate": 1.8060773480662986e-05, | |
| "loss": 2.326, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 1.1970534069981584, | |
| "grad_norm": 4.0694427490234375, | |
| "learning_rate": 1.8030079803560467e-05, | |
| "loss": 2.3532, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.20012277470841, | |
| "grad_norm": 4.104779243469238, | |
| "learning_rate": 1.799938612645795e-05, | |
| "loss": 2.2995, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 1.2031921424186618, | |
| "grad_norm": 3.412951707839966, | |
| "learning_rate": 1.796869244935543e-05, | |
| "loss": 2.3195, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.2062615101289134, | |
| "grad_norm": 3.1561272144317627, | |
| "learning_rate": 1.7937998772252916e-05, | |
| "loss": 2.2425, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 1.2093308778391652, | |
| "grad_norm": 3.319150924682617, | |
| "learning_rate": 1.79073050951504e-05, | |
| "loss": 2.3061, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.212400245549417, | |
| "grad_norm": 3.917623281478882, | |
| "learning_rate": 1.787661141804788e-05, | |
| "loss": 2.2989, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 1.2154696132596685, | |
| "grad_norm": 3.800072193145752, | |
| "learning_rate": 1.7845917740945365e-05, | |
| "loss": 2.2609, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.21853898096992, | |
| "grad_norm": 3.723968505859375, | |
| "learning_rate": 1.781522406384285e-05, | |
| "loss": 2.3172, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 1.2216083486801719, | |
| "grad_norm": 4.040971755981445, | |
| "learning_rate": 1.778453038674033e-05, | |
| "loss": 2.224, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.2246777163904237, | |
| "grad_norm": 3.918321132659912, | |
| "learning_rate": 1.7753836709637814e-05, | |
| "loss": 2.3556, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 1.2277470841006752, | |
| "grad_norm": 4.419713973999023, | |
| "learning_rate": 1.77231430325353e-05, | |
| "loss": 2.3278, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.230816451810927, | |
| "grad_norm": 4.213504791259766, | |
| "learning_rate": 1.769244935543278e-05, | |
| "loss": 2.2026, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 1.2338858195211786, | |
| "grad_norm": 3.972687005996704, | |
| "learning_rate": 1.7661755678330264e-05, | |
| "loss": 2.2754, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.2369551872314304, | |
| "grad_norm": 4.094639301300049, | |
| "learning_rate": 1.7631062001227748e-05, | |
| "loss": 2.2452, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 1.240024554941682, | |
| "grad_norm": 2.83046817779541, | |
| "learning_rate": 1.760036832412523e-05, | |
| "loss": 2.3677, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.2430939226519337, | |
| "grad_norm": 3.1770524978637695, | |
| "learning_rate": 1.7569674647022713e-05, | |
| "loss": 2.2744, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 1.2461632903621853, | |
| "grad_norm": 3.803001880645752, | |
| "learning_rate": 1.7538980969920194e-05, | |
| "loss": 2.1896, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.249232658072437, | |
| "grad_norm": 2.9435923099517822, | |
| "learning_rate": 1.7508287292817678e-05, | |
| "loss": 2.2679, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 1.2523020257826887, | |
| "grad_norm": 3.0736653804779053, | |
| "learning_rate": 1.7477593615715166e-05, | |
| "loss": 2.2378, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.2553713934929405, | |
| "grad_norm": 4.1547627449035645, | |
| "learning_rate": 1.7446899938612646e-05, | |
| "loss": 2.2726, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 1.2584407612031923, | |
| "grad_norm": 4.235386848449707, | |
| "learning_rate": 1.741620626151013e-05, | |
| "loss": 2.291, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.2615101289134438, | |
| "grad_norm": 3.412493944168091, | |
| "learning_rate": 1.7385512584407615e-05, | |
| "loss": 2.3055, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 1.2645794966236954, | |
| "grad_norm": 3.837425947189331, | |
| "learning_rate": 1.7354818907305096e-05, | |
| "loss": 2.3072, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.2676488643339472, | |
| "grad_norm": 3.7470505237579346, | |
| "learning_rate": 1.732412523020258e-05, | |
| "loss": 2.2997, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 1.270718232044199, | |
| "grad_norm": 4.290903568267822, | |
| "learning_rate": 1.7293431553100064e-05, | |
| "loss": 2.3537, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.2737875997544506, | |
| "grad_norm": 4.011292457580566, | |
| "learning_rate": 1.7262737875997545e-05, | |
| "loss": 2.1966, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 1.2768569674647023, | |
| "grad_norm": 3.7366220951080322, | |
| "learning_rate": 1.723204419889503e-05, | |
| "loss": 2.2752, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.279926335174954, | |
| "grad_norm": 3.9714245796203613, | |
| "learning_rate": 1.7201350521792513e-05, | |
| "loss": 2.2664, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 1.2829957028852057, | |
| "grad_norm": 3.8838491439819336, | |
| "learning_rate": 1.7170656844689994e-05, | |
| "loss": 2.2699, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.2860650705954573, | |
| "grad_norm": 4.699042320251465, | |
| "learning_rate": 1.713996316758748e-05, | |
| "loss": 2.314, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 1.289134438305709, | |
| "grad_norm": 3.9477968215942383, | |
| "learning_rate": 1.710926949048496e-05, | |
| "loss": 2.1777, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.2922038060159606, | |
| "grad_norm": 3.690079927444458, | |
| "learning_rate": 1.7078575813382444e-05, | |
| "loss": 2.2705, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 1.2952731737262124, | |
| "grad_norm": 3.5377986431121826, | |
| "learning_rate": 1.7047882136279928e-05, | |
| "loss": 2.2185, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.298342541436464, | |
| "grad_norm": 4.657019138336182, | |
| "learning_rate": 1.701718845917741e-05, | |
| "loss": 2.3273, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 1.3014119091467158, | |
| "grad_norm": 4.9236040115356445, | |
| "learning_rate": 1.6986494782074893e-05, | |
| "loss": 2.2613, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.3044812768569676, | |
| "grad_norm": 3.1163363456726074, | |
| "learning_rate": 1.6955801104972377e-05, | |
| "loss": 2.2433, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 1.3075506445672191, | |
| "grad_norm": 3.7134690284729004, | |
| "learning_rate": 1.6925107427869858e-05, | |
| "loss": 2.3434, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.3106200122774707, | |
| "grad_norm": 3.383561372756958, | |
| "learning_rate": 1.6894413750767342e-05, | |
| "loss": 2.1809, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 1.3136893799877225, | |
| "grad_norm": 4.129547595977783, | |
| "learning_rate": 1.6863720073664826e-05, | |
| "loss": 2.3241, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.3167587476979743, | |
| "grad_norm": 3.206624984741211, | |
| "learning_rate": 1.6833026396562307e-05, | |
| "loss": 2.2917, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 1.3198281154082259, | |
| "grad_norm": 3.993472099304199, | |
| "learning_rate": 1.680233271945979e-05, | |
| "loss": 2.2888, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.3228974831184777, | |
| "grad_norm": 3.8573262691497803, | |
| "learning_rate": 1.6771639042357276e-05, | |
| "loss": 2.3034, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 1.3259668508287292, | |
| "grad_norm": 4.161017417907715, | |
| "learning_rate": 1.6740945365254756e-05, | |
| "loss": 2.3173, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.329036218538981, | |
| "grad_norm": 3.6462788581848145, | |
| "learning_rate": 1.671025168815224e-05, | |
| "loss": 2.2411, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 1.3321055862492326, | |
| "grad_norm": 5.160103797912598, | |
| "learning_rate": 1.667955801104972e-05, | |
| "loss": 2.3113, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.3351749539594844, | |
| "grad_norm": 8.680712699890137, | |
| "learning_rate": 1.6648864333947206e-05, | |
| "loss": 2.2769, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 1.3382443216697362, | |
| "grad_norm": 4.962557315826416, | |
| "learning_rate": 1.661817065684469e-05, | |
| "loss": 2.2755, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.3413136893799877, | |
| "grad_norm": 4.264275074005127, | |
| "learning_rate": 1.658747697974217e-05, | |
| "loss": 2.2863, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 1.3443830570902393, | |
| "grad_norm": 4.581940650939941, | |
| "learning_rate": 1.6556783302639655e-05, | |
| "loss": 2.4296, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.347452424800491, | |
| "grad_norm": 3.814467430114746, | |
| "learning_rate": 1.652608962553714e-05, | |
| "loss": 2.258, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 1.350521792510743, | |
| "grad_norm": 3.2274892330169678, | |
| "learning_rate": 1.6495395948434623e-05, | |
| "loss": 2.2739, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.3535911602209945, | |
| "grad_norm": 3.3576676845550537, | |
| "learning_rate": 1.6464702271332108e-05, | |
| "loss": 2.3536, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 1.356660527931246, | |
| "grad_norm": 3.561453104019165, | |
| "learning_rate": 1.6434008594229592e-05, | |
| "loss": 2.338, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.3597298956414978, | |
| "grad_norm": 3.8528378009796143, | |
| "learning_rate": 1.6403314917127073e-05, | |
| "loss": 2.2749, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 1.3627992633517496, | |
| "grad_norm": 3.7933218479156494, | |
| "learning_rate": 1.6372621240024557e-05, | |
| "loss": 2.1698, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.3658686310620012, | |
| "grad_norm": 4.1472578048706055, | |
| "learning_rate": 1.634192756292204e-05, | |
| "loss": 2.2975, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 1.368937998772253, | |
| "grad_norm": 4.129203796386719, | |
| "learning_rate": 1.6311233885819522e-05, | |
| "loss": 2.2525, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.3720073664825045, | |
| "grad_norm": 4.041978359222412, | |
| "learning_rate": 1.6280540208717006e-05, | |
| "loss": 2.3245, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 1.3750767341927563, | |
| "grad_norm": 3.7860097885131836, | |
| "learning_rate": 1.6249846531614487e-05, | |
| "loss": 2.2678, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.378146101903008, | |
| "grad_norm": 3.5321691036224365, | |
| "learning_rate": 1.621915285451197e-05, | |
| "loss": 2.2928, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 1.3812154696132597, | |
| "grad_norm": 3.8494341373443604, | |
| "learning_rate": 1.6188459177409455e-05, | |
| "loss": 2.3158, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.3842848373235115, | |
| "grad_norm": 3.3036093711853027, | |
| "learning_rate": 1.6157765500306936e-05, | |
| "loss": 2.1744, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 1.387354205033763, | |
| "grad_norm": 3.412515163421631, | |
| "learning_rate": 1.612707182320442e-05, | |
| "loss": 2.2236, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.3904235727440146, | |
| "grad_norm": 3.514040470123291, | |
| "learning_rate": 1.6096378146101905e-05, | |
| "loss": 2.2406, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 1.3934929404542664, | |
| "grad_norm": 3.6580166816711426, | |
| "learning_rate": 1.6065684468999386e-05, | |
| "loss": 2.2914, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.3965623081645182, | |
| "grad_norm": 4.870865821838379, | |
| "learning_rate": 1.603499079189687e-05, | |
| "loss": 2.2971, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 1.3996316758747698, | |
| "grad_norm": 4.047878742218018, | |
| "learning_rate": 1.6004297114794354e-05, | |
| "loss": 2.3312, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.4027010435850216, | |
| "grad_norm": 3.317620038986206, | |
| "learning_rate": 1.5973603437691835e-05, | |
| "loss": 2.2121, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 1.4057704112952731, | |
| "grad_norm": 3.2293405532836914, | |
| "learning_rate": 1.594290976058932e-05, | |
| "loss": 2.268, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.408839779005525, | |
| "grad_norm": 3.658886194229126, | |
| "learning_rate": 1.5912216083486803e-05, | |
| "loss": 2.2022, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 1.4119091467157765, | |
| "grad_norm": 4.797260761260986, | |
| "learning_rate": 1.5881522406384284e-05, | |
| "loss": 2.2916, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.4149785144260283, | |
| "grad_norm": 4.262215614318848, | |
| "learning_rate": 1.585082872928177e-05, | |
| "loss": 2.2257, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 1.4180478821362799, | |
| "grad_norm": 3.0167343616485596, | |
| "learning_rate": 1.582013505217925e-05, | |
| "loss": 2.2285, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.4211172498465316, | |
| "grad_norm": 3.6330764293670654, | |
| "learning_rate": 1.5789441375076733e-05, | |
| "loss": 2.3057, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 1.4241866175567832, | |
| "grad_norm": 4.605088233947754, | |
| "learning_rate": 1.5758747697974218e-05, | |
| "loss": 2.2406, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.427255985267035, | |
| "grad_norm": 3.957474708557129, | |
| "learning_rate": 1.57280540208717e-05, | |
| "loss": 2.2519, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 1.4303253529772868, | |
| "grad_norm": 3.5939078330993652, | |
| "learning_rate": 1.5697360343769183e-05, | |
| "loss": 2.2892, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.4333947206875384, | |
| "grad_norm": 3.805011034011841, | |
| "learning_rate": 1.5666666666666667e-05, | |
| "loss": 2.2179, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 1.43646408839779, | |
| "grad_norm": 3.5911526679992676, | |
| "learning_rate": 1.5635972989564148e-05, | |
| "loss": 2.313, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.4395334561080417, | |
| "grad_norm": 3.6143059730529785, | |
| "learning_rate": 1.5605279312461632e-05, | |
| "loss": 2.2352, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 1.4426028238182935, | |
| "grad_norm": 4.9773077964782715, | |
| "learning_rate": 1.5574585635359116e-05, | |
| "loss": 2.266, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.445672191528545, | |
| "grad_norm": 3.49001407623291, | |
| "learning_rate": 1.5543891958256597e-05, | |
| "loss": 2.4199, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 1.4487415592387969, | |
| "grad_norm": 4.041284084320068, | |
| "learning_rate": 1.551319828115408e-05, | |
| "loss": 2.2682, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.4518109269490485, | |
| "grad_norm": 4.0507121086120605, | |
| "learning_rate": 1.548250460405157e-05, | |
| "loss": 2.3086, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 1.4548802946593002, | |
| "grad_norm": 4.48442268371582, | |
| "learning_rate": 1.545181092694905e-05, | |
| "loss": 2.2863, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.4579496623695518, | |
| "grad_norm": 4.268632888793945, | |
| "learning_rate": 1.5421117249846534e-05, | |
| "loss": 2.2778, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 1.4610190300798036, | |
| "grad_norm": 3.334290027618408, | |
| "learning_rate": 1.5390423572744015e-05, | |
| "loss": 2.2268, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.4640883977900552, | |
| "grad_norm": 4.395374774932861, | |
| "learning_rate": 1.53597298956415e-05, | |
| "loss": 2.3163, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 1.467157765500307, | |
| "grad_norm": 4.427293300628662, | |
| "learning_rate": 1.5329036218538983e-05, | |
| "loss": 2.3159, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.4702271332105585, | |
| "grad_norm": 3.552321195602417, | |
| "learning_rate": 1.5298342541436464e-05, | |
| "loss": 2.3377, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 1.4732965009208103, | |
| "grad_norm": 3.2035748958587646, | |
| "learning_rate": 1.5267648864333948e-05, | |
| "loss": 2.2654, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.4763658686310621, | |
| "grad_norm": 3.877993106842041, | |
| "learning_rate": 1.5236955187231432e-05, | |
| "loss": 2.3279, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 1.4794352363413137, | |
| "grad_norm": 4.105770111083984, | |
| "learning_rate": 1.5206261510128913e-05, | |
| "loss": 2.2001, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.4825046040515653, | |
| "grad_norm": 5.055785655975342, | |
| "learning_rate": 1.5175567833026397e-05, | |
| "loss": 2.2383, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 1.485573971761817, | |
| "grad_norm": 3.5279541015625, | |
| "learning_rate": 1.5144874155923882e-05, | |
| "loss": 2.2956, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.4886433394720688, | |
| "grad_norm": 3.2130086421966553, | |
| "learning_rate": 1.5114180478821363e-05, | |
| "loss": 2.2689, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 1.4917127071823204, | |
| "grad_norm": 3.5005886554718018, | |
| "learning_rate": 1.5083486801718847e-05, | |
| "loss": 2.226, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.4947820748925722, | |
| "grad_norm": 3.9424734115600586, | |
| "learning_rate": 1.5052793124616331e-05, | |
| "loss": 2.207, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 1.4978514426028238, | |
| "grad_norm": 3.7467117309570312, | |
| "learning_rate": 1.5022099447513812e-05, | |
| "loss": 2.2265, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.5009208103130756, | |
| "grad_norm": 3.600050926208496, | |
| "learning_rate": 1.4991405770411296e-05, | |
| "loss": 2.3054, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 1.5039901780233271, | |
| "grad_norm": 3.9778027534484863, | |
| "learning_rate": 1.4960712093308779e-05, | |
| "loss": 2.2486, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.507059545733579, | |
| "grad_norm": 5.874206066131592, | |
| "learning_rate": 1.4930018416206261e-05, | |
| "loss": 2.2713, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 1.5101289134438307, | |
| "grad_norm": 3.219372034072876, | |
| "learning_rate": 1.4899324739103745e-05, | |
| "loss": 2.2801, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.5131982811540823, | |
| "grad_norm": 4.86896276473999, | |
| "learning_rate": 1.4868631062001228e-05, | |
| "loss": 2.3152, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 1.5162676488643339, | |
| "grad_norm": 3.7367022037506104, | |
| "learning_rate": 1.483793738489871e-05, | |
| "loss": 2.2584, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.5193370165745856, | |
| "grad_norm": 6.774600028991699, | |
| "learning_rate": 1.4807243707796193e-05, | |
| "loss": 2.2155, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 1.5224063842848374, | |
| "grad_norm": 3.1714091300964355, | |
| "learning_rate": 1.4776550030693677e-05, | |
| "loss": 2.317, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.525475751995089, | |
| "grad_norm": 3.4561657905578613, | |
| "learning_rate": 1.4745856353591161e-05, | |
| "loss": 2.2814, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 1.5285451197053406, | |
| "grad_norm": 3.321249485015869, | |
| "learning_rate": 1.4715162676488644e-05, | |
| "loss": 2.2776, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.5316144874155924, | |
| "grad_norm": 2.9775593280792236, | |
| "learning_rate": 1.4684468999386128e-05, | |
| "loss": 2.2629, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 1.5346838551258442, | |
| "grad_norm": 2.6327016353607178, | |
| "learning_rate": 1.465377532228361e-05, | |
| "loss": 2.2041, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.5377532228360957, | |
| "grad_norm": 4.254408359527588, | |
| "learning_rate": 1.4623081645181093e-05, | |
| "loss": 2.2525, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 1.5408225905463473, | |
| "grad_norm": 4.363503932952881, | |
| "learning_rate": 1.4592387968078576e-05, | |
| "loss": 2.211, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.5438919582565993, | |
| "grad_norm": 4.6273579597473145, | |
| "learning_rate": 1.456169429097606e-05, | |
| "loss": 2.308, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 1.5469613259668509, | |
| "grad_norm": 3.543792247772217, | |
| "learning_rate": 1.4531000613873542e-05, | |
| "loss": 2.2355, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.5500306936771024, | |
| "grad_norm": 3.429605007171631, | |
| "learning_rate": 1.4500306936771025e-05, | |
| "loss": 2.2436, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 1.5531000613873542, | |
| "grad_norm": 4.589274883270264, | |
| "learning_rate": 1.446961325966851e-05, | |
| "loss": 2.3328, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.556169429097606, | |
| "grad_norm": 3.7569265365600586, | |
| "learning_rate": 1.4438919582565992e-05, | |
| "loss": 2.318, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 1.5592387968078576, | |
| "grad_norm": 4.732515811920166, | |
| "learning_rate": 1.4408225905463474e-05, | |
| "loss": 2.2883, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.5623081645181092, | |
| "grad_norm": 3.1093533039093018, | |
| "learning_rate": 1.4377532228360957e-05, | |
| "loss": 2.3087, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 1.565377532228361, | |
| "grad_norm": 5.486563682556152, | |
| "learning_rate": 1.4346838551258441e-05, | |
| "loss": 2.3276, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.5684468999386127, | |
| "grad_norm": 4.043442249298096, | |
| "learning_rate": 1.4316144874155923e-05, | |
| "loss": 2.1952, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 1.5715162676488643, | |
| "grad_norm": 3.298995018005371, | |
| "learning_rate": 1.4285451197053406e-05, | |
| "loss": 2.2533, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.5745856353591159, | |
| "grad_norm": 3.928128719329834, | |
| "learning_rate": 1.4254757519950892e-05, | |
| "loss": 2.2624, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 1.5776550030693677, | |
| "grad_norm": 4.050337791442871, | |
| "learning_rate": 1.4224063842848374e-05, | |
| "loss": 2.1662, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.5807243707796195, | |
| "grad_norm": 4.037144660949707, | |
| "learning_rate": 1.4193370165745857e-05, | |
| "loss": 2.3193, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 1.583793738489871, | |
| "grad_norm": 5.330986976623535, | |
| "learning_rate": 1.416267648864334e-05, | |
| "loss": 2.2778, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.5868631062001226, | |
| "grad_norm": 4.488786697387695, | |
| "learning_rate": 1.4131982811540824e-05, | |
| "loss": 2.2893, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 1.5899324739103746, | |
| "grad_norm": 3.4088134765625, | |
| "learning_rate": 1.4101289134438306e-05, | |
| "loss": 2.3159, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.5930018416206262, | |
| "grad_norm": 4.404228687286377, | |
| "learning_rate": 1.4070595457335789e-05, | |
| "loss": 2.2411, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 1.5960712093308778, | |
| "grad_norm": 3.9491429328918457, | |
| "learning_rate": 1.4039901780233273e-05, | |
| "loss": 2.2032, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.5991405770411296, | |
| "grad_norm": 3.8530337810516357, | |
| "learning_rate": 1.4009208103130756e-05, | |
| "loss": 2.1635, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 1.6022099447513813, | |
| "grad_norm": 6.127511978149414, | |
| "learning_rate": 1.3978514426028238e-05, | |
| "loss": 2.287, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.605279312461633, | |
| "grad_norm": 3.831045389175415, | |
| "learning_rate": 1.394782074892572e-05, | |
| "loss": 2.2275, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 1.6083486801718845, | |
| "grad_norm": 3.487755060195923, | |
| "learning_rate": 1.3917127071823205e-05, | |
| "loss": 2.302, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.6114180478821363, | |
| "grad_norm": 3.54748272895813, | |
| "learning_rate": 1.3886433394720687e-05, | |
| "loss": 2.2559, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 1.614487415592388, | |
| "grad_norm": 3.1733102798461914, | |
| "learning_rate": 1.385573971761817e-05, | |
| "loss": 2.2246, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.6175567833026396, | |
| "grad_norm": 3.280029773712158, | |
| "learning_rate": 1.3825046040515654e-05, | |
| "loss": 2.223, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 1.6206261510128912, | |
| "grad_norm": 4.188273906707764, | |
| "learning_rate": 1.3794352363413137e-05, | |
| "loss": 2.2478, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.623695518723143, | |
| "grad_norm": 4.134437084197998, | |
| "learning_rate": 1.376365868631062e-05, | |
| "loss": 2.1995, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 1.6267648864333948, | |
| "grad_norm": 3.6614558696746826, | |
| "learning_rate": 1.3732965009208103e-05, | |
| "loss": 2.3007, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.6298342541436464, | |
| "grad_norm": 3.6023659706115723, | |
| "learning_rate": 1.3702271332105588e-05, | |
| "loss": 2.2745, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 1.6329036218538981, | |
| "grad_norm": 4.1788201332092285, | |
| "learning_rate": 1.367157765500307e-05, | |
| "loss": 2.2729, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.63597298956415, | |
| "grad_norm": 3.9169983863830566, | |
| "learning_rate": 1.3640883977900553e-05, | |
| "loss": 2.2836, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 1.6390423572744015, | |
| "grad_norm": 3.853062152862549, | |
| "learning_rate": 1.3610190300798037e-05, | |
| "loss": 2.271, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.642111724984653, | |
| "grad_norm": 4.5239667892456055, | |
| "learning_rate": 1.357949662369552e-05, | |
| "loss": 2.2514, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 1.6451810926949049, | |
| "grad_norm": 4.2847065925598145, | |
| "learning_rate": 1.3548802946593002e-05, | |
| "loss": 2.2787, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.6482504604051567, | |
| "grad_norm": 4.32819128036499, | |
| "learning_rate": 1.3518109269490484e-05, | |
| "loss": 2.1954, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 1.6513198281154082, | |
| "grad_norm": 4.206119537353516, | |
| "learning_rate": 1.3487415592387969e-05, | |
| "loss": 2.2867, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.6543891958256598, | |
| "grad_norm": 3.985600709915161, | |
| "learning_rate": 1.3456721915285451e-05, | |
| "loss": 2.2126, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 1.6574585635359116, | |
| "grad_norm": 3.822664976119995, | |
| "learning_rate": 1.3426028238182934e-05, | |
| "loss": 2.2767, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.6605279312461634, | |
| "grad_norm": 4.091802597045898, | |
| "learning_rate": 1.3395334561080418e-05, | |
| "loss": 2.2247, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 1.663597298956415, | |
| "grad_norm": 4.74222993850708, | |
| "learning_rate": 1.33646408839779e-05, | |
| "loss": 2.2001, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 3.1740357875823975, | |
| "learning_rate": 1.3333947206875383e-05, | |
| "loss": 2.2442, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 1.6697360343769183, | |
| "grad_norm": 5.706885814666748, | |
| "learning_rate": 1.3303253529772866e-05, | |
| "loss": 2.2529, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.67280540208717, | |
| "grad_norm": 4.168138027191162, | |
| "learning_rate": 1.3272559852670351e-05, | |
| "loss": 2.1694, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 1.6758747697974217, | |
| "grad_norm": 3.907432794570923, | |
| "learning_rate": 1.3241866175567834e-05, | |
| "loss": 2.2338, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.6789441375076735, | |
| "grad_norm": 3.9594688415527344, | |
| "learning_rate": 1.3211172498465316e-05, | |
| "loss": 2.2013, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 1.6820135052179253, | |
| "grad_norm": 3.2740478515625, | |
| "learning_rate": 1.31804788213628e-05, | |
| "loss": 2.2376, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.6850828729281768, | |
| "grad_norm": 5.300954341888428, | |
| "learning_rate": 1.3149785144260283e-05, | |
| "loss": 2.2501, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 1.6881522406384284, | |
| "grad_norm": 3.6815123558044434, | |
| "learning_rate": 1.3119091467157766e-05, | |
| "loss": 2.3304, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.6912216083486802, | |
| "grad_norm": 4.4728684425354, | |
| "learning_rate": 1.3088397790055248e-05, | |
| "loss": 2.2966, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 1.694290976058932, | |
| "grad_norm": 3.806849241256714, | |
| "learning_rate": 1.3057704112952733e-05, | |
| "loss": 2.1784, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.6973603437691835, | |
| "grad_norm": 3.8693387508392334, | |
| "learning_rate": 1.3027010435850215e-05, | |
| "loss": 2.1768, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 1.7004297114794351, | |
| "grad_norm": 3.4431064128875732, | |
| "learning_rate": 1.2996316758747698e-05, | |
| "loss": 2.2657, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.703499079189687, | |
| "grad_norm": 4.247345924377441, | |
| "learning_rate": 1.2965623081645182e-05, | |
| "loss": 2.2104, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 1.7065684468999387, | |
| "grad_norm": 4.055105209350586, | |
| "learning_rate": 1.2934929404542664e-05, | |
| "loss": 2.274, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.7096378146101903, | |
| "grad_norm": 3.7587838172912598, | |
| "learning_rate": 1.2904235727440147e-05, | |
| "loss": 2.278, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 1.7127071823204418, | |
| "grad_norm": 3.716425657272339, | |
| "learning_rate": 1.287354205033763e-05, | |
| "loss": 2.3438, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.7157765500306936, | |
| "grad_norm": 3.8528246879577637, | |
| "learning_rate": 1.2842848373235114e-05, | |
| "loss": 2.3489, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 1.7188459177409454, | |
| "grad_norm": 3.5920658111572266, | |
| "learning_rate": 1.2812154696132596e-05, | |
| "loss": 2.3107, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.721915285451197, | |
| "grad_norm": 3.0533790588378906, | |
| "learning_rate": 1.278146101903008e-05, | |
| "loss": 2.3603, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 1.7249846531614488, | |
| "grad_norm": 4.115893363952637, | |
| "learning_rate": 1.2750767341927565e-05, | |
| "loss": 2.2572, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.7280540208717006, | |
| "grad_norm": 3.350722074508667, | |
| "learning_rate": 1.2720073664825047e-05, | |
| "loss": 2.2774, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 1.7311233885819521, | |
| "grad_norm": 4.05141544342041, | |
| "learning_rate": 1.268937998772253e-05, | |
| "loss": 2.1578, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.7341927562922037, | |
| "grad_norm": 3.764138698577881, | |
| "learning_rate": 1.2658686310620012e-05, | |
| "loss": 2.2309, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 1.7372621240024555, | |
| "grad_norm": 3.2544310092926025, | |
| "learning_rate": 1.2627992633517496e-05, | |
| "loss": 2.2146, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.7403314917127073, | |
| "grad_norm": 4.030269622802734, | |
| "learning_rate": 1.2597298956414979e-05, | |
| "loss": 2.2981, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 1.7434008594229589, | |
| "grad_norm": 3.6446919441223145, | |
| "learning_rate": 1.2566605279312461e-05, | |
| "loss": 2.1891, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.7464702271332104, | |
| "grad_norm": 3.7096481323242188, | |
| "learning_rate": 1.2535911602209946e-05, | |
| "loss": 2.272, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 1.7495395948434622, | |
| "grad_norm": 3.4253058433532715, | |
| "learning_rate": 1.2505217925107428e-05, | |
| "loss": 2.2502, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.752608962553714, | |
| "grad_norm": 3.299448013305664, | |
| "learning_rate": 1.247452424800491e-05, | |
| "loss": 2.2742, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 1.7556783302639656, | |
| "grad_norm": 4.302381992340088, | |
| "learning_rate": 1.2443830570902393e-05, | |
| "loss": 2.2315, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.7587476979742172, | |
| "grad_norm": 3.4078803062438965, | |
| "learning_rate": 1.2413136893799877e-05, | |
| "loss": 2.3013, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 1.7618170656844692, | |
| "grad_norm": 2.8105528354644775, | |
| "learning_rate": 1.238244321669736e-05, | |
| "loss": 2.2035, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.7648864333947207, | |
| "grad_norm": 4.302020072937012, | |
| "learning_rate": 1.2351749539594843e-05, | |
| "loss": 2.306, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 1.7679558011049723, | |
| "grad_norm": 5.1633219718933105, | |
| "learning_rate": 1.2321055862492327e-05, | |
| "loss": 2.2469, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.771025168815224, | |
| "grad_norm": 3.7127487659454346, | |
| "learning_rate": 1.2290362185389811e-05, | |
| "loss": 2.2803, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 1.7740945365254759, | |
| "grad_norm": 3.1988329887390137, | |
| "learning_rate": 1.2259668508287293e-05, | |
| "loss": 2.2693, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.7771639042357275, | |
| "grad_norm": 4.184259414672852, | |
| "learning_rate": 1.2228974831184776e-05, | |
| "loss": 2.1331, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 1.780233271945979, | |
| "grad_norm": 4.31723690032959, | |
| "learning_rate": 1.219828115408226e-05, | |
| "loss": 2.3265, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.7833026396562308, | |
| "grad_norm": 3.367295742034912, | |
| "learning_rate": 1.2167587476979743e-05, | |
| "loss": 2.2231, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 1.7863720073664826, | |
| "grad_norm": 3.7550508975982666, | |
| "learning_rate": 1.2136893799877225e-05, | |
| "loss": 2.1928, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.7894413750767342, | |
| "grad_norm": 3.3911259174346924, | |
| "learning_rate": 1.210620012277471e-05, | |
| "loss": 2.2118, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 1.7925107427869857, | |
| "grad_norm": 3.8555543422698975, | |
| "learning_rate": 1.2075506445672192e-05, | |
| "loss": 2.2831, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.7955801104972375, | |
| "grad_norm": 3.8747925758361816, | |
| "learning_rate": 1.2044812768569675e-05, | |
| "loss": 2.1612, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 1.7986494782074893, | |
| "grad_norm": 4.418224334716797, | |
| "learning_rate": 1.2014119091467157e-05, | |
| "loss": 2.1782, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.801718845917741, | |
| "grad_norm": 3.63905668258667, | |
| "learning_rate": 1.1983425414364641e-05, | |
| "loss": 2.2919, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 1.8047882136279927, | |
| "grad_norm": 3.302374839782715, | |
| "learning_rate": 1.1952731737262124e-05, | |
| "loss": 2.2046, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.8078575813382445, | |
| "grad_norm": 4.592925548553467, | |
| "learning_rate": 1.1922038060159606e-05, | |
| "loss": 2.238, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 1.810926949048496, | |
| "grad_norm": 3.654604434967041, | |
| "learning_rate": 1.189134438305709e-05, | |
| "loss": 2.1764, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.8139963167587476, | |
| "grad_norm": 3.7106800079345703, | |
| "learning_rate": 1.1860650705954573e-05, | |
| "loss": 2.2601, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 1.8170656844689994, | |
| "grad_norm": 3.459660291671753, | |
| "learning_rate": 1.1829957028852056e-05, | |
| "loss": 2.2503, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.8201350521792512, | |
| "grad_norm": 3.504185676574707, | |
| "learning_rate": 1.179926335174954e-05, | |
| "loss": 2.258, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 1.8232044198895028, | |
| "grad_norm": 4.167102336883545, | |
| "learning_rate": 1.1768569674647024e-05, | |
| "loss": 2.1789, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.8262737875997543, | |
| "grad_norm": 4.083024978637695, | |
| "learning_rate": 1.1737875997544507e-05, | |
| "loss": 2.2965, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 1.8293431553100061, | |
| "grad_norm": 3.6207692623138428, | |
| "learning_rate": 1.1707182320441989e-05, | |
| "loss": 2.3554, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.832412523020258, | |
| "grad_norm": 3.8433992862701416, | |
| "learning_rate": 1.1676488643339473e-05, | |
| "loss": 2.2148, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 1.8354818907305095, | |
| "grad_norm": 4.200483798980713, | |
| "learning_rate": 1.1645794966236956e-05, | |
| "loss": 2.2467, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.838551258440761, | |
| "grad_norm": 4.590367794036865, | |
| "learning_rate": 1.1615101289134438e-05, | |
| "loss": 2.313, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 1.8416206261510129, | |
| "grad_norm": 4.230051040649414, | |
| "learning_rate": 1.1584407612031921e-05, | |
| "loss": 2.2457, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.8446899938612646, | |
| "grad_norm": 3.817789077758789, | |
| "learning_rate": 1.1553713934929405e-05, | |
| "loss": 2.2483, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 1.8477593615715162, | |
| "grad_norm": 3.726513147354126, | |
| "learning_rate": 1.1523020257826888e-05, | |
| "loss": 2.2662, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.850828729281768, | |
| "grad_norm": 3.6397483348846436, | |
| "learning_rate": 1.149232658072437e-05, | |
| "loss": 2.1713, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 1.8538980969920198, | |
| "grad_norm": 3.6421852111816406, | |
| "learning_rate": 1.1461632903621854e-05, | |
| "loss": 2.1855, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.8569674647022714, | |
| "grad_norm": 3.6123268604278564, | |
| "learning_rate": 1.1430939226519337e-05, | |
| "loss": 2.3794, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 1.860036832412523, | |
| "grad_norm": 3.842371940612793, | |
| "learning_rate": 1.140024554941682e-05, | |
| "loss": 2.3057, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.8631062001227747, | |
| "grad_norm": 5.15551233291626, | |
| "learning_rate": 1.1369551872314302e-05, | |
| "loss": 2.1755, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 1.8661755678330265, | |
| "grad_norm": 3.2684996128082275, | |
| "learning_rate": 1.1338858195211786e-05, | |
| "loss": 2.2181, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.869244935543278, | |
| "grad_norm": 3.720906972885132, | |
| "learning_rate": 1.130816451810927e-05, | |
| "loss": 2.2057, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 1.8723143032535297, | |
| "grad_norm": 3.2957749366760254, | |
| "learning_rate": 1.1277470841006753e-05, | |
| "loss": 2.2404, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.8753836709637814, | |
| "grad_norm": 3.4108922481536865, | |
| "learning_rate": 1.1246777163904237e-05, | |
| "loss": 2.3055, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 1.8784530386740332, | |
| "grad_norm": 2.9891228675842285, | |
| "learning_rate": 1.121608348680172e-05, | |
| "loss": 2.2714, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.8815224063842848, | |
| "grad_norm": 5.469006538391113, | |
| "learning_rate": 1.1185389809699202e-05, | |
| "loss": 2.2108, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 1.8845917740945364, | |
| "grad_norm": 3.9105262756347656, | |
| "learning_rate": 1.1154696132596686e-05, | |
| "loss": 2.2818, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.8876611418047882, | |
| "grad_norm": 3.2086987495422363, | |
| "learning_rate": 1.1124002455494169e-05, | |
| "loss": 2.2281, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 1.89073050951504, | |
| "grad_norm": 4.461240291595459, | |
| "learning_rate": 1.1093308778391652e-05, | |
| "loss": 2.2431, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.8937998772252915, | |
| "grad_norm": 4.049542427062988, | |
| "learning_rate": 1.1062615101289134e-05, | |
| "loss": 2.2089, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 1.8968692449355433, | |
| "grad_norm": 3.2396111488342285, | |
| "learning_rate": 1.1031921424186618e-05, | |
| "loss": 2.3097, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.899938612645795, | |
| "grad_norm": 3.4000086784362793, | |
| "learning_rate": 1.10012277470841e-05, | |
| "loss": 2.2472, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 1.9030079803560467, | |
| "grad_norm": 3.818934917449951, | |
| "learning_rate": 1.0970534069981583e-05, | |
| "loss": 2.2014, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.9060773480662982, | |
| "grad_norm": 3.150446891784668, | |
| "learning_rate": 1.0939840392879068e-05, | |
| "loss": 2.2697, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 1.90914671577655, | |
| "grad_norm": 3.2145376205444336, | |
| "learning_rate": 1.090914671577655e-05, | |
| "loss": 2.2698, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.9122160834868018, | |
| "grad_norm": 3.603330135345459, | |
| "learning_rate": 1.0878453038674033e-05, | |
| "loss": 2.2337, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 1.9152854511970534, | |
| "grad_norm": 3.6672143936157227, | |
| "learning_rate": 1.0847759361571515e-05, | |
| "loss": 2.2064, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.918354818907305, | |
| "grad_norm": 3.2886476516723633, | |
| "learning_rate": 1.0817065684469001e-05, | |
| "loss": 2.2159, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 1.9214241866175568, | |
| "grad_norm": 3.3169350624084473, | |
| "learning_rate": 1.0786372007366484e-05, | |
| "loss": 2.2242, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.9244935543278086, | |
| "grad_norm": 3.8866281509399414, | |
| "learning_rate": 1.0755678330263966e-05, | |
| "loss": 2.212, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 1.9275629220380601, | |
| "grad_norm": 3.3577752113342285, | |
| "learning_rate": 1.072498465316145e-05, | |
| "loss": 2.3055, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.9306322897483117, | |
| "grad_norm": 3.503736972808838, | |
| "learning_rate": 1.0694290976058933e-05, | |
| "loss": 2.1698, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 1.9337016574585635, | |
| "grad_norm": 5.08292818069458, | |
| "learning_rate": 1.0663597298956415e-05, | |
| "loss": 2.2281, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.9367710251688153, | |
| "grad_norm": 6.739192485809326, | |
| "learning_rate": 1.0632903621853898e-05, | |
| "loss": 2.2375, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 1.9398403928790668, | |
| "grad_norm": 5.141798496246338, | |
| "learning_rate": 1.0602209944751382e-05, | |
| "loss": 2.2684, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.9429097605893186, | |
| "grad_norm": 3.4031152725219727, | |
| "learning_rate": 1.0571516267648865e-05, | |
| "loss": 2.2104, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 1.9459791282995704, | |
| "grad_norm": 3.678633451461792, | |
| "learning_rate": 1.0540822590546347e-05, | |
| "loss": 2.2351, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.949048496009822, | |
| "grad_norm": 4.1313700675964355, | |
| "learning_rate": 1.0510128913443831e-05, | |
| "loss": 2.2951, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 1.9521178637200736, | |
| "grad_norm": 3.0364913940429688, | |
| "learning_rate": 1.0479435236341314e-05, | |
| "loss": 2.2499, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.9551872314303254, | |
| "grad_norm": 3.7849690914154053, | |
| "learning_rate": 1.0448741559238796e-05, | |
| "loss": 2.2005, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 1.9582565991405771, | |
| "grad_norm": 4.416446208953857, | |
| "learning_rate": 1.0418047882136279e-05, | |
| "loss": 2.3114, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.9613259668508287, | |
| "grad_norm": 3.4799766540527344, | |
| "learning_rate": 1.0387354205033763e-05, | |
| "loss": 2.2727, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 1.9643953345610803, | |
| "grad_norm": 5.180732727050781, | |
| "learning_rate": 1.0356660527931246e-05, | |
| "loss": 2.2864, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.967464702271332, | |
| "grad_norm": 3.589080810546875, | |
| "learning_rate": 1.032596685082873e-05, | |
| "loss": 2.2939, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 1.9705340699815839, | |
| "grad_norm": 4.802340984344482, | |
| "learning_rate": 1.0295273173726214e-05, | |
| "loss": 2.2003, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.9736034376918354, | |
| "grad_norm": 3.132723331451416, | |
| "learning_rate": 1.0264579496623697e-05, | |
| "loss": 2.2338, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 1.976672805402087, | |
| "grad_norm": 5.015474796295166, | |
| "learning_rate": 1.023388581952118e-05, | |
| "loss": 2.2431, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.979742173112339, | |
| "grad_norm": 3.432023286819458, | |
| "learning_rate": 1.0203192142418662e-05, | |
| "loss": 2.2862, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 1.9828115408225906, | |
| "grad_norm": 3.8772900104522705, | |
| "learning_rate": 1.0172498465316146e-05, | |
| "loss": 2.2758, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.9858809085328422, | |
| "grad_norm": 3.640902042388916, | |
| "learning_rate": 1.0141804788213629e-05, | |
| "loss": 2.2174, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 1.988950276243094, | |
| "grad_norm": 3.8185462951660156, | |
| "learning_rate": 1.0111111111111111e-05, | |
| "loss": 2.2102, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.9920196439533457, | |
| "grad_norm": 4.0993499755859375, | |
| "learning_rate": 1.0080417434008595e-05, | |
| "loss": 2.2474, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 1.9950890116635973, | |
| "grad_norm": 3.5613911151885986, | |
| "learning_rate": 1.0049723756906078e-05, | |
| "loss": 2.2452, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.9981583793738489, | |
| "grad_norm": 3.9277961254119873, | |
| "learning_rate": 1.001903007980356e-05, | |
| "loss": 2.2498, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 2.001227747084101, | |
| "grad_norm": 3.8902101516723633, | |
| "learning_rate": 9.988336402701043e-06, | |
| "loss": 2.3092, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.0042971147943525, | |
| "grad_norm": 3.29555606842041, | |
| "learning_rate": 9.957642725598527e-06, | |
| "loss": 2.2777, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 2.007366482504604, | |
| "grad_norm": 3.297602653503418, | |
| "learning_rate": 9.92694904849601e-06, | |
| "loss": 2.2509, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 2.0104358502148556, | |
| "grad_norm": 3.616257905960083, | |
| "learning_rate": 9.896255371393492e-06, | |
| "loss": 2.2828, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 2.0135052179251076, | |
| "grad_norm": 3.872678518295288, | |
| "learning_rate": 9.865561694290976e-06, | |
| "loss": 2.2431, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.016574585635359, | |
| "grad_norm": 4.2430338859558105, | |
| "learning_rate": 9.83486801718846e-06, | |
| "loss": 2.143, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 2.0196439533456108, | |
| "grad_norm": 4.328212738037109, | |
| "learning_rate": 9.804174340085943e-06, | |
| "loss": 2.3134, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 2.0227133210558623, | |
| "grad_norm": 3.488384246826172, | |
| "learning_rate": 9.773480662983426e-06, | |
| "loss": 2.2116, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 2.0257826887661143, | |
| "grad_norm": 4.3153910636901855, | |
| "learning_rate": 9.74278698588091e-06, | |
| "loss": 2.1732, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.028852056476366, | |
| "grad_norm": 3.968754768371582, | |
| "learning_rate": 9.712093308778392e-06, | |
| "loss": 2.2158, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 2.0319214241866175, | |
| "grad_norm": 3.1098225116729736, | |
| "learning_rate": 9.681399631675875e-06, | |
| "loss": 2.2657, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 2.034990791896869, | |
| "grad_norm": 3.7003393173217773, | |
| "learning_rate": 9.650705954573359e-06, | |
| "loss": 2.2534, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 2.038060159607121, | |
| "grad_norm": 3.1514766216278076, | |
| "learning_rate": 9.620012277470842e-06, | |
| "loss": 2.1827, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.0411295273173726, | |
| "grad_norm": 3.5508854389190674, | |
| "learning_rate": 9.589318600368324e-06, | |
| "loss": 2.2009, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 2.044198895027624, | |
| "grad_norm": 4.060067653656006, | |
| "learning_rate": 9.558624923265807e-06, | |
| "loss": 2.2533, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 2.047268262737876, | |
| "grad_norm": 3.6063380241394043, | |
| "learning_rate": 9.527931246163291e-06, | |
| "loss": 2.254, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 2.050337630448128, | |
| "grad_norm": 5.065506458282471, | |
| "learning_rate": 9.497237569060773e-06, | |
| "loss": 2.2285, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.0534069981583793, | |
| "grad_norm": 3.942070245742798, | |
| "learning_rate": 9.466543891958256e-06, | |
| "loss": 2.2693, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 2.056476365868631, | |
| "grad_norm": 4.165147304534912, | |
| "learning_rate": 9.43585021485574e-06, | |
| "loss": 2.1426, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.059545733578883, | |
| "grad_norm": 9.669456481933594, | |
| "learning_rate": 9.405156537753223e-06, | |
| "loss": 2.2249, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 2.0626151012891345, | |
| "grad_norm": 3.426900625228882, | |
| "learning_rate": 9.374462860650705e-06, | |
| "loss": 2.2908, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.065684468999386, | |
| "grad_norm": 4.799295902252197, | |
| "learning_rate": 9.34376918354819e-06, | |
| "loss": 2.226, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 2.0687538367096376, | |
| "grad_norm": 3.066361427307129, | |
| "learning_rate": 9.313075506445674e-06, | |
| "loss": 2.2653, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.0718232044198897, | |
| "grad_norm": 4.229564666748047, | |
| "learning_rate": 9.282381829343156e-06, | |
| "loss": 2.2778, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 2.074892572130141, | |
| "grad_norm": 3.7543585300445557, | |
| "learning_rate": 9.251688152240639e-06, | |
| "loss": 2.1851, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.077961939840393, | |
| "grad_norm": 4.075713634490967, | |
| "learning_rate": 9.220994475138123e-06, | |
| "loss": 2.189, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 2.0810313075506444, | |
| "grad_norm": 4.204864978790283, | |
| "learning_rate": 9.190300798035606e-06, | |
| "loss": 2.2357, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.0841006752608964, | |
| "grad_norm": 4.006982326507568, | |
| "learning_rate": 9.159607120933088e-06, | |
| "loss": 2.231, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 2.087170042971148, | |
| "grad_norm": 3.0241997241973877, | |
| "learning_rate": 9.12891344383057e-06, | |
| "loss": 2.1866, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.0902394106813995, | |
| "grad_norm": 3.5990588665008545, | |
| "learning_rate": 9.098219766728055e-06, | |
| "loss": 2.145, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 2.0933087783916515, | |
| "grad_norm": 3.6155498027801514, | |
| "learning_rate": 9.067526089625537e-06, | |
| "loss": 2.1855, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.096378146101903, | |
| "grad_norm": 3.9599666595458984, | |
| "learning_rate": 9.03683241252302e-06, | |
| "loss": 2.2783, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 2.0994475138121547, | |
| "grad_norm": 3.8327977657318115, | |
| "learning_rate": 9.006138735420504e-06, | |
| "loss": 2.1919, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.1025168815224062, | |
| "grad_norm": 3.3617892265319824, | |
| "learning_rate": 8.975445058317987e-06, | |
| "loss": 2.1101, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 2.1055862492326582, | |
| "grad_norm": 3.5898163318634033, | |
| "learning_rate": 8.944751381215469e-06, | |
| "loss": 2.1866, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.10865561694291, | |
| "grad_norm": 4.3782525062561035, | |
| "learning_rate": 8.914057704112952e-06, | |
| "loss": 2.233, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 2.1117249846531614, | |
| "grad_norm": 3.649711847305298, | |
| "learning_rate": 8.883364027010436e-06, | |
| "loss": 2.2422, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.114794352363413, | |
| "grad_norm": 3.99489164352417, | |
| "learning_rate": 8.852670349907918e-06, | |
| "loss": 2.1979, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 2.117863720073665, | |
| "grad_norm": 4.443358421325684, | |
| "learning_rate": 8.821976672805403e-06, | |
| "loss": 2.1984, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.1209330877839165, | |
| "grad_norm": 2.918077230453491, | |
| "learning_rate": 8.791282995702887e-06, | |
| "loss": 2.1995, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 2.124002455494168, | |
| "grad_norm": 3.6200385093688965, | |
| "learning_rate": 8.76058931860037e-06, | |
| "loss": 2.2225, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.12707182320442, | |
| "grad_norm": 3.616900682449341, | |
| "learning_rate": 8.729895641497852e-06, | |
| "loss": 2.1904, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 2.1301411909146717, | |
| "grad_norm": 3.1443259716033936, | |
| "learning_rate": 8.699201964395334e-06, | |
| "loss": 2.1765, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 2.1332105586249233, | |
| "grad_norm": 3.3852028846740723, | |
| "learning_rate": 8.668508287292819e-06, | |
| "loss": 2.2505, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 2.136279926335175, | |
| "grad_norm": 3.266024112701416, | |
| "learning_rate": 8.637814610190301e-06, | |
| "loss": 2.1815, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.139349294045427, | |
| "grad_norm": 4.952578067779541, | |
| "learning_rate": 8.607120933087784e-06, | |
| "loss": 2.2401, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 2.1424186617556784, | |
| "grad_norm": 4.235185623168945, | |
| "learning_rate": 8.576427255985268e-06, | |
| "loss": 2.2374, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 2.14548802946593, | |
| "grad_norm": 3.80965256690979, | |
| "learning_rate": 8.54573357888275e-06, | |
| "loss": 2.2094, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 2.1485573971761815, | |
| "grad_norm": 5.098249435424805, | |
| "learning_rate": 8.515039901780233e-06, | |
| "loss": 2.1882, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.1516267648864336, | |
| "grad_norm": 3.5961649417877197, | |
| "learning_rate": 8.484346224677715e-06, | |
| "loss": 2.2372, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 2.154696132596685, | |
| "grad_norm": 3.425548791885376, | |
| "learning_rate": 8.4536525475752e-06, | |
| "loss": 2.2271, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 2.1577655003069367, | |
| "grad_norm": 3.765516996383667, | |
| "learning_rate": 8.422958870472682e-06, | |
| "loss": 2.2176, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 2.1608348680171883, | |
| "grad_norm": 4.038573265075684, | |
| "learning_rate": 8.392265193370165e-06, | |
| "loss": 2.214, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.1639042357274403, | |
| "grad_norm": 4.2313385009765625, | |
| "learning_rate": 8.361571516267649e-06, | |
| "loss": 2.2596, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 2.166973603437692, | |
| "grad_norm": 3.4800400733947754, | |
| "learning_rate": 8.330877839165133e-06, | |
| "loss": 2.2576, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 2.1700429711479434, | |
| "grad_norm": 3.5811681747436523, | |
| "learning_rate": 8.300184162062616e-06, | |
| "loss": 2.1885, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 2.1731123388581954, | |
| "grad_norm": 3.505411148071289, | |
| "learning_rate": 8.269490484960098e-06, | |
| "loss": 2.1773, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.176181706568447, | |
| "grad_norm": 3.3958797454833984, | |
| "learning_rate": 8.238796807857582e-06, | |
| "loss": 2.2335, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 2.1792510742786986, | |
| "grad_norm": 4.680118083953857, | |
| "learning_rate": 8.208103130755065e-06, | |
| "loss": 2.2351, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.18232044198895, | |
| "grad_norm": 3.314845323562622, | |
| "learning_rate": 8.177409453652548e-06, | |
| "loss": 2.2563, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 2.185389809699202, | |
| "grad_norm": 3.014174461364746, | |
| "learning_rate": 8.146715776550032e-06, | |
| "loss": 2.2946, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.1884591774094537, | |
| "grad_norm": 6.031067848205566, | |
| "learning_rate": 8.116022099447514e-06, | |
| "loss": 2.2273, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 2.1915285451197053, | |
| "grad_norm": 4.454038143157959, | |
| "learning_rate": 8.085328422344997e-06, | |
| "loss": 2.226, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 2.194597912829957, | |
| "grad_norm": 4.110731601715088, | |
| "learning_rate": 8.05463474524248e-06, | |
| "loss": 2.3176, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 2.197667280540209, | |
| "grad_norm": 3.230386972427368, | |
| "learning_rate": 8.023941068139964e-06, | |
| "loss": 2.2589, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.2007366482504604, | |
| "grad_norm": 4.837220668792725, | |
| "learning_rate": 7.993247391037446e-06, | |
| "loss": 2.2341, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 2.203806015960712, | |
| "grad_norm": 4.532881259918213, | |
| "learning_rate": 7.962553713934929e-06, | |
| "loss": 2.2125, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 2.2068753836709636, | |
| "grad_norm": 3.323784828186035, | |
| "learning_rate": 7.931860036832413e-06, | |
| "loss": 2.19, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 2.2099447513812156, | |
| "grad_norm": 4.003852844238281, | |
| "learning_rate": 7.901166359729895e-06, | |
| "loss": 2.2215, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.213014119091467, | |
| "grad_norm": 3.0279271602630615, | |
| "learning_rate": 7.870472682627378e-06, | |
| "loss": 2.2177, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 2.2160834868017187, | |
| "grad_norm": 4.593332290649414, | |
| "learning_rate": 7.839779005524862e-06, | |
| "loss": 2.1596, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 2.2191528545119708, | |
| "grad_norm": 3.9358561038970947, | |
| "learning_rate": 7.809085328422346e-06, | |
| "loss": 2.3147, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 4.021229267120361, | |
| "learning_rate": 7.778391651319829e-06, | |
| "loss": 2.1922, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.225291589932474, | |
| "grad_norm": 3.740377426147461, | |
| "learning_rate": 7.747697974217311e-06, | |
| "loss": 2.2208, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 2.2283609576427255, | |
| "grad_norm": 3.133218765258789, | |
| "learning_rate": 7.717004297114796e-06, | |
| "loss": 2.181, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 2.2314303253529775, | |
| "grad_norm": 4.224998950958252, | |
| "learning_rate": 7.686310620012278e-06, | |
| "loss": 2.1571, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 2.234499693063229, | |
| "grad_norm": 3.908095359802246, | |
| "learning_rate": 7.65561694290976e-06, | |
| "loss": 2.2142, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.2375690607734806, | |
| "grad_norm": 4.671231746673584, | |
| "learning_rate": 7.624923265807243e-06, | |
| "loss": 2.2114, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 2.240638428483732, | |
| "grad_norm": 3.268892765045166, | |
| "learning_rate": 7.5942295887047274e-06, | |
| "loss": 2.2469, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.243707796193984, | |
| "grad_norm": 2.9523401260375977, | |
| "learning_rate": 7.56353591160221e-06, | |
| "loss": 2.171, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 2.2467771639042358, | |
| "grad_norm": 3.850844383239746, | |
| "learning_rate": 7.5328422344996925e-06, | |
| "loss": 2.1936, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 2.2498465316144873, | |
| "grad_norm": 3.288367509841919, | |
| "learning_rate": 7.502148557397177e-06, | |
| "loss": 2.1882, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 2.252915899324739, | |
| "grad_norm": 3.225170612335205, | |
| "learning_rate": 7.47145488029466e-06, | |
| "loss": 2.2253, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 2.255985267034991, | |
| "grad_norm": 3.7475740909576416, | |
| "learning_rate": 7.440761203192143e-06, | |
| "loss": 2.2224, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 2.2590546347452425, | |
| "grad_norm": 4.108501434326172, | |
| "learning_rate": 7.410067526089626e-06, | |
| "loss": 2.2538, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 2.262124002455494, | |
| "grad_norm": 4.399234771728516, | |
| "learning_rate": 7.3793738489871085e-06, | |
| "loss": 2.2093, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 2.265193370165746, | |
| "grad_norm": 4.0335235595703125, | |
| "learning_rate": 7.348680171884592e-06, | |
| "loss": 2.2265, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 2.2682627378759976, | |
| "grad_norm": 3.3310387134552, | |
| "learning_rate": 7.317986494782075e-06, | |
| "loss": 2.2195, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 2.271332105586249, | |
| "grad_norm": 14.169954299926758, | |
| "learning_rate": 7.287292817679558e-06, | |
| "loss": 2.3107, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.2744014732965008, | |
| "grad_norm": 4.349920272827148, | |
| "learning_rate": 7.256599140577041e-06, | |
| "loss": 2.2233, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 2.277470841006753, | |
| "grad_norm": 3.546018362045288, | |
| "learning_rate": 7.2259054634745245e-06, | |
| "loss": 2.1851, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 2.2805402087170044, | |
| "grad_norm": 3.578289270401001, | |
| "learning_rate": 7.195211786372008e-06, | |
| "loss": 2.1984, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 2.283609576427256, | |
| "grad_norm": 3.5594937801361084, | |
| "learning_rate": 7.1645181092694904e-06, | |
| "loss": 2.2544, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 2.2866789441375075, | |
| "grad_norm": 3.502493143081665, | |
| "learning_rate": 7.133824432166974e-06, | |
| "loss": 2.2898, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 2.2897483118477595, | |
| "grad_norm": 3.839489459991455, | |
| "learning_rate": 7.103130755064457e-06, | |
| "loss": 2.2333, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 2.292817679558011, | |
| "grad_norm": 3.7720537185668945, | |
| "learning_rate": 7.07243707796194e-06, | |
| "loss": 2.2484, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 2.2958870472682626, | |
| "grad_norm": 3.5186944007873535, | |
| "learning_rate": 7.041743400859423e-06, | |
| "loss": 2.2974, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 2.298956414978514, | |
| "grad_norm": 3.9113717079162598, | |
| "learning_rate": 7.011049723756906e-06, | |
| "loss": 2.2212, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 2.3020257826887662, | |
| "grad_norm": 3.704716920852661, | |
| "learning_rate": 6.98035604665439e-06, | |
| "loss": 2.2158, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.305095150399018, | |
| "grad_norm": 3.8221049308776855, | |
| "learning_rate": 6.949662369551872e-06, | |
| "loss": 2.1804, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 2.3081645181092694, | |
| "grad_norm": 3.8908891677856445, | |
| "learning_rate": 6.918968692449356e-06, | |
| "loss": 2.171, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 2.3112338858195214, | |
| "grad_norm": 3.604534149169922, | |
| "learning_rate": 6.888275015346839e-06, | |
| "loss": 2.2704, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 2.314303253529773, | |
| "grad_norm": 3.2667436599731445, | |
| "learning_rate": 6.857581338244322e-06, | |
| "loss": 2.2706, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 2.3173726212400245, | |
| "grad_norm": 3.7572014331817627, | |
| "learning_rate": 6.826887661141805e-06, | |
| "loss": 2.3158, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 2.320441988950276, | |
| "grad_norm": 3.4231903553009033, | |
| "learning_rate": 6.7961939840392875e-06, | |
| "loss": 2.2766, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 2.323511356660528, | |
| "grad_norm": 3.4527835845947266, | |
| "learning_rate": 6.765500306936771e-06, | |
| "loss": 2.1798, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 2.3265807243707797, | |
| "grad_norm": 4.387216091156006, | |
| "learning_rate": 6.734806629834254e-06, | |
| "loss": 2.2615, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 2.3296500920810312, | |
| "grad_norm": 3.5280401706695557, | |
| "learning_rate": 6.704112952731738e-06, | |
| "loss": 2.2263, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 2.332719459791283, | |
| "grad_norm": 3.647169351577759, | |
| "learning_rate": 6.673419275629221e-06, | |
| "loss": 2.2117, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.335788827501535, | |
| "grad_norm": 3.3504931926727295, | |
| "learning_rate": 6.6427255985267036e-06, | |
| "loss": 2.2202, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 2.3388581952117864, | |
| "grad_norm": 3.1713030338287354, | |
| "learning_rate": 6.612031921424187e-06, | |
| "loss": 2.1531, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 2.341927562922038, | |
| "grad_norm": 4.14404821395874, | |
| "learning_rate": 6.5813382443216695e-06, | |
| "loss": 2.2549, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 2.3449969306322895, | |
| "grad_norm": 4.7959065437316895, | |
| "learning_rate": 6.550644567219153e-06, | |
| "loss": 2.2079, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 2.3480662983425415, | |
| "grad_norm": 3.699985980987549, | |
| "learning_rate": 6.519950890116635e-06, | |
| "loss": 2.2061, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 2.351135666052793, | |
| "grad_norm": 3.93282151222229, | |
| "learning_rate": 6.48925721301412e-06, | |
| "loss": 2.1875, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 2.3542050337630447, | |
| "grad_norm": 3.5464470386505127, | |
| "learning_rate": 6.458563535911603e-06, | |
| "loss": 2.3065, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 2.3572744014732967, | |
| "grad_norm": 4.367957592010498, | |
| "learning_rate": 6.4278698588090855e-06, | |
| "loss": 2.2946, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 2.3603437691835483, | |
| "grad_norm": 4.520755767822266, | |
| "learning_rate": 6.397176181706569e-06, | |
| "loss": 2.2403, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 2.3634131368938, | |
| "grad_norm": 3.270214557647705, | |
| "learning_rate": 6.366482504604051e-06, | |
| "loss": 2.276, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.3664825046040514, | |
| "grad_norm": 4.663724422454834, | |
| "learning_rate": 6.335788827501535e-06, | |
| "loss": 2.2394, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 2.3695518723143034, | |
| "grad_norm": 3.735618829727173, | |
| "learning_rate": 6.305095150399017e-06, | |
| "loss": 2.1831, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 2.372621240024555, | |
| "grad_norm": 4.269412040710449, | |
| "learning_rate": 6.274401473296501e-06, | |
| "loss": 2.2155, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 2.3756906077348066, | |
| "grad_norm": 4.040123462677002, | |
| "learning_rate": 6.243707796193985e-06, | |
| "loss": 2.2135, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 2.378759975445058, | |
| "grad_norm": 3.0279011726379395, | |
| "learning_rate": 6.213014119091467e-06, | |
| "loss": 2.2062, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 2.38182934315531, | |
| "grad_norm": 4.656242370605469, | |
| "learning_rate": 6.182320441988951e-06, | |
| "loss": 2.2526, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 2.3848987108655617, | |
| "grad_norm": 4.1057233810424805, | |
| "learning_rate": 6.151626764886433e-06, | |
| "loss": 2.1878, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 2.3879680785758133, | |
| "grad_norm": 4.058590888977051, | |
| "learning_rate": 6.120933087783917e-06, | |
| "loss": 2.2473, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 2.391037446286065, | |
| "grad_norm": 3.7655320167541504, | |
| "learning_rate": 6.090239410681399e-06, | |
| "loss": 2.1313, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 2.394106813996317, | |
| "grad_norm": 3.7537214756011963, | |
| "learning_rate": 6.059545733578883e-06, | |
| "loss": 2.2779, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.3971761817065684, | |
| "grad_norm": 3.464635133743286, | |
| "learning_rate": 6.028852056476366e-06, | |
| "loss": 2.1035, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 2.40024554941682, | |
| "grad_norm": 3.9705522060394287, | |
| "learning_rate": 5.998158379373849e-06, | |
| "loss": 2.2249, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 2.403314917127072, | |
| "grad_norm": 2.9240760803222656, | |
| "learning_rate": 5.967464702271333e-06, | |
| "loss": 2.1655, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 2.4063842848373236, | |
| "grad_norm": 4.480701923370361, | |
| "learning_rate": 5.936771025168815e-06, | |
| "loss": 2.1998, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.409453652547575, | |
| "grad_norm": 3.32859468460083, | |
| "learning_rate": 5.906077348066299e-06, | |
| "loss": 2.2133, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 2.4125230202578267, | |
| "grad_norm": 2.9386136531829834, | |
| "learning_rate": 5.875383670963781e-06, | |
| "loss": 2.1524, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 2.4155923879680787, | |
| "grad_norm": 3.8305766582489014, | |
| "learning_rate": 5.8446899938612645e-06, | |
| "loss": 2.2915, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 2.4186617556783303, | |
| "grad_norm": 3.4347639083862305, | |
| "learning_rate": 5.813996316758748e-06, | |
| "loss": 2.1909, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.421731123388582, | |
| "grad_norm": 4.805240631103516, | |
| "learning_rate": 5.78330263965623e-06, | |
| "loss": 2.2795, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 2.424800491098834, | |
| "grad_norm": 3.2844135761260986, | |
| "learning_rate": 5.752608962553715e-06, | |
| "loss": 2.1176, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.4278698588090855, | |
| "grad_norm": 3.550025701522827, | |
| "learning_rate": 5.721915285451197e-06, | |
| "loss": 2.2356, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 2.430939226519337, | |
| "grad_norm": 3.8909902572631836, | |
| "learning_rate": 5.6912216083486805e-06, | |
| "loss": 2.2591, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.4340085942295886, | |
| "grad_norm": 3.584829330444336, | |
| "learning_rate": 5.660527931246163e-06, | |
| "loss": 2.2678, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 2.43707796193984, | |
| "grad_norm": 3.7134439945220947, | |
| "learning_rate": 5.6298342541436464e-06, | |
| "loss": 2.1648, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 2.440147329650092, | |
| "grad_norm": 4.022806167602539, | |
| "learning_rate": 5.59914057704113e-06, | |
| "loss": 2.1818, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 2.4432166973603437, | |
| "grad_norm": 3.5967869758605957, | |
| "learning_rate": 5.568446899938612e-06, | |
| "loss": 2.2368, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.4462860650705953, | |
| "grad_norm": 4.099997520446777, | |
| "learning_rate": 5.537753222836096e-06, | |
| "loss": 2.2196, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 2.4493554327808473, | |
| "grad_norm": 4.131256103515625, | |
| "learning_rate": 5.507059545733579e-06, | |
| "loss": 2.3592, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 2.452424800491099, | |
| "grad_norm": 3.403428077697754, | |
| "learning_rate": 5.4763658686310625e-06, | |
| "loss": 2.2484, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 2.4554941682013505, | |
| "grad_norm": 3.4898879528045654, | |
| "learning_rate": 5.445672191528546e-06, | |
| "loss": 2.2361, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.458563535911602, | |
| "grad_norm": 4.398887634277344, | |
| "learning_rate": 5.414978514426028e-06, | |
| "loss": 2.2462, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 2.461632903621854, | |
| "grad_norm": 4.28602409362793, | |
| "learning_rate": 5.384284837323512e-06, | |
| "loss": 2.3204, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 2.4647022713321056, | |
| "grad_norm": 4.812078475952148, | |
| "learning_rate": 5.353591160220994e-06, | |
| "loss": 2.2206, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 2.467771639042357, | |
| "grad_norm": 5.229348659515381, | |
| "learning_rate": 5.322897483118478e-06, | |
| "loss": 2.299, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.470841006752609, | |
| "grad_norm": 5.011894226074219, | |
| "learning_rate": 5.29220380601596e-06, | |
| "loss": 2.2308, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 2.4739103744628608, | |
| "grad_norm": 3.229832410812378, | |
| "learning_rate": 5.261510128913444e-06, | |
| "loss": 2.1293, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 2.4769797421731123, | |
| "grad_norm": 4.192412376403809, | |
| "learning_rate": 5.230816451810928e-06, | |
| "loss": 2.2252, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 2.480049109883364, | |
| "grad_norm": 4.124536037445068, | |
| "learning_rate": 5.20012277470841e-06, | |
| "loss": 2.1729, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.4831184775936155, | |
| "grad_norm": 3.670736789703369, | |
| "learning_rate": 5.169429097605894e-06, | |
| "loss": 2.2571, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 2.4861878453038675, | |
| "grad_norm": 5.001986026763916, | |
| "learning_rate": 5.138735420503376e-06, | |
| "loss": 2.2075, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.489257213014119, | |
| "grad_norm": 3.8158419132232666, | |
| "learning_rate": 5.1080417434008596e-06, | |
| "loss": 2.2931, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 2.4923265807243706, | |
| "grad_norm": 3.6598846912384033, | |
| "learning_rate": 5.077348066298342e-06, | |
| "loss": 2.2037, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.4953959484346226, | |
| "grad_norm": 4.0994110107421875, | |
| "learning_rate": 5.0466543891958255e-06, | |
| "loss": 2.2622, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 2.498465316144874, | |
| "grad_norm": 3.9565281867980957, | |
| "learning_rate": 5.01596071209331e-06, | |
| "loss": 2.1952, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 2.501534683855126, | |
| "grad_norm": 3.9254519939422607, | |
| "learning_rate": 4.985267034990792e-06, | |
| "loss": 2.2506, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 2.5046040515653774, | |
| "grad_norm": 4.242046356201172, | |
| "learning_rate": 4.954573357888276e-06, | |
| "loss": 2.2135, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.5076734192756294, | |
| "grad_norm": 3.1262447834014893, | |
| "learning_rate": 4.923879680785758e-06, | |
| "loss": 2.2056, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 2.510742786985881, | |
| "grad_norm": 4.857666015625, | |
| "learning_rate": 4.8931860036832415e-06, | |
| "loss": 2.2238, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 2.5138121546961325, | |
| "grad_norm": 4.507630348205566, | |
| "learning_rate": 4.862492326580724e-06, | |
| "loss": 2.2332, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 2.5168815224063845, | |
| "grad_norm": 4.321670055389404, | |
| "learning_rate": 4.831798649478207e-06, | |
| "loss": 2.2581, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.519950890116636, | |
| "grad_norm": 3.4853837490081787, | |
| "learning_rate": 4.801104972375691e-06, | |
| "loss": 2.3517, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 2.5230202578268877, | |
| "grad_norm": 4.295222759246826, | |
| "learning_rate": 4.770411295273174e-06, | |
| "loss": 2.1357, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 2.5260896255371392, | |
| "grad_norm": 3.4203784465789795, | |
| "learning_rate": 4.7397176181706575e-06, | |
| "loss": 2.1721, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 2.529158993247391, | |
| "grad_norm": 4.489879608154297, | |
| "learning_rate": 4.70902394106814e-06, | |
| "loss": 2.196, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.532228360957643, | |
| "grad_norm": 3.238175392150879, | |
| "learning_rate": 4.6783302639656234e-06, | |
| "loss": 2.2373, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 2.5352977286678944, | |
| "grad_norm": 4.743640422821045, | |
| "learning_rate": 4.647636586863106e-06, | |
| "loss": 2.309, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 2.538367096378146, | |
| "grad_norm": 2.7739622592926025, | |
| "learning_rate": 4.616942909760589e-06, | |
| "loss": 2.1396, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 2.541436464088398, | |
| "grad_norm": 3.4076218605041504, | |
| "learning_rate": 4.586249232658073e-06, | |
| "loss": 2.2281, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.5445058317986495, | |
| "grad_norm": 4.367641448974609, | |
| "learning_rate": 4.555555555555555e-06, | |
| "loss": 2.2136, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 2.547575199508901, | |
| "grad_norm": 3.8523755073547363, | |
| "learning_rate": 4.5248618784530395e-06, | |
| "loss": 2.265, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.5506445672191527, | |
| "grad_norm": 3.5632312297821045, | |
| "learning_rate": 4.494168201350522e-06, | |
| "loss": 2.2266, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 2.5537139349294047, | |
| "grad_norm": 4.128525733947754, | |
| "learning_rate": 4.463474524248005e-06, | |
| "loss": 2.209, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.5567833026396563, | |
| "grad_norm": 3.2727203369140625, | |
| "learning_rate": 4.432780847145488e-06, | |
| "loss": 2.167, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 2.559852670349908, | |
| "grad_norm": 4.561786651611328, | |
| "learning_rate": 4.402087170042971e-06, | |
| "loss": 2.2009, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 2.56292203806016, | |
| "grad_norm": 3.624037742614746, | |
| "learning_rate": 4.371393492940455e-06, | |
| "loss": 2.2852, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 2.5659914057704114, | |
| "grad_norm": 4.098727226257324, | |
| "learning_rate": 4.340699815837937e-06, | |
| "loss": 2.1527, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.569060773480663, | |
| "grad_norm": 3.3079962730407715, | |
| "learning_rate": 4.3100061387354205e-06, | |
| "loss": 2.2701, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 2.5721301411909145, | |
| "grad_norm": 3.725670099258423, | |
| "learning_rate": 4.279312461632904e-06, | |
| "loss": 2.2145, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 2.575199508901166, | |
| "grad_norm": 4.11065673828125, | |
| "learning_rate": 4.248618784530387e-06, | |
| "loss": 2.2223, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 2.578268876611418, | |
| "grad_norm": 3.768911123275757, | |
| "learning_rate": 4.21792510742787e-06, | |
| "loss": 2.2406, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.5813382443216697, | |
| "grad_norm": 3.27990984916687, | |
| "learning_rate": 4.187231430325353e-06, | |
| "loss": 2.2224, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 2.5844076120319213, | |
| "grad_norm": 3.7315287590026855, | |
| "learning_rate": 4.1565377532228366e-06, | |
| "loss": 2.3168, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 2.5874769797421733, | |
| "grad_norm": 3.7368297576904297, | |
| "learning_rate": 4.125844076120319e-06, | |
| "loss": 2.3103, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 2.590546347452425, | |
| "grad_norm": 3.973989725112915, | |
| "learning_rate": 4.0951503990178025e-06, | |
| "loss": 2.2719, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.5936157151626764, | |
| "grad_norm": 3.1219234466552734, | |
| "learning_rate": 4.064456721915285e-06, | |
| "loss": 2.2401, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 2.596685082872928, | |
| "grad_norm": 3.8633742332458496, | |
| "learning_rate": 4.033763044812769e-06, | |
| "loss": 2.1432, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 2.59975445058318, | |
| "grad_norm": 3.6198391914367676, | |
| "learning_rate": 4.003069367710252e-06, | |
| "loss": 2.2469, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 2.6028238182934316, | |
| "grad_norm": 4.8632707595825195, | |
| "learning_rate": 3.972375690607735e-06, | |
| "loss": 2.3461, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.605893186003683, | |
| "grad_norm": 3.7397594451904297, | |
| "learning_rate": 3.9416820135052185e-06, | |
| "loss": 2.2278, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 2.608962553713935, | |
| "grad_norm": 3.7671289443969727, | |
| "learning_rate": 3.910988336402701e-06, | |
| "loss": 2.2099, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.6120319214241867, | |
| "grad_norm": 3.9413743019104004, | |
| "learning_rate": 3.880294659300184e-06, | |
| "loss": 2.2534, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 2.6151012891344383, | |
| "grad_norm": 3.448629856109619, | |
| "learning_rate": 3.849600982197667e-06, | |
| "loss": 2.2862, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.61817065684469, | |
| "grad_norm": 5.043887138366699, | |
| "learning_rate": 3.81890730509515e-06, | |
| "loss": 2.1931, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 2.6212400245549414, | |
| "grad_norm": 3.661371946334839, | |
| "learning_rate": 3.7882136279926332e-06, | |
| "loss": 2.2653, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 2.6243093922651934, | |
| "grad_norm": 4.375932216644287, | |
| "learning_rate": 3.7575199508901166e-06, | |
| "loss": 2.1996, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 2.627378759975445, | |
| "grad_norm": 4.304765701293945, | |
| "learning_rate": 3.7268262737876e-06, | |
| "loss": 2.1938, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.630448127685697, | |
| "grad_norm": 3.3659396171569824, | |
| "learning_rate": 3.696132596685083e-06, | |
| "loss": 2.2412, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 2.6335174953959486, | |
| "grad_norm": 3.610954999923706, | |
| "learning_rate": 3.665438919582566e-06, | |
| "loss": 2.238, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 2.6365868631062, | |
| "grad_norm": 3.3917031288146973, | |
| "learning_rate": 3.6347452424800493e-06, | |
| "loss": 2.2998, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 2.6396562308164517, | |
| "grad_norm": 3.4687845706939697, | |
| "learning_rate": 3.6040515653775326e-06, | |
| "loss": 2.2232, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.6427255985267033, | |
| "grad_norm": 4.230668544769287, | |
| "learning_rate": 3.5733578882750156e-06, | |
| "loss": 2.1685, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 2.6457949662369553, | |
| "grad_norm": 3.617204189300537, | |
| "learning_rate": 3.5426642111724985e-06, | |
| "loss": 2.218, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 2.648864333947207, | |
| "grad_norm": 3.763354778289795, | |
| "learning_rate": 3.5119705340699815e-06, | |
| "loss": 2.321, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 2.6519337016574585, | |
| "grad_norm": 3.923051357269287, | |
| "learning_rate": 3.481276856967465e-06, | |
| "loss": 2.2174, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.6550030693677105, | |
| "grad_norm": 4.259540557861328, | |
| "learning_rate": 3.450583179864948e-06, | |
| "loss": 2.2633, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 2.658072437077962, | |
| "grad_norm": 4.246336936950684, | |
| "learning_rate": 3.419889502762431e-06, | |
| "loss": 2.2235, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 2.6611418047882136, | |
| "grad_norm": 3.1326816082000732, | |
| "learning_rate": 3.389195825659914e-06, | |
| "loss": 2.1769, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 2.664211172498465, | |
| "grad_norm": 5.116452217102051, | |
| "learning_rate": 3.3585021485573975e-06, | |
| "loss": 2.1907, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.6672805402087167, | |
| "grad_norm": 3.177436113357544, | |
| "learning_rate": 3.3278084714548805e-06, | |
| "loss": 2.2524, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 2.6703499079189688, | |
| "grad_norm": 2.992366313934326, | |
| "learning_rate": 3.2971147943523634e-06, | |
| "loss": 2.1998, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.6734192756292203, | |
| "grad_norm": 3.997972249984741, | |
| "learning_rate": 3.2664211172498464e-06, | |
| "loss": 2.234, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 2.6764886433394723, | |
| "grad_norm": 4.2181267738342285, | |
| "learning_rate": 3.2357274401473297e-06, | |
| "loss": 2.1645, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.679558011049724, | |
| "grad_norm": 3.3036773204803467, | |
| "learning_rate": 3.205033763044813e-06, | |
| "loss": 2.2462, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 2.6826273787599755, | |
| "grad_norm": 4.222419738769531, | |
| "learning_rate": 3.174340085942296e-06, | |
| "loss": 2.283, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 2.685696746470227, | |
| "grad_norm": 3.980220079421997, | |
| "learning_rate": 3.143646408839779e-06, | |
| "loss": 2.2752, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 2.6887661141804786, | |
| "grad_norm": 3.438683271408081, | |
| "learning_rate": 3.1129527317372624e-06, | |
| "loss": 2.1897, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.6918354818907306, | |
| "grad_norm": 3.9108569622039795, | |
| "learning_rate": 3.0822590546347453e-06, | |
| "loss": 2.2084, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 2.694904849600982, | |
| "grad_norm": 3.4712257385253906, | |
| "learning_rate": 3.0515653775322283e-06, | |
| "loss": 2.0835, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 2.6979742173112338, | |
| "grad_norm": 3.4415714740753174, | |
| "learning_rate": 3.0208717004297112e-06, | |
| "loss": 2.3018, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 2.701043585021486, | |
| "grad_norm": 4.478912353515625, | |
| "learning_rate": 2.990178023327195e-06, | |
| "loss": 2.1982, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.7041129527317374, | |
| "grad_norm": 4.173290729522705, | |
| "learning_rate": 2.959484346224678e-06, | |
| "loss": 2.1964, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 2.707182320441989, | |
| "grad_norm": 3.7616212368011475, | |
| "learning_rate": 2.928790669122161e-06, | |
| "loss": 2.2183, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 2.7102516881522405, | |
| "grad_norm": 5.122647285461426, | |
| "learning_rate": 2.898096992019644e-06, | |
| "loss": 2.2466, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 2.713321055862492, | |
| "grad_norm": 3.6268253326416016, | |
| "learning_rate": 2.8674033149171273e-06, | |
| "loss": 2.2719, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.716390423572744, | |
| "grad_norm": 4.107768535614014, | |
| "learning_rate": 2.8367096378146102e-06, | |
| "loss": 2.148, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 2.7194597912829956, | |
| "grad_norm": 3.9949638843536377, | |
| "learning_rate": 2.806015960712093e-06, | |
| "loss": 2.2333, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 2.7225291589932477, | |
| "grad_norm": 3.9412174224853516, | |
| "learning_rate": 2.7753222836095765e-06, | |
| "loss": 2.1901, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 2.7255985267034992, | |
| "grad_norm": 3.243807792663574, | |
| "learning_rate": 2.74462860650706e-06, | |
| "loss": 2.163, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.728667894413751, | |
| "grad_norm": 4.045169353485107, | |
| "learning_rate": 2.713934929404543e-06, | |
| "loss": 2.1712, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 2.7317372621240024, | |
| "grad_norm": 3.781874418258667, | |
| "learning_rate": 2.683241252302026e-06, | |
| "loss": 2.2034, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.734806629834254, | |
| "grad_norm": 3.88508677482605, | |
| "learning_rate": 2.6525475751995088e-06, | |
| "loss": 2.1833, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 2.737875997544506, | |
| "grad_norm": 4.135626792907715, | |
| "learning_rate": 2.621853898096992e-06, | |
| "loss": 2.2339, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.7409453652547575, | |
| "grad_norm": 3.489367723464966, | |
| "learning_rate": 2.591160220994475e-06, | |
| "loss": 2.2991, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 2.744014732965009, | |
| "grad_norm": 3.8391823768615723, | |
| "learning_rate": 2.5604665438919585e-06, | |
| "loss": 2.1783, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 2.747084100675261, | |
| "grad_norm": 3.0692577362060547, | |
| "learning_rate": 2.5297728667894414e-06, | |
| "loss": 2.195, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 2.7501534683855127, | |
| "grad_norm": 3.084923267364502, | |
| "learning_rate": 2.4990791896869244e-06, | |
| "loss": 2.2347, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.7532228360957642, | |
| "grad_norm": 3.383420705795288, | |
| "learning_rate": 2.4683855125844077e-06, | |
| "loss": 2.168, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 2.756292203806016, | |
| "grad_norm": 3.1771624088287354, | |
| "learning_rate": 2.4376918354818907e-06, | |
| "loss": 2.217, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 2.7593615715162674, | |
| "grad_norm": 3.8641133308410645, | |
| "learning_rate": 2.4069981583793737e-06, | |
| "loss": 2.2976, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 2.7624309392265194, | |
| "grad_norm": 3.6356940269470215, | |
| "learning_rate": 2.376304481276857e-06, | |
| "loss": 2.2688, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.765500306936771, | |
| "grad_norm": 3.3960859775543213, | |
| "learning_rate": 2.3456108041743404e-06, | |
| "loss": 2.2591, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 2.768569674647023, | |
| "grad_norm": 4.219804286956787, | |
| "learning_rate": 2.3149171270718234e-06, | |
| "loss": 2.2724, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 2.7716390423572745, | |
| "grad_norm": 4.273275852203369, | |
| "learning_rate": 2.2842234499693063e-06, | |
| "loss": 2.2016, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 2.774708410067526, | |
| "grad_norm": 4.720740795135498, | |
| "learning_rate": 2.2535297728667893e-06, | |
| "loss": 2.2711, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 3.9274086952209473, | |
| "learning_rate": 2.2228360957642726e-06, | |
| "loss": 2.2219, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 2.7808471454880292, | |
| "grad_norm": 3.7379603385925293, | |
| "learning_rate": 2.1921424186617556e-06, | |
| "loss": 2.2034, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 2.7839165131982813, | |
| "grad_norm": 3.719149112701416, | |
| "learning_rate": 2.161448741559239e-06, | |
| "loss": 2.2461, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 2.786985880908533, | |
| "grad_norm": 3.402672529220581, | |
| "learning_rate": 2.130755064456722e-06, | |
| "loss": 2.2349, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.7900552486187844, | |
| "grad_norm": 3.5159754753112793, | |
| "learning_rate": 2.1000613873542053e-06, | |
| "loss": 2.1584, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 2.7931246163290364, | |
| "grad_norm": 3.4366443157196045, | |
| "learning_rate": 2.0693677102516882e-06, | |
| "loss": 2.1867, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.796193984039288, | |
| "grad_norm": 3.5394604206085205, | |
| "learning_rate": 2.038674033149171e-06, | |
| "loss": 2.301, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 2.7992633517495396, | |
| "grad_norm": 5.54389762878418, | |
| "learning_rate": 2.007980356046654e-06, | |
| "loss": 2.2846, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.802332719459791, | |
| "grad_norm": 12.670145988464355, | |
| "learning_rate": 1.9772866789441375e-06, | |
| "loss": 2.2808, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 2.805402087170043, | |
| "grad_norm": 4.009146690368652, | |
| "learning_rate": 1.946593001841621e-06, | |
| "loss": 2.1663, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 2.8084714548802947, | |
| "grad_norm": 4.112977504730225, | |
| "learning_rate": 1.915899324739104e-06, | |
| "loss": 2.2551, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 2.8115408225905463, | |
| "grad_norm": 5.213067054748535, | |
| "learning_rate": 1.885205647636587e-06, | |
| "loss": 2.2628, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.8146101903007983, | |
| "grad_norm": 3.289320230484009, | |
| "learning_rate": 1.85451197053407e-06, | |
| "loss": 2.2012, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 2.81767955801105, | |
| "grad_norm": 3.8698418140411377, | |
| "learning_rate": 1.8238182934315531e-06, | |
| "loss": 2.1918, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 2.8207489257213014, | |
| "grad_norm": 3.230456829071045, | |
| "learning_rate": 1.7931246163290363e-06, | |
| "loss": 2.1566, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 2.823818293431553, | |
| "grad_norm": 3.878119945526123, | |
| "learning_rate": 1.7624309392265194e-06, | |
| "loss": 2.1233, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.8268876611418046, | |
| "grad_norm": 3.892206907272339, | |
| "learning_rate": 1.7317372621240024e-06, | |
| "loss": 2.2653, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 2.8299570288520566, | |
| "grad_norm": 5.064377784729004, | |
| "learning_rate": 1.7010435850214855e-06, | |
| "loss": 2.1615, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 2.833026396562308, | |
| "grad_norm": 3.4874629974365234, | |
| "learning_rate": 1.6703499079189687e-06, | |
| "loss": 2.1768, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 2.8360957642725597, | |
| "grad_norm": 4.075310230255127, | |
| "learning_rate": 1.6396562308164519e-06, | |
| "loss": 2.1583, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.8391651319828117, | |
| "grad_norm": 6.029613018035889, | |
| "learning_rate": 1.6089625537139348e-06, | |
| "loss": 2.2742, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 2.8422344996930633, | |
| "grad_norm": 3.3309133052825928, | |
| "learning_rate": 1.5782688766114182e-06, | |
| "loss": 2.2977, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 2.845303867403315, | |
| "grad_norm": 3.7084951400756836, | |
| "learning_rate": 1.5475751995089011e-06, | |
| "loss": 2.2776, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 2.8483732351135664, | |
| "grad_norm": 3.8084752559661865, | |
| "learning_rate": 1.5168815224063843e-06, | |
| "loss": 2.2517, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.8514426028238185, | |
| "grad_norm": 3.2854843139648438, | |
| "learning_rate": 1.4861878453038673e-06, | |
| "loss": 2.2082, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 2.85451197053407, | |
| "grad_norm": 3.1363027095794678, | |
| "learning_rate": 1.4554941682013506e-06, | |
| "loss": 2.1851, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.8575813382443216, | |
| "grad_norm": 2.982666492462158, | |
| "learning_rate": 1.4248004910988336e-06, | |
| "loss": 2.2236, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 2.8606507059545736, | |
| "grad_norm": 3.61039662361145, | |
| "learning_rate": 1.3941068139963167e-06, | |
| "loss": 2.2663, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.863720073664825, | |
| "grad_norm": 3.5564205646514893, | |
| "learning_rate": 1.3634131368938e-06, | |
| "loss": 2.2026, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 2.8667894413750767, | |
| "grad_norm": 3.3528811931610107, | |
| "learning_rate": 1.332719459791283e-06, | |
| "loss": 2.2235, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 2.8698588090853283, | |
| "grad_norm": 3.672039270401001, | |
| "learning_rate": 1.302025782688766e-06, | |
| "loss": 2.2037, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 2.87292817679558, | |
| "grad_norm": 3.8955376148223877, | |
| "learning_rate": 1.2713321055862492e-06, | |
| "loss": 2.2178, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.875997544505832, | |
| "grad_norm": 4.099259376525879, | |
| "learning_rate": 1.2406384284837323e-06, | |
| "loss": 2.1855, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 2.8790669122160835, | |
| "grad_norm": 3.968477964401245, | |
| "learning_rate": 1.2099447513812155e-06, | |
| "loss": 2.3346, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 2.882136279926335, | |
| "grad_norm": 4.449561595916748, | |
| "learning_rate": 1.1792510742786985e-06, | |
| "loss": 2.1736, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 2.885205647636587, | |
| "grad_norm": 3.945478916168213, | |
| "learning_rate": 1.1485573971761818e-06, | |
| "loss": 2.2412, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.8882750153468386, | |
| "grad_norm": 3.1608164310455322, | |
| "learning_rate": 1.1178637200736648e-06, | |
| "loss": 2.2238, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 2.89134438305709, | |
| "grad_norm": 4.12243127822876, | |
| "learning_rate": 1.087170042971148e-06, | |
| "loss": 2.1938, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 2.8944137507673418, | |
| "grad_norm": 3.392117977142334, | |
| "learning_rate": 1.056476365868631e-06, | |
| "loss": 2.1843, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 2.8974831184775938, | |
| "grad_norm": 3.5791280269622803, | |
| "learning_rate": 1.0257826887661143e-06, | |
| "loss": 2.192, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.9005524861878453, | |
| "grad_norm": 3.195387363433838, | |
| "learning_rate": 9.950890116635972e-07, | |
| "loss": 2.221, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 2.903621853898097, | |
| "grad_norm": 4.41968297958374, | |
| "learning_rate": 9.643953345610804e-07, | |
| "loss": 2.2192, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 2.906691221608349, | |
| "grad_norm": 3.8508644104003906, | |
| "learning_rate": 9.337016574585636e-07, | |
| "loss": 2.1182, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 2.9097605893186005, | |
| "grad_norm": 3.493018865585327, | |
| "learning_rate": 9.030079803560467e-07, | |
| "loss": 2.1742, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.912829957028852, | |
| "grad_norm": 3.8892369270324707, | |
| "learning_rate": 8.723143032535298e-07, | |
| "loss": 2.1974, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 2.9158993247391036, | |
| "grad_norm": 3.238802671432495, | |
| "learning_rate": 8.416206261510129e-07, | |
| "loss": 2.3495, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.918968692449355, | |
| "grad_norm": 3.5974628925323486, | |
| "learning_rate": 8.109269490484961e-07, | |
| "loss": 2.2413, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 2.922038060159607, | |
| "grad_norm": 3.806520938873291, | |
| "learning_rate": 7.802332719459792e-07, | |
| "loss": 2.2885, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.925107427869859, | |
| "grad_norm": 4.2564496994018555, | |
| "learning_rate": 7.495395948434623e-07, | |
| "loss": 2.2423, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 2.9281767955801103, | |
| "grad_norm": 3.9945321083068848, | |
| "learning_rate": 7.188459177409454e-07, | |
| "loss": 2.2334, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 2.9312461632903624, | |
| "grad_norm": 3.3918559551239014, | |
| "learning_rate": 6.881522406384285e-07, | |
| "loss": 2.1502, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 2.934315531000614, | |
| "grad_norm": 4.716714859008789, | |
| "learning_rate": 6.574585635359117e-07, | |
| "loss": 2.2168, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.9373848987108655, | |
| "grad_norm": 4.033308506011963, | |
| "learning_rate": 6.267648864333948e-07, | |
| "loss": 2.2624, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 2.940454266421117, | |
| "grad_norm": 5.724266052246094, | |
| "learning_rate": 5.960712093308779e-07, | |
| "loss": 2.1917, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 2.943523634131369, | |
| "grad_norm": 3.851032257080078, | |
| "learning_rate": 5.65377532228361e-07, | |
| "loss": 2.2129, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 2.9465930018416207, | |
| "grad_norm": 3.436573028564453, | |
| "learning_rate": 5.346838551258441e-07, | |
| "loss": 2.3044, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.949662369551872, | |
| "grad_norm": 3.4812095165252686, | |
| "learning_rate": 5.039901780233272e-07, | |
| "loss": 2.2165, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 2.9527317372621242, | |
| "grad_norm": 3.4163248538970947, | |
| "learning_rate": 4.732965009208103e-07, | |
| "loss": 2.1441, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 2.955801104972376, | |
| "grad_norm": 4.04727840423584, | |
| "learning_rate": 4.426028238182934e-07, | |
| "loss": 2.2139, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 2.9588704726826274, | |
| "grad_norm": 3.314655065536499, | |
| "learning_rate": 4.119091467157765e-07, | |
| "loss": 2.2098, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 2.961939840392879, | |
| "grad_norm": 4.228841304779053, | |
| "learning_rate": 3.812154696132597e-07, | |
| "loss": 2.1626, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 2.9650092081031305, | |
| "grad_norm": 4.127499580383301, | |
| "learning_rate": 3.505217925107428e-07, | |
| "loss": 2.3014, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 2.9680785758133825, | |
| "grad_norm": 3.7830405235290527, | |
| "learning_rate": 3.198281154082259e-07, | |
| "loss": 2.2795, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 2.971147943523634, | |
| "grad_norm": 5.398400783538818, | |
| "learning_rate": 2.89134438305709e-07, | |
| "loss": 2.2314, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 2.9742173112338857, | |
| "grad_norm": 4.308445453643799, | |
| "learning_rate": 2.584407612031921e-07, | |
| "loss": 2.3097, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 2.9772866789441377, | |
| "grad_norm": 3.5713133811950684, | |
| "learning_rate": 2.277470841006753e-07, | |
| "loss": 2.1968, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.9803560466543892, | |
| "grad_norm": 3.5215930938720703, | |
| "learning_rate": 1.970534069981584e-07, | |
| "loss": 2.2354, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 2.983425414364641, | |
| "grad_norm": 3.120506763458252, | |
| "learning_rate": 1.663597298956415e-07, | |
| "loss": 2.2237, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 2.9864947820748924, | |
| "grad_norm": 6.137388706207275, | |
| "learning_rate": 1.3566605279312464e-07, | |
| "loss": 2.235, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 2.9895641497851444, | |
| "grad_norm": 3.3079631328582764, | |
| "learning_rate": 1.0497237569060774e-07, | |
| "loss": 2.1549, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 2.992633517495396, | |
| "grad_norm": 4.21074914932251, | |
| "learning_rate": 7.427869858809085e-08, | |
| "loss": 2.1818, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 2.9957028852056475, | |
| "grad_norm": 3.8667783737182617, | |
| "learning_rate": 4.358502148557397e-08, | |
| "loss": 2.3276, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 2.9987722529158995, | |
| "grad_norm": 4.376747131347656, | |
| "learning_rate": 1.2891344383057091e-08, | |
| "loss": 2.2735, | |
| "step": 48850 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 48870, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.816879851189043e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |