TermiGen-32B / trainer_state.json
March07's picture
Upload folder using huggingface_hub
d2f27bd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 515,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0970873786407767,
"grad_norm": 6.964529721458028,
"learning_rate": 8.653846153846154e-07,
"loss": 0.9101,
"step": 10
},
{
"epoch": 0.1941747572815534,
"grad_norm": 3.566929368993238,
"learning_rate": 1.826923076923077e-06,
"loss": 0.785,
"step": 20
},
{
"epoch": 0.2912621359223301,
"grad_norm": 2.572419911943158,
"learning_rate": 2.7884615384615386e-06,
"loss": 0.6303,
"step": 30
},
{
"epoch": 0.3883495145631068,
"grad_norm": 0.6433805154127846,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.5371,
"step": 40
},
{
"epoch": 0.4854368932038835,
"grad_norm": 0.5152897860452502,
"learning_rate": 4.711538461538462e-06,
"loss": 0.4788,
"step": 50
},
{
"epoch": 0.5825242718446602,
"grad_norm": 0.7787775149779967,
"learning_rate": 4.997180564209414e-06,
"loss": 0.4451,
"step": 60
},
{
"epoch": 0.6796116504854369,
"grad_norm": 0.8091799603982648,
"learning_rate": 4.9833863897161715e-06,
"loss": 0.4145,
"step": 70
},
{
"epoch": 0.7766990291262136,
"grad_norm": 0.8426511044153906,
"learning_rate": 4.95816302585984e-06,
"loss": 0.3935,
"step": 80
},
{
"epoch": 0.8737864077669902,
"grad_norm": 0.8115389627762832,
"learning_rate": 4.9216265571140565e-06,
"loss": 0.3753,
"step": 90
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.5597270195195797,
"learning_rate": 4.8739451338003675e-06,
"loss": 0.3679,
"step": 100
},
{
"epoch": 1.0679611650485437,
"grad_norm": 0.41701001823219247,
"learning_rate": 4.815338198216762e-06,
"loss": 0.3288,
"step": 110
},
{
"epoch": 1.1650485436893203,
"grad_norm": 22.448277335293643,
"learning_rate": 4.746075474707204e-06,
"loss": 0.3192,
"step": 120
},
{
"epoch": 1.262135922330097,
"grad_norm": 0.4010902102419476,
"learning_rate": 4.666475728320124e-06,
"loss": 0.3189,
"step": 130
},
{
"epoch": 1.3592233009708738,
"grad_norm": 0.48930786140814836,
"learning_rate": 4.576905297768856e-06,
"loss": 0.3132,
"step": 140
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.3541487037094227,
"learning_rate": 4.477776409445692e-06,
"loss": 0.3038,
"step": 150
},
{
"epoch": 1.5533980582524272,
"grad_norm": 31.33298474339703,
"learning_rate": 4.369545280248932e-06,
"loss": 0.2957,
"step": 160
},
{
"epoch": 1.650485436893204,
"grad_norm": 0.3578518918951451,
"learning_rate": 4.252710017954191e-06,
"loss": 0.2966,
"step": 170
},
{
"epoch": 1.7475728155339807,
"grad_norm": 0.33938843089336523,
"learning_rate": 4.127808328793e-06,
"loss": 0.3004,
"step": 180
},
{
"epoch": 1.8446601941747574,
"grad_norm": 0.32000682056080126,
"learning_rate": 3.995415042789034e-06,
"loss": 0.3086,
"step": 190
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.3829047834599722,
"learning_rate": 3.856139468240996e-06,
"loss": 0.2868,
"step": 200
},
{
"epoch": 2.0388349514563107,
"grad_norm": 0.4031366118857969,
"learning_rate": 3.7106225875275257e-06,
"loss": 0.2779,
"step": 210
},
{
"epoch": 2.1359223300970873,
"grad_norm": 0.37263381399379847,
"learning_rate": 3.5595341071397627e-06,
"loss": 0.2504,
"step": 220
},
{
"epoch": 2.233009708737864,
"grad_norm": 0.3332345121427193,
"learning_rate": 3.4035693755180817e-06,
"loss": 0.2466,
"step": 230
},
{
"epoch": 2.3300970873786406,
"grad_norm": 0.2989895988940855,
"learning_rate": 3.2434461828779096e-06,
"loss": 0.2387,
"step": 240
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.3583335796728048,
"learning_rate": 3.0799014577526735e-06,
"loss": 0.2418,
"step": 250
},
{
"epoch": 2.524271844660194,
"grad_norm": 0.38853396086729947,
"learning_rate": 2.9136878754572317e-06,
"loss": 0.2542,
"step": 260
},
{
"epoch": 2.6213592233009706,
"grad_norm": 0.3547407166001253,
"learning_rate": 2.7455703940805228e-06,
"loss": 0.2419,
"step": 270
},
{
"epoch": 2.7184466019417477,
"grad_norm": 0.33729197409950573,
"learning_rate": 2.5763227339496984e-06,
"loss": 0.24,
"step": 280
},
{
"epoch": 2.8155339805825244,
"grad_norm": 0.33325471675524143,
"learning_rate": 2.4067238167681655e-06,
"loss": 0.2414,
"step": 290
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.3516852631876031,
"learning_rate": 2.237554180815538e-06,
"loss": 0.2413,
"step": 300
},
{
"epoch": 3.0097087378640777,
"grad_norm": 0.3860131260501091,
"learning_rate": 2.0695923887076824e-06,
"loss": 0.241,
"step": 310
},
{
"epoch": 3.1067961165048543,
"grad_norm": 0.3824808831772379,
"learning_rate": 1.9036114442492901e-06,
"loss": 0.2095,
"step": 320
},
{
"epoch": 3.203883495145631,
"grad_norm": 0.39411524156521255,
"learning_rate": 1.7403752348695296e-06,
"loss": 0.211,
"step": 330
},
{
"epoch": 3.3009708737864076,
"grad_norm": 0.3829443147282107,
"learning_rate": 1.5806350160136446e-06,
"loss": 0.2072,
"step": 340
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.36458747976200806,
"learning_rate": 1.4251259536702078e-06,
"loss": 0.1964,
"step": 350
},
{
"epoch": 3.4951456310679614,
"grad_norm": 0.37575548194010133,
"learning_rate": 1.2745637409462447e-06,
"loss": 0.2088,
"step": 360
},
{
"epoch": 3.592233009708738,
"grad_norm": 0.3652962786931115,
"learning_rate": 1.1296413042616115e-06,
"loss": 0.1994,
"step": 370
},
{
"epoch": 3.6893203883495147,
"grad_norm": 0.3712933316004299,
"learning_rate": 9.910256143215882e-07,
"loss": 0.2045,
"step": 380
},
{
"epoch": 3.7864077669902914,
"grad_norm": 0.3461802470195759,
"learning_rate": 8.593546165444078e-07,
"loss": 0.2045,
"step": 390
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.35407836747337357,
"learning_rate": 7.352342950706964e-07,
"loss": 0.2067,
"step": 400
},
{
"epoch": 3.9805825242718447,
"grad_norm": 0.35720973588507643,
"learning_rate": 6.192358838670293e-07,
"loss": 0.1983,
"step": 410
},
{
"epoch": 4.077669902912621,
"grad_norm": 0.49136621176532774,
"learning_rate": 5.118932377587984e-07,
"loss": 0.186,
"step": 420
},
{
"epoch": 4.174757281553398,
"grad_norm": 0.3964877004137412,
"learning_rate": 4.137003754916105e-07,
"loss": 0.1931,
"step": 430
},
{
"epoch": 4.271844660194175,
"grad_norm": 0.34705190574049327,
"learning_rate": 3.2510920612867284e-07,
"loss": 0.1775,
"step": 440
},
{
"epoch": 4.368932038834951,
"grad_norm": 0.37657649802404225,
"learning_rate": 2.4652744924787253e-07,
"loss": 0.1913,
"step": 450
},
{
"epoch": 4.466019417475728,
"grad_norm": 0.3797812117921156,
"learning_rate": 1.7831675851035264e-07,
"loss": 0.193,
"step": 460
},
{
"epoch": 4.563106796116505,
"grad_norm": 0.35942347600355457,
"learning_rate": 1.207910572364046e-07,
"loss": 0.1828,
"step": 470
},
{
"epoch": 4.660194174757281,
"grad_norm": 0.34568138491238304,
"learning_rate": 7.421509364878927e-08,
"loss": 0.1833,
"step": 480
},
{
"epoch": 4.757281553398058,
"grad_norm": 0.6159835033744901,
"learning_rate": 3.8803222432630685e-08,
"loss": 0.1832,
"step": 490
},
{
"epoch": 4.854368932038835,
"grad_norm": 0.38886425151937265,
"learning_rate": 1.4718418219468178e-08,
"loss": 0.1827,
"step": 500
},
{
"epoch": 4.951456310679612,
"grad_norm": 0.4089928939727998,
"learning_rate": 2.0715255356559826e-09,
"loss": 0.1829,
"step": 510
},
{
"epoch": 5.0,
"step": 515,
"total_flos": 944499472728064.0,
"train_loss": 0.29363987642584494,
"train_runtime": 38623.7413,
"train_samples_per_second": 0.426,
"train_steps_per_second": 0.013
}
],
"logging_steps": 10,
"max_steps": 515,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 944499472728064.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}