clinical_longformer_squadv2 / trainer_state.json
trevorkwan's picture
Upload folder using huggingface_hub
9031956 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9997034576834114,
"eval_steps": 500,
"global_step": 25290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.059308463317715436,
"grad_norm": 36.79362869262695,
"learning_rate": 1.9609331751680507e-05,
"loss": 2.1261,
"step": 500
},
{
"epoch": 0.11861692663543087,
"grad_norm": 28.276079177856445,
"learning_rate": 1.92139185448794e-05,
"loss": 1.4346,
"step": 1000
},
{
"epoch": 0.1779253899531463,
"grad_norm": 27.88380241394043,
"learning_rate": 1.8818505338078293e-05,
"loss": 1.2696,
"step": 1500
},
{
"epoch": 0.23723385327086174,
"grad_norm": 21.309072494506836,
"learning_rate": 1.8423092131277186e-05,
"loss": 1.1835,
"step": 2000
},
{
"epoch": 0.2965423165885772,
"grad_norm": 34.47601318359375,
"learning_rate": 1.802767892447608e-05,
"loss": 1.1527,
"step": 2500
},
{
"epoch": 0.3558507799062926,
"grad_norm": 30.186681747436523,
"learning_rate": 1.7633056544088573e-05,
"loss": 1.0756,
"step": 3000
},
{
"epoch": 0.41515924322400805,
"grad_norm": 19.00593376159668,
"learning_rate": 1.7237643337287466e-05,
"loss": 1.0807,
"step": 3500
},
{
"epoch": 0.4744677065417235,
"grad_norm": 27.36587142944336,
"learning_rate": 1.684223013048636e-05,
"loss": 1.0658,
"step": 4000
},
{
"epoch": 0.533776169859439,
"grad_norm": 28.576007843017578,
"learning_rate": 1.644681692368525e-05,
"loss": 1.0234,
"step": 4500
},
{
"epoch": 0.5930846331771544,
"grad_norm": 24.95502471923828,
"learning_rate": 1.6052194543297746e-05,
"loss": 1.0081,
"step": 5000
},
{
"epoch": 0.6523930964948699,
"grad_norm": 26.936307907104492,
"learning_rate": 1.565678133649664e-05,
"loss": 0.9975,
"step": 5500
},
{
"epoch": 0.7117015598125852,
"grad_norm": 20.501012802124023,
"learning_rate": 1.5261368129695532e-05,
"loss": 0.975,
"step": 6000
},
{
"epoch": 0.7710100231303006,
"grad_norm": 18.4880428314209,
"learning_rate": 1.4865954922894426e-05,
"loss": 0.968,
"step": 6500
},
{
"epoch": 0.8303184864480161,
"grad_norm": 27.225852966308594,
"learning_rate": 1.4470541716093318e-05,
"loss": 0.9638,
"step": 7000
},
{
"epoch": 0.8896269497657315,
"grad_norm": 19.646434783935547,
"learning_rate": 1.407512850929221e-05,
"loss": 0.9294,
"step": 7500
},
{
"epoch": 0.948935413083447,
"grad_norm": 19.823829650878906,
"learning_rate": 1.3679715302491103e-05,
"loss": 0.9074,
"step": 8000
},
{
"epoch": 1.0081845679378447,
"grad_norm": 30.65908432006836,
"learning_rate": 1.3284302095689998e-05,
"loss": 0.8917,
"step": 8500
},
{
"epoch": 1.0674930312555602,
"grad_norm": 15.392027854919434,
"learning_rate": 1.288888888888889e-05,
"loss": 0.7416,
"step": 9000
},
{
"epoch": 1.1268014945732756,
"grad_norm": 11.21274185180664,
"learning_rate": 1.2493475682087784e-05,
"loss": 0.7167,
"step": 9500
},
{
"epoch": 1.186109957890991,
"grad_norm": 13.336421012878418,
"learning_rate": 1.2098062475286676e-05,
"loss": 0.7167,
"step": 10000
},
{
"epoch": 1.2454184212087065,
"grad_norm": 11.942096710205078,
"learning_rate": 1.170264926848557e-05,
"loss": 0.7162,
"step": 10500
},
{
"epoch": 1.304726884526422,
"grad_norm": 11.245469093322754,
"learning_rate": 1.1308026888098062e-05,
"loss": 0.7256,
"step": 11000
},
{
"epoch": 1.3640353478441374,
"grad_norm": 59.7520751953125,
"learning_rate": 1.0913404507710558e-05,
"loss": 0.7336,
"step": 11500
},
{
"epoch": 1.4233438111618528,
"grad_norm": 22.972858428955078,
"learning_rate": 1.0517991300909451e-05,
"loss": 0.6973,
"step": 12000
},
{
"epoch": 1.4826522744795683,
"grad_norm": 19.813735961914062,
"learning_rate": 1.0122578094108344e-05,
"loss": 0.6968,
"step": 12500
},
{
"epoch": 1.5419607377972837,
"grad_norm": 20.160003662109375,
"learning_rate": 9.727164887307237e-06,
"loss": 0.6998,
"step": 13000
},
{
"epoch": 1.6012692011149992,
"grad_norm": 14.531295776367188,
"learning_rate": 9.33175168050613e-06,
"loss": 0.6966,
"step": 13500
},
{
"epoch": 1.6605776644327146,
"grad_norm": 23.200489044189453,
"learning_rate": 8.937129300118624e-06,
"loss": 0.6846,
"step": 14000
},
{
"epoch": 1.71988612775043,
"grad_norm": 29.29988670349121,
"learning_rate": 8.541716093317518e-06,
"loss": 0.7078,
"step": 14500
},
{
"epoch": 1.7791945910681455,
"grad_norm": 15.779882431030273,
"learning_rate": 8.147093712930011e-06,
"loss": 0.7016,
"step": 15000
},
{
"epoch": 1.838503054385861,
"grad_norm": 14.096702575683594,
"learning_rate": 7.751680506128906e-06,
"loss": 0.686,
"step": 15500
},
{
"epoch": 1.8978115177035764,
"grad_norm": 22.493389129638672,
"learning_rate": 7.356267299327799e-06,
"loss": 0.7019,
"step": 16000
},
{
"epoch": 1.9571199810212918,
"grad_norm": 33.955657958984375,
"learning_rate": 6.960854092526691e-06,
"loss": 0.6723,
"step": 16500
},
{
"epoch": 2.0163691358756894,
"grad_norm": 19.76311683654785,
"learning_rate": 6.5654408857255835e-06,
"loss": 0.6539,
"step": 17000
},
{
"epoch": 2.075677599193405,
"grad_norm": 10.439183235168457,
"learning_rate": 6.170818505338079e-06,
"loss": 0.5251,
"step": 17500
},
{
"epoch": 2.1349860625111203,
"grad_norm": 9.628398895263672,
"learning_rate": 5.775405298536972e-06,
"loss": 0.521,
"step": 18000
},
{
"epoch": 2.1942945258288358,
"grad_norm": 23.81959342956543,
"learning_rate": 5.379992091735864e-06,
"loss": 0.5243,
"step": 18500
},
{
"epoch": 2.253602989146551,
"grad_norm": 88.61954498291016,
"learning_rate": 4.984578884934757e-06,
"loss": 0.5298,
"step": 19000
},
{
"epoch": 2.3129114524642667,
"grad_norm": 34.159732818603516,
"learning_rate": 4.58916567813365e-06,
"loss": 0.5132,
"step": 19500
},
{
"epoch": 2.372219915781982,
"grad_norm": 13.070930480957031,
"learning_rate": 4.193752471332543e-06,
"loss": 0.5221,
"step": 20000
},
{
"epoch": 2.4315283790996975,
"grad_norm": 15.99543571472168,
"learning_rate": 3.7983392645314355e-06,
"loss": 0.53,
"step": 20500
},
{
"epoch": 2.490836842417413,
"grad_norm": 35.39397430419922,
"learning_rate": 3.4029260577303288e-06,
"loss": 0.516,
"step": 21000
},
{
"epoch": 2.5501453057351284,
"grad_norm": 20.889413833618164,
"learning_rate": 3.0083036773428236e-06,
"loss": 0.5134,
"step": 21500
},
{
"epoch": 2.609453769052844,
"grad_norm": 14.608019828796387,
"learning_rate": 2.6128904705417165e-06,
"loss": 0.508,
"step": 22000
},
{
"epoch": 2.6687622323705593,
"grad_norm": 44.29560852050781,
"learning_rate": 2.2174772637406093e-06,
"loss": 0.5055,
"step": 22500
},
{
"epoch": 2.7280706956882748,
"grad_norm": 22.466794967651367,
"learning_rate": 1.822064056939502e-06,
"loss": 0.4965,
"step": 23000
},
{
"epoch": 2.78737915900599,
"grad_norm": 61.90739822387695,
"learning_rate": 1.4274416765519968e-06,
"loss": 0.519,
"step": 23500
},
{
"epoch": 2.8466876223237056,
"grad_norm": 27.325544357299805,
"learning_rate": 1.032819296164492e-06,
"loss": 0.5097,
"step": 24000
},
{
"epoch": 2.905996085641421,
"grad_norm": 45.56763458251953,
"learning_rate": 6.374060893633847e-07,
"loss": 0.4979,
"step": 24500
},
{
"epoch": 2.9653045489591365,
"grad_norm": 29.870397567749023,
"learning_rate": 2.419928825622776e-07,
"loss": 0.51,
"step": 25000
},
{
"epoch": 2.9997034576834114,
"step": 25290,
"total_flos": 8.259596752704e+16,
"train_loss": 0.7820950991931099,
"train_runtime": 45983.6556,
"train_samples_per_second": 8.8,
"train_steps_per_second": 0.55
}
],
"logging_steps": 500,
"max_steps": 25290,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.259596752704e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}