| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9767441860465116, | |
| "eval_steps": 500, | |
| "global_step": 192, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15503875968992248, | |
| "grad_norm": 0.7852384448051453, | |
| "learning_rate": 1.75e-05, | |
| "loss": 2.506, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.31007751937984496, | |
| "grad_norm": 0.5620294809341431, | |
| "learning_rate": 4.25e-05, | |
| "loss": 1.9499, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.818453848361969, | |
| "learning_rate": 4.9795940299380575e-05, | |
| "loss": 1.8121, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "grad_norm": 0.5289535522460938, | |
| "learning_rate": 4.880447529310118e-05, | |
| "loss": 1.789, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7751937984496124, | |
| "grad_norm": 0.8725740313529968, | |
| "learning_rate": 4.723355372206297e-05, | |
| "loss": 1.7512, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 0.7308263182640076, | |
| "learning_rate": 4.478741221073136e-05, | |
| "loss": 1.6186, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0852713178294573, | |
| "grad_norm": 0.7901429533958435, | |
| "learning_rate": 4.1682970280555986e-05, | |
| "loss": 1.6679, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2403100775193798, | |
| "grad_norm": 0.4563349783420563, | |
| "learning_rate": 3.8023508512198256e-05, | |
| "loss": 1.266, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 0.5261668562889099, | |
| "learning_rate": 3.393077224502832e-05, | |
| "loss": 1.3869, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.550387596899225, | |
| "grad_norm": 0.8815342783927917, | |
| "learning_rate": 2.954092127448591e-05, | |
| "loss": 1.2376, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.7054263565891472, | |
| "grad_norm": 0.727622926235199, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.291, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.6443896293640137, | |
| "learning_rate": 2.0459078725514092e-05, | |
| "loss": 1.3153, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.0155038759689923, | |
| "grad_norm": 0.7863492965698242, | |
| "learning_rate": 1.6069227754971683e-05, | |
| "loss": 1.3498, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.1705426356589146, | |
| "grad_norm": 0.9250425696372986, | |
| "learning_rate": 1.1976491487801748e-05, | |
| "loss": 0.9415, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 0.9809682965278625, | |
| "learning_rate": 8.317029719444016e-06, | |
| "loss": 0.7475, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.4806201550387597, | |
| "grad_norm": 0.8305571675300598, | |
| "learning_rate": 5.2125877892686496e-06, | |
| "loss": 0.6603, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.6356589147286824, | |
| "grad_norm": 0.6964967846870422, | |
| "learning_rate": 2.7664462779370293e-06, | |
| "loss": 0.6403, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 1.0790668725967407, | |
| "learning_rate": 1.0599850022898539e-06, | |
| "loss": 0.8335, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.945736434108527, | |
| "grad_norm": 0.8265672326087952, | |
| "learning_rate": 1.4997561900135238e-07, | |
| "loss": 0.6972, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.9767441860465116, | |
| "step": 192, | |
| "total_flos": 1.1519499230222746e+17, | |
| "train_loss": 1.3342496789991856, | |
| "train_runtime": 2508.9029, | |
| "train_samples_per_second": 0.617, | |
| "train_steps_per_second": 0.077 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 192, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1519499230222746e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |