{ "best_metric": 3.675755739212036, "best_model_checkpoint": "models/GPT2_natural_function_67/checkpoint-64390", "epoch": 10.0, "global_step": 64390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 7.5832, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 6.0927, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 5.6579, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 5.3482, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 5.1066, "step": 5000 }, { "epoch": 0.93, "learning_rate": 6e-05, "loss": 4.9145, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.29016047550269236, "eval_loss": 4.688190937042236, "eval_runtime": 1.9998, "eval_samples_per_second": 591.551, "eval_steps_per_second": 5.0, "step": 6439 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 4.7467, "step": 7000 }, { "epoch": 1.24, "learning_rate": 8e-05, "loss": 4.6193, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 4.5146, "step": 9000 }, { "epoch": 1.55, "learning_rate": 0.0001, "loss": 4.4312, "step": 10000 }, { "epoch": 1.71, "learning_rate": 9.816326530612245e-05, "loss": 4.3532, "step": 11000 }, { "epoch": 1.86, "learning_rate": 9.632469203897775e-05, "loss": 4.2877, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.32783993716761733, "eval_loss": 4.14790153503418, "eval_runtime": 2.0061, "eval_samples_per_second": 589.709, "eval_steps_per_second": 4.985, "step": 12878 }, { "epoch": 2.02, "learning_rate": 9.448795734510021e-05, "loss": 4.2314, "step": 13000 }, { "epoch": 2.17, "learning_rate": 9.264938407795552e-05, "loss": 4.1519, "step": 14000 }, { "epoch": 2.33, "learning_rate": 9.081264938407796e-05, "loss": 4.1216, "step": 15000 }, { "epoch": 2.48, "learning_rate": 8.897407611693326e-05, "loss": 4.0984, "step": 16000 }, { "epoch": 2.64, "learning_rate": 8.713734142305572e-05, "loss": 4.0698, "step": 17000 }, { "epoch": 2.8, "learning_rate": 8.529876815591101e-05, "loss": 4.049, "step": 18000 }, { "epoch": 2.95, "learning_rate": 8.346203346203346e-05, "loss": 4.0279, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.3453651133844956, "eval_loss": 3.954080820083618, "eval_runtime": 2.0088, "eval_samples_per_second": 588.916, "eval_steps_per_second": 4.978, "step": 19317 }, { "epoch": 3.11, "learning_rate": 8.162346019488876e-05, "loss": 3.9655, "step": 20000 }, { "epoch": 3.26, "learning_rate": 7.978672550101122e-05, "loss": 3.9369, "step": 21000 }, { "epoch": 3.42, "learning_rate": 7.794815223386652e-05, "loss": 3.9293, "step": 22000 }, { "epoch": 3.57, "learning_rate": 7.611325611325611e-05, "loss": 3.9213, "step": 23000 }, { "epoch": 3.73, "learning_rate": 7.427468284611142e-05, "loss": 3.9107, "step": 24000 }, { "epoch": 3.88, "learning_rate": 7.243610957896673e-05, "loss": 3.9003, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.3553157926265134, "eval_loss": 3.856142044067383, "eval_runtime": 2.0123, "eval_samples_per_second": 587.87, "eval_steps_per_second": 4.969, "step": 25756 }, { "epoch": 4.04, "learning_rate": 7.059937488508918e-05, "loss": 3.8711, "step": 26000 }, { "epoch": 4.19, "learning_rate": 6.876080161794448e-05, "loss": 3.8186, "step": 27000 }, { "epoch": 4.35, "learning_rate": 6.692406692406693e-05, "loss": 3.8207, "step": 28000 }, { "epoch": 4.5, "learning_rate": 6.508549365692223e-05, "loss": 3.8165, "step": 29000 }, { "epoch": 4.66, "learning_rate": 6.324692038977753e-05, "loss": 3.8114, "step": 30000 }, { "epoch": 4.81, "learning_rate": 6.141018569589998e-05, "loss": 3.8074, "step": 31000 }, { "epoch": 4.97, "learning_rate": 5.957161242875529e-05, "loss": 3.8034, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.36155243908120954, "eval_loss": 3.7953288555145264, "eval_runtime": 1.9992, "eval_samples_per_second": 591.726, "eval_steps_per_second": 5.002, "step": 32195 }, { "epoch": 5.13, "learning_rate": 5.7734877734877734e-05, "loss": 3.7432, "step": 33000 }, { "epoch": 5.28, "learning_rate": 5.589630446773304e-05, "loss": 3.7348, "step": 34000 }, { "epoch": 5.44, "learning_rate": 5.4057731200588346e-05, "loss": 3.7356, "step": 35000 }, { "epoch": 5.59, "learning_rate": 5.222099650671079e-05, "loss": 3.7344, "step": 36000 }, { "epoch": 5.75, "learning_rate": 5.03824232395661e-05, "loss": 3.735, "step": 37000 }, { "epoch": 5.9, "learning_rate": 4.854568854568855e-05, "loss": 3.731, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.36634473945194723, "eval_loss": 3.753897190093994, "eval_runtime": 2.0037, "eval_samples_per_second": 590.407, "eval_steps_per_second": 4.991, "step": 38634 }, { "epoch": 6.06, "learning_rate": 4.670711527854385e-05, "loss": 3.7019, "step": 39000 }, { "epoch": 6.21, "learning_rate": 4.486854201139915e-05, "loss": 3.6644, "step": 40000 }, { "epoch": 6.37, "learning_rate": 4.3031807317521606e-05, "loss": 3.6718, "step": 41000 }, { "epoch": 6.52, "learning_rate": 4.119323405037691e-05, "loss": 3.6717, "step": 42000 }, { "epoch": 6.68, "learning_rate": 3.935466078323221e-05, "loss": 3.6718, "step": 43000 }, { "epoch": 6.83, "learning_rate": 3.751792608935466e-05, "loss": 3.6713, "step": 44000 }, { "epoch": 6.99, "learning_rate": 3.567935282220997e-05, "loss": 3.6709, "step": 45000 }, { "epoch": 7.0, "eval_accuracy": 0.3699988684846347, "eval_loss": 3.72359037399292, "eval_runtime": 2.0134, "eval_samples_per_second": 587.554, "eval_steps_per_second": 4.967, "step": 45073 }, { "epoch": 7.14, "learning_rate": 3.384261812833241e-05, "loss": 3.6101, "step": 46000 }, { "epoch": 7.3, "learning_rate": 3.200404486118772e-05, "loss": 3.6119, "step": 47000 }, { "epoch": 7.45, "learning_rate": 3.0165471594043025e-05, "loss": 3.6167, "step": 48000 }, { "epoch": 7.61, "learning_rate": 2.8326898326898328e-05, "loss": 3.618, "step": 49000 }, { "epoch": 7.77, "learning_rate": 2.6490163633020777e-05, "loss": 3.6183, "step": 50000 }, { "epoch": 7.92, "learning_rate": 2.465159036587608e-05, "loss": 3.6162, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.3734133824987853, "eval_loss": 3.7009902000427246, "eval_runtime": 2.0145, "eval_samples_per_second": 587.246, "eval_steps_per_second": 4.964, "step": 51512 }, { "epoch": 8.08, "learning_rate": 2.281485567199853e-05, "loss": 3.5874, "step": 52000 }, { "epoch": 8.23, "learning_rate": 2.0976282404853832e-05, "loss": 3.5646, "step": 53000 }, { "epoch": 8.39, "learning_rate": 1.913954771097628e-05, "loss": 3.5686, "step": 54000 }, { "epoch": 8.54, "learning_rate": 1.7300974443831588e-05, "loss": 3.5686, "step": 55000 }, { "epoch": 8.7, "learning_rate": 1.546240117668689e-05, "loss": 3.5705, "step": 56000 }, { "epoch": 8.85, "learning_rate": 1.3623827909542197e-05, "loss": 3.5698, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.3750973436012806, "eval_loss": 3.684567928314209, "eval_runtime": 2.0058, "eval_samples_per_second": 589.776, "eval_steps_per_second": 4.985, "step": 57951 }, { "epoch": 9.01, "learning_rate": 1.1787093215664645e-05, "loss": 3.566, "step": 58000 }, { "epoch": 9.16, "learning_rate": 9.948519948519949e-06, "loss": 3.5272, "step": 59000 }, { "epoch": 9.32, "learning_rate": 8.111785254642398e-06, "loss": 3.5296, "step": 60000 }, { "epoch": 9.47, "learning_rate": 6.273211987497703e-06, "loss": 3.5308, "step": 61000 }, { "epoch": 9.63, "learning_rate": 4.436477293620151e-06, "loss": 3.5305, "step": 62000 }, { "epoch": 9.78, "learning_rate": 2.597904026475455e-06, "loss": 3.527, "step": 63000 }, { "epoch": 9.94, "learning_rate": 7.61169332597904e-07, "loss": 3.5258, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.3762155470211194, "eval_loss": 3.675755739212036, "eval_runtime": 2.0114, "eval_samples_per_second": 588.151, "eval_steps_per_second": 4.972, "step": 64390 }, { "epoch": 10.0, "step": 64390, "total_flos": 5.3836218335232e+17, "train_loss": 4.008237893900936, "train_runtime": 29754.0363, "train_samples_per_second": 276.989, "train_steps_per_second": 2.164 } ], "max_steps": 64390, "num_train_epochs": 10, "total_flos": 5.3836218335232e+17, "trial_name": null, "trial_params": null }