blackbeard334 commited on
Commit
b458432
·
verified ·
1 Parent(s): ac6bdf6

Training in progress, step 9930, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b545f2ac256a1ec934f08c21773adea843eb241aa5f58a3a75826b63ddc2bc0a
3
  size 1657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf2b2692bd437523e84d15e36f9ab617d11e127d6c59b0dcec8c128f0a0287b
3
  size 1657
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:695e8cb5033fd9aa348d65602b2210e61a59f5fc94ac74efdd3602e03d26a7c2
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165be38fd18fbee14a94b0ef0b0f4ca61e34e0cdf4d4eea2d670a80d97dc37e9
3
  size 14709
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7a68bf6a1ee407daacf6969fc31e8f14203c55ddbc395e807ed319072927cc8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256bd28d09808220fc486c0b3f1471b8492dcd6c2c8c440dc19925088713e0a1
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 76.3076923076923,
6
  "eval_steps": 500,
7
- "global_step": 9920,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -17864,6 +17864,24 @@
17864
  "mean_token_accuracy": 0.6330039739608765,
17865
  "num_tokens": 36177566.0,
17866
  "step": 9920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17867
  }
17868
  ],
17869
  "logging_steps": 5,
@@ -17883,7 +17901,7 @@
17883
  "attributes": {}
17884
  }
17885
  },
17886
- "total_flos": 1.834522347220992e+18,
17887
  "train_batch_size": 4,
17888
  "trial_name": null,
17889
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 76.38461538461539,
6
  "eval_steps": 500,
7
+ "global_step": 9930,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
17864
  "mean_token_accuracy": 0.6330039739608765,
17865
  "num_tokens": 36177566.0,
17866
  "step": 9920
17867
+ },
17868
+ {
17869
+ "epoch": 76.34615384615384,
17870
+ "grad_norm": 0.0,
17871
+ "learning_rate": 3.8e-07,
17872
+ "loss": 1.7486,
17873
+ "mean_token_accuracy": 0.6493139892816544,
17874
+ "num_tokens": 36195783.0,
17875
+ "step": 9925
17876
+ },
17877
+ {
17878
+ "epoch": 76.38461538461539,
17879
+ "grad_norm": 0.0,
17880
+ "learning_rate": 3.5500000000000004e-07,
17881
+ "loss": 1.7019,
17882
+ "mean_token_accuracy": 0.6548255890607834,
17883
+ "num_tokens": 36214515.0,
17884
+ "step": 9930
17885
  }
17886
  ],
17887
  "logging_steps": 5,
 
17901
  "attributes": {}
17902
  }
17903
  },
17904
+ "total_flos": 1.8363737120661504e+18,
17905
  "train_batch_size": 4,
17906
  "trial_name": null,
17907
  "trial_params": null