Training in progress, step 700, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a91b27e568f3b3dc70a1d8c75412159ad33bd5aba0e7b22f6619b8f582318aa8
 size 47235968

 version https://git-lfs.github.com/spec/v1
+oid sha256:abcb05ca92ec06fa9d499ee86e392b068c0b4a16b306c8b0b8ea0aa1596b0e2d
 size 47235968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10ffc27ba58cde7e8c21991475e2219e2eb942aa9d560b90a5a52bc8e4ed68be
 size 24411220

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cbe2fca5d55fd4900aac8ab5212444f1c730addc7623246ee262e01f7108055
 size 24411220

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbbe9bba5389ba64ff06ef6b3cdf88b7c65f4059eaa91f597c1c9ca6154ecf7b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bca3766410a42bb4ce032456de09de4a9260806e2a23e53e8723474208864451
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fea967412bd83a775b4d93f7216aebd2bc7764ecddeb461a0505a72239f0d159
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbad99671fe9e8fad7c9e5d1bd2f0b78776dc12ff7758703eb68552f8e997bf1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5766245287203371,
   "eval_steps": 50,
-  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4669,6 +4669,364 @@
       "eval_samples_per_second": 27.077,
       "eval_steps_per_second": 13.567,
       "step": 650
     }
   ],
   "logging_steps": 1,
@@ -4688,7 +5046,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.2498426667008e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6209802616988246,
   "eval_steps": 50,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.077,
       "eval_steps_per_second": 13.567,
       "step": 650
+    },
+    {
+      "epoch": 0.5775116433799069,
+      "grad_norm": 10.354761123657227,
+      "learning_rate": 0.00019999984039399245,
+      "loss": 0.3899,
+      "step": 651
+    },
+    {
+      "epoch": 0.5783987580394766,
+      "grad_norm": 8.909992218017578,
+      "learning_rate": 0.00019999983989561347,
+      "loss": 0.398,
+      "step": 652
+    },
+    {
+      "epoch": 0.5792858726990463,
+      "grad_norm": 7.4923906326293945,
+      "learning_rate": 0.00019999983939645758,
+      "loss": 0.3956,
+      "step": 653
+    },
+    {
+      "epoch": 0.5801729873586161,
+      "grad_norm": 5.873891353607178,
+      "learning_rate": 0.00019999983889652483,
+      "loss": 0.368,
+      "step": 654
+    },
+    {
+      "epoch": 0.5810601020181858,
+      "grad_norm": 7.360598087310791,
+      "learning_rate": 0.00019999983839581517,
+      "loss": 0.4163,
+      "step": 655
+    },
+    {
+      "epoch": 0.5819472166777556,
+      "grad_norm": 10.175065994262695,
+      "learning_rate": 0.0001999998378943286,
+      "loss": 0.4937,
+      "step": 656
+    },
+    {
+      "epoch": 0.5828343313373253,
+      "grad_norm": 10.308756828308105,
+      "learning_rate": 0.00019999983739206516,
+      "loss": 0.4576,
+      "step": 657
+    },
+    {
+      "epoch": 0.5837214459968951,
+      "grad_norm": 6.791981220245361,
+      "learning_rate": 0.0001999998368890248,
+      "loss": 0.455,
+      "step": 658
+    },
+    {
+      "epoch": 0.5846085606564648,
+      "grad_norm": 9.735336303710938,
+      "learning_rate": 0.00019999983638520753,
+      "loss": 0.5393,
+      "step": 659
+    },
+    {
+      "epoch": 0.5854956753160346,
+      "grad_norm": 7.667912006378174,
+      "learning_rate": 0.0001999998358806134,
+      "loss": 0.5499,
+      "step": 660
+    },
+    {
+      "epoch": 0.5863827899756043,
+      "grad_norm": 9.633892059326172,
+      "learning_rate": 0.00019999983537524236,
+      "loss": 0.4529,
+      "step": 661
+    },
+    {
+      "epoch": 0.5872699046351741,
+      "grad_norm": 6.23357629776001,
+      "learning_rate": 0.00019999983486909445,
+      "loss": 0.414,
+      "step": 662
+    },
+    {
+      "epoch": 0.5881570192947438,
+      "grad_norm": 11.319755554199219,
+      "learning_rate": 0.0001999998343621696,
+      "loss": 0.5764,
+      "step": 663
+    },
+    {
+      "epoch": 0.5890441339543135,
+      "grad_norm": 13.453693389892578,
+      "learning_rate": 0.0001999998338544679,
+      "loss": 0.6609,
+      "step": 664
+    },
+    {
+      "epoch": 0.5899312486138834,
+      "grad_norm": 18.999950408935547,
+      "learning_rate": 0.00019999983334598927,
+      "loss": 0.5154,
+      "step": 665
+    },
+    {
+      "epoch": 0.590818363273453,
+      "grad_norm": 10.19577693939209,
+      "learning_rate": 0.00019999983283673376,
+      "loss": 0.5638,
+      "step": 666
+    },
+    {
+      "epoch": 0.5917054779330229,
+      "grad_norm": 15.789012908935547,
+      "learning_rate": 0.00019999983232670134,
+      "loss": 0.5514,
+      "step": 667
+    },
+    {
+      "epoch": 0.5925925925925926,
+      "grad_norm": 7.380650997161865,
+      "learning_rate": 0.00019999983181589204,
+      "loss": 0.5238,
+      "step": 668
+    },
+    {
+      "epoch": 0.5934797072521624,
+      "grad_norm": 12.285819053649902,
+      "learning_rate": 0.00019999983130430585,
+      "loss": 0.6751,
+      "step": 669
+    },
+    {
+      "epoch": 0.5943668219117321,
+      "grad_norm": 10.537954330444336,
+      "learning_rate": 0.00019999983079194272,
+      "loss": 0.5944,
+      "step": 670
+    },
+    {
+      "epoch": 0.5952539365713019,
+      "grad_norm": 7.963987827301025,
+      "learning_rate": 0.00019999983027880274,
+      "loss": 0.5648,
+      "step": 671
+    },
+    {
+      "epoch": 0.5961410512308716,
+      "grad_norm": 13.050141334533691,
+      "learning_rate": 0.00019999982976488586,
+      "loss": 0.8043,
+      "step": 672
+    },
+    {
+      "epoch": 0.5970281658904414,
+      "grad_norm": 17.245141983032227,
+      "learning_rate": 0.00019999982925019208,
+      "loss": 0.5685,
+      "step": 673
+    },
+    {
+      "epoch": 0.5979152805500111,
+      "grad_norm": 9.825310707092285,
+      "learning_rate": 0.00019999982873472139,
+      "loss": 0.403,
+      "step": 674
+    },
+    {
+      "epoch": 0.5988023952095808,
+      "grad_norm": 10.039129257202148,
+      "learning_rate": 0.00019999982821847383,
+      "loss": 0.6127,
+      "step": 675
+    },
+    {
+      "epoch": 0.5996895098691506,
+      "grad_norm": 7.191605091094971,
+      "learning_rate": 0.00019999982770144937,
+      "loss": 0.4647,
+      "step": 676
+    },
+    {
+      "epoch": 0.6005766245287203,
+      "grad_norm": 6.5292181968688965,
+      "learning_rate": 0.000199999827183648,
+      "loss": 0.5513,
+      "step": 677
+    },
+    {
+      "epoch": 0.6014637391882901,
+      "grad_norm": 7.8640666007995605,
+      "learning_rate": 0.00019999982666506972,
+      "loss": 0.3423,
+      "step": 678
+    },
+    {
+      "epoch": 0.6023508538478598,
+      "grad_norm": 36.11716842651367,
+      "learning_rate": 0.00019999982614571458,
+      "loss": 0.5195,
+      "step": 679
+    },
+    {
+      "epoch": 0.6032379685074296,
+      "grad_norm": 11.68887996673584,
+      "learning_rate": 0.00019999982562558252,
+      "loss": 0.4774,
+      "step": 680
+    },
+    {
+      "epoch": 0.6041250831669993,
+      "grad_norm": 11.403864860534668,
+      "learning_rate": 0.00019999982510467357,
+      "loss": 0.7514,
+      "step": 681
+    },
+    {
+      "epoch": 0.6050121978265691,
+      "grad_norm": 6.176864147186279,
+      "learning_rate": 0.00019999982458298774,
+      "loss": 0.4794,
+      "step": 682
+    },
+    {
+      "epoch": 0.6058993124861388,
+      "grad_norm": 9.716928482055664,
+      "learning_rate": 0.000199999824060525,
+      "loss": 0.5677,
+      "step": 683
+    },
+    {
+      "epoch": 0.6067864271457086,
+      "grad_norm": 6.658013343811035,
+      "learning_rate": 0.00019999982353728537,
+      "loss": 0.4459,
+      "step": 684
+    },
+    {
+      "epoch": 0.6076735418052783,
+      "grad_norm": 4.054922580718994,
+      "learning_rate": 0.00019999982301326886,
+      "loss": 0.296,
+      "step": 685
+    },
+    {
+      "epoch": 0.608560656464848,
+      "grad_norm": 7.6882710456848145,
+      "learning_rate": 0.0001999998224884754,
+      "loss": 0.5358,
+      "step": 686
+    },
+    {
+      "epoch": 0.6094477711244178,
+      "grad_norm": 10.745440483093262,
+      "learning_rate": 0.00019999982196290512,
+      "loss": 0.4966,
+      "step": 687
+    },
+    {
+      "epoch": 0.6103348857839875,
+      "grad_norm": 6.231947422027588,
+      "learning_rate": 0.0001999998214365579,
+      "loss": 0.4244,
+      "step": 688
+    },
+    {
+      "epoch": 0.6112220004435573,
+      "grad_norm": 6.25615930557251,
+      "learning_rate": 0.0001999998209094338,
+      "loss": 0.3549,
+      "step": 689
+    },
+    {
+      "epoch": 0.612109115103127,
+      "grad_norm": 5.80368185043335,
+      "learning_rate": 0.00019999982038153277,
+      "loss": 0.4522,
+      "step": 690
+    },
+    {
+      "epoch": 0.6129962297626969,
+      "grad_norm": 8.078680038452148,
+      "learning_rate": 0.0001999998198528549,
+      "loss": 0.5348,
+      "step": 691
+    },
+    {
+      "epoch": 0.6138833444222666,
+      "grad_norm": 8.55868148803711,
+      "learning_rate": 0.00019999981932340008,
+      "loss": 0.5859,
+      "step": 692
+    },
+    {
+      "epoch": 0.6147704590818364,
+      "grad_norm": 10.13481616973877,
+      "learning_rate": 0.00019999981879316838,
+      "loss": 0.5749,
+      "step": 693
+    },
+    {
+      "epoch": 0.6156575737414061,
+      "grad_norm": 9.58573055267334,
+      "learning_rate": 0.00019999981826215982,
+      "loss": 0.4413,
+      "step": 694
+    },
+    {
+      "epoch": 0.6165446884009759,
+      "grad_norm": 10.3484525680542,
+      "learning_rate": 0.00019999981773037432,
+      "loss": 0.3737,
+      "step": 695
+    },
+    {
+      "epoch": 0.6174318030605456,
+      "grad_norm": 5.4196672439575195,
+      "learning_rate": 0.00019999981719781196,
+      "loss": 0.3963,
+      "step": 696
+    },
+    {
+      "epoch": 0.6183189177201154,
+      "grad_norm": 11.233104705810547,
+      "learning_rate": 0.00019999981666447267,
+      "loss": 0.6738,
+      "step": 697
+    },
+    {
+      "epoch": 0.6192060323796851,
+      "grad_norm": 7.30211067199707,
+      "learning_rate": 0.0001999998161303565,
+      "loss": 0.4252,
+      "step": 698
+    },
+    {
+      "epoch": 0.6200931470392548,
+      "grad_norm": 12.224394798278809,
+      "learning_rate": 0.00019999981559546345,
+      "loss": 0.3892,
+      "step": 699
+    },
+    {
+      "epoch": 0.6209802616988246,
+      "grad_norm": 8.769978523254395,
+      "learning_rate": 0.0001999998150597935,
+      "loss": 0.4835,
+      "step": 700
+    },
+    {
+      "epoch": 0.6209802616988246,
+      "eval_loss": 0.4917793571949005,
+      "eval_runtime": 17.5512,
+      "eval_samples_per_second": 27.064,
+      "eval_steps_per_second": 13.56,
+      "step": 700
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.5767536410624e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null