Training in progress, step 700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 47235968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abcb05ca92ec06fa9d499ee86e392b068c0b4a16b306c8b0b8ea0aa1596b0e2d
|
| 3 |
size 47235968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 24411220
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cbe2fca5d55fd4900aac8ab5212444f1c730addc7623246ee262e01f7108055
|
| 3 |
size 24411220
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bca3766410a42bb4ce032456de09de4a9260806e2a23e53e8723474208864451
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbad99671fe9e8fad7c9e5d1bd2f0b78776dc12ff7758703eb68552f8e997bf1
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4669,6 +4669,364 @@
|
|
| 4669 |
"eval_samples_per_second": 27.077,
|
| 4670 |
"eval_steps_per_second": 13.567,
|
| 4671 |
"step": 650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4672 |
}
|
| 4673 |
],
|
| 4674 |
"logging_steps": 1,
|
|
@@ -4688,7 +5046,7 @@
|
|
| 4688 |
"attributes": {}
|
| 4689 |
}
|
| 4690 |
},
|
| 4691 |
-
"total_flos": 4.
|
| 4692 |
"train_batch_size": 2,
|
| 4693 |
"trial_name": null,
|
| 4694 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.6209802616988246,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 700,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4669 |
"eval_samples_per_second": 27.077,
|
| 4670 |
"eval_steps_per_second": 13.567,
|
| 4671 |
"step": 650
|
| 4672 |
+
},
|
| 4673 |
+
{
|
| 4674 |
+
"epoch": 0.5775116433799069,
|
| 4675 |
+
"grad_norm": 10.354761123657227,
|
| 4676 |
+
"learning_rate": 0.00019999984039399245,
|
| 4677 |
+
"loss": 0.3899,
|
| 4678 |
+
"step": 651
|
| 4679 |
+
},
|
| 4680 |
+
{
|
| 4681 |
+
"epoch": 0.5783987580394766,
|
| 4682 |
+
"grad_norm": 8.909992218017578,
|
| 4683 |
+
"learning_rate": 0.00019999983989561347,
|
| 4684 |
+
"loss": 0.398,
|
| 4685 |
+
"step": 652
|
| 4686 |
+
},
|
| 4687 |
+
{
|
| 4688 |
+
"epoch": 0.5792858726990463,
|
| 4689 |
+
"grad_norm": 7.4923906326293945,
|
| 4690 |
+
"learning_rate": 0.00019999983939645758,
|
| 4691 |
+
"loss": 0.3956,
|
| 4692 |
+
"step": 653
|
| 4693 |
+
},
|
| 4694 |
+
{
|
| 4695 |
+
"epoch": 0.5801729873586161,
|
| 4696 |
+
"grad_norm": 5.873891353607178,
|
| 4697 |
+
"learning_rate": 0.00019999983889652483,
|
| 4698 |
+
"loss": 0.368,
|
| 4699 |
+
"step": 654
|
| 4700 |
+
},
|
| 4701 |
+
{
|
| 4702 |
+
"epoch": 0.5810601020181858,
|
| 4703 |
+
"grad_norm": 7.360598087310791,
|
| 4704 |
+
"learning_rate": 0.00019999983839581517,
|
| 4705 |
+
"loss": 0.4163,
|
| 4706 |
+
"step": 655
|
| 4707 |
+
},
|
| 4708 |
+
{
|
| 4709 |
+
"epoch": 0.5819472166777556,
|
| 4710 |
+
"grad_norm": 10.175065994262695,
|
| 4711 |
+
"learning_rate": 0.0001999998378943286,
|
| 4712 |
+
"loss": 0.4937,
|
| 4713 |
+
"step": 656
|
| 4714 |
+
},
|
| 4715 |
+
{
|
| 4716 |
+
"epoch": 0.5828343313373253,
|
| 4717 |
+
"grad_norm": 10.308756828308105,
|
| 4718 |
+
"learning_rate": 0.00019999983739206516,
|
| 4719 |
+
"loss": 0.4576,
|
| 4720 |
+
"step": 657
|
| 4721 |
+
},
|
| 4722 |
+
{
|
| 4723 |
+
"epoch": 0.5837214459968951,
|
| 4724 |
+
"grad_norm": 6.791981220245361,
|
| 4725 |
+
"learning_rate": 0.0001999998368890248,
|
| 4726 |
+
"loss": 0.455,
|
| 4727 |
+
"step": 658
|
| 4728 |
+
},
|
| 4729 |
+
{
|
| 4730 |
+
"epoch": 0.5846085606564648,
|
| 4731 |
+
"grad_norm": 9.735336303710938,
|
| 4732 |
+
"learning_rate": 0.00019999983638520753,
|
| 4733 |
+
"loss": 0.5393,
|
| 4734 |
+
"step": 659
|
| 4735 |
+
},
|
| 4736 |
+
{
|
| 4737 |
+
"epoch": 0.5854956753160346,
|
| 4738 |
+
"grad_norm": 7.667912006378174,
|
| 4739 |
+
"learning_rate": 0.0001999998358806134,
|
| 4740 |
+
"loss": 0.5499,
|
| 4741 |
+
"step": 660
|
| 4742 |
+
},
|
| 4743 |
+
{
|
| 4744 |
+
"epoch": 0.5863827899756043,
|
| 4745 |
+
"grad_norm": 9.633892059326172,
|
| 4746 |
+
"learning_rate": 0.00019999983537524236,
|
| 4747 |
+
"loss": 0.4529,
|
| 4748 |
+
"step": 661
|
| 4749 |
+
},
|
| 4750 |
+
{
|
| 4751 |
+
"epoch": 0.5872699046351741,
|
| 4752 |
+
"grad_norm": 6.23357629776001,
|
| 4753 |
+
"learning_rate": 0.00019999983486909445,
|
| 4754 |
+
"loss": 0.414,
|
| 4755 |
+
"step": 662
|
| 4756 |
+
},
|
| 4757 |
+
{
|
| 4758 |
+
"epoch": 0.5881570192947438,
|
| 4759 |
+
"grad_norm": 11.319755554199219,
|
| 4760 |
+
"learning_rate": 0.0001999998343621696,
|
| 4761 |
+
"loss": 0.5764,
|
| 4762 |
+
"step": 663
|
| 4763 |
+
},
|
| 4764 |
+
{
|
| 4765 |
+
"epoch": 0.5890441339543135,
|
| 4766 |
+
"grad_norm": 13.453693389892578,
|
| 4767 |
+
"learning_rate": 0.0001999998338544679,
|
| 4768 |
+
"loss": 0.6609,
|
| 4769 |
+
"step": 664
|
| 4770 |
+
},
|
| 4771 |
+
{
|
| 4772 |
+
"epoch": 0.5899312486138834,
|
| 4773 |
+
"grad_norm": 18.999950408935547,
|
| 4774 |
+
"learning_rate": 0.00019999983334598927,
|
| 4775 |
+
"loss": 0.5154,
|
| 4776 |
+
"step": 665
|
| 4777 |
+
},
|
| 4778 |
+
{
|
| 4779 |
+
"epoch": 0.590818363273453,
|
| 4780 |
+
"grad_norm": 10.19577693939209,
|
| 4781 |
+
"learning_rate": 0.00019999983283673376,
|
| 4782 |
+
"loss": 0.5638,
|
| 4783 |
+
"step": 666
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 0.5917054779330229,
|
| 4787 |
+
"grad_norm": 15.789012908935547,
|
| 4788 |
+
"learning_rate": 0.00019999983232670134,
|
| 4789 |
+
"loss": 0.5514,
|
| 4790 |
+
"step": 667
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 0.5925925925925926,
|
| 4794 |
+
"grad_norm": 7.380650997161865,
|
| 4795 |
+
"learning_rate": 0.00019999983181589204,
|
| 4796 |
+
"loss": 0.5238,
|
| 4797 |
+
"step": 668
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 0.5934797072521624,
|
| 4801 |
+
"grad_norm": 12.285819053649902,
|
| 4802 |
+
"learning_rate": 0.00019999983130430585,
|
| 4803 |
+
"loss": 0.6751,
|
| 4804 |
+
"step": 669
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 0.5943668219117321,
|
| 4808 |
+
"grad_norm": 10.537954330444336,
|
| 4809 |
+
"learning_rate": 0.00019999983079194272,
|
| 4810 |
+
"loss": 0.5944,
|
| 4811 |
+
"step": 670
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 0.5952539365713019,
|
| 4815 |
+
"grad_norm": 7.963987827301025,
|
| 4816 |
+
"learning_rate": 0.00019999983027880274,
|
| 4817 |
+
"loss": 0.5648,
|
| 4818 |
+
"step": 671
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 0.5961410512308716,
|
| 4822 |
+
"grad_norm": 13.050141334533691,
|
| 4823 |
+
"learning_rate": 0.00019999982976488586,
|
| 4824 |
+
"loss": 0.8043,
|
| 4825 |
+
"step": 672
|
| 4826 |
+
},
|
| 4827 |
+
{
|
| 4828 |
+
"epoch": 0.5970281658904414,
|
| 4829 |
+
"grad_norm": 17.245141983032227,
|
| 4830 |
+
"learning_rate": 0.00019999982925019208,
|
| 4831 |
+
"loss": 0.5685,
|
| 4832 |
+
"step": 673
|
| 4833 |
+
},
|
| 4834 |
+
{
|
| 4835 |
+
"epoch": 0.5979152805500111,
|
| 4836 |
+
"grad_norm": 9.825310707092285,
|
| 4837 |
+
"learning_rate": 0.00019999982873472139,
|
| 4838 |
+
"loss": 0.403,
|
| 4839 |
+
"step": 674
|
| 4840 |
+
},
|
| 4841 |
+
{
|
| 4842 |
+
"epoch": 0.5988023952095808,
|
| 4843 |
+
"grad_norm": 10.039129257202148,
|
| 4844 |
+
"learning_rate": 0.00019999982821847383,
|
| 4845 |
+
"loss": 0.6127,
|
| 4846 |
+
"step": 675
|
| 4847 |
+
},
|
| 4848 |
+
{
|
| 4849 |
+
"epoch": 0.5996895098691506,
|
| 4850 |
+
"grad_norm": 7.191605091094971,
|
| 4851 |
+
"learning_rate": 0.00019999982770144937,
|
| 4852 |
+
"loss": 0.4647,
|
| 4853 |
+
"step": 676
|
| 4854 |
+
},
|
| 4855 |
+
{
|
| 4856 |
+
"epoch": 0.6005766245287203,
|
| 4857 |
+
"grad_norm": 6.5292181968688965,
|
| 4858 |
+
"learning_rate": 0.000199999827183648,
|
| 4859 |
+
"loss": 0.5513,
|
| 4860 |
+
"step": 677
|
| 4861 |
+
},
|
| 4862 |
+
{
|
| 4863 |
+
"epoch": 0.6014637391882901,
|
| 4864 |
+
"grad_norm": 7.8640666007995605,
|
| 4865 |
+
"learning_rate": 0.00019999982666506972,
|
| 4866 |
+
"loss": 0.3423,
|
| 4867 |
+
"step": 678
|
| 4868 |
+
},
|
| 4869 |
+
{
|
| 4870 |
+
"epoch": 0.6023508538478598,
|
| 4871 |
+
"grad_norm": 36.11716842651367,
|
| 4872 |
+
"learning_rate": 0.00019999982614571458,
|
| 4873 |
+
"loss": 0.5195,
|
| 4874 |
+
"step": 679
|
| 4875 |
+
},
|
| 4876 |
+
{
|
| 4877 |
+
"epoch": 0.6032379685074296,
|
| 4878 |
+
"grad_norm": 11.68887996673584,
|
| 4879 |
+
"learning_rate": 0.00019999982562558252,
|
| 4880 |
+
"loss": 0.4774,
|
| 4881 |
+
"step": 680
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 0.6041250831669993,
|
| 4885 |
+
"grad_norm": 11.403864860534668,
|
| 4886 |
+
"learning_rate": 0.00019999982510467357,
|
| 4887 |
+
"loss": 0.7514,
|
| 4888 |
+
"step": 681
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 0.6050121978265691,
|
| 4892 |
+
"grad_norm": 6.176864147186279,
|
| 4893 |
+
"learning_rate": 0.00019999982458298774,
|
| 4894 |
+
"loss": 0.4794,
|
| 4895 |
+
"step": 682
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 0.6058993124861388,
|
| 4899 |
+
"grad_norm": 9.716928482055664,
|
| 4900 |
+
"learning_rate": 0.000199999824060525,
|
| 4901 |
+
"loss": 0.5677,
|
| 4902 |
+
"step": 683
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 0.6067864271457086,
|
| 4906 |
+
"grad_norm": 6.658013343811035,
|
| 4907 |
+
"learning_rate": 0.00019999982353728537,
|
| 4908 |
+
"loss": 0.4459,
|
| 4909 |
+
"step": 684
|
| 4910 |
+
},
|
| 4911 |
+
{
|
| 4912 |
+
"epoch": 0.6076735418052783,
|
| 4913 |
+
"grad_norm": 4.054922580718994,
|
| 4914 |
+
"learning_rate": 0.00019999982301326886,
|
| 4915 |
+
"loss": 0.296,
|
| 4916 |
+
"step": 685
|
| 4917 |
+
},
|
| 4918 |
+
{
|
| 4919 |
+
"epoch": 0.608560656464848,
|
| 4920 |
+
"grad_norm": 7.6882710456848145,
|
| 4921 |
+
"learning_rate": 0.0001999998224884754,
|
| 4922 |
+
"loss": 0.5358,
|
| 4923 |
+
"step": 686
|
| 4924 |
+
},
|
| 4925 |
+
{
|
| 4926 |
+
"epoch": 0.6094477711244178,
|
| 4927 |
+
"grad_norm": 10.745440483093262,
|
| 4928 |
+
"learning_rate": 0.00019999982196290512,
|
| 4929 |
+
"loss": 0.4966,
|
| 4930 |
+
"step": 687
|
| 4931 |
+
},
|
| 4932 |
+
{
|
| 4933 |
+
"epoch": 0.6103348857839875,
|
| 4934 |
+
"grad_norm": 6.231947422027588,
|
| 4935 |
+
"learning_rate": 0.0001999998214365579,
|
| 4936 |
+
"loss": 0.4244,
|
| 4937 |
+
"step": 688
|
| 4938 |
+
},
|
| 4939 |
+
{
|
| 4940 |
+
"epoch": 0.6112220004435573,
|
| 4941 |
+
"grad_norm": 6.25615930557251,
|
| 4942 |
+
"learning_rate": 0.0001999998209094338,
|
| 4943 |
+
"loss": 0.3549,
|
| 4944 |
+
"step": 689
|
| 4945 |
+
},
|
| 4946 |
+
{
|
| 4947 |
+
"epoch": 0.612109115103127,
|
| 4948 |
+
"grad_norm": 5.80368185043335,
|
| 4949 |
+
"learning_rate": 0.00019999982038153277,
|
| 4950 |
+
"loss": 0.4522,
|
| 4951 |
+
"step": 690
|
| 4952 |
+
},
|
| 4953 |
+
{
|
| 4954 |
+
"epoch": 0.6129962297626969,
|
| 4955 |
+
"grad_norm": 8.078680038452148,
|
| 4956 |
+
"learning_rate": 0.0001999998198528549,
|
| 4957 |
+
"loss": 0.5348,
|
| 4958 |
+
"step": 691
|
| 4959 |
+
},
|
| 4960 |
+
{
|
| 4961 |
+
"epoch": 0.6138833444222666,
|
| 4962 |
+
"grad_norm": 8.55868148803711,
|
| 4963 |
+
"learning_rate": 0.00019999981932340008,
|
| 4964 |
+
"loss": 0.5859,
|
| 4965 |
+
"step": 692
|
| 4966 |
+
},
|
| 4967 |
+
{
|
| 4968 |
+
"epoch": 0.6147704590818364,
|
| 4969 |
+
"grad_norm": 10.13481616973877,
|
| 4970 |
+
"learning_rate": 0.00019999981879316838,
|
| 4971 |
+
"loss": 0.5749,
|
| 4972 |
+
"step": 693
|
| 4973 |
+
},
|
| 4974 |
+
{
|
| 4975 |
+
"epoch": 0.6156575737414061,
|
| 4976 |
+
"grad_norm": 9.58573055267334,
|
| 4977 |
+
"learning_rate": 0.00019999981826215982,
|
| 4978 |
+
"loss": 0.4413,
|
| 4979 |
+
"step": 694
|
| 4980 |
+
},
|
| 4981 |
+
{
|
| 4982 |
+
"epoch": 0.6165446884009759,
|
| 4983 |
+
"grad_norm": 10.3484525680542,
|
| 4984 |
+
"learning_rate": 0.00019999981773037432,
|
| 4985 |
+
"loss": 0.3737,
|
| 4986 |
+
"step": 695
|
| 4987 |
+
},
|
| 4988 |
+
{
|
| 4989 |
+
"epoch": 0.6174318030605456,
|
| 4990 |
+
"grad_norm": 5.4196672439575195,
|
| 4991 |
+
"learning_rate": 0.00019999981719781196,
|
| 4992 |
+
"loss": 0.3963,
|
| 4993 |
+
"step": 696
|
| 4994 |
+
},
|
| 4995 |
+
{
|
| 4996 |
+
"epoch": 0.6183189177201154,
|
| 4997 |
+
"grad_norm": 11.233104705810547,
|
| 4998 |
+
"learning_rate": 0.00019999981666447267,
|
| 4999 |
+
"loss": 0.6738,
|
| 5000 |
+
"step": 697
|
| 5001 |
+
},
|
| 5002 |
+
{
|
| 5003 |
+
"epoch": 0.6192060323796851,
|
| 5004 |
+
"grad_norm": 7.30211067199707,
|
| 5005 |
+
"learning_rate": 0.0001999998161303565,
|
| 5006 |
+
"loss": 0.4252,
|
| 5007 |
+
"step": 698
|
| 5008 |
+
},
|
| 5009 |
+
{
|
| 5010 |
+
"epoch": 0.6200931470392548,
|
| 5011 |
+
"grad_norm": 12.224394798278809,
|
| 5012 |
+
"learning_rate": 0.00019999981559546345,
|
| 5013 |
+
"loss": 0.3892,
|
| 5014 |
+
"step": 699
|
| 5015 |
+
},
|
| 5016 |
+
{
|
| 5017 |
+
"epoch": 0.6209802616988246,
|
| 5018 |
+
"grad_norm": 8.769978523254395,
|
| 5019 |
+
"learning_rate": 0.0001999998150597935,
|
| 5020 |
+
"loss": 0.4835,
|
| 5021 |
+
"step": 700
|
| 5022 |
+
},
|
| 5023 |
+
{
|
| 5024 |
+
"epoch": 0.6209802616988246,
|
| 5025 |
+
"eval_loss": 0.4917793571949005,
|
| 5026 |
+
"eval_runtime": 17.5512,
|
| 5027 |
+
"eval_samples_per_second": 27.064,
|
| 5028 |
+
"eval_steps_per_second": 13.56,
|
| 5029 |
+
"step": 700
|
| 5030 |
}
|
| 5031 |
],
|
| 5032 |
"logging_steps": 1,
|
|
|
|
| 5046 |
"attributes": {}
|
| 5047 |
}
|
| 5048 |
},
|
| 5049 |
+
"total_flos": 4.5767536410624e+16,
|
| 5050 |
"train_batch_size": 2,
|
| 5051 |
"trial_name": null,
|
| 5052 |
"trial_params": null
|