Training in progress, step 700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 59933632
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:418bcebcdfb7bb46077a0a8d3c77de02f80d94b6485b8050123d8dc674da1fd0
|
| 3 |
size 59933632
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 31823460
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27cac1d8665684f8fb9ae47b03b08212872a04f9d41c081c6f66d7ddc18d1571
|
| 3 |
size 31823460
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:401e8bcffbfaba4d317e2a89edb4f1073b7d8a172738af37a2a13688139c01d3
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9d8a72170922eb22149c61e4763d188f7c858219b1f34e4777fff2bb46fb290
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4557,6 +4557,356 @@
|
|
| 4557 |
"learning_rate": 9.999999970901543e-05,
|
| 4558 |
"loss": 3.4294,
|
| 4559 |
"step": 650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4560 |
}
|
| 4561 |
],
|
| 4562 |
"logging_steps": 1,
|
|
@@ -4576,7 +4926,7 @@
|
|
| 4576 |
"attributes": {}
|
| 4577 |
}
|
| 4578 |
},
|
| 4579 |
-
"total_flos": 1.
|
| 4580 |
"train_batch_size": 4,
|
| 4581 |
"trial_name": null,
|
| 4582 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.03726923025729079,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 700,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4557 |
"learning_rate": 9.999999970901543e-05,
|
| 4558 |
"loss": 3.4294,
|
| 4559 |
"step": 650
|
| 4560 |
+
},
|
| 4561 |
+
{
|
| 4562 |
+
"epoch": 0.03466038413928044,
|
| 4563 |
+
"grad_norm": 0.3088901937007904,
|
| 4564 |
+
"learning_rate": 9.999999970811245e-05,
|
| 4565 |
+
"loss": 2.7029,
|
| 4566 |
+
"step": 651
|
| 4567 |
+
},
|
| 4568 |
+
{
|
| 4569 |
+
"epoch": 0.034713625896790855,
|
| 4570 |
+
"grad_norm": 0.41135066747665405,
|
| 4571 |
+
"learning_rate": 9.999999970720808e-05,
|
| 4572 |
+
"loss": 2.7839,
|
| 4573 |
+
"step": 652
|
| 4574 |
+
},
|
| 4575 |
+
{
|
| 4576 |
+
"epoch": 0.034766867654301266,
|
| 4577 |
+
"grad_norm": 0.42486026883125305,
|
| 4578 |
+
"learning_rate": 9.999999970630231e-05,
|
| 4579 |
+
"loss": 2.8996,
|
| 4580 |
+
"step": 653
|
| 4581 |
+
},
|
| 4582 |
+
{
|
| 4583 |
+
"epoch": 0.034820109411811684,
|
| 4584 |
+
"grad_norm": 0.4523933529853821,
|
| 4585 |
+
"learning_rate": 9.999999970539513e-05,
|
| 4586 |
+
"loss": 2.8055,
|
| 4587 |
+
"step": 654
|
| 4588 |
+
},
|
| 4589 |
+
{
|
| 4590 |
+
"epoch": 0.0348733511693221,
|
| 4591 |
+
"grad_norm": 0.4108598828315735,
|
| 4592 |
+
"learning_rate": 9.999999970448655e-05,
|
| 4593 |
+
"loss": 2.7709,
|
| 4594 |
+
"step": 655
|
| 4595 |
+
},
|
| 4596 |
+
{
|
| 4597 |
+
"epoch": 0.03492659292683251,
|
| 4598 |
+
"grad_norm": 0.38042157888412476,
|
| 4599 |
+
"learning_rate": 9.999999970357658e-05,
|
| 4600 |
+
"loss": 2.8717,
|
| 4601 |
+
"step": 656
|
| 4602 |
+
},
|
| 4603 |
+
{
|
| 4604 |
+
"epoch": 0.03497983468434293,
|
| 4605 |
+
"grad_norm": 0.37943702936172485,
|
| 4606 |
+
"learning_rate": 9.999999970266523e-05,
|
| 4607 |
+
"loss": 2.6056,
|
| 4608 |
+
"step": 657
|
| 4609 |
+
},
|
| 4610 |
+
{
|
| 4611 |
+
"epoch": 0.03503307644185335,
|
| 4612 |
+
"grad_norm": 0.3546702265739441,
|
| 4613 |
+
"learning_rate": 9.999999970175244e-05,
|
| 4614 |
+
"loss": 2.8449,
|
| 4615 |
+
"step": 658
|
| 4616 |
+
},
|
| 4617 |
+
{
|
| 4618 |
+
"epoch": 0.03508631819936376,
|
| 4619 |
+
"grad_norm": 0.37441372871398926,
|
| 4620 |
+
"learning_rate": 9.999999970083828e-05,
|
| 4621 |
+
"loss": 2.7897,
|
| 4622 |
+
"step": 659
|
| 4623 |
+
},
|
| 4624 |
+
{
|
| 4625 |
+
"epoch": 0.03513955995687418,
|
| 4626 |
+
"grad_norm": 0.3513989746570587,
|
| 4627 |
+
"learning_rate": 9.999999969992271e-05,
|
| 4628 |
+
"loss": 2.7763,
|
| 4629 |
+
"step": 660
|
| 4630 |
+
},
|
| 4631 |
+
{
|
| 4632 |
+
"epoch": 0.03519280171438459,
|
| 4633 |
+
"grad_norm": 0.35834649205207825,
|
| 4634 |
+
"learning_rate": 9.999999969900575e-05,
|
| 4635 |
+
"loss": 2.7976,
|
| 4636 |
+
"step": 661
|
| 4637 |
+
},
|
| 4638 |
+
{
|
| 4639 |
+
"epoch": 0.03524604347189501,
|
| 4640 |
+
"grad_norm": 0.37155264616012573,
|
| 4641 |
+
"learning_rate": 9.999999969808738e-05,
|
| 4642 |
+
"loss": 2.8712,
|
| 4643 |
+
"step": 662
|
| 4644 |
+
},
|
| 4645 |
+
{
|
| 4646 |
+
"epoch": 0.035299285229405425,
|
| 4647 |
+
"grad_norm": 0.3758937120437622,
|
| 4648 |
+
"learning_rate": 9.999999969716762e-05,
|
| 4649 |
+
"loss": 2.7646,
|
| 4650 |
+
"step": 663
|
| 4651 |
+
},
|
| 4652 |
+
{
|
| 4653 |
+
"epoch": 0.035352526986915836,
|
| 4654 |
+
"grad_norm": 0.3764578700065613,
|
| 4655 |
+
"learning_rate": 9.999999969624645e-05,
|
| 4656 |
+
"loss": 2.8118,
|
| 4657 |
+
"step": 664
|
| 4658 |
+
},
|
| 4659 |
+
{
|
| 4660 |
+
"epoch": 0.035405768744426254,
|
| 4661 |
+
"grad_norm": 0.380533903837204,
|
| 4662 |
+
"learning_rate": 9.99999996953239e-05,
|
| 4663 |
+
"loss": 2.9565,
|
| 4664 |
+
"step": 665
|
| 4665 |
+
},
|
| 4666 |
+
{
|
| 4667 |
+
"epoch": 0.03545901050193667,
|
| 4668 |
+
"grad_norm": 0.36998698115348816,
|
| 4669 |
+
"learning_rate": 9.999999969439992e-05,
|
| 4670 |
+
"loss": 2.783,
|
| 4671 |
+
"step": 666
|
| 4672 |
+
},
|
| 4673 |
+
{
|
| 4674 |
+
"epoch": 0.03551225225944708,
|
| 4675 |
+
"grad_norm": 0.399178147315979,
|
| 4676 |
+
"learning_rate": 9.999999969347458e-05,
|
| 4677 |
+
"loss": 2.9048,
|
| 4678 |
+
"step": 667
|
| 4679 |
+
},
|
| 4680 |
+
{
|
| 4681 |
+
"epoch": 0.0355654940169575,
|
| 4682 |
+
"grad_norm": 0.40703439712524414,
|
| 4683 |
+
"learning_rate": 9.999999969254782e-05,
|
| 4684 |
+
"loss": 2.8838,
|
| 4685 |
+
"step": 668
|
| 4686 |
+
},
|
| 4687 |
+
{
|
| 4688 |
+
"epoch": 0.03561873577446791,
|
| 4689 |
+
"grad_norm": 0.39457255601882935,
|
| 4690 |
+
"learning_rate": 9.999999969161966e-05,
|
| 4691 |
+
"loss": 2.6732,
|
| 4692 |
+
"step": 669
|
| 4693 |
+
},
|
| 4694 |
+
{
|
| 4695 |
+
"epoch": 0.03567197753197833,
|
| 4696 |
+
"grad_norm": 0.4186328053474426,
|
| 4697 |
+
"learning_rate": 9.999999969069011e-05,
|
| 4698 |
+
"loss": 2.8089,
|
| 4699 |
+
"step": 670
|
| 4700 |
+
},
|
| 4701 |
+
{
|
| 4702 |
+
"epoch": 0.03572521928948875,
|
| 4703 |
+
"grad_norm": 0.4049818813800812,
|
| 4704 |
+
"learning_rate": 9.999999968975914e-05,
|
| 4705 |
+
"loss": 2.7544,
|
| 4706 |
+
"step": 671
|
| 4707 |
+
},
|
| 4708 |
+
{
|
| 4709 |
+
"epoch": 0.03577846104699916,
|
| 4710 |
+
"grad_norm": 0.41349250078201294,
|
| 4711 |
+
"learning_rate": 9.99999996888268e-05,
|
| 4712 |
+
"loss": 2.8557,
|
| 4713 |
+
"step": 672
|
| 4714 |
+
},
|
| 4715 |
+
{
|
| 4716 |
+
"epoch": 0.03583170280450958,
|
| 4717 |
+
"grad_norm": 0.384772390127182,
|
| 4718 |
+
"learning_rate": 9.999999968789304e-05,
|
| 4719 |
+
"loss": 2.7758,
|
| 4720 |
+
"step": 673
|
| 4721 |
+
},
|
| 4722 |
+
{
|
| 4723 |
+
"epoch": 0.035884944562019995,
|
| 4724 |
+
"grad_norm": 0.39242029190063477,
|
| 4725 |
+
"learning_rate": 9.999999968695789e-05,
|
| 4726 |
+
"loss": 2.8561,
|
| 4727 |
+
"step": 674
|
| 4728 |
+
},
|
| 4729 |
+
{
|
| 4730 |
+
"epoch": 0.035938186319530406,
|
| 4731 |
+
"grad_norm": 0.4232184886932373,
|
| 4732 |
+
"learning_rate": 9.999999968602134e-05,
|
| 4733 |
+
"loss": 2.7526,
|
| 4734 |
+
"step": 675
|
| 4735 |
+
},
|
| 4736 |
+
{
|
| 4737 |
+
"epoch": 0.035991428077040824,
|
| 4738 |
+
"grad_norm": 0.3954784870147705,
|
| 4739 |
+
"learning_rate": 9.999999968508339e-05,
|
| 4740 |
+
"loss": 2.8118,
|
| 4741 |
+
"step": 676
|
| 4742 |
+
},
|
| 4743 |
+
{
|
| 4744 |
+
"epoch": 0.036044669834551235,
|
| 4745 |
+
"grad_norm": 0.4440658688545227,
|
| 4746 |
+
"learning_rate": 9.999999968414405e-05,
|
| 4747 |
+
"loss": 2.8997,
|
| 4748 |
+
"step": 677
|
| 4749 |
+
},
|
| 4750 |
+
{
|
| 4751 |
+
"epoch": 0.03609791159206165,
|
| 4752 |
+
"grad_norm": 0.4090384244918823,
|
| 4753 |
+
"learning_rate": 9.99999996832033e-05,
|
| 4754 |
+
"loss": 2.7277,
|
| 4755 |
+
"step": 678
|
| 4756 |
+
},
|
| 4757 |
+
{
|
| 4758 |
+
"epoch": 0.03615115334957207,
|
| 4759 |
+
"grad_norm": 0.4622509479522705,
|
| 4760 |
+
"learning_rate": 9.999999968226114e-05,
|
| 4761 |
+
"loss": 2.8473,
|
| 4762 |
+
"step": 679
|
| 4763 |
+
},
|
| 4764 |
+
{
|
| 4765 |
+
"epoch": 0.03620439510708248,
|
| 4766 |
+
"grad_norm": 0.44071659445762634,
|
| 4767 |
+
"learning_rate": 9.999999968131761e-05,
|
| 4768 |
+
"loss": 2.942,
|
| 4769 |
+
"step": 680
|
| 4770 |
+
},
|
| 4771 |
+
{
|
| 4772 |
+
"epoch": 0.0362576368645929,
|
| 4773 |
+
"grad_norm": 0.5004546046257019,
|
| 4774 |
+
"learning_rate": 9.999999968037266e-05,
|
| 4775 |
+
"loss": 2.8794,
|
| 4776 |
+
"step": 681
|
| 4777 |
+
},
|
| 4778 |
+
{
|
| 4779 |
+
"epoch": 0.03631087862210332,
|
| 4780 |
+
"grad_norm": 0.4791366159915924,
|
| 4781 |
+
"learning_rate": 9.999999967942633e-05,
|
| 4782 |
+
"loss": 2.9765,
|
| 4783 |
+
"step": 682
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 0.03636412037961373,
|
| 4787 |
+
"grad_norm": 0.4310838580131531,
|
| 4788 |
+
"learning_rate": 9.999999967847858e-05,
|
| 4789 |
+
"loss": 2.6606,
|
| 4790 |
+
"step": 683
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 0.03641736213712415,
|
| 4794 |
+
"grad_norm": 0.43610477447509766,
|
| 4795 |
+
"learning_rate": 9.999999967752944e-05,
|
| 4796 |
+
"loss": 2.7102,
|
| 4797 |
+
"step": 684
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 0.036470603894634565,
|
| 4801 |
+
"grad_norm": 0.5011301040649414,
|
| 4802 |
+
"learning_rate": 9.999999967657889e-05,
|
| 4803 |
+
"loss": 2.9686,
|
| 4804 |
+
"step": 685
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 0.036523845652144976,
|
| 4808 |
+
"grad_norm": 0.48820289969444275,
|
| 4809 |
+
"learning_rate": 9.999999967562696e-05,
|
| 4810 |
+
"loss": 2.8583,
|
| 4811 |
+
"step": 686
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 0.036577087409655394,
|
| 4815 |
+
"grad_norm": 0.49313730001449585,
|
| 4816 |
+
"learning_rate": 9.999999967467362e-05,
|
| 4817 |
+
"loss": 2.741,
|
| 4818 |
+
"step": 687
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 0.036630329167165805,
|
| 4822 |
+
"grad_norm": 0.4905647933483124,
|
| 4823 |
+
"learning_rate": 9.999999967371889e-05,
|
| 4824 |
+
"loss": 2.8032,
|
| 4825 |
+
"step": 688
|
| 4826 |
+
},
|
| 4827 |
+
{
|
| 4828 |
+
"epoch": 0.03668357092467622,
|
| 4829 |
+
"grad_norm": 0.5020934343338013,
|
| 4830 |
+
"learning_rate": 9.999999967276276e-05,
|
| 4831 |
+
"loss": 2.8226,
|
| 4832 |
+
"step": 689
|
| 4833 |
+
},
|
| 4834 |
+
{
|
| 4835 |
+
"epoch": 0.03673681268218664,
|
| 4836 |
+
"grad_norm": 0.5333757400512695,
|
| 4837 |
+
"learning_rate": 9.999999967180522e-05,
|
| 4838 |
+
"loss": 2.8236,
|
| 4839 |
+
"step": 690
|
| 4840 |
+
},
|
| 4841 |
+
{
|
| 4842 |
+
"epoch": 0.03679005443969705,
|
| 4843 |
+
"grad_norm": 0.5454348921775818,
|
| 4844 |
+
"learning_rate": 9.99999996708463e-05,
|
| 4845 |
+
"loss": 2.9117,
|
| 4846 |
+
"step": 691
|
| 4847 |
+
},
|
| 4848 |
+
{
|
| 4849 |
+
"epoch": 0.03684329619720747,
|
| 4850 |
+
"grad_norm": 0.5079708099365234,
|
| 4851 |
+
"learning_rate": 9.999999966988596e-05,
|
| 4852 |
+
"loss": 2.8137,
|
| 4853 |
+
"step": 692
|
| 4854 |
+
},
|
| 4855 |
+
{
|
| 4856 |
+
"epoch": 0.03689653795471789,
|
| 4857 |
+
"grad_norm": 0.5753440260887146,
|
| 4858 |
+
"learning_rate": 9.999999966892422e-05,
|
| 4859 |
+
"loss": 2.9181,
|
| 4860 |
+
"step": 693
|
| 4861 |
+
},
|
| 4862 |
+
{
|
| 4863 |
+
"epoch": 0.0369497797122283,
|
| 4864 |
+
"grad_norm": 0.5435117483139038,
|
| 4865 |
+
"learning_rate": 9.99999996679611e-05,
|
| 4866 |
+
"loss": 2.9129,
|
| 4867 |
+
"step": 694
|
| 4868 |
+
},
|
| 4869 |
+
{
|
| 4870 |
+
"epoch": 0.03700302146973872,
|
| 4871 |
+
"grad_norm": 0.5819733142852783,
|
| 4872 |
+
"learning_rate": 9.999999966699656e-05,
|
| 4873 |
+
"loss": 3.0462,
|
| 4874 |
+
"step": 695
|
| 4875 |
+
},
|
| 4876 |
+
{
|
| 4877 |
+
"epoch": 0.03705626322724913,
|
| 4878 |
+
"grad_norm": 0.6745149493217468,
|
| 4879 |
+
"learning_rate": 9.999999966603065e-05,
|
| 4880 |
+
"loss": 3.2039,
|
| 4881 |
+
"step": 696
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 0.037109504984759546,
|
| 4885 |
+
"grad_norm": 0.6611133217811584,
|
| 4886 |
+
"learning_rate": 9.999999966506331e-05,
|
| 4887 |
+
"loss": 2.9349,
|
| 4888 |
+
"step": 697
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 0.037162746742269964,
|
| 4892 |
+
"grad_norm": 0.7144906520843506,
|
| 4893 |
+
"learning_rate": 9.999999966409459e-05,
|
| 4894 |
+
"loss": 2.9977,
|
| 4895 |
+
"step": 698
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 0.037215988499780375,
|
| 4899 |
+
"grad_norm": 1.030449628829956,
|
| 4900 |
+
"learning_rate": 9.999999966312447e-05,
|
| 4901 |
+
"loss": 3.3691,
|
| 4902 |
+
"step": 699
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 0.03726923025729079,
|
| 4906 |
+
"grad_norm": 1.110541820526123,
|
| 4907 |
+
"learning_rate": 9.999999966215294e-05,
|
| 4908 |
+
"loss": 3.0989,
|
| 4909 |
+
"step": 700
|
| 4910 |
}
|
| 4911 |
],
|
| 4912 |
"logging_steps": 1,
|
|
|
|
| 4926 |
"attributes": {}
|
| 4927 |
}
|
| 4928 |
},
|
| 4929 |
+
"total_flos": 1.9203841876977254e+17,
|
| 4930 |
"train_batch_size": 4,
|
| 4931 |
"trial_name": null,
|
| 4932 |
"trial_params": null
|