Spaces:
Running
Running
Commit
·
740e347
1
Parent(s):
6cbeb5d
update
Browse files
examples/tutorials/rlhf/gpt2_sst2_ppo/step_2_train_model_ddp.py
CHANGED
|
@@ -39,11 +39,11 @@ kl_coef:0.03
|
|
| 39 |
response_length: 256
|
| 40 |
|
| 41 |
nohup torchrun --nproc_per_node=2 step_2_train_model_ddp.py \
|
| 42 |
-
--learning_rate
|
| 43 |
--response_length 128 \
|
| 44 |
-
--kl_coef 0.
|
| 45 |
-
--cliprange 0.
|
| 46 |
-
--cliprange_value 0.
|
| 47 |
&
|
| 48 |
|
| 49 |
|
|
|
|
| 39 |
response_length: 256
|
| 40 |
|
| 41 |
nohup torchrun --nproc_per_node=2 step_2_train_model_ddp.py \
|
| 42 |
+
--learning_rate 5e-6 \
|
| 43 |
--response_length 128 \
|
| 44 |
+
--kl_coef 0.05 \
|
| 45 |
+
--cliprange 0.2 \
|
| 46 |
+
--cliprange_value 0.2 \
|
| 47 |
&
|
| 48 |
|
| 49 |
|
toolbox/trl/trainer/ppo_trainer.py
CHANGED
|
@@ -364,8 +364,8 @@ class PPOTrainer(Trainer):
|
|
| 364 |
top_k=0.0,
|
| 365 |
top_p=1.0,
|
| 366 |
do_sample=True,
|
| 367 |
-
pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
|
| 368 |
-
repetition_penalty=3.0,
|
| 369 |
)
|
| 370 |
|
| 371 |
accelerator.print("===training policy===")
|
|
@@ -692,8 +692,8 @@ class PPOTrainer(Trainer):
|
|
| 692 |
top_k=0.0,
|
| 693 |
top_p=1.0,
|
| 694 |
do_sample=True,
|
| 695 |
-
pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
|
| 696 |
-
repetition_penalty=3.0,
|
| 697 |
)
|
| 698 |
|
| 699 |
table = defaultdict(list)
|
|
|
|
| 364 |
top_k=0.0,
|
| 365 |
top_p=1.0,
|
| 366 |
do_sample=True,
|
| 367 |
+
# pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
|
| 368 |
+
# repetition_penalty=3.0,
|
| 369 |
)
|
| 370 |
|
| 371 |
accelerator.print("===training policy===")
|
|
|
|
| 692 |
top_k=0.0,
|
| 693 |
top_p=1.0,
|
| 694 |
do_sample=True,
|
| 695 |
+
# pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
|
| 696 |
+
# repetition_penalty=3.0,
|
| 697 |
)
|
| 698 |
|
| 699 |
table = defaultdict(list)
|