miyuki2026 commited on
Commit
740e347
·
1 Parent(s): 6cbeb5d
examples/tutorials/rlhf/gpt2_sst2_ppo/step_2_train_model_ddp.py CHANGED
@@ -39,11 +39,11 @@ kl_coef:0.03
39
  response_length: 256
40
 
41
  nohup torchrun --nproc_per_node=2 step_2_train_model_ddp.py \
42
- --learning_rate 1e-6 \
43
  --response_length 128 \
44
- --kl_coef 0.3 \
45
- --cliprange 0.05 \
46
- --cliprange_value 0.05 \
47
  &
48
 
49
 
 
39
  response_length: 256
40
 
41
  nohup torchrun --nproc_per_node=2 step_2_train_model_ddp.py \
42
+ --learning_rate 5e-6 \
43
  --response_length 128 \
44
+ --kl_coef 0.05 \
45
+ --cliprange 0.2 \
46
+ --cliprange_value 0.2 \
47
  &
48
 
49
 
toolbox/trl/trainer/ppo_trainer.py CHANGED
@@ -364,8 +364,8 @@ class PPOTrainer(Trainer):
364
  top_k=0.0,
365
  top_p=1.0,
366
  do_sample=True,
367
- pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
368
- repetition_penalty=3.0,
369
  )
370
 
371
  accelerator.print("===training policy===")
@@ -692,8 +692,8 @@ class PPOTrainer(Trainer):
692
  top_k=0.0,
693
  top_p=1.0,
694
  do_sample=True,
695
- pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
696
- repetition_penalty=3.0,
697
  )
698
 
699
  table = defaultdict(list)
 
364
  top_k=0.0,
365
  top_p=1.0,
366
  do_sample=True,
367
+ # pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
368
+ # repetition_penalty=3.0,
369
  )
370
 
371
  accelerator.print("===training policy===")
 
692
  top_k=0.0,
693
  top_p=1.0,
694
  do_sample=True,
695
+ # pad_token_id=processing_class.pad_token_id, eos_token_id=processing_class.eos_token_id,
696
+ # repetition_penalty=3.0,
697
  )
698
 
699
  table = defaultdict(list)