prapti19 commited on
Commit
f538515
·
verified ·
1 Parent(s): 11f11b7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -99
README.md CHANGED
@@ -17,105 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
  <details><summary>See axolotl config</summary>
18
 
19
  axolotl version: `0.5.0`
20
- ```yaml
21
- strict: false
22
- base_model: microsoft/Phi-3.5-mini-instruct
23
- tokenizer_config: microsoft/Phi-3.5-mini-instruct
24
- model_type: AutoModelForCausalLM
25
- tokenizer_type: AutoTokenizer
26
-
27
- # Output configuration
28
- hub_model_id: collinear-ai/curator_math_phase1_sn_ensemble7_90325
29
- dataset_prepared_path: /workspace/prapti/curator_math_phase1_sn_ensemble7_90325
30
- output_dir: /workspace/prapti/curator_math_phase1_sn_ensemble7_90325
31
-
32
- # Format the dataset into the right instruction format.
33
- chat_template: phi_3
34
- datasets:
35
- - path: collinear-ai/curator_math_phase1_sn_ensemble7_50325_sft
36
- split: train
37
- type: chat_template
38
- chat_template: phi_3
39
- field_messages: train_conv
40
- message_field_role: role
41
- message_field_content: content
42
- train_on_inputs: false #FALSE
43
-
44
- val_set_size: 0.05
45
- # Data packing
46
- sequence_len: 4096 #meghana can you please confirm this
47
- eval_sample_packing: false
48
- sample_packing: false
49
- pad_to_sequence_len: true
50
- group_by_length: false
51
-
52
- # Lora config
53
- adapter: qlora
54
- lora_model_dir:
55
- load_in_8bit: false
56
- load_in_4bit: true
57
- lora_r: 128
58
- lora_alpha: 64
59
- lora_dropout: 0.2
60
- lora_target_linear: true
61
- lora_fan_in_fan_out:
62
- lora_target_modules:
63
- - gate_proj
64
- - down_proj
65
- - up_proj
66
- - q_proj
67
- - v_proj
68
- - k_proj
69
- - o_proj
70
- lora_modules_to_save:
71
- - embed_tokens
72
- - lm_head
73
-
74
-
75
- # Logging config
76
- wandb_project: sn-curators-downstream
77
- wandb_entity: nazneen
78
- wandb_name: curator_math_phase1_sn_ensemble7_90325
79
-
80
- # Trainer config
81
- gradient_accumulation_steps: 2
82
- micro_batch_size: 8
83
- num_epochs: 1
84
- optimizer: paged_adamw_8bit
85
- lr_scheduler: cosine
86
- learning_rate: 0.000005
87
-
88
- bfloat16: true
89
- bf16: true
90
- fp16:
91
- tf32: false
92
-
93
- gradient_checkpointing: true
94
- early_stopping_patience:
95
- resume_from_checkpoint:
96
- local_rank:
97
- logging_steps: 10
98
- xformers_attention:
99
- flash_attention: true
100
- save_safetensors: true
101
-
102
-
103
-
104
- warmup_steps: 50
105
- evals_per_epoch: 3
106
- eval_table_size: 3
107
- eval_max_new_tokens: 2048
108
- saves_per_epoch: 20
109
- debug:
110
- deepspeed:
111
- weight_decay: 0.02
112
- fsdp_config:
113
- special_tokens:
114
- bos_token: "<s>"
115
- eos_token: "<|endoftext|>"
116
- unk_token: "<unk>"
117
- pad_token: "<|endoftext|>"
118
- ```
119
 
120
  </details><br>
121
 
 
17
  <details><summary>See axolotl config</summary>
18
 
19
  axolotl version: `0.5.0`
20
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  </details><br>
23