Text Generation
Transformers
GGUF
step3p5
custom_code
imatrix
conversational
WinstonDeng commited on
Commit
e982fcf
·
verified ·
1 Parent(s): 8f4ece3

rename split filename

Browse files
.gitattributes CHANGED
@@ -45,3 +45,15 @@ step3p5_flash_Q4_K_S.gguf.part-008 filter=lfs diff=lfs merge=lfs -text
45
  step3p5_flash_Q4_K_S.gguf.part-009 filter=lfs diff=lfs merge=lfs -text
46
  step3p5_flash_Q4_K_S.gguf.part-010 filter=lfs diff=lfs merge=lfs -text
47
  step-bar-chart.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  step3p5_flash_Q4_K_S.gguf.part-009 filter=lfs diff=lfs merge=lfs -text
46
  step3p5_flash_Q4_K_S.gguf.part-010 filter=lfs diff=lfs merge=lfs -text
47
  step-bar-chart.png filter=lfs diff=lfs merge=lfs -text
48
+ step3p5_flash_Q4_K_S-00001-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
49
+ step3p5_flash_Q4_K_S-00002-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
50
+ step3p5_flash_Q4_K_S-00003-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
51
+ step3p5_flash_Q4_K_S-00004-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
52
+ step3p5_flash_Q4_K_S-00005-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
53
+ step3p5_flash_Q4_K_S-00006-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
54
+ step3p5_flash_Q4_K_S-00007-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
55
+ step3p5_flash_Q4_K_S-00008-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
56
+ step3p5_flash_Q4_K_S-00009-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
57
+ step3p5_flash_Q4_K_S-00010-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
58
+ step3p5_flash_Q4_K_S-00011-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
59
+ step3p5_flash_Q4_K_S-00012-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Step3p5ForCausalLM"
4
+ ],
5
+ "model_type": "step3p5",
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_step3p5.Step3p5Config",
8
+ "AutoModelForCausalLM": "modeling_step3p5.Step3p5ForCausalLM"
9
+ },
10
+ "rope_scaling": {
11
+ "rope_type": "llama3",
12
+ "factor": 2.0,
13
+ "original_max_position_embeddings": 131072,
14
+ "low_freq_factor": 1.0,
15
+ "high_freq_factor": 32.0
16
+ },
17
+ "yarn_only_types": ["full_attention"],
18
+ "hidden_size": 4096,
19
+ "intermediate_size": 11264,
20
+ "num_hidden_layers": 45,
21
+ "max_seq_len": 262144,
22
+ "vocab_size": 128896,
23
+ "torch_dtype": "bfloat16",
24
+ "use_qk_norm": true,
25
+ "moe_layers_enum": "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44",
26
+ "num_attention_heads": 64,
27
+ "num_attention_groups": 8,
28
+ "head_dim": 128,
29
+ "use_moe": true,
30
+ "moe_num_experts": 288,
31
+ "moe_top_k": 8,
32
+ "moe_intermediate_size": 1280,
33
+ "share_expert_dim": 1280,
34
+ "moe_layer_offset": 0,
35
+ "moe_every_n_layer": 1,
36
+ "norm_expert_weight": true,
37
+ "moe_router_activation": "sigmoid",
38
+ "moe_router_scaling_factor": 3.0,
39
+ "att_impl_type": "GQA",
40
+ "rope_theta": [
41
+ 5000000.0,
42
+ 10000.0,
43
+ 10000.0,
44
+ 10000.0,
45
+ 5000000.0,
46
+ 10000.0,
47
+ 10000.0,
48
+ 10000.0,
49
+ 5000000.0,
50
+ 10000.0,
51
+ 10000.0,
52
+ 10000.0,
53
+ 5000000.0,
54
+ 10000.0,
55
+ 10000.0,
56
+ 10000.0,
57
+ 5000000.0,
58
+ 10000.0,
59
+ 10000.0,
60
+ 10000.0,
61
+ 5000000.0,
62
+ 10000.0,
63
+ 10000.0,
64
+ 10000.0,
65
+ 5000000.0,
66
+ 10000.0,
67
+ 10000.0,
68
+ 10000.0,
69
+ 5000000.0,
70
+ 10000.0,
71
+ 10000.0,
72
+ 10000.0,
73
+ 5000000.0,
74
+ 10000.0,
75
+ 10000.0,
76
+ 10000.0,
77
+ 5000000.0,
78
+ 10000.0,
79
+ 10000.0,
80
+ 10000.0,
81
+ 5000000.0,
82
+ 10000.0,
83
+ 10000.0,
84
+ 10000.0,
85
+ 5000000.0,
86
+ 10000.0,
87
+ 10000.0,
88
+ 10000.0
89
+ ],
90
+ "use_head_wise_attn_gate": true,
91
+ "sliding_window": 512,
92
+ "use_moe_router_bias": true,
93
+ "need_fp32_gate": true,
94
+ "sink": false,
95
+ "layer_types": [
96
+ "full_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "full_attention",
101
+ "sliding_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "full_attention",
105
+ "sliding_attention",
106
+ "sliding_attention",
107
+ "sliding_attention",
108
+ "full_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "sliding_attention",
112
+ "full_attention",
113
+ "sliding_attention",
114
+ "sliding_attention",
115
+ "sliding_attention",
116
+ "full_attention",
117
+ "sliding_attention",
118
+ "sliding_attention",
119
+ "sliding_attention",
120
+ "full_attention",
121
+ "sliding_attention",
122
+ "sliding_attention",
123
+ "sliding_attention",
124
+ "full_attention",
125
+ "sliding_attention",
126
+ "sliding_attention",
127
+ "sliding_attention",
128
+ "full_attention",
129
+ "sliding_attention",
130
+ "sliding_attention",
131
+ "sliding_attention",
132
+ "full_attention",
133
+ "sliding_attention",
134
+ "sliding_attention",
135
+ "sliding_attention",
136
+ "full_attention",
137
+ "sliding_attention",
138
+ "sliding_attention",
139
+ "sliding_attention",
140
+ "full_attention",
141
+ "sliding_attention",
142
+ "sliding_attention",
143
+ "sliding_attention"
144
+ ],
145
+ "use_rope_layers": [],
146
+ "num_nextn_predict_layers": 3,
147
+ "partial_rotary_factors": [
148
+ 0.5,
149
+ 1.0,
150
+ 1.0,
151
+ 1.0,
152
+ 0.5,
153
+ 1.0,
154
+ 1.0,
155
+ 1.0,
156
+ 0.5,
157
+ 1.0,
158
+ 1.0,
159
+ 1.0,
160
+ 0.5,
161
+ 1.0,
162
+ 1.0,
163
+ 1.0,
164
+ 0.5,
165
+ 1.0,
166
+ 1.0,
167
+ 1.0,
168
+ 0.5,
169
+ 1.0,
170
+ 1.0,
171
+ 1.0,
172
+ 0.5,
173
+ 1.0,
174
+ 1.0,
175
+ 1.0,
176
+ 0.5,
177
+ 1.0,
178
+ 1.0,
179
+ 1.0,
180
+ 0.5,
181
+ 1.0,
182
+ 1.0,
183
+ 1.0,
184
+ 0.5,
185
+ 1.0,
186
+ 1.0,
187
+ 1.0,
188
+ 0.5,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ 0.5,
193
+ 1.0,
194
+ 1.0,
195
+ 1.0
196
+ ],
197
+ "eos_token_id": [
198
+ 1,
199
+ 2,
200
+ 128007
201
+ ],
202
+ "bos_token_id": 0,
203
+ "attention_other_setting": {
204
+ "attention_type": "sliding_attention",
205
+ "num_attention_heads": 96,
206
+ "num_attention_groups": 8,
207
+ "head_dim": 128,
208
+ "true_head_dim": 128
209
+ },
210
+ "swiglu_limits": [
211
+ 0.0,
212
+ 0.0,
213
+ 0.0,
214
+ 0.0,
215
+ 0.0,
216
+ 0.0,
217
+ 0.0,
218
+ 0.0,
219
+ 0.0,
220
+ 0.0,
221
+ 0.0,
222
+ 0.0,
223
+ 0.0,
224
+ 0.0,
225
+ 0.0,
226
+ 0.0,
227
+ 0.0,
228
+ 0.0,
229
+ 0.0,
230
+ 0.0,
231
+ 0.0,
232
+ 0.0,
233
+ 0.0,
234
+ 0.0,
235
+ 0.0,
236
+ 0.0,
237
+ 0.0,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0,
242
+ 0.0,
243
+ 0.0,
244
+ 0.0,
245
+ 0.0,
246
+ 0.0,
247
+ 0.0,
248
+ 0.0,
249
+ 0.0,
250
+ 0.0,
251
+ 0.0,
252
+ 0.0,
253
+ 0.0,
254
+ 7,
255
+ 7,
256
+ 0.0,
257
+ 0.0,
258
+ 0.0
259
+ ],
260
+ "swiglu_limits_shared": [
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.0,
288
+ 0.0,
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 16,
306
+ 0.0,
307
+ 0.0,
308
+ 0.0
309
+ ],
310
+ "zero_centered": true,
311
+ "max_position_embeddings": 262144
312
+ }
step3p5_flash_Q4_K_S-00001-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1eb8748bd3ad496afa7d997682c7ca6a42ff692fbd7b5cbecf8a99f612ec72
3
+ size 9445174496
step3p5_flash_Q4_K_S-00002-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd437b69100c1f9a39dd9c70d600a0c94bf8ee86fbcd37b60ffffe9f3a4c5736
3
+ size 9559992320
step3p5_flash_Q4_K_S-00003-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317a2f5e3dca7f752e4ef5c69bf2933d2894c90ac8b42cec7f00babed6b95eac
3
+ size 9616839232
step3p5_flash_Q4_K_S-00004-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1128d067b75d1b9f9bc278de6d34331825ab2a39ce5026213341adf487a78f14
3
+ size 9621574336
step3p5_flash_Q4_K_S-00005-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e31821f6a8dbbef8bc0141c791224de92d3b9cc73a9f9d9e41337ec25054513d
3
+ size 9559992352
step3p5_flash_Q4_K_S-00006-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf96a17dec83f05c6782457b41a1b2f8d1eb7ce046e66d60723420da4d455558
3
+ size 9616839232
step3p5_flash_Q4_K_S-00007-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c81d8878dc68f29a503ff552a4696c76c6f329dfb7981a55fa575c78baee79
3
+ size 9621574336
step3p5_flash_Q4_K_S-00008-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc6d4452e983962aadd970d8ef05dbdf6fc8d6c443a8a4f412bec4a8c73dfc80
3
+ size 9578940448
step3p5_flash_Q4_K_S-00009-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6259afa226a62fd020d06b47f9c6a88c472afe702bb09173b5d4a95c581df92
3
+ size 9616839232
step3p5_flash_Q4_K_S-00010-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7170ae3e10983a3613755547f90767deccb03685d08254305b468556c70bd3
3
+ size 9621574336
step3p5_flash_Q4_K_S-00011-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb0e8c525c31c2c0802cb87c23843059496a6bf5d10240e8b180c6f5155587a
3
+ size 9559992352
step3p5_flash_Q4_K_S-00012-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c6f73ce8a1bb6f8847edb596220ab508a239db4c17e7223c73a945c935149f
3
+ size 6079756896