diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..aa7aacd0134a92c3c1943fdecc75cd8b7420cce6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2ab98ef068d62829d17c5ade1827b9f013fa2bbf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfa857c27555fd894cd2da4564f16d02a0808a3b --- /dev/null +++ b/config.json @@ -0,0 +1,2726 @@ +{ + "architectures": [ + "GlmMoeDsaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 0, + "dtype": "bfloat16", + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "ep_size": 1, + "first_k_dense_replace": 3, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 6144, + "index_head_dim": 128, + "index_n_heads": 32, + "index_topk": 2048, + "indexer_rope_interleave": true, + "initializer_range": 0.02, + "intermediate_size": 12288, + "kv_lora_rank": 512, + "max_position_embeddings": 202752, + "mlp_layer_types": [ + "dense", + "dense", + "dense", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse" + ], + "model_type": "glm_moe_dsa", + "moe_intermediate_size": 2048, + "moe_layer_freq": 1, + "n_group": 1, + "n_routed_experts": 256, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 78, + "num_key_value_heads": 64, + "num_nextn_predict_layers": 1, + "pad_token_id": 154820, + "pretraining_tp": 1, + "q_lora_rank": 2048, + "qk_head_dim": 256, + "qk_nope_head_dim": 192, + "qk_rope_head_dim": 64, + "quantization_config": { + "autoround_version": "0.10.0", + "bits": 4, + "data_type": "int", + "extra_config": { + "model.layers.0.mlp.down_proj": { + "bits": 8 + }, + "model.layers.0.mlp.gate_proj": { + "bits": 8 + }, + "model.layers.0.mlp.up_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.0.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.0.self_attn.wk": { + "bits": 8 + }, + "model.layers.0.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.1.mlp.down_proj": { + "bits": 8 + }, + "model.layers.1.mlp.gate_proj": { + "bits": 8 + }, + "model.layers.1.mlp.up_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.1.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.1.self_attn.wk": { + "bits": 8 + }, + "model.layers.1.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.10.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.10.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.10.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.10.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.10.self_attn.wk": { + "bits": 8 + }, + "model.layers.10.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.11.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.11.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.11.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.11.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.11.self_attn.wk": { + "bits": 8 + }, + "model.layers.11.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.12.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.12.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.12.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.12.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.12.self_attn.wk": { + "bits": 8 + }, + "model.layers.12.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.13.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.13.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.13.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.13.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.13.self_attn.wk": { + "bits": 8 + }, + "model.layers.13.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.14.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.14.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.14.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.14.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.14.self_attn.wk": { + "bits": 8 + }, + "model.layers.14.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.15.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.15.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.15.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.15.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.15.self_attn.wk": { + "bits": 8 + }, + "model.layers.15.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.16.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.16.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.16.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.16.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.16.self_attn.wk": { + "bits": 8 + }, + "model.layers.16.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.17.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.17.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.17.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.17.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.17.self_attn.wk": { + "bits": 8 + }, + "model.layers.17.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.18.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.18.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.18.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.18.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.18.self_attn.wk": { + "bits": 8 + }, + "model.layers.18.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.19.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.19.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.19.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.19.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.19.self_attn.wk": { + "bits": 8 + }, + "model.layers.19.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.2.mlp.down_proj": { + "bits": 8 + }, + "model.layers.2.mlp.gate_proj": { + "bits": 8 + }, + "model.layers.2.mlp.up_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.2.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.2.self_attn.wk": { + "bits": 8 + }, + "model.layers.2.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.20.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.20.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.20.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.20.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.20.self_attn.wk": { + "bits": 8 + }, + "model.layers.20.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.21.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.21.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.21.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.21.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.21.self_attn.wk": { + "bits": 8 + }, + "model.layers.21.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.22.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.22.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.22.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.22.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.22.self_attn.wk": { + "bits": 8 + }, + "model.layers.22.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.23.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.23.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.23.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.23.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.23.self_attn.wk": { + "bits": 8 + }, + "model.layers.23.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.24.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.24.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.24.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.24.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.24.self_attn.wk": { + "bits": 8 + }, + "model.layers.24.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.25.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.25.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.25.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.25.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.25.self_attn.wk": { + "bits": 8 + }, + "model.layers.25.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.26.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.26.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.26.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.26.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.26.self_attn.wk": { + "bits": 8 + }, + "model.layers.26.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.27.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.27.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.27.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.27.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.27.self_attn.wk": { + "bits": 8 + }, + "model.layers.27.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.28.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.28.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.28.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.28.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.28.self_attn.wk": { + "bits": 8 + }, + "model.layers.28.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.29.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.29.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.29.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.29.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.29.self_attn.wk": { + "bits": 8 + }, + "model.layers.29.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.3.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.3.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.3.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.3.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.3.self_attn.wk": { + "bits": 8 + }, + "model.layers.3.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.30.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.30.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.30.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.30.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.30.self_attn.wk": { + "bits": 8 + }, + "model.layers.30.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.31.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.31.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.31.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.31.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.31.self_attn.wk": { + "bits": 8 + }, + "model.layers.31.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.32.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.32.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.32.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.32.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.32.self_attn.wk": { + "bits": 8 + }, + "model.layers.32.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.33.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.33.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.33.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.33.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.33.self_attn.wk": { + "bits": 8 + }, + "model.layers.33.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.34.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.34.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.34.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.34.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.34.self_attn.wk": { + "bits": 8 + }, + "model.layers.34.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.35.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.35.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.35.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.35.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.35.self_attn.wk": { + "bits": 8 + }, + "model.layers.35.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.36.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.36.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.36.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.36.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.36.self_attn.wk": { + "bits": 8 + }, + "model.layers.36.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.37.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.37.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.37.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.37.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.37.self_attn.wk": { + "bits": 8 + }, + "model.layers.37.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.38.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.38.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.38.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.38.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.38.self_attn.wk": { + "bits": 8 + }, + "model.layers.38.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.39.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.39.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.39.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.39.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.39.self_attn.wk": { + "bits": 8 + }, + "model.layers.39.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.4.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.4.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.4.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.4.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.4.self_attn.wk": { + "bits": 8 + }, + "model.layers.4.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.40.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.40.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.40.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.40.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.40.self_attn.wk": { + "bits": 8 + }, + "model.layers.40.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.41.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.41.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.41.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.41.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.41.self_attn.wk": { + "bits": 8 + }, + "model.layers.41.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.42.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.42.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.42.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.42.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.42.self_attn.wk": { + "bits": 8 + }, + "model.layers.42.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.43.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.43.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.43.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.43.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.43.self_attn.wk": { + "bits": 8 + }, + "model.layers.43.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.44.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.44.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.44.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.44.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.44.self_attn.wk": { + "bits": 8 + }, + "model.layers.44.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.45.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.45.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.45.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.45.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.45.self_attn.wk": { + "bits": 8 + }, + "model.layers.45.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.46.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.46.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.46.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.46.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.46.self_attn.wk": { + "bits": 8 + }, + "model.layers.46.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.47.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.47.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.47.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.47.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.47.self_attn.wk": { + "bits": 8 + }, + "model.layers.47.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.48.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.48.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.48.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.48.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.48.self_attn.wk": { + "bits": 8 + }, + "model.layers.48.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.49.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.49.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.49.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.49.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.49.self_attn.wk": { + "bits": 8 + }, + "model.layers.49.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.5.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.5.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.5.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.5.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.5.self_attn.wk": { + "bits": 8 + }, + "model.layers.5.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.50.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.50.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.50.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.50.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.50.self_attn.wk": { + "bits": 8 + }, + "model.layers.50.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.51.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.51.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.51.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.51.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.51.self_attn.wk": { + "bits": 8 + }, + "model.layers.51.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.52.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.52.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.52.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.52.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.52.self_attn.wk": { + "bits": 8 + }, + "model.layers.52.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.53.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.53.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.53.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.53.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.53.self_attn.wk": { + "bits": 8 + }, + "model.layers.53.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.54.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.54.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.54.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.54.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.54.self_attn.wk": { + "bits": 8 + }, + "model.layers.54.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.55.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.55.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.55.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.55.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.55.self_attn.wk": { + "bits": 8 + }, + "model.layers.55.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.56.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.56.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.56.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.56.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.56.self_attn.wk": { + "bits": 8 + }, + "model.layers.56.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.57.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.57.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.57.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.57.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.57.self_attn.wk": { + "bits": 8 + }, + "model.layers.57.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.58.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.58.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.58.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.58.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.58.self_attn.wk": { + "bits": 8 + }, + "model.layers.58.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.59.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.59.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.59.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.59.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.59.self_attn.wk": { + "bits": 8 + }, + "model.layers.59.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.6.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.6.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.6.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.6.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.6.self_attn.wk": { + "bits": 8 + }, + "model.layers.6.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.60.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.60.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.60.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.60.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.60.self_attn.wk": { + "bits": 8 + }, + "model.layers.60.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.61.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.61.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.61.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.61.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.61.self_attn.wk": { + "bits": 8 + }, + "model.layers.61.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.62.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.62.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.62.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.62.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.62.self_attn.wk": { + "bits": 8 + }, + "model.layers.62.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.63.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.63.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.63.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.63.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.63.self_attn.wk": { + "bits": 8 + }, + "model.layers.63.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.64.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.64.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.64.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.64.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.64.self_attn.wk": { + "bits": 8 + }, + "model.layers.64.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.65.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.65.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.65.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.65.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.65.self_attn.wk": { + "bits": 8 + }, + "model.layers.65.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.66.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.66.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.66.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.66.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.66.self_attn.wk": { + "bits": 8 + }, + "model.layers.66.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.67.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.67.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.67.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.67.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.67.self_attn.wk": { + "bits": 8 + }, + "model.layers.67.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.68.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.68.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.68.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.68.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.68.self_attn.wk": { + "bits": 8 + }, + "model.layers.68.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.69.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.69.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.69.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.69.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.69.self_attn.wk": { + "bits": 8 + }, + "model.layers.69.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.7.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.7.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.7.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.7.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.7.self_attn.wk": { + "bits": 8 + }, + "model.layers.7.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.70.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.70.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.70.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.70.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.70.self_attn.wk": { + "bits": 8 + }, + "model.layers.70.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.71.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.71.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.71.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.71.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.71.self_attn.wk": { + "bits": 8 + }, + "model.layers.71.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.72.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.72.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.72.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.72.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.72.self_attn.wk": { + "bits": 8 + }, + "model.layers.72.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.73.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.73.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.73.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.73.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.73.self_attn.wk": { + "bits": 8 + }, + "model.layers.73.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.74.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.74.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.74.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.74.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.74.self_attn.wk": { + "bits": 8 + }, + "model.layers.74.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.75.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.75.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.75.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.75.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.75.self_attn.wk": { + "bits": 8 + }, + "model.layers.75.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.76.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.76.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.76.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.76.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.76.self_attn.wk": { + "bits": 8 + }, + "model.layers.76.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.77.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.77.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.77.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.77.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.77.self_attn.wk": { + "bits": 8 + }, + "model.layers.77.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.8.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.8.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.8.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.8.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.8.self_attn.wk": { + "bits": 8 + }, + "model.layers.8.self_attn.wq_b": { + "bits": 8 + }, + "model.layers.9.mlp.shared_experts.down_proj": { + "bits": 8 + }, + "model.layers.9.mlp.shared_experts.gate_proj": { + "bits": 8 + }, + "model.layers.9.mlp.shared_experts.up_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.kv_a_proj_with_mqa": { + "bits": 8 + }, + "model.layers.9.self_attn.kv_b_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.o_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.q_a_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.q_b_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.weights_proj": { + "bits": 8 + }, + "model.layers.9.self_attn.wk": { + "bits": 8 + }, + "model.layers.9.self_attn.wq_b": { + "bits": 8 + } + }, + "group_size": 128, + "iters": 0, + "packing_format": "auto_round:auto_gptq", + "quant_method": "auto-round", + "sym": true + }, + "rms_norm_eps": 1e-05, + "rope_interleave": true, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "routed_scaling_factor": 2.5, + "scoring_func": "sigmoid", + "tie_word_embeddings": false, + "topk_group": 1, + "topk_method": "noaux_tc", + "transformers_version": "5.2.0.dev0", + "use_cache": true, + "v_head_dim": 256, + "vocab_size": 154880 +} diff --git a/model-00001-of-00076.safetensors b/model-00001-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..801663274476005bc715c86753600458d94a867d --- /dev/null +++ b/model-00001-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b83e9d0ad0a56dac2770a8af57bf576dce3cb904773a5422be0c5f9bfed25ab +size 5366923072 diff --git a/model-00002-of-00076.safetensors b/model-00002-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46a548065a0e1004f2326554ad5071d418c46552 --- /dev/null +++ b/model-00002-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fdede0711cdc57bd4c34d56034b6680502164ae686febb5a6e7a3de5653534 +size 5362713760 diff --git a/model-00003-of-00076.safetensors b/model-00003-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf32ee9b47e2059f46dbcb9d1c5cbd9a60a6ee5e --- /dev/null +++ b/model-00003-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1431458a88bb44a875b48f3d882e21835a9dba6da4440154fba9d59b2566c79d +size 5362713784 diff --git a/model-00004-of-00076.safetensors b/model-00004-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9f949507ad75b8935b78d8c43d53c4c7596f0db --- /dev/null +++ b/model-00004-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3847a69c2a3b79c1b84e6738d38e8d596cf80981aa838631ec4ff0b99128fe91 +size 5362713816 diff --git a/model-00005-of-00076.safetensors b/model-00005-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a7807bd0963c718a7e8cf78fd235b2479769941 --- /dev/null +++ b/model-00005-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99305ad6522849718bb58af19270edc62b9c1271823ca599723a7ee77b11d46f +size 5362713816 diff --git a/model-00006-of-00076.safetensors b/model-00006-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e39d1cf831dbfb17ba5e10b6e15428c51f8357b4 --- /dev/null +++ b/model-00006-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a49bcd95f32ed0db3d10ec7cc5968d2a2dd213da1e4a6d8fc27fbbbe4bc47e71 +size 5362713816 diff --git a/model-00007-of-00076.safetensors b/model-00007-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4897a825aa4418c1aafc72104d3b8691e372d15a --- /dev/null +++ b/model-00007-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8a1faf2f6d31e8949884e62ae749d2e8c4b0578540c6375a5b2e10187fb569 +size 5362713816 diff --git a/model-00008-of-00076.safetensors b/model-00008-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be6f7e6077edb63dfcdf327f57ebe52cda968200 --- /dev/null +++ b/model-00008-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e157b39397847af0c220f68e6ecde1d99fd681db8a55887e98647f21d06a31f +size 5362715144 diff --git a/model-00009-of-00076.safetensors b/model-00009-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..231a05a887a51df7ed5d0c3002fb5b1d41012738 --- /dev/null +++ b/model-00009-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1e1b5087e555851b2309883a059b40ba660b8e8fb6a418f0939df152c2befb +size 5362715432 diff --git a/model-00010-of-00076.safetensors b/model-00010-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56adbcb3f87e0d6bdc49939916824df5c6b1cc00 --- /dev/null +++ b/model-00010-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1fdbf5319f5c8c0ea30a2efdcea4795857e977ac297ee32d6b5d3757adbbc98 +size 5362715432 diff --git a/model-00011-of-00076.safetensors b/model-00011-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7f1c4f5fd2b3f8adee454134f9a10b68125cbbe --- /dev/null +++ b/model-00011-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7784feb4019ff13f6f60000365aca8b42216ffaf3fcb2bd15fa2221662e648 +size 5362715432 diff --git a/model-00012-of-00076.safetensors b/model-00012-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b0783f5d45948ce7d18be6da343de2c49f94cdd --- /dev/null +++ b/model-00012-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1194b561e892c9bc098440d0dff6606f2be0254a27dfde18a761f5a551cb57 +size 5362715432 diff --git a/model-00013-of-00076.safetensors b/model-00013-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac02482b319a2811244045d8832b16d846633e75 --- /dev/null +++ b/model-00013-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9e0b38c45a7b9544e4375926eef734bca8b0cb7c489565ca2333c8493977bc +size 5368859320 diff --git a/model-00014-of-00076.safetensors b/model-00014-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5479d3b41a7f0bb80a37aeac71a2697ee863d37 --- /dev/null +++ b/model-00014-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff7a47582aeb84db0329cd918c78047028c35b6460b3d8edbb2e1aa29cccdc80 +size 5356470896 diff --git a/model-00015-of-00076.safetensors b/model-00015-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77112b847374f8880df063adbbb6501a917ba257 --- /dev/null +++ b/model-00015-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:765ed1987eb01eb0e308c982c52985918b8e29f430cf3199cc4675ca1c9dc37f +size 5367674528 diff --git a/model-00016-of-00076.safetensors b/model-00016-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9d20829ca56be1b40bedd1f27f426cf8b4aff7c --- /dev/null +++ b/model-00016-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14f17bcf6c62cda9c2acd7d2dcc453bfb06e8151888c21eff1a15d39a9faf6d +size 5366379680 diff --git a/model-00017-of-00076.safetensors b/model-00017-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1a3273f0f71af40064d6dffc3d949bbde95af6b --- /dev/null +++ b/model-00017-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade7714806556ed1250f38bbdbc1101cdb3398b45c82778e0d18f7f92cb9b36a +size 5368856712 diff --git a/model-00018-of-00076.safetensors b/model-00018-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ea71d1e9a753de81a43b56485dd54a933abceac --- /dev/null +++ b/model-00018-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75c8a14e676558434f89230178c0b40244ae9698eff7e847f922da8d6f13a80 +size 5356568600 diff --git a/model-00019-of-00076.safetensors b/model-00019-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0014e461bb4e6ce1ce8b5e7c4ddfb555f3f48fb7 --- /dev/null +++ b/model-00019-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3cf8e26ec03c8191ad641dfe6ae16812e434d7ed51c5751687cc2ad3080d0e +size 5368856728 diff --git a/model-00020-of-00076.safetensors b/model-00020-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..652c9b8bf93d81f7a2c1a548f9d5d60d4a7bd84c --- /dev/null +++ b/model-00020-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a06dac3bf746d1b0ba890a68e72c613a3cb076068841cee8f4e8b9dac44a46e +size 5356568600 diff --git a/model-00021-of-00076.safetensors b/model-00021-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5326d11d26e55013eb4ff44fafc67b4f47d872e2 --- /dev/null +++ b/model-00021-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94df641d686eadab356110218e69571c8f22813b0f71118a049ebdefce303ac2 +size 5368856728 diff --git a/model-00022-of-00076.safetensors b/model-00022-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36ebbe506a00e437f4a1de820eb2ae5ca297e239 --- /dev/null +++ b/model-00022-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b3fea9d60d692a2bd38426bf8fb90b769b836a1a846c46c97b369637e019c9 +size 5356568600 diff --git a/model-00023-of-00076.safetensors b/model-00023-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4957d5cb4a43d572f9818c2171f46c010a15f7eb --- /dev/null +++ b/model-00023-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b19b0b0ce7a233006cb6b896c4ce3a2f721113e143794eee2454fcd05477961 +size 5368856728 diff --git a/model-00024-of-00076.safetensors b/model-00024-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fcb0e3728116f18c808cae050a63bb5a96e5529 --- /dev/null +++ b/model-00024-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf773480a1d4638f8098e8bc3f47c5eade4aef2c5f24c928f52ab67ad270290 +size 5356568600 diff --git a/model-00025-of-00076.safetensors b/model-00025-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..657365905fd3d826bd1399ae1711896c15a1c6ee --- /dev/null +++ b/model-00025-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0307b2efa9b2086555fee2c9355f1d37c09f04699c4f774a51a0345c371f82ae +size 5368856728 diff --git a/model-00026-of-00076.safetensors b/model-00026-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e11eed4aa713c2c8faa25f5a99570226787cd2af --- /dev/null +++ b/model-00026-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6231d0db0663c90be2b0b6693b5cb499900e34e8f829b08c05a4b666b41579 +size 5356568600 diff --git a/model-00027-of-00076.safetensors b/model-00027-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09babaaf33c2374567dc67e52503f57433a4ce13 --- /dev/null +++ b/model-00027-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac0df36d2e307cce9d461c594722eba2f46a19f24fd3c8dd59523cac389c85f +size 5368856728 diff --git a/model-00028-of-00076.safetensors b/model-00028-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a84ca13870a05aa9e6bc1f669a24a56e40427fe2 --- /dev/null +++ b/model-00028-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216acd17044e6d3da142ee2d560cf11c7e46b582b0c91baf402d0d6e277e909e +size 5356568600 diff --git a/model-00029-of-00076.safetensors b/model-00029-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c005cef64e8e0612da273008e2ddb017918436b --- /dev/null +++ b/model-00029-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2259be935a9e8e7732a90bcb53c2064ab05e42be611d246a5be30efbb7392725 +size 5368856736 diff --git a/model-00030-of-00076.safetensors b/model-00030-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e4290566f32d14b6567b5843f0f760a5a376ea6 --- /dev/null +++ b/model-00030-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e5e8deac03ddb66e3fbc3feb2796da710d81703d9a2dcae1ebbbc6acb61e1f +size 5356568624 diff --git a/model-00031-of-00076.safetensors b/model-00031-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..685994b5162a986f29feba1a71ecbc6f65ddc8bb --- /dev/null +++ b/model-00031-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589b2668d458c8c9d2c07ac3235da199e560613a2218522db6169e2b2672af86 +size 5368856752 diff --git a/model-00032-of-00076.safetensors b/model-00032-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2aaacfe1f562e62a1a10eea1cfcf306d553855b --- /dev/null +++ b/model-00032-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91568559fe6de526fff2613e340a48945c8c72225c286ba63ca399bcfd283765 +size 5356568624 diff --git a/model-00033-of-00076.safetensors b/model-00033-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f2bab68e1d67860928ecefd802e92793d3407bf --- /dev/null +++ b/model-00033-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233f7746641c895661d3d81d25eabaea200f69ed63b0dbd254418430096052ac +size 5368856752 diff --git a/model-00034-of-00076.safetensors b/model-00034-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6266fdcfc1b477b737a372fbb9b1ac0a52be67c0 --- /dev/null +++ b/model-00034-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3459522256bdf4f13acfc381a7d3f5a7f1787f06943c870c20e1ea723709708 +size 5356568624 diff --git a/model-00035-of-00076.safetensors b/model-00035-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfe426f6271ec73963c4c119a9ab3bf2665b8670 --- /dev/null +++ b/model-00035-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525a9afbc52bbf13fe23bb0d6ef5ed06d0dc44c324775b7b88aa3d29cd25c2a6 +size 5368856752 diff --git a/model-00036-of-00076.safetensors b/model-00036-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9906e70f0604baab1c32c86b6d006b2d3a95dcb4 --- /dev/null +++ b/model-00036-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8593a90dcc457f392be616c184f89698c331967017a0c5c929cb9dcf14817211 +size 5356568624 diff --git a/model-00037-of-00076.safetensors b/model-00037-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85ac145d9999554e3a4109d6828060e879e6949c --- /dev/null +++ b/model-00037-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ce7aecc9f4b28c5f3e1cfcf585dae5f7c47a9ca3cd0e0e34640437a00b045f +size 5368856752 diff --git a/model-00038-of-00076.safetensors b/model-00038-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25f5310166f69c42c7a81104c91775dd211bcacb --- /dev/null +++ b/model-00038-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3951004bad0f36dc24ab7e54b38cf44b3869a5613bf61f6f9a3badabb8f9236 +size 5356568624 diff --git a/model-00039-of-00076.safetensors b/model-00039-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..482c198afc495777e9587d90b536e0045457a51a --- /dev/null +++ b/model-00039-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2c9db6c9ff59673047bc8e7a20e4c18d2f4d926b10169b936154af91da6fe9 +size 5368856752 diff --git a/model-00040-of-00076.safetensors b/model-00040-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2711496eca91ca1bc4e13498d56e28a57edfbe87 --- /dev/null +++ b/model-00040-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c37da3518ab4cbcfd5757bd815b3e908441c69e65f6cc177f5dcb9946bc393a +size 5356568624 diff --git a/model-00041-of-00076.safetensors b/model-00041-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bec9371af65a368b8850370765f1fca49c8b5cc3 --- /dev/null +++ b/model-00041-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7397a584b016ffc46120481fb864fa3c882b7f0bef8711cbc3c0e9c7e7f921 +size 5368856752 diff --git a/model-00042-of-00076.safetensors b/model-00042-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fcaaaf1c7ecfd4ff59d88aacb12e4d23839176e --- /dev/null +++ b/model-00042-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7848d34b3af5defceeb87a1c77ab21ddaccebd4db071148fe5cee4cf3b086c7b +size 5356568624 diff --git a/model-00043-of-00076.safetensors b/model-00043-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d47d29e591d323b51ae8bd75d9ab248a6ea5161c --- /dev/null +++ b/model-00043-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecf7c5f6d3c4f353508036455df4f2abf286505902dc67c33fe4affee5dd07a +size 5368856752 diff --git a/model-00044-of-00076.safetensors b/model-00044-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b3de825e5554a16a5f6e96b11c96aa787774974 --- /dev/null +++ b/model-00044-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18e5b3098a8380cbc43fc14d764d035eee5dc987a00ce23a0f6bf4cbf36936f +size 5356568624 diff --git a/model-00045-of-00076.safetensors b/model-00045-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c4bcf34b3ac3a9d2d02c9272a35f37ece94ef21 --- /dev/null +++ b/model-00045-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b9cb254a1e1eb1b192d1b10b3410bde1e05187cd02cbf502caa358b0b7286f3 +size 5368856752 diff --git a/model-00046-of-00076.safetensors b/model-00046-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bd7b7f4c78c35c40b5ceead0c8ce4a5b56cf6c8 --- /dev/null +++ b/model-00046-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445f7e4ee11539b998415705c161b16a65438ab4fa55edead111f3774a5c7895 +size 5356568624 diff --git a/model-00047-of-00076.safetensors b/model-00047-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff110e801a5dfb5a57ec809d193413235a7a15eb --- /dev/null +++ b/model-00047-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d44e3f80307fccbe11a058e04aeb02490e3bfe185effeabd39fb35bcd5df9f +size 5368856752 diff --git a/model-00048-of-00076.safetensors b/model-00048-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb933edc4ba0cdb6e50779019c54a59a07e7179d --- /dev/null +++ b/model-00048-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f9311282e31e6b788b871dfbcdc590c08289253a4b003a8fd8490cb5b58832 +size 5356568624 diff --git a/model-00049-of-00076.safetensors b/model-00049-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58604943458e74380698fd06c8dc138ac1844ac9 --- /dev/null +++ b/model-00049-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:479a0b0acc2914871c983410a5e8a82a5ab25925e963df5e59ecb941353ccdcd +size 5368856752 diff --git a/model-00050-of-00076.safetensors b/model-00050-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c524e8a51cf9f09c62c46a741a6d3a19fe368664 --- /dev/null +++ b/model-00050-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be1e9b2fdde7d2f1e6838f679c07975bd5b2532cb483b046c36b9c8fac37922 +size 5363107616 diff --git a/model-00051-of-00076.safetensors b/model-00051-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90f94ae69b2f6ff0f77c99df884a5851c29c3125 --- /dev/null +++ b/model-00051-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797a8844e2e54b8c67e43f5c95b630b6fb3abd08bf1ebfae825c8041e1192f04 +size 5362715368 diff --git a/model-00052-of-00076.safetensors b/model-00052-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..737251432437cee8f34ce80a4b5fda5b68aacfed --- /dev/null +++ b/model-00052-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494239e85cfec11a0240f27320227a573f99654fbe88f7bd51381a91f205948c +size 5362715376 diff --git a/model-00053-of-00076.safetensors b/model-00053-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7046cc15ef0d81977d7488c60b46a9fdad2a58f0 --- /dev/null +++ b/model-00053-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c0e2903f6975055e3470ef02b190acbc063c496377174ef6c3cf977e1b7159 +size 5362715376 diff --git a/model-00054-of-00076.safetensors b/model-00054-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04776d976a8ebfe45916fd1d92ebdc89f76f3cb5 --- /dev/null +++ b/model-00054-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2121741fb0f3419a8a922e8c6404e72a34dba36601e99e1d63d8589e8db2b7 +size 5362715376 diff --git a/model-00055-of-00076.safetensors b/model-00055-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..873e78170cadc0178148d4c4f84872715a24f5fa --- /dev/null +++ b/model-00055-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd01b43534bce0bfd0b92ffe04ff0741a5cc143174a07522f36e87a2cae67e1b +size 5362715376 diff --git a/model-00056-of-00076.safetensors b/model-00056-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4af0d4cadd9047bfaf22fff21be06204f8b77b5a --- /dev/null +++ b/model-00056-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df27f6626c256f0defadd02ad72bba46dfe46d6b8efc346c041ac87007b687cd +size 5362715376 diff --git a/model-00057-of-00076.safetensors b/model-00057-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fc0e600d3715e65b1ea00b084c1435f2a14a1fe --- /dev/null +++ b/model-00057-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66731faa5796f8bd5617d3eaf93e81f6d6ce627ef1d6eb0e7a723f724df40fc9 +size 5362715416 diff --git a/model-00058-of-00076.safetensors b/model-00058-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee5f77eb5f75eb5b4b7bac085b932d9b081b6072 --- /dev/null +++ b/model-00058-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dafbc7dca86076337d8a187e947994797df360842e2d75fdfc31dba490174c3 +size 5362715432 diff --git a/model-00059-of-00076.safetensors b/model-00059-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59a4e4d10af3008cc336f2d0fd16bb3622a89cc6 --- /dev/null +++ b/model-00059-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028b3b46bdbd1faa7b2e82d9a8db315b993168fcd59423818e7808a19d5ebdcb +size 5362715432 diff --git a/model-00060-of-00076.safetensors b/model-00060-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da38d13a85e68c0bfe4811469c49f0a87ba988da --- /dev/null +++ b/model-00060-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ecf1b240abae7ffedc59528dd211ef00c90e7782115cee1d7fb2358f5b6617 +size 5362715432 diff --git a/model-00061-of-00076.safetensors b/model-00061-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a31b18e0399c5727b08e492a461a6bbd6add89e2 --- /dev/null +++ b/model-00061-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45a49880200f37f0c7c43f1d39163c2192e13635716b7d9d8aa9d666e52959b +size 5362715432 diff --git a/model-00062-of-00076.safetensors b/model-00062-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9d43b0b1cf2b99e6107d4968d6a278f77caf966 --- /dev/null +++ b/model-00062-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb3bccaea26cf2249775e841debab40ff7af20c638bbf621a727664ab703f12 +size 5362715432 diff --git a/model-00063-of-00076.safetensors b/model-00063-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e36579191172104337bdf3b2de71f54b44c59870 --- /dev/null +++ b/model-00063-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151f0f6b38218e1276bd96500b0cc04cc9bfaed8603be88957bde5714ec35311 +size 5362715432 diff --git a/model-00064-of-00076.safetensors b/model-00064-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a4e508ad4a653f3bbb1e549aee66a89ededad74 --- /dev/null +++ b/model-00064-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da28aa6b75aae2779b0b97a8eeaa8ca81e0f27fae7d28a79f7ac281ba4a21877 +size 5362715432 diff --git a/model-00065-of-00076.safetensors b/model-00065-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59c3b16baf66b1e18709022cdae68b00f98708e5 --- /dev/null +++ b/model-00065-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda12884166d86ce214bfcb5c0f24e1e092d25e0c2d4edcce70ee0ea7d41c74e +size 5362715432 diff --git a/model-00066-of-00076.safetensors b/model-00066-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4126cd7dd0714dc9b28aaedbd9e7d51b10f8ced7 --- /dev/null +++ b/model-00066-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcab54a182b3ae1479d0ec42e7508a2f2edeb583c28208d7ce2d7bf6b465138 +size 5362715432 diff --git a/model-00067-of-00076.safetensors b/model-00067-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6de87a7cc78bc163c8b16c4b14776986ab415b51 --- /dev/null +++ b/model-00067-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b057e57d73698f74dee96c68e393be4782ee42a2e75e5ba1a110c19476273a +size 5361927520 diff --git a/model-00068-of-00076.safetensors b/model-00068-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a244c6b54702df0713a73d3e6d8ff7ecb6f14b2e --- /dev/null +++ b/model-00068-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e945e92bff10c8386c78ee35354ee009e9e81d9a9f58492ff20dff5337c8d1 +size 5317639560 diff --git a/model-00069-of-00076.safetensors b/model-00069-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7017d826d77e7df27e8abcfb5e6d1e83b9a0f379 --- /dev/null +++ b/model-00069-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7169ef3d90d290f86e9492c6d1a4cb198dddcd619df58c17378a1662cfe23f71 +size 5367674528 diff --git a/model-00070-of-00076.safetensors b/model-00070-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ea6d3f6db6f3f1477a1653538f0cd7545cb7ed1 --- /dev/null +++ b/model-00070-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d808b068d733204f44ce843ce9343af4b65f6104ce1ac51e9145e65972810231 +size 5366379680 diff --git a/model-00071-of-00076.safetensors b/model-00071-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bfb3a8ada297ca78f5860a84f57012bb52e3a3a --- /dev/null +++ b/model-00071-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9f9055e320cbfa871126ab258ca42883a9f344eb3a41f17d45b31aeafc5252 +size 5368856712 diff --git a/model-00072-of-00076.safetensors b/model-00072-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edfa7b68b186cd8b00dccf37e7ac0d0ec5807cb6 --- /dev/null +++ b/model-00072-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06b5e481acac9534e0ab5525df163649aad971b71ca1aaf3fdf67ca3510ff07 +size 5356568600 diff --git a/model-00073-of-00076.safetensors b/model-00073-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a8f9edba5010ed61975955e814a9c75e21860af --- /dev/null +++ b/model-00073-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4c9f5847e60234454ecc4bc28a6fde6802cd176341fe5595d951634253c2ff +size 5368856728 diff --git a/model-00074-of-00076.safetensors b/model-00074-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00442b657f588beb7407644cccf13c0458e45a16 --- /dev/null +++ b/model-00074-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0008c2cfc50a4c1261e7f27c969994b9cdb339e81355e641f233c58cf057b2e8 +size 4693707096 diff --git a/model-00075-of-00076.safetensors b/model-00075-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b57ee18abf3a7a139f6d167b5d6cde7d7c111053 --- /dev/null +++ b/model-00075-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70692fb5ff521b8c8b5c055f2939e87e32b82eadfc257b3eab9e776d5e3e0c8 +size 5368296872 diff --git a/model-00076-of-00076.safetensors b/model-00076-of-00076.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8c3340a12c1f2b47c4a4ec0ff17e3ef93bec193 --- /dev/null +++ b/model-00076-of-00076.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3013507d68a119122f3e34f6a603a00ec09ae96e705c7bf761bdd1122504cd3e +size 3370177832 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..f8821fc4527e670c4b4893656beb399908c6d215 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee5f6172a9195007aaa7f1b453c8519dcec74b0189695883ae418bb9f58250d3 +size 11022417 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c7edb3c9544185418004c88e219aae5cf34eedfa --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfe2c099a7702a0921abc315ee039deb51e4a34b4818fc509bd27fa3dc4acc1 +size 20217541 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1723f7d90e3fb497303ec7b18f88cf5d05928f37 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,33 @@ +{ + "backend": "tokenizers", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>" + ], + "is_local": true, + "model_max_length": 202752, + "model_specific_special_tokens": {}, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "TokenizersBackend" +}