|
|
main: build = 4190 (494d7062) |
|
|
main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu |
|
|
main: quantizing '/mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-256x4.9B-BF16-00001-of-00010.gguf' to '/mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-Q8_0.gguf' as Q8_0 using 128 threads |
|
|
llama_model_loader: additional 9 GGUFs metadata loaded. |
|
|
llama_model_loader: loaded meta data with 40 key-value pairs and 809 tensors from /mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-256x4.9B-BF16-00001-of-00010.gguf (version GGUF V3 (latest)) |
|
|
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. |
|
|
llama_model_loader: - kv 0: general.architecture str = minimax-m2 |
|
|
llama_model_loader: - kv 1: general.type str = model |
|
|
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40 |
|
|
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000 |
|
|
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000 |
|
|
llama_model_loader: - kv 5: general.name str = MiniMax M2.5 |
|
|
llama_model_loader: - kv 6: general.size_label str = 256x4.9B |
|
|
llama_model_loader: - kv 7: general.license str = other |
|
|
llama_model_loader: - kv 8: general.license.name str = modified-mit |
|
|
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax... |
|
|
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"] |
|
|
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62 |
|
|
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608 |
|
|
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072 |
|
|
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536 |
|
|
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48 |
|
|
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8 |
|
|
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000 |
|
|
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001 |
|
|
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256 |
|
|
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8 |
|
|
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2 |
|
|
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128 |
|
|
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128 |
|
|
llama_model_loader: - kv 24: general.file_type u32 = 32 |
|
|
llama_model_loader: - kv 25: minimax-m2.expert_feed_forward_length u32 = 1536 |
|
|
llama_model_loader: - kv 26: minimax-m2.rope.dimension_count u32 = 64 |
|
|
llama_model_loader: - kv 27: general.quantization_version u32 = 2 |
|
|
llama_model_loader: - kv 28: tokenizer.ggml.model str = gpt2 |
|
|
llama_model_loader: - kv 29: tokenizer.ggml.pre str = minimax-m2 |
|
|
llama_model_loader: - kv 30: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ... |
|
|
llama_model_loader: - kv 31: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... |
|
|
llama_model_loader: - kv 32: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r... |
|
|
llama_model_loader: - kv 33: tokenizer.ggml.bos_token_id u32 = 200034 |
|
|
llama_model_loader: - kv 34: tokenizer.ggml.eos_token_id u32 = 200020 |
|
|
llama_model_loader: - kv 35: tokenizer.ggml.unknown_token_id u32 = 200021 |
|
|
llama_model_loader: - kv 36: tokenizer.chat_template str = {# ----------‑‑‑ special token ... |
|
|
llama_model_loader: - kv 37: split.no u16 = 0 |
|
|
llama_model_loader: - kv 38: split.count u16 = 10 |
|
|
llama_model_loader: - kv 39: split.tensors.count i32 = 809 |
|
|
llama_model_loader: - type f32: 373 tensors |
|
|
llama_model_loader: - type bf16: 436 tensors |
|
|
[ 1/ 809] token_embd.weight - [ 3072, 200064, 1, 1], type = bf16, converting to q8_0 .. size = 1172.25 MiB -> 622.76 MiB |
|
|
[ 2/ 809] blk.0.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 3/ 809] blk.0.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 4/ 809] blk.0.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 5/ 809] blk.0.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 6/ 809] blk.0.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 7/ 809] blk.0.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 8/ 809] blk.0.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 9/ 809] blk.0.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 10/ 809] blk.0.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 11/ 809] blk.0.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 12/ 809] blk.0.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 13/ 809] blk.0.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 14/ 809] blk.0.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 15/ 809] blk.1.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 16/ 809] blk.1.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 17/ 809] blk.1.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 18/ 809] blk.1.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 19/ 809] blk.1.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 20/ 809] blk.1.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 21/ 809] blk.1.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 22/ 809] blk.1.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 23/ 809] blk.1.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 24/ 809] blk.1.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 25/ 809] blk.1.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 26/ 809] blk.1.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 27/ 809] blk.1.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 28/ 809] blk.2.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 29/ 809] blk.2.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 30/ 809] blk.2.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 31/ 809] blk.2.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 32/ 809] blk.2.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 33/ 809] blk.2.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 34/ 809] blk.2.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 35/ 809] blk.2.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 36/ 809] blk.2.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 37/ 809] blk.2.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 38/ 809] blk.2.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 39/ 809] blk.2.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 40/ 809] blk.2.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 41/ 809] blk.3.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 42/ 809] blk.3.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 43/ 809] blk.3.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 44/ 809] blk.3.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 45/ 809] blk.3.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 46/ 809] blk.3.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 47/ 809] blk.3.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 48/ 809] blk.3.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 49/ 809] blk.3.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 50/ 809] blk.3.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 51/ 809] blk.3.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 52/ 809] blk.3.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 53/ 809] blk.3.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 54/ 809] blk.4.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 55/ 809] blk.4.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 56/ 809] blk.4.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 57/ 809] blk.4.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 58/ 809] blk.4.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 59/ 809] blk.4.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 60/ 809] blk.4.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 61/ 809] blk.4.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 62/ 809] blk.4.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 63/ 809] blk.4.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 64/ 809] blk.4.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 65/ 809] blk.4.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 66/ 809] blk.4.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 67/ 809] blk.5.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 68/ 809] blk.5.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 69/ 809] blk.5.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 70/ 809] blk.5.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 71/ 809] blk.5.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 72/ 809] blk.5.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 73/ 809] blk.5.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 74/ 809] blk.5.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 75/ 809] blk.5.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 76/ 809] blk.5.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 77/ 809] blk.5.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 78/ 809] blk.5.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 79/ 809] blk.5.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 80/ 809] blk.6.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 81/ 809] blk.6.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 82/ 809] blk.6.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 83/ 809] blk.6.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 84/ 809] blk.6.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 85/ 809] blk.6.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 86/ 809] blk.6.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 87/ 809] blk.6.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 88/ 809] blk.6.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 89/ 809] blk.6.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 90/ 809] blk.6.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 91/ 809] blk.6.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 92/ 809] blk.6.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 93/ 809] blk.7.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 94/ 809] blk.7.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 95/ 809] blk.7.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 96/ 809] blk.7.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 97/ 809] blk.7.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 98/ 809] blk.7.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 99/ 809] blk.7.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 100/ 809] blk.7.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 101/ 809] blk.7.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 102/ 809] blk.7.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 103/ 809] blk.7.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 104/ 809] blk.7.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 105/ 809] blk.7.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 106/ 809] blk.8.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 107/ 809] blk.8.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 108/ 809] blk.8.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 109/ 809] blk.8.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 110/ 809] blk.8.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 111/ 809] blk.8.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 112/ 809] blk.8.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 113/ 809] blk.8.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 114/ 809] blk.8.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 115/ 809] blk.8.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 116/ 809] blk.8.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 117/ 809] blk.8.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 118/ 809] blk.8.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 119/ 809] blk.9.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 120/ 809] blk.9.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 121/ 809] blk.9.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 122/ 809] blk.9.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 123/ 809] blk.9.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 124/ 809] blk.9.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 125/ 809] blk.9.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 126/ 809] blk.9.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 127/ 809] blk.9.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 128/ 809] blk.9.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 129/ 809] blk.9.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 130/ 809] blk.9.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 131/ 809] blk.9.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 132/ 809] blk.10.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 133/ 809] blk.10.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 134/ 809] blk.10.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 135/ 809] blk.10.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 136/ 809] blk.10.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 137/ 809] blk.10.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 138/ 809] blk.10.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 139/ 809] blk.10.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 140/ 809] blk.10.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 141/ 809] blk.10.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 142/ 809] blk.10.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 143/ 809] blk.10.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 144/ 809] blk.10.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 145/ 809] blk.11.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 146/ 809] blk.11.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 147/ 809] blk.11.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 148/ 809] blk.11.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 149/ 809] blk.11.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 150/ 809] blk.11.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 151/ 809] blk.11.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 152/ 809] blk.11.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 153/ 809] blk.11.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 154/ 809] blk.11.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 155/ 809] blk.11.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 156/ 809] blk.11.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 157/ 809] blk.11.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 158/ 809] blk.12.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 159/ 809] blk.12.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 160/ 809] blk.12.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 161/ 809] blk.12.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 162/ 809] blk.12.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 163/ 809] blk.12.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 164/ 809] blk.12.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 165/ 809] blk.12.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 166/ 809] blk.12.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 167/ 809] blk.12.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 168/ 809] blk.12.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 169/ 809] blk.12.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 170/ 809] blk.12.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 171/ 809] blk.13.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 172/ 809] blk.13.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 173/ 809] blk.13.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 174/ 809] blk.13.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 175/ 809] blk.13.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 176/ 809] blk.13.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 177/ 809] blk.13.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 178/ 809] blk.13.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 179/ 809] blk.13.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 180/ 809] blk.13.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 181/ 809] blk.13.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 182/ 809] blk.13.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 183/ 809] blk.13.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 184/ 809] blk.14.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 185/ 809] blk.14.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 186/ 809] blk.14.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 187/ 809] blk.14.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 188/ 809] blk.14.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 189/ 809] blk.14.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 190/ 809] blk.14.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 191/ 809] blk.14.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 192/ 809] blk.14.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 193/ 809] blk.14.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 194/ 809] blk.14.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 195/ 809] blk.14.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 196/ 809] blk.14.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 197/ 809] blk.15.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 198/ 809] blk.15.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 199/ 809] blk.15.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 200/ 809] blk.15.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 201/ 809] blk.15.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 202/ 809] blk.15.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 203/ 809] blk.15.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 204/ 809] blk.15.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 205/ 809] blk.15.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 206/ 809] blk.15.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 207/ 809] blk.15.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 208/ 809] blk.15.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 209/ 809] blk.15.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 210/ 809] blk.16.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 211/ 809] blk.16.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 212/ 809] blk.16.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 213/ 809] blk.16.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 214/ 809] blk.16.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 215/ 809] blk.16.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 216/ 809] blk.16.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 217/ 809] blk.16.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 218/ 809] blk.16.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 219/ 809] blk.16.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 220/ 809] blk.16.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 221/ 809] blk.16.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 222/ 809] blk.16.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 223/ 809] blk.17.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 224/ 809] blk.17.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 225/ 809] blk.17.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 226/ 809] blk.17.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 227/ 809] blk.17.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 228/ 809] blk.17.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 229/ 809] blk.17.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 230/ 809] blk.17.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 231/ 809] blk.17.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 232/ 809] blk.17.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 233/ 809] blk.17.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 234/ 809] blk.17.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 235/ 809] blk.17.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 236/ 809] blk.18.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 237/ 809] blk.18.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 238/ 809] blk.18.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 239/ 809] blk.18.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 240/ 809] blk.18.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 241/ 809] blk.18.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 242/ 809] blk.18.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 243/ 809] blk.18.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 244/ 809] blk.18.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 245/ 809] blk.18.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 246/ 809] blk.18.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 247/ 809] blk.18.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 248/ 809] blk.18.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 249/ 809] blk.19.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 250/ 809] blk.19.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 251/ 809] blk.19.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 252/ 809] blk.19.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 253/ 809] blk.19.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 254/ 809] blk.19.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 255/ 809] blk.19.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 256/ 809] blk.19.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 257/ 809] blk.19.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 258/ 809] blk.19.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 259/ 809] blk.19.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 260/ 809] blk.19.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 261/ 809] blk.19.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 262/ 809] blk.20.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 263/ 809] blk.20.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 264/ 809] blk.20.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 265/ 809] blk.20.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 266/ 809] blk.20.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 267/ 809] blk.20.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 268/ 809] blk.20.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 269/ 809] blk.20.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 270/ 809] blk.20.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 271/ 809] blk.20.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 272/ 809] blk.20.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 273/ 809] blk.20.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 274/ 809] blk.20.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 275/ 809] blk.21.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 276/ 809] blk.21.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 277/ 809] blk.21.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 278/ 809] blk.21.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 279/ 809] blk.21.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 280/ 809] blk.21.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 281/ 809] blk.21.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 282/ 809] blk.21.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 283/ 809] blk.21.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 284/ 809] blk.21.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 285/ 809] blk.21.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 286/ 809] blk.21.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 287/ 809] blk.21.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 288/ 809] blk.22.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 289/ 809] blk.22.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 290/ 809] blk.22.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 291/ 809] blk.22.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 292/ 809] blk.22.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 293/ 809] blk.22.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 294/ 809] blk.22.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 295/ 809] blk.22.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 296/ 809] blk.22.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 297/ 809] blk.22.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 298/ 809] blk.22.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 299/ 809] blk.22.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 300/ 809] blk.22.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 301/ 809] blk.23.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 302/ 809] blk.23.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 303/ 809] blk.23.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 304/ 809] blk.23.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 305/ 809] blk.23.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 306/ 809] blk.23.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 307/ 809] blk.23.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 308/ 809] blk.23.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 309/ 809] blk.23.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 310/ 809] blk.23.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 311/ 809] blk.23.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 312/ 809] blk.23.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 313/ 809] blk.23.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 314/ 809] blk.24.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 315/ 809] blk.24.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 316/ 809] blk.24.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 317/ 809] blk.24.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 318/ 809] blk.24.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 319/ 809] blk.24.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 320/ 809] blk.24.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 321/ 809] blk.24.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 322/ 809] blk.24.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 323/ 809] blk.24.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 324/ 809] blk.24.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 325/ 809] blk.24.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 326/ 809] blk.24.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 327/ 809] blk.25.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 328/ 809] blk.25.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 329/ 809] blk.25.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 330/ 809] blk.25.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 331/ 809] blk.25.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 332/ 809] blk.25.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 333/ 809] blk.25.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 334/ 809] blk.25.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 335/ 809] blk.25.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 336/ 809] blk.25.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 337/ 809] blk.25.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 338/ 809] blk.25.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 339/ 809] blk.25.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 340/ 809] blk.26.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 341/ 809] blk.26.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 342/ 809] blk.26.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 343/ 809] blk.26.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 344/ 809] blk.26.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 345/ 809] blk.26.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 346/ 809] blk.26.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 347/ 809] blk.26.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 348/ 809] blk.26.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 349/ 809] blk.26.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 350/ 809] blk.26.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 351/ 809] blk.26.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 352/ 809] blk.26.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 353/ 809] blk.27.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 354/ 809] blk.27.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 355/ 809] blk.27.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 356/ 809] blk.27.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 357/ 809] blk.27.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 358/ 809] blk.27.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 359/ 809] blk.27.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 360/ 809] blk.27.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 361/ 809] blk.27.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 362/ 809] blk.27.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 363/ 809] blk.27.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 364/ 809] blk.27.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 365/ 809] blk.27.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 366/ 809] blk.28.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 367/ 809] blk.28.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 368/ 809] blk.28.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 369/ 809] blk.28.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 370/ 809] blk.28.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 371/ 809] blk.28.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 372/ 809] blk.28.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 373/ 809] blk.28.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 374/ 809] blk.28.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 375/ 809] blk.28.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 376/ 809] blk.28.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 377/ 809] blk.28.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 378/ 809] blk.28.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 379/ 809] blk.29.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 380/ 809] blk.29.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 381/ 809] blk.29.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 382/ 809] blk.29.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 383/ 809] blk.29.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 384/ 809] blk.29.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 385/ 809] blk.29.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 386/ 809] blk.29.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 387/ 809] blk.29.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 388/ 809] blk.29.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 389/ 809] blk.29.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 390/ 809] blk.29.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 391/ 809] blk.29.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 392/ 809] blk.30.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 393/ 809] blk.30.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 394/ 809] blk.30.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 395/ 809] blk.30.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 396/ 809] blk.30.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 397/ 809] blk.30.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 398/ 809] blk.30.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 399/ 809] blk.30.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 400/ 809] blk.30.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 401/ 809] blk.30.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 402/ 809] blk.30.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 403/ 809] blk.30.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 404/ 809] blk.30.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 405/ 809] blk.31.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 406/ 809] blk.31.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 407/ 809] blk.31.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 408/ 809] blk.31.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 409/ 809] blk.31.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 410/ 809] blk.31.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 411/ 809] blk.31.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 412/ 809] blk.31.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 413/ 809] blk.31.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 414/ 809] blk.31.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 415/ 809] blk.31.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 416/ 809] blk.31.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 417/ 809] blk.31.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 418/ 809] blk.32.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 419/ 809] blk.32.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 420/ 809] blk.32.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 421/ 809] blk.32.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 422/ 809] blk.32.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 423/ 809] blk.32.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 424/ 809] blk.32.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 425/ 809] blk.32.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 426/ 809] blk.32.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 427/ 809] blk.32.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 428/ 809] blk.32.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 429/ 809] blk.32.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 430/ 809] blk.32.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 431/ 809] blk.33.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 432/ 809] blk.33.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 433/ 809] blk.33.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 434/ 809] blk.33.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 435/ 809] blk.33.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 436/ 809] blk.33.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 437/ 809] blk.33.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 438/ 809] blk.33.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 439/ 809] blk.33.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 440/ 809] blk.33.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 441/ 809] blk.33.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 442/ 809] blk.33.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 443/ 809] blk.33.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 444/ 809] blk.34.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 445/ 809] blk.34.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 446/ 809] blk.34.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 447/ 809] blk.34.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 448/ 809] blk.34.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 449/ 809] blk.34.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 450/ 809] blk.34.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 451/ 809] blk.34.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 452/ 809] blk.34.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 453/ 809] blk.34.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 454/ 809] blk.34.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 455/ 809] blk.34.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 456/ 809] blk.34.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 457/ 809] blk.35.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 458/ 809] blk.35.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 459/ 809] blk.35.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 460/ 809] blk.35.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 461/ 809] blk.35.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 462/ 809] blk.35.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 463/ 809] blk.35.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 464/ 809] blk.35.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 465/ 809] blk.35.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 466/ 809] blk.35.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 467/ 809] blk.35.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 468/ 809] blk.35.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 469/ 809] blk.35.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 470/ 809] blk.36.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 471/ 809] blk.36.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 472/ 809] blk.36.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 473/ 809] blk.36.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 474/ 809] blk.36.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 475/ 809] blk.36.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 476/ 809] blk.36.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 477/ 809] blk.36.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 478/ 809] blk.36.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 479/ 809] blk.36.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 480/ 809] blk.36.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 481/ 809] blk.36.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 482/ 809] blk.36.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 483/ 809] blk.37.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 484/ 809] blk.37.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 485/ 809] blk.37.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 486/ 809] blk.37.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 487/ 809] blk.37.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 488/ 809] blk.37.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 489/ 809] blk.37.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 490/ 809] blk.37.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 491/ 809] blk.37.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 492/ 809] blk.37.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 493/ 809] blk.37.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 494/ 809] blk.37.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 495/ 809] blk.37.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 496/ 809] blk.38.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 497/ 809] blk.38.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 498/ 809] blk.38.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 499/ 809] blk.38.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 500/ 809] blk.38.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 501/ 809] blk.38.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 502/ 809] blk.38.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 503/ 809] blk.38.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 504/ 809] blk.38.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 505/ 809] blk.38.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 506/ 809] blk.38.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 507/ 809] blk.38.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 508/ 809] blk.38.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 509/ 809] blk.39.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 510/ 809] blk.39.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 511/ 809] blk.39.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 512/ 809] blk.39.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 513/ 809] blk.39.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 514/ 809] blk.39.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 515/ 809] blk.39.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 516/ 809] blk.39.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 517/ 809] blk.39.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 518/ 809] blk.39.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 519/ 809] blk.39.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 520/ 809] blk.39.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 521/ 809] blk.39.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 522/ 809] blk.40.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 523/ 809] blk.40.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 524/ 809] blk.40.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 525/ 809] blk.40.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 526/ 809] blk.40.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 527/ 809] blk.40.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 528/ 809] blk.40.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 529/ 809] blk.40.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 530/ 809] blk.40.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 531/ 809] blk.40.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 532/ 809] blk.40.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 533/ 809] blk.40.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 534/ 809] blk.40.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 535/ 809] blk.41.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 536/ 809] blk.41.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 537/ 809] blk.41.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 538/ 809] blk.41.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 539/ 809] blk.41.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 540/ 809] blk.41.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 541/ 809] blk.41.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 542/ 809] blk.41.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 543/ 809] blk.41.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 544/ 809] blk.41.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 545/ 809] blk.41.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 546/ 809] blk.41.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 547/ 809] blk.41.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 548/ 809] blk.42.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 549/ 809] blk.42.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 550/ 809] blk.42.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 551/ 809] blk.42.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 552/ 809] blk.42.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 553/ 809] blk.42.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 554/ 809] blk.42.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 555/ 809] blk.42.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 556/ 809] blk.42.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 557/ 809] blk.42.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 558/ 809] blk.42.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 559/ 809] blk.42.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 560/ 809] blk.42.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 561/ 809] blk.43.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 562/ 809] blk.43.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 563/ 809] blk.43.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 564/ 809] blk.43.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 565/ 809] blk.43.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 566/ 809] blk.43.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 567/ 809] blk.43.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 568/ 809] blk.43.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 569/ 809] blk.43.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 570/ 809] blk.43.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 571/ 809] blk.43.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 572/ 809] blk.43.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 573/ 809] blk.43.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 574/ 809] blk.44.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 575/ 809] blk.44.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 576/ 809] blk.44.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 577/ 809] blk.44.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 578/ 809] blk.44.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 579/ 809] blk.44.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 580/ 809] blk.44.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 581/ 809] blk.44.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 582/ 809] blk.44.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 583/ 809] blk.44.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 584/ 809] blk.44.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 585/ 809] blk.44.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 586/ 809] blk.44.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 587/ 809] blk.45.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 588/ 809] blk.45.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 589/ 809] blk.45.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 590/ 809] blk.45.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 591/ 809] blk.45.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 592/ 809] blk.45.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 593/ 809] blk.45.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 594/ 809] blk.45.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 595/ 809] blk.45.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 596/ 809] blk.45.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 597/ 809] blk.45.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 598/ 809] blk.45.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 599/ 809] blk.45.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 600/ 809] blk.46.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 601/ 809] blk.46.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 602/ 809] blk.46.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 603/ 809] blk.46.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 604/ 809] blk.46.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 605/ 809] blk.46.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 606/ 809] blk.46.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 607/ 809] blk.46.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 608/ 809] blk.46.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 609/ 809] blk.46.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 610/ 809] blk.46.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 611/ 809] blk.46.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 612/ 809] blk.46.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 613/ 809] blk.47.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 614/ 809] blk.47.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 615/ 809] blk.47.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 616/ 809] blk.47.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 617/ 809] blk.47.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 618/ 809] blk.47.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 619/ 809] blk.47.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 620/ 809] blk.47.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 621/ 809] blk.47.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 622/ 809] blk.47.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 623/ 809] blk.47.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 624/ 809] blk.47.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 625/ 809] blk.47.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 626/ 809] blk.48.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 627/ 809] blk.48.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 628/ 809] blk.48.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 629/ 809] blk.48.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 630/ 809] blk.48.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 631/ 809] blk.48.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 632/ 809] blk.48.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 633/ 809] blk.48.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 634/ 809] blk.48.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 635/ 809] blk.48.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 636/ 809] blk.48.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 637/ 809] blk.48.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 638/ 809] blk.48.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 639/ 809] blk.49.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 640/ 809] blk.49.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 641/ 809] blk.49.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 642/ 809] blk.49.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 643/ 809] blk.49.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 644/ 809] blk.49.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 645/ 809] blk.49.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 646/ 809] blk.49.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 647/ 809] blk.49.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 648/ 809] blk.49.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 649/ 809] blk.49.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 650/ 809] blk.49.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 651/ 809] blk.49.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 652/ 809] blk.50.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 653/ 809] blk.50.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 654/ 809] blk.50.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 655/ 809] blk.50.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 656/ 809] blk.50.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 657/ 809] blk.50.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 658/ 809] blk.50.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 659/ 809] blk.50.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 660/ 809] blk.50.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 661/ 809] blk.50.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 662/ 809] blk.50.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 663/ 809] blk.50.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 664/ 809] blk.50.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 665/ 809] blk.51.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 666/ 809] blk.51.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 667/ 809] blk.51.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 668/ 809] blk.51.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 669/ 809] blk.51.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 670/ 809] blk.51.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 671/ 809] blk.51.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 672/ 809] blk.51.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 673/ 809] blk.51.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 674/ 809] blk.51.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 675/ 809] blk.51.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 676/ 809] blk.51.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 677/ 809] blk.51.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 678/ 809] blk.52.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 679/ 809] blk.52.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 680/ 809] blk.52.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 681/ 809] blk.52.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 682/ 809] blk.52.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 683/ 809] blk.52.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 684/ 809] blk.52.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 685/ 809] blk.52.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 686/ 809] blk.52.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 687/ 809] blk.52.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 688/ 809] blk.52.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 689/ 809] blk.52.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 690/ 809] blk.52.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 691/ 809] blk.53.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 692/ 809] blk.53.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 693/ 809] blk.53.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 694/ 809] blk.53.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 695/ 809] blk.53.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 696/ 809] blk.53.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 697/ 809] blk.53.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 698/ 809] blk.53.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 699/ 809] blk.53.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 700/ 809] blk.53.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 701/ 809] blk.53.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 702/ 809] blk.53.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 703/ 809] blk.53.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 704/ 809] blk.54.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 705/ 809] blk.54.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 706/ 809] blk.54.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 707/ 809] blk.54.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 708/ 809] blk.54.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 709/ 809] blk.54.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 710/ 809] blk.54.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 711/ 809] blk.54.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 712/ 809] blk.54.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 713/ 809] blk.54.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 714/ 809] blk.54.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 715/ 809] blk.54.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 716/ 809] blk.54.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 717/ 809] blk.55.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 718/ 809] blk.55.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 719/ 809] blk.55.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 720/ 809] blk.55.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 721/ 809] blk.55.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 722/ 809] blk.55.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 723/ 809] blk.55.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 724/ 809] blk.55.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 725/ 809] blk.55.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 726/ 809] blk.55.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 727/ 809] blk.55.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 728/ 809] blk.55.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 729/ 809] blk.55.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 730/ 809] blk.56.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 731/ 809] blk.56.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 732/ 809] blk.56.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 733/ 809] blk.56.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 734/ 809] blk.56.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 735/ 809] blk.56.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 736/ 809] blk.56.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 737/ 809] blk.56.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 738/ 809] blk.56.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 739/ 809] blk.56.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 740/ 809] blk.56.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 741/ 809] blk.56.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 742/ 809] blk.56.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 743/ 809] blk.57.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 744/ 809] blk.57.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 745/ 809] blk.57.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 746/ 809] blk.57.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 747/ 809] blk.57.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 748/ 809] blk.57.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 749/ 809] blk.57.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 750/ 809] blk.57.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 751/ 809] blk.57.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 752/ 809] blk.57.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 753/ 809] blk.57.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 754/ 809] blk.57.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 755/ 809] blk.57.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 756/ 809] blk.58.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 757/ 809] blk.58.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 758/ 809] blk.58.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 759/ 809] blk.58.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 760/ 809] blk.58.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 761/ 809] blk.58.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 762/ 809] blk.58.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 763/ 809] blk.58.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 764/ 809] blk.58.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 765/ 809] blk.58.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 766/ 809] blk.58.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 767/ 809] blk.58.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 768/ 809] blk.58.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 769/ 809] blk.59.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 770/ 809] blk.59.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 771/ 809] blk.59.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 772/ 809] blk.59.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 773/ 809] blk.59.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 774/ 809] blk.59.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 775/ 809] blk.59.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 776/ 809] blk.59.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 777/ 809] blk.59.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 778/ 809] blk.59.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 779/ 809] blk.59.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 780/ 809] blk.59.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 781/ 809] blk.59.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 782/ 809] blk.60.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 783/ 809] blk.60.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 784/ 809] blk.60.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 785/ 809] blk.60.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 786/ 809] blk.60.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 787/ 809] blk.60.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 788/ 809] blk.60.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 789/ 809] blk.60.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 790/ 809] blk.60.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 791/ 809] blk.60.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 792/ 809] blk.60.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 793/ 809] blk.60.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 794/ 809] blk.60.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 795/ 809] blk.61.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB |
|
|
[ 796/ 809] blk.61.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB |
|
|
[ 797/ 809] blk.61.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 798/ 809] blk.61.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
[ 799/ 809] blk.61.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB |
|
|
[ 800/ 809] blk.61.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 801/ 809] blk.61.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 802/ 809] blk.61.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB |
|
|
[ 803/ 809] blk.61.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB |
|
|
[ 804/ 809] blk.61.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB |
|
|
[ 805/ 809] blk.61.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 806/ 809] blk.61.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 807/ 809] blk.61.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB |
|
|
[ 808/ 809] output.weight - [ 3072, 200064, 1, 1], type = bf16, converting to q8_0 .. size = 1172.25 MiB -> 622.76 MiB |
|
|
[ 809/ 809] output_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB |
|
|
llama_model_quantize_internal: model size = 436285.72 MB |
|
|
llama_model_quantize_internal: quant size = 231865.49 MB |
|
|
|
|
|
main: quantize time = 300315.66 ms |
|
|
main: total time = 300315.66 ms |
|
|
|