MiniMax-M2.5-GGUF / logs /quantize-MiniMax-M2.5-Q8_0.log

uploading imatrix

0470d79 4 days ago

115 kB

	main: build = 4190 (494d7062)
	main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu
	main: quantizing '/mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-256x4.9B-BF16-00001-of-00010.gguf' to '/mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-Q8_0.gguf' as Q8_0 using 128 threads
	llama_model_loader: additional 9 GGUFs metadata loaded.
	llama_model_loader: loaded meta data with 40 key-value pairs and 809 tensors from /mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-256x4.9B-BF16-00001-of-00010.gguf (version GGUF V3 (latest))
	llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
	llama_model_loader: - kv 0: general.architecture str = minimax-m2
	llama_model_loader: - kv 1: general.type str = model
	llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
	llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
	llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
	llama_model_loader: - kv 5: general.name str = MiniMax M2.5
	llama_model_loader: - kv 6: general.size_label str = 256x4.9B
	llama_model_loader: - kv 7: general.license str = other
	llama_model_loader: - kv 8: general.license.name str = modified-mit
	llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
	llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
	llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
	llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
	llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
	llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
	llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
	llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
	llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
	llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
	llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
	llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
	llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
	llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
	llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
	llama_model_loader: - kv 24: general.file_type u32 = 32
	llama_model_loader: - kv 25: minimax-m2.expert_feed_forward_length u32 = 1536
	llama_model_loader: - kv 26: minimax-m2.rope.dimension_count u32 = 64
	llama_model_loader: - kv 27: general.quantization_version u32 = 2
	llama_model_loader: - kv 28: tokenizer.ggml.model str = gpt2
	llama_model_loader: - kv 29: tokenizer.ggml.pre str = minimax-m2
	llama_model_loader: - kv 30: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
	llama_model_loader: - kv 31: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
	llama_model_loader: - kv 32: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
	llama_model_loader: - kv 33: tokenizer.ggml.bos_token_id u32 = 200034
	llama_model_loader: - kv 34: tokenizer.ggml.eos_token_id u32 = 200020
	llama_model_loader: - kv 35: tokenizer.ggml.unknown_token_id u32 = 200021
	llama_model_loader: - kv 36: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
	llama_model_loader: - kv 37: split.no u16 = 0
	llama_model_loader: - kv 38: split.count u16 = 10
	llama_model_loader: - kv 39: split.tensors.count i32 = 809
	llama_model_loader: - type f32: 373 tensors
	llama_model_loader: - type bf16: 436 tensors
	[ 1/ 809] token_embd.weight - [ 3072, 200064, 1, 1], type = bf16, converting to q8_0 .. size = 1172.25 MiB -> 622.76 MiB
	[ 2/ 809] blk.0.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 3/ 809] blk.0.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 4/ 809] blk.0.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 5/ 809] blk.0.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 6/ 809] blk.0.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 7/ 809] blk.0.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 8/ 809] blk.0.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 9/ 809] blk.0.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 10/ 809] blk.0.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 11/ 809] blk.0.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 12/ 809] blk.0.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 13/ 809] blk.0.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 14/ 809] blk.0.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 15/ 809] blk.1.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 16/ 809] blk.1.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 17/ 809] blk.1.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 18/ 809] blk.1.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 19/ 809] blk.1.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 20/ 809] blk.1.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 21/ 809] blk.1.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 22/ 809] blk.1.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 23/ 809] blk.1.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 24/ 809] blk.1.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 25/ 809] blk.1.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 26/ 809] blk.1.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 27/ 809] blk.1.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 28/ 809] blk.2.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 29/ 809] blk.2.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 30/ 809] blk.2.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 31/ 809] blk.2.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 32/ 809] blk.2.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 33/ 809] blk.2.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 34/ 809] blk.2.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 35/ 809] blk.2.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 36/ 809] blk.2.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 37/ 809] blk.2.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 38/ 809] blk.2.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 39/ 809] blk.2.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 40/ 809] blk.2.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 41/ 809] blk.3.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 42/ 809] blk.3.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 43/ 809] blk.3.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 44/ 809] blk.3.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 45/ 809] blk.3.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 46/ 809] blk.3.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 47/ 809] blk.3.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 48/ 809] blk.3.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 49/ 809] blk.3.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 50/ 809] blk.3.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 51/ 809] blk.3.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 52/ 809] blk.3.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 53/ 809] blk.3.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 54/ 809] blk.4.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 55/ 809] blk.4.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 56/ 809] blk.4.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 57/ 809] blk.4.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 58/ 809] blk.4.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 59/ 809] blk.4.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 60/ 809] blk.4.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 61/ 809] blk.4.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 62/ 809] blk.4.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 63/ 809] blk.4.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 64/ 809] blk.4.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 65/ 809] blk.4.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 66/ 809] blk.4.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 67/ 809] blk.5.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 68/ 809] blk.5.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 69/ 809] blk.5.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 70/ 809] blk.5.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 71/ 809] blk.5.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 72/ 809] blk.5.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 73/ 809] blk.5.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 74/ 809] blk.5.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 75/ 809] blk.5.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 76/ 809] blk.5.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 77/ 809] blk.5.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 78/ 809] blk.5.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 79/ 809] blk.5.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 80/ 809] blk.6.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 81/ 809] blk.6.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 82/ 809] blk.6.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 83/ 809] blk.6.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 84/ 809] blk.6.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 85/ 809] blk.6.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 86/ 809] blk.6.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 87/ 809] blk.6.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 88/ 809] blk.6.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 89/ 809] blk.6.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 90/ 809] blk.6.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 91/ 809] blk.6.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 92/ 809] blk.6.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 93/ 809] blk.7.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 94/ 809] blk.7.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 95/ 809] blk.7.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 96/ 809] blk.7.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 97/ 809] blk.7.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 98/ 809] blk.7.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 99/ 809] blk.7.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 100/ 809] blk.7.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 101/ 809] blk.7.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 102/ 809] blk.7.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 103/ 809] blk.7.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 104/ 809] blk.7.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 105/ 809] blk.7.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 106/ 809] blk.8.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 107/ 809] blk.8.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 108/ 809] blk.8.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 109/ 809] blk.8.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 110/ 809] blk.8.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 111/ 809] blk.8.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 112/ 809] blk.8.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 113/ 809] blk.8.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 114/ 809] blk.8.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 115/ 809] blk.8.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 116/ 809] blk.8.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 117/ 809] blk.8.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 118/ 809] blk.8.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 119/ 809] blk.9.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 120/ 809] blk.9.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 121/ 809] blk.9.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 122/ 809] blk.9.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 123/ 809] blk.9.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 124/ 809] blk.9.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 125/ 809] blk.9.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 126/ 809] blk.9.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 127/ 809] blk.9.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 128/ 809] blk.9.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 129/ 809] blk.9.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 130/ 809] blk.9.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 131/ 809] blk.9.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 132/ 809] blk.10.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 133/ 809] blk.10.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 134/ 809] blk.10.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 135/ 809] blk.10.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 136/ 809] blk.10.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 137/ 809] blk.10.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 138/ 809] blk.10.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 139/ 809] blk.10.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 140/ 809] blk.10.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 141/ 809] blk.10.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 142/ 809] blk.10.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 143/ 809] blk.10.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 144/ 809] blk.10.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 145/ 809] blk.11.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 146/ 809] blk.11.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 147/ 809] blk.11.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 148/ 809] blk.11.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 149/ 809] blk.11.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 150/ 809] blk.11.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 151/ 809] blk.11.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 152/ 809] blk.11.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 153/ 809] blk.11.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 154/ 809] blk.11.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 155/ 809] blk.11.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 156/ 809] blk.11.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 157/ 809] blk.11.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 158/ 809] blk.12.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 159/ 809] blk.12.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 160/ 809] blk.12.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 161/ 809] blk.12.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 162/ 809] blk.12.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 163/ 809] blk.12.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 164/ 809] blk.12.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 165/ 809] blk.12.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 166/ 809] blk.12.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 167/ 809] blk.12.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 168/ 809] blk.12.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 169/ 809] blk.12.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 170/ 809] blk.12.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 171/ 809] blk.13.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 172/ 809] blk.13.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 173/ 809] blk.13.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 174/ 809] blk.13.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 175/ 809] blk.13.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 176/ 809] blk.13.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 177/ 809] blk.13.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 178/ 809] blk.13.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 179/ 809] blk.13.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 180/ 809] blk.13.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 181/ 809] blk.13.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 182/ 809] blk.13.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 183/ 809] blk.13.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 184/ 809] blk.14.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 185/ 809] blk.14.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 186/ 809] blk.14.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 187/ 809] blk.14.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 188/ 809] blk.14.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 189/ 809] blk.14.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 190/ 809] blk.14.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 191/ 809] blk.14.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 192/ 809] blk.14.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 193/ 809] blk.14.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 194/ 809] blk.14.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 195/ 809] blk.14.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 196/ 809] blk.14.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 197/ 809] blk.15.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 198/ 809] blk.15.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 199/ 809] blk.15.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 200/ 809] blk.15.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 201/ 809] blk.15.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 202/ 809] blk.15.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 203/ 809] blk.15.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 204/ 809] blk.15.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 205/ 809] blk.15.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 206/ 809] blk.15.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 207/ 809] blk.15.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 208/ 809] blk.15.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 209/ 809] blk.15.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 210/ 809] blk.16.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 211/ 809] blk.16.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 212/ 809] blk.16.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 213/ 809] blk.16.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 214/ 809] blk.16.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 215/ 809] blk.16.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 216/ 809] blk.16.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 217/ 809] blk.16.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 218/ 809] blk.16.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 219/ 809] blk.16.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 220/ 809] blk.16.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 221/ 809] blk.16.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 222/ 809] blk.16.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 223/ 809] blk.17.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 224/ 809] blk.17.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 225/ 809] blk.17.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 226/ 809] blk.17.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 227/ 809] blk.17.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 228/ 809] blk.17.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 229/ 809] blk.17.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 230/ 809] blk.17.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 231/ 809] blk.17.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 232/ 809] blk.17.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 233/ 809] blk.17.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 234/ 809] blk.17.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 235/ 809] blk.17.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 236/ 809] blk.18.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 237/ 809] blk.18.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 238/ 809] blk.18.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 239/ 809] blk.18.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 240/ 809] blk.18.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 241/ 809] blk.18.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 242/ 809] blk.18.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 243/ 809] blk.18.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 244/ 809] blk.18.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 245/ 809] blk.18.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 246/ 809] blk.18.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 247/ 809] blk.18.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 248/ 809] blk.18.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 249/ 809] blk.19.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 250/ 809] blk.19.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 251/ 809] blk.19.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 252/ 809] blk.19.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 253/ 809] blk.19.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 254/ 809] blk.19.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 255/ 809] blk.19.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 256/ 809] blk.19.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 257/ 809] blk.19.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 258/ 809] blk.19.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 259/ 809] blk.19.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 260/ 809] blk.19.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 261/ 809] blk.19.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 262/ 809] blk.20.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 263/ 809] blk.20.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 264/ 809] blk.20.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 265/ 809] blk.20.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 266/ 809] blk.20.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 267/ 809] blk.20.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 268/ 809] blk.20.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 269/ 809] blk.20.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 270/ 809] blk.20.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 271/ 809] blk.20.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 272/ 809] blk.20.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 273/ 809] blk.20.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 274/ 809] blk.20.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 275/ 809] blk.21.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 276/ 809] blk.21.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 277/ 809] blk.21.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 278/ 809] blk.21.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 279/ 809] blk.21.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 280/ 809] blk.21.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 281/ 809] blk.21.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 282/ 809] blk.21.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 283/ 809] blk.21.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 284/ 809] blk.21.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 285/ 809] blk.21.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 286/ 809] blk.21.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 287/ 809] blk.21.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 288/ 809] blk.22.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 289/ 809] blk.22.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 290/ 809] blk.22.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 291/ 809] blk.22.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 292/ 809] blk.22.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 293/ 809] blk.22.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 294/ 809] blk.22.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 295/ 809] blk.22.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 296/ 809] blk.22.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 297/ 809] blk.22.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 298/ 809] blk.22.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 299/ 809] blk.22.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 300/ 809] blk.22.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 301/ 809] blk.23.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 302/ 809] blk.23.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 303/ 809] blk.23.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 304/ 809] blk.23.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 305/ 809] blk.23.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 306/ 809] blk.23.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 307/ 809] blk.23.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 308/ 809] blk.23.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 309/ 809] blk.23.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 310/ 809] blk.23.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 311/ 809] blk.23.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 312/ 809] blk.23.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 313/ 809] blk.23.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 314/ 809] blk.24.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 315/ 809] blk.24.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 316/ 809] blk.24.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 317/ 809] blk.24.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 318/ 809] blk.24.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 319/ 809] blk.24.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 320/ 809] blk.24.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 321/ 809] blk.24.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 322/ 809] blk.24.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 323/ 809] blk.24.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 324/ 809] blk.24.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 325/ 809] blk.24.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 326/ 809] blk.24.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 327/ 809] blk.25.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 328/ 809] blk.25.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 329/ 809] blk.25.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 330/ 809] blk.25.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 331/ 809] blk.25.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 332/ 809] blk.25.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 333/ 809] blk.25.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 334/ 809] blk.25.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 335/ 809] blk.25.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 336/ 809] blk.25.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 337/ 809] blk.25.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 338/ 809] blk.25.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 339/ 809] blk.25.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 340/ 809] blk.26.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 341/ 809] blk.26.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 342/ 809] blk.26.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 343/ 809] blk.26.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 344/ 809] blk.26.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 345/ 809] blk.26.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 346/ 809] blk.26.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 347/ 809] blk.26.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 348/ 809] blk.26.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 349/ 809] blk.26.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 350/ 809] blk.26.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 351/ 809] blk.26.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 352/ 809] blk.26.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 353/ 809] blk.27.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 354/ 809] blk.27.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 355/ 809] blk.27.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 356/ 809] blk.27.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 357/ 809] blk.27.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 358/ 809] blk.27.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 359/ 809] blk.27.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 360/ 809] blk.27.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 361/ 809] blk.27.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 362/ 809] blk.27.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 363/ 809] blk.27.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 364/ 809] blk.27.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 365/ 809] blk.27.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 366/ 809] blk.28.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 367/ 809] blk.28.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 368/ 809] blk.28.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 369/ 809] blk.28.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 370/ 809] blk.28.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 371/ 809] blk.28.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 372/ 809] blk.28.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 373/ 809] blk.28.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 374/ 809] blk.28.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 375/ 809] blk.28.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 376/ 809] blk.28.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 377/ 809] blk.28.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 378/ 809] blk.28.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 379/ 809] blk.29.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 380/ 809] blk.29.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 381/ 809] blk.29.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 382/ 809] blk.29.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 383/ 809] blk.29.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 384/ 809] blk.29.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 385/ 809] blk.29.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 386/ 809] blk.29.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 387/ 809] blk.29.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 388/ 809] blk.29.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 389/ 809] blk.29.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 390/ 809] blk.29.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 391/ 809] blk.29.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 392/ 809] blk.30.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 393/ 809] blk.30.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 394/ 809] blk.30.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 395/ 809] blk.30.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 396/ 809] blk.30.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 397/ 809] blk.30.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 398/ 809] blk.30.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 399/ 809] blk.30.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 400/ 809] blk.30.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 401/ 809] blk.30.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 402/ 809] blk.30.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 403/ 809] blk.30.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 404/ 809] blk.30.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 405/ 809] blk.31.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 406/ 809] blk.31.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 407/ 809] blk.31.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 408/ 809] blk.31.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 409/ 809] blk.31.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 410/ 809] blk.31.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 411/ 809] blk.31.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 412/ 809] blk.31.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 413/ 809] blk.31.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 414/ 809] blk.31.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 415/ 809] blk.31.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 416/ 809] blk.31.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 417/ 809] blk.31.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 418/ 809] blk.32.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 419/ 809] blk.32.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 420/ 809] blk.32.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 421/ 809] blk.32.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 422/ 809] blk.32.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 423/ 809] blk.32.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 424/ 809] blk.32.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 425/ 809] blk.32.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 426/ 809] blk.32.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 427/ 809] blk.32.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 428/ 809] blk.32.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 429/ 809] blk.32.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 430/ 809] blk.32.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 431/ 809] blk.33.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 432/ 809] blk.33.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 433/ 809] blk.33.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 434/ 809] blk.33.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 435/ 809] blk.33.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 436/ 809] blk.33.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 437/ 809] blk.33.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 438/ 809] blk.33.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 439/ 809] blk.33.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 440/ 809] blk.33.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 441/ 809] blk.33.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 442/ 809] blk.33.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 443/ 809] blk.33.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 444/ 809] blk.34.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 445/ 809] blk.34.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 446/ 809] blk.34.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 447/ 809] blk.34.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 448/ 809] blk.34.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 449/ 809] blk.34.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 450/ 809] blk.34.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 451/ 809] blk.34.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 452/ 809] blk.34.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 453/ 809] blk.34.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 454/ 809] blk.34.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 455/ 809] blk.34.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 456/ 809] blk.34.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 457/ 809] blk.35.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 458/ 809] blk.35.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 459/ 809] blk.35.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 460/ 809] blk.35.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 461/ 809] blk.35.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 462/ 809] blk.35.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 463/ 809] blk.35.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 464/ 809] blk.35.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 465/ 809] blk.35.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 466/ 809] blk.35.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 467/ 809] blk.35.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 468/ 809] blk.35.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 469/ 809] blk.35.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 470/ 809] blk.36.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 471/ 809] blk.36.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 472/ 809] blk.36.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 473/ 809] blk.36.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 474/ 809] blk.36.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 475/ 809] blk.36.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 476/ 809] blk.36.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 477/ 809] blk.36.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 478/ 809] blk.36.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 479/ 809] blk.36.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 480/ 809] blk.36.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 481/ 809] blk.36.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 482/ 809] blk.36.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 483/ 809] blk.37.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 484/ 809] blk.37.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 485/ 809] blk.37.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 486/ 809] blk.37.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 487/ 809] blk.37.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 488/ 809] blk.37.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 489/ 809] blk.37.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 490/ 809] blk.37.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 491/ 809] blk.37.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 492/ 809] blk.37.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 493/ 809] blk.37.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 494/ 809] blk.37.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 495/ 809] blk.37.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 496/ 809] blk.38.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 497/ 809] blk.38.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 498/ 809] blk.38.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 499/ 809] blk.38.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 500/ 809] blk.38.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 501/ 809] blk.38.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 502/ 809] blk.38.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 503/ 809] blk.38.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 504/ 809] blk.38.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 505/ 809] blk.38.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 506/ 809] blk.38.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 507/ 809] blk.38.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 508/ 809] blk.38.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 509/ 809] blk.39.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 510/ 809] blk.39.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 511/ 809] blk.39.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 512/ 809] blk.39.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 513/ 809] blk.39.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 514/ 809] blk.39.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 515/ 809] blk.39.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 516/ 809] blk.39.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 517/ 809] blk.39.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 518/ 809] blk.39.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 519/ 809] blk.39.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 520/ 809] blk.39.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 521/ 809] blk.39.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 522/ 809] blk.40.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 523/ 809] blk.40.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 524/ 809] blk.40.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 525/ 809] blk.40.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 526/ 809] blk.40.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 527/ 809] blk.40.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 528/ 809] blk.40.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 529/ 809] blk.40.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 530/ 809] blk.40.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 531/ 809] blk.40.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 532/ 809] blk.40.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 533/ 809] blk.40.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 534/ 809] blk.40.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 535/ 809] blk.41.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 536/ 809] blk.41.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 537/ 809] blk.41.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 538/ 809] blk.41.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 539/ 809] blk.41.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 540/ 809] blk.41.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 541/ 809] blk.41.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 542/ 809] blk.41.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 543/ 809] blk.41.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 544/ 809] blk.41.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 545/ 809] blk.41.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 546/ 809] blk.41.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 547/ 809] blk.41.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 548/ 809] blk.42.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 549/ 809] blk.42.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 550/ 809] blk.42.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 551/ 809] blk.42.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 552/ 809] blk.42.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 553/ 809] blk.42.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 554/ 809] blk.42.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 555/ 809] blk.42.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 556/ 809] blk.42.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 557/ 809] blk.42.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 558/ 809] blk.42.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 559/ 809] blk.42.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 560/ 809] blk.42.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 561/ 809] blk.43.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 562/ 809] blk.43.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 563/ 809] blk.43.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 564/ 809] blk.43.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 565/ 809] blk.43.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 566/ 809] blk.43.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 567/ 809] blk.43.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 568/ 809] blk.43.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 569/ 809] blk.43.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 570/ 809] blk.43.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 571/ 809] blk.43.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 572/ 809] blk.43.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 573/ 809] blk.43.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 574/ 809] blk.44.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 575/ 809] blk.44.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 576/ 809] blk.44.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 577/ 809] blk.44.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 578/ 809] blk.44.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 579/ 809] blk.44.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 580/ 809] blk.44.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 581/ 809] blk.44.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 582/ 809] blk.44.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 583/ 809] blk.44.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 584/ 809] blk.44.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 585/ 809] blk.44.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 586/ 809] blk.44.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 587/ 809] blk.45.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 588/ 809] blk.45.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 589/ 809] blk.45.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 590/ 809] blk.45.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 591/ 809] blk.45.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 592/ 809] blk.45.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 593/ 809] blk.45.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 594/ 809] blk.45.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 595/ 809] blk.45.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 596/ 809] blk.45.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 597/ 809] blk.45.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 598/ 809] blk.45.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 599/ 809] blk.45.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 600/ 809] blk.46.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 601/ 809] blk.46.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 602/ 809] blk.46.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 603/ 809] blk.46.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 604/ 809] blk.46.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 605/ 809] blk.46.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 606/ 809] blk.46.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 607/ 809] blk.46.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 608/ 809] blk.46.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 609/ 809] blk.46.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 610/ 809] blk.46.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 611/ 809] blk.46.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 612/ 809] blk.46.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 613/ 809] blk.47.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 614/ 809] blk.47.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 615/ 809] blk.47.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 616/ 809] blk.47.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 617/ 809] blk.47.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 618/ 809] blk.47.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 619/ 809] blk.47.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 620/ 809] blk.47.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 621/ 809] blk.47.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 622/ 809] blk.47.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 623/ 809] blk.47.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 624/ 809] blk.47.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 625/ 809] blk.47.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 626/ 809] blk.48.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 627/ 809] blk.48.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 628/ 809] blk.48.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 629/ 809] blk.48.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 630/ 809] blk.48.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 631/ 809] blk.48.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 632/ 809] blk.48.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 633/ 809] blk.48.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 634/ 809] blk.48.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 635/ 809] blk.48.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 636/ 809] blk.48.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 637/ 809] blk.48.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 638/ 809] blk.48.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 639/ 809] blk.49.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 640/ 809] blk.49.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 641/ 809] blk.49.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 642/ 809] blk.49.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 643/ 809] blk.49.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 644/ 809] blk.49.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 645/ 809] blk.49.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 646/ 809] blk.49.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 647/ 809] blk.49.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 648/ 809] blk.49.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 649/ 809] blk.49.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 650/ 809] blk.49.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 651/ 809] blk.49.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 652/ 809] blk.50.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 653/ 809] blk.50.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 654/ 809] blk.50.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 655/ 809] blk.50.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 656/ 809] blk.50.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 657/ 809] blk.50.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 658/ 809] blk.50.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 659/ 809] blk.50.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 660/ 809] blk.50.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 661/ 809] blk.50.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 662/ 809] blk.50.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 663/ 809] blk.50.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 664/ 809] blk.50.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 665/ 809] blk.51.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 666/ 809] blk.51.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 667/ 809] blk.51.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 668/ 809] blk.51.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 669/ 809] blk.51.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 670/ 809] blk.51.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 671/ 809] blk.51.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 672/ 809] blk.51.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 673/ 809] blk.51.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 674/ 809] blk.51.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 675/ 809] blk.51.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 676/ 809] blk.51.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 677/ 809] blk.51.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 678/ 809] blk.52.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 679/ 809] blk.52.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 680/ 809] blk.52.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 681/ 809] blk.52.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 682/ 809] blk.52.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 683/ 809] blk.52.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 684/ 809] blk.52.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 685/ 809] blk.52.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 686/ 809] blk.52.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 687/ 809] blk.52.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 688/ 809] blk.52.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 689/ 809] blk.52.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 690/ 809] blk.52.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 691/ 809] blk.53.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 692/ 809] blk.53.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 693/ 809] blk.53.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 694/ 809] blk.53.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 695/ 809] blk.53.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 696/ 809] blk.53.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 697/ 809] blk.53.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 698/ 809] blk.53.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 699/ 809] blk.53.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 700/ 809] blk.53.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 701/ 809] blk.53.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 702/ 809] blk.53.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 703/ 809] blk.53.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 704/ 809] blk.54.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 705/ 809] blk.54.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 706/ 809] blk.54.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 707/ 809] blk.54.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 708/ 809] blk.54.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 709/ 809] blk.54.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 710/ 809] blk.54.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 711/ 809] blk.54.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 712/ 809] blk.54.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 713/ 809] blk.54.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 714/ 809] blk.54.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 715/ 809] blk.54.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 716/ 809] blk.54.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 717/ 809] blk.55.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 718/ 809] blk.55.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 719/ 809] blk.55.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 720/ 809] blk.55.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 721/ 809] blk.55.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 722/ 809] blk.55.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 723/ 809] blk.55.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 724/ 809] blk.55.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 725/ 809] blk.55.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 726/ 809] blk.55.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 727/ 809] blk.55.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 728/ 809] blk.55.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 729/ 809] blk.55.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 730/ 809] blk.56.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 731/ 809] blk.56.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 732/ 809] blk.56.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 733/ 809] blk.56.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 734/ 809] blk.56.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 735/ 809] blk.56.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 736/ 809] blk.56.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 737/ 809] blk.56.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 738/ 809] blk.56.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 739/ 809] blk.56.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 740/ 809] blk.56.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 741/ 809] blk.56.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 742/ 809] blk.56.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 743/ 809] blk.57.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 744/ 809] blk.57.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 745/ 809] blk.57.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 746/ 809] blk.57.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 747/ 809] blk.57.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 748/ 809] blk.57.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 749/ 809] blk.57.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 750/ 809] blk.57.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 751/ 809] blk.57.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 752/ 809] blk.57.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 753/ 809] blk.57.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 754/ 809] blk.57.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 755/ 809] blk.57.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 756/ 809] blk.58.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 757/ 809] blk.58.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 758/ 809] blk.58.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 759/ 809] blk.58.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 760/ 809] blk.58.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 761/ 809] blk.58.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 762/ 809] blk.58.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 763/ 809] blk.58.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 764/ 809] blk.58.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 765/ 809] blk.58.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 766/ 809] blk.58.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 767/ 809] blk.58.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 768/ 809] blk.58.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 769/ 809] blk.59.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 770/ 809] blk.59.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 771/ 809] blk.59.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 772/ 809] blk.59.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 773/ 809] blk.59.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 774/ 809] blk.59.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 775/ 809] blk.59.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 776/ 809] blk.59.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 777/ 809] blk.59.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 778/ 809] blk.59.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 779/ 809] blk.59.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 780/ 809] blk.59.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 781/ 809] blk.59.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 782/ 809] blk.60.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 783/ 809] blk.60.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 784/ 809] blk.60.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 785/ 809] blk.60.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 786/ 809] blk.60.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 787/ 809] blk.60.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 788/ 809] blk.60.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 789/ 809] blk.60.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 790/ 809] blk.60.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 791/ 809] blk.60.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 792/ 809] blk.60.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 793/ 809] blk.60.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 794/ 809] blk.60.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 795/ 809] blk.61.exp_probs_b.bias - [ 256, 1, 1, 1], type = f32, size = 0.001 MB
	[ 796/ 809] blk.61.ffn_gate_inp.weight - [ 3072, 256, 1, 1], type = f32, size = 3.000 MB
	[ 797/ 809] blk.61.attn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 798/ 809] blk.61.ffn_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	[ 799/ 809] blk.61.attn_k_norm.weight - [ 1024, 1, 1, 1], type = f32, size = 0.004 MB
	[ 800/ 809] blk.61.attn_k.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 801/ 809] blk.61.attn_output.weight - [ 6144, 3072, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 802/ 809] blk.61.attn_q_norm.weight - [ 6144, 1, 1, 1], type = f32, size = 0.023 MB
	[ 803/ 809] blk.61.attn_q.weight - [ 3072, 6144, 1, 1], type = bf16, converting to q8_0 .. size = 36.00 MiB -> 19.12 MiB
	[ 804/ 809] blk.61.attn_v.weight - [ 3072, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 6.00 MiB -> 3.19 MiB
	[ 805/ 809] blk.61.ffn_gate_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 806/ 809] blk.61.ffn_down_exps.weight - [ 1536, 3072, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 807/ 809] blk.61.ffn_up_exps.weight - [ 3072, 1536, 256, 1], type = bf16, converting to q8_0 .. size = 2304.00 MiB -> 1224.00 MiB
	[ 808/ 809] output.weight - [ 3072, 200064, 1, 1], type = bf16, converting to q8_0 .. size = 1172.25 MiB -> 622.76 MiB
	[ 809/ 809] output_norm.weight - [ 3072, 1, 1, 1], type = f32, size = 0.012 MB
	llama_model_quantize_internal: model size = 436285.72 MB
	llama_model_quantize_internal: quant size = 231865.49 MB

	main: quantize time = 300315.66 ms
	main: total time = 300315.66 ms