{ "base": { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-2.0bpw-h6-exl3", "bpw": 2.018558002020811 }, "alts": [ { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-3.0bpw-h6-exl3", "bpw": 3.018343639748571 } ], "groups": [ { "idx": 0, "layers": [ "model.layers.0.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003256261348724143, "dbits": 18874368 } ] }, { "idx": 1, "layers": [ "model.layers.0.self_attn.k_proj", "model.layers.0.self_attn.v_proj" ], "candidates": [ { "dkld": -0.013142330944538094, "dbits": 6291456 } ] }, { "idx": 2, "layers": [ "model.layers.0.self_attn.o_proj" ], "candidates": [ { "dkld": 0.06301209330558777, "dbits": 18874368 } ] }, { "idx": 3, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w1", "model.layers.0.block_sparse_moe.experts.1.w1", "model.layers.0.block_sparse_moe.experts.2.w1", "model.layers.0.block_sparse_moe.experts.3.w1", "model.layers.0.block_sparse_moe.experts.4.w1", "model.layers.0.block_sparse_moe.experts.5.w1", "model.layers.0.block_sparse_moe.experts.6.w1", "model.layers.0.block_sparse_moe.experts.7.w1", "model.layers.0.block_sparse_moe.experts.8.w1", "model.layers.0.block_sparse_moe.experts.9.w1", "model.layers.0.block_sparse_moe.experts.10.w1", "model.layers.0.block_sparse_moe.experts.11.w1", "model.layers.0.block_sparse_moe.experts.12.w1", "model.layers.0.block_sparse_moe.experts.13.w1", "model.layers.0.block_sparse_moe.experts.14.w1", "model.layers.0.block_sparse_moe.experts.15.w1", "model.layers.0.block_sparse_moe.experts.16.w1", "model.layers.0.block_sparse_moe.experts.17.w1", "model.layers.0.block_sparse_moe.experts.18.w1", "model.layers.0.block_sparse_moe.experts.19.w1", "model.layers.0.block_sparse_moe.experts.20.w1", "model.layers.0.block_sparse_moe.experts.21.w1", "model.layers.0.block_sparse_moe.experts.22.w1", "model.layers.0.block_sparse_moe.experts.23.w1", "model.layers.0.block_sparse_moe.experts.24.w1", "model.layers.0.block_sparse_moe.experts.25.w1", "model.layers.0.block_sparse_moe.experts.26.w1", "model.layers.0.block_sparse_moe.experts.27.w1", "model.layers.0.block_sparse_moe.experts.28.w1", "model.layers.0.block_sparse_moe.experts.29.w1", "model.layers.0.block_sparse_moe.experts.30.w1", "model.layers.0.block_sparse_moe.experts.31.w1", "model.layers.0.block_sparse_moe.experts.32.w1", "model.layers.0.block_sparse_moe.experts.33.w1", "model.layers.0.block_sparse_moe.experts.34.w1", "model.layers.0.block_sparse_moe.experts.35.w1", "model.layers.0.block_sparse_moe.experts.36.w1", "model.layers.0.block_sparse_moe.experts.37.w1", "model.layers.0.block_sparse_moe.experts.38.w1", "model.layers.0.block_sparse_moe.experts.39.w1", "model.layers.0.block_sparse_moe.experts.40.w1", "model.layers.0.block_sparse_moe.experts.41.w1", "model.layers.0.block_sparse_moe.experts.42.w1", "model.layers.0.block_sparse_moe.experts.43.w1", "model.layers.0.block_sparse_moe.experts.44.w1", "model.layers.0.block_sparse_moe.experts.45.w1", "model.layers.0.block_sparse_moe.experts.46.w1", "model.layers.0.block_sparse_moe.experts.47.w1", "model.layers.0.block_sparse_moe.experts.48.w1", "model.layers.0.block_sparse_moe.experts.49.w1", "model.layers.0.block_sparse_moe.experts.50.w1", "model.layers.0.block_sparse_moe.experts.51.w1", "model.layers.0.block_sparse_moe.experts.52.w1", "model.layers.0.block_sparse_moe.experts.53.w1", "model.layers.0.block_sparse_moe.experts.54.w1", "model.layers.0.block_sparse_moe.experts.55.w1", "model.layers.0.block_sparse_moe.experts.56.w1", "model.layers.0.block_sparse_moe.experts.57.w1", "model.layers.0.block_sparse_moe.experts.58.w1", "model.layers.0.block_sparse_moe.experts.59.w1", "model.layers.0.block_sparse_moe.experts.60.w1", "model.layers.0.block_sparse_moe.experts.61.w1", "model.layers.0.block_sparse_moe.experts.62.w1", "model.layers.0.block_sparse_moe.experts.63.w1", "model.layers.0.block_sparse_moe.experts.64.w1", "model.layers.0.block_sparse_moe.experts.65.w1", "model.layers.0.block_sparse_moe.experts.66.w1", "model.layers.0.block_sparse_moe.experts.67.w1", "model.layers.0.block_sparse_moe.experts.68.w1", "model.layers.0.block_sparse_moe.experts.69.w1", "model.layers.0.block_sparse_moe.experts.70.w1", "model.layers.0.block_sparse_moe.experts.71.w1", "model.layers.0.block_sparse_moe.experts.72.w1", "model.layers.0.block_sparse_moe.experts.73.w1", "model.layers.0.block_sparse_moe.experts.74.w1", "model.layers.0.block_sparse_moe.experts.75.w1", "model.layers.0.block_sparse_moe.experts.76.w1", "model.layers.0.block_sparse_moe.experts.77.w1", "model.layers.0.block_sparse_moe.experts.78.w1", "model.layers.0.block_sparse_moe.experts.79.w1", "model.layers.0.block_sparse_moe.experts.80.w1", "model.layers.0.block_sparse_moe.experts.81.w1", "model.layers.0.block_sparse_moe.experts.82.w1", "model.layers.0.block_sparse_moe.experts.83.w1", "model.layers.0.block_sparse_moe.experts.84.w1", "model.layers.0.block_sparse_moe.experts.85.w1", "model.layers.0.block_sparse_moe.experts.86.w1", "model.layers.0.block_sparse_moe.experts.87.w1", "model.layers.0.block_sparse_moe.experts.88.w1", "model.layers.0.block_sparse_moe.experts.89.w1", "model.layers.0.block_sparse_moe.experts.90.w1", "model.layers.0.block_sparse_moe.experts.91.w1", "model.layers.0.block_sparse_moe.experts.92.w1", "model.layers.0.block_sparse_moe.experts.93.w1", "model.layers.0.block_sparse_moe.experts.94.w1", "model.layers.0.block_sparse_moe.experts.95.w1", "model.layers.0.block_sparse_moe.experts.96.w1", "model.layers.0.block_sparse_moe.experts.97.w1", "model.layers.0.block_sparse_moe.experts.98.w1", "model.layers.0.block_sparse_moe.experts.99.w1", "model.layers.0.block_sparse_moe.experts.100.w1", "model.layers.0.block_sparse_moe.experts.101.w1", "model.layers.0.block_sparse_moe.experts.102.w1", "model.layers.0.block_sparse_moe.experts.103.w1", "model.layers.0.block_sparse_moe.experts.104.w1", "model.layers.0.block_sparse_moe.experts.105.w1", "model.layers.0.block_sparse_moe.experts.106.w1", "model.layers.0.block_sparse_moe.experts.107.w1", "model.layers.0.block_sparse_moe.experts.108.w1", "model.layers.0.block_sparse_moe.experts.109.w1", "model.layers.0.block_sparse_moe.experts.110.w1", "model.layers.0.block_sparse_moe.experts.111.w1", "model.layers.0.block_sparse_moe.experts.112.w1", "model.layers.0.block_sparse_moe.experts.113.w1", "model.layers.0.block_sparse_moe.experts.114.w1", "model.layers.0.block_sparse_moe.experts.115.w1", "model.layers.0.block_sparse_moe.experts.116.w1", "model.layers.0.block_sparse_moe.experts.117.w1", "model.layers.0.block_sparse_moe.experts.118.w1", "model.layers.0.block_sparse_moe.experts.119.w1", "model.layers.0.block_sparse_moe.experts.120.w1", "model.layers.0.block_sparse_moe.experts.121.w1", "model.layers.0.block_sparse_moe.experts.122.w1", "model.layers.0.block_sparse_moe.experts.123.w1", "model.layers.0.block_sparse_moe.experts.124.w1", "model.layers.0.block_sparse_moe.experts.125.w1", "model.layers.0.block_sparse_moe.experts.126.w1", "model.layers.0.block_sparse_moe.experts.127.w1", "model.layers.0.block_sparse_moe.experts.128.w1", "model.layers.0.block_sparse_moe.experts.129.w1", "model.layers.0.block_sparse_moe.experts.130.w1", "model.layers.0.block_sparse_moe.experts.131.w1", "model.layers.0.block_sparse_moe.experts.132.w1", "model.layers.0.block_sparse_moe.experts.133.w1", "model.layers.0.block_sparse_moe.experts.134.w1", "model.layers.0.block_sparse_moe.experts.135.w1", "model.layers.0.block_sparse_moe.experts.136.w1", "model.layers.0.block_sparse_moe.experts.137.w1", "model.layers.0.block_sparse_moe.experts.138.w1", "model.layers.0.block_sparse_moe.experts.139.w1", "model.layers.0.block_sparse_moe.experts.140.w1", "model.layers.0.block_sparse_moe.experts.141.w1", "model.layers.0.block_sparse_moe.experts.142.w1", "model.layers.0.block_sparse_moe.experts.143.w1", "model.layers.0.block_sparse_moe.experts.144.w1", "model.layers.0.block_sparse_moe.experts.145.w1", "model.layers.0.block_sparse_moe.experts.146.w1", "model.layers.0.block_sparse_moe.experts.147.w1", "model.layers.0.block_sparse_moe.experts.148.w1", "model.layers.0.block_sparse_moe.experts.149.w1", "model.layers.0.block_sparse_moe.experts.150.w1", "model.layers.0.block_sparse_moe.experts.151.w1", "model.layers.0.block_sparse_moe.experts.152.w1", "model.layers.0.block_sparse_moe.experts.153.w1", "model.layers.0.block_sparse_moe.experts.154.w1", "model.layers.0.block_sparse_moe.experts.155.w1", "model.layers.0.block_sparse_moe.experts.156.w1", "model.layers.0.block_sparse_moe.experts.157.w1", "model.layers.0.block_sparse_moe.experts.158.w1", "model.layers.0.block_sparse_moe.experts.159.w1", "model.layers.0.block_sparse_moe.experts.160.w1", "model.layers.0.block_sparse_moe.experts.161.w1", "model.layers.0.block_sparse_moe.experts.162.w1", "model.layers.0.block_sparse_moe.experts.163.w1", "model.layers.0.block_sparse_moe.experts.164.w1", "model.layers.0.block_sparse_moe.experts.165.w1", "model.layers.0.block_sparse_moe.experts.166.w1", "model.layers.0.block_sparse_moe.experts.167.w1", "model.layers.0.block_sparse_moe.experts.168.w1", "model.layers.0.block_sparse_moe.experts.169.w1", "model.layers.0.block_sparse_moe.experts.170.w1", "model.layers.0.block_sparse_moe.experts.171.w1", "model.layers.0.block_sparse_moe.experts.172.w1", "model.layers.0.block_sparse_moe.experts.173.w1", "model.layers.0.block_sparse_moe.experts.174.w1", "model.layers.0.block_sparse_moe.experts.175.w1", "model.layers.0.block_sparse_moe.experts.176.w1", "model.layers.0.block_sparse_moe.experts.177.w1", "model.layers.0.block_sparse_moe.experts.178.w1", "model.layers.0.block_sparse_moe.experts.179.w1", "model.layers.0.block_sparse_moe.experts.180.w1", "model.layers.0.block_sparse_moe.experts.181.w1", "model.layers.0.block_sparse_moe.experts.182.w1", "model.layers.0.block_sparse_moe.experts.183.w1", "model.layers.0.block_sparse_moe.experts.184.w1", "model.layers.0.block_sparse_moe.experts.185.w1", "model.layers.0.block_sparse_moe.experts.186.w1", "model.layers.0.block_sparse_moe.experts.187.w1", "model.layers.0.block_sparse_moe.experts.188.w1", "model.layers.0.block_sparse_moe.experts.189.w1", "model.layers.0.block_sparse_moe.experts.190.w1", "model.layers.0.block_sparse_moe.experts.191.w1", "model.layers.0.block_sparse_moe.experts.192.w1", "model.layers.0.block_sparse_moe.experts.193.w1", "model.layers.0.block_sparse_moe.experts.194.w1", "model.layers.0.block_sparse_moe.experts.195.w1", "model.layers.0.block_sparse_moe.experts.196.w1", "model.layers.0.block_sparse_moe.experts.197.w1", "model.layers.0.block_sparse_moe.experts.198.w1", "model.layers.0.block_sparse_moe.experts.199.w1", "model.layers.0.block_sparse_moe.experts.200.w1", "model.layers.0.block_sparse_moe.experts.201.w1", "model.layers.0.block_sparse_moe.experts.202.w1", "model.layers.0.block_sparse_moe.experts.203.w1", "model.layers.0.block_sparse_moe.experts.204.w1", "model.layers.0.block_sparse_moe.experts.205.w1", "model.layers.0.block_sparse_moe.experts.206.w1", "model.layers.0.block_sparse_moe.experts.207.w1", "model.layers.0.block_sparse_moe.experts.208.w1", "model.layers.0.block_sparse_moe.experts.209.w1", "model.layers.0.block_sparse_moe.experts.210.w1", "model.layers.0.block_sparse_moe.experts.211.w1", "model.layers.0.block_sparse_moe.experts.212.w1", "model.layers.0.block_sparse_moe.experts.213.w1", "model.layers.0.block_sparse_moe.experts.214.w1", "model.layers.0.block_sparse_moe.experts.215.w1", "model.layers.0.block_sparse_moe.experts.216.w1", "model.layers.0.block_sparse_moe.experts.217.w1", "model.layers.0.block_sparse_moe.experts.218.w1", "model.layers.0.block_sparse_moe.experts.219.w1", "model.layers.0.block_sparse_moe.experts.220.w1", "model.layers.0.block_sparse_moe.experts.221.w1", "model.layers.0.block_sparse_moe.experts.222.w1", "model.layers.0.block_sparse_moe.experts.223.w1", "model.layers.0.block_sparse_moe.experts.224.w1", "model.layers.0.block_sparse_moe.experts.225.w1", "model.layers.0.block_sparse_moe.experts.226.w1", "model.layers.0.block_sparse_moe.experts.227.w1", "model.layers.0.block_sparse_moe.experts.228.w1", "model.layers.0.block_sparse_moe.experts.229.w1", "model.layers.0.block_sparse_moe.experts.230.w1", "model.layers.0.block_sparse_moe.experts.231.w1", "model.layers.0.block_sparse_moe.experts.232.w1", "model.layers.0.block_sparse_moe.experts.233.w1", "model.layers.0.block_sparse_moe.experts.234.w1", "model.layers.0.block_sparse_moe.experts.235.w1", "model.layers.0.block_sparse_moe.experts.236.w1", "model.layers.0.block_sparse_moe.experts.237.w1", "model.layers.0.block_sparse_moe.experts.238.w1", "model.layers.0.block_sparse_moe.experts.239.w1", "model.layers.0.block_sparse_moe.experts.240.w1", "model.layers.0.block_sparse_moe.experts.241.w1", "model.layers.0.block_sparse_moe.experts.242.w1", "model.layers.0.block_sparse_moe.experts.243.w1", "model.layers.0.block_sparse_moe.experts.244.w1", "model.layers.0.block_sparse_moe.experts.245.w1", "model.layers.0.block_sparse_moe.experts.246.w1", "model.layers.0.block_sparse_moe.experts.247.w1", "model.layers.0.block_sparse_moe.experts.248.w1", "model.layers.0.block_sparse_moe.experts.249.w1", "model.layers.0.block_sparse_moe.experts.250.w1", "model.layers.0.block_sparse_moe.experts.251.w1", "model.layers.0.block_sparse_moe.experts.252.w1", "model.layers.0.block_sparse_moe.experts.253.w1", "model.layers.0.block_sparse_moe.experts.254.w1", "model.layers.0.block_sparse_moe.experts.255.w1", "model.layers.0.block_sparse_moe.experts.0.w3", "model.layers.0.block_sparse_moe.experts.1.w3", "model.layers.0.block_sparse_moe.experts.2.w3", "model.layers.0.block_sparse_moe.experts.3.w3", "model.layers.0.block_sparse_moe.experts.4.w3", "model.layers.0.block_sparse_moe.experts.5.w3", "model.layers.0.block_sparse_moe.experts.6.w3", "model.layers.0.block_sparse_moe.experts.7.w3", "model.layers.0.block_sparse_moe.experts.8.w3", "model.layers.0.block_sparse_moe.experts.9.w3", "model.layers.0.block_sparse_moe.experts.10.w3", "model.layers.0.block_sparse_moe.experts.11.w3", "model.layers.0.block_sparse_moe.experts.12.w3", "model.layers.0.block_sparse_moe.experts.13.w3", "model.layers.0.block_sparse_moe.experts.14.w3", "model.layers.0.block_sparse_moe.experts.15.w3", "model.layers.0.block_sparse_moe.experts.16.w3", "model.layers.0.block_sparse_moe.experts.17.w3", "model.layers.0.block_sparse_moe.experts.18.w3", "model.layers.0.block_sparse_moe.experts.19.w3", "model.layers.0.block_sparse_moe.experts.20.w3", "model.layers.0.block_sparse_moe.experts.21.w3", "model.layers.0.block_sparse_moe.experts.22.w3", "model.layers.0.block_sparse_moe.experts.23.w3", "model.layers.0.block_sparse_moe.experts.24.w3", "model.layers.0.block_sparse_moe.experts.25.w3", "model.layers.0.block_sparse_moe.experts.26.w3", "model.layers.0.block_sparse_moe.experts.27.w3", "model.layers.0.block_sparse_moe.experts.28.w3", "model.layers.0.block_sparse_moe.experts.29.w3", "model.layers.0.block_sparse_moe.experts.30.w3", "model.layers.0.block_sparse_moe.experts.31.w3", "model.layers.0.block_sparse_moe.experts.32.w3", "model.layers.0.block_sparse_moe.experts.33.w3", "model.layers.0.block_sparse_moe.experts.34.w3", "model.layers.0.block_sparse_moe.experts.35.w3", "model.layers.0.block_sparse_moe.experts.36.w3", "model.layers.0.block_sparse_moe.experts.37.w3", "model.layers.0.block_sparse_moe.experts.38.w3", "model.layers.0.block_sparse_moe.experts.39.w3", "model.layers.0.block_sparse_moe.experts.40.w3", "model.layers.0.block_sparse_moe.experts.41.w3", "model.layers.0.block_sparse_moe.experts.42.w3", "model.layers.0.block_sparse_moe.experts.43.w3", "model.layers.0.block_sparse_moe.experts.44.w3", "model.layers.0.block_sparse_moe.experts.45.w3", "model.layers.0.block_sparse_moe.experts.46.w3", "model.layers.0.block_sparse_moe.experts.47.w3", "model.layers.0.block_sparse_moe.experts.48.w3", "model.layers.0.block_sparse_moe.experts.49.w3", "model.layers.0.block_sparse_moe.experts.50.w3", "model.layers.0.block_sparse_moe.experts.51.w3", "model.layers.0.block_sparse_moe.experts.52.w3", "model.layers.0.block_sparse_moe.experts.53.w3", "model.layers.0.block_sparse_moe.experts.54.w3", "model.layers.0.block_sparse_moe.experts.55.w3", "model.layers.0.block_sparse_moe.experts.56.w3", "model.layers.0.block_sparse_moe.experts.57.w3", "model.layers.0.block_sparse_moe.experts.58.w3", "model.layers.0.block_sparse_moe.experts.59.w3", "model.layers.0.block_sparse_moe.experts.60.w3", "model.layers.0.block_sparse_moe.experts.61.w3", "model.layers.0.block_sparse_moe.experts.62.w3", "model.layers.0.block_sparse_moe.experts.63.w3", "model.layers.0.block_sparse_moe.experts.64.w3", "model.layers.0.block_sparse_moe.experts.65.w3", "model.layers.0.block_sparse_moe.experts.66.w3", "model.layers.0.block_sparse_moe.experts.67.w3", "model.layers.0.block_sparse_moe.experts.68.w3", "model.layers.0.block_sparse_moe.experts.69.w3", "model.layers.0.block_sparse_moe.experts.70.w3", "model.layers.0.block_sparse_moe.experts.71.w3", "model.layers.0.block_sparse_moe.experts.72.w3", "model.layers.0.block_sparse_moe.experts.73.w3", "model.layers.0.block_sparse_moe.experts.74.w3", "model.layers.0.block_sparse_moe.experts.75.w3", "model.layers.0.block_sparse_moe.experts.76.w3", "model.layers.0.block_sparse_moe.experts.77.w3", "model.layers.0.block_sparse_moe.experts.78.w3", "model.layers.0.block_sparse_moe.experts.79.w3", "model.layers.0.block_sparse_moe.experts.80.w3", "model.layers.0.block_sparse_moe.experts.81.w3", "model.layers.0.block_sparse_moe.experts.82.w3", "model.layers.0.block_sparse_moe.experts.83.w3", "model.layers.0.block_sparse_moe.experts.84.w3", "model.layers.0.block_sparse_moe.experts.85.w3", "model.layers.0.block_sparse_moe.experts.86.w3", "model.layers.0.block_sparse_moe.experts.87.w3", "model.layers.0.block_sparse_moe.experts.88.w3", "model.layers.0.block_sparse_moe.experts.89.w3", "model.layers.0.block_sparse_moe.experts.90.w3", "model.layers.0.block_sparse_moe.experts.91.w3", "model.layers.0.block_sparse_moe.experts.92.w3", "model.layers.0.block_sparse_moe.experts.93.w3", "model.layers.0.block_sparse_moe.experts.94.w3", "model.layers.0.block_sparse_moe.experts.95.w3", "model.layers.0.block_sparse_moe.experts.96.w3", "model.layers.0.block_sparse_moe.experts.97.w3", "model.layers.0.block_sparse_moe.experts.98.w3", "model.layers.0.block_sparse_moe.experts.99.w3", "model.layers.0.block_sparse_moe.experts.100.w3", "model.layers.0.block_sparse_moe.experts.101.w3", "model.layers.0.block_sparse_moe.experts.102.w3", "model.layers.0.block_sparse_moe.experts.103.w3", "model.layers.0.block_sparse_moe.experts.104.w3", "model.layers.0.block_sparse_moe.experts.105.w3", "model.layers.0.block_sparse_moe.experts.106.w3", "model.layers.0.block_sparse_moe.experts.107.w3", "model.layers.0.block_sparse_moe.experts.108.w3", "model.layers.0.block_sparse_moe.experts.109.w3", "model.layers.0.block_sparse_moe.experts.110.w3", "model.layers.0.block_sparse_moe.experts.111.w3", "model.layers.0.block_sparse_moe.experts.112.w3", "model.layers.0.block_sparse_moe.experts.113.w3", "model.layers.0.block_sparse_moe.experts.114.w3", "model.layers.0.block_sparse_moe.experts.115.w3", "model.layers.0.block_sparse_moe.experts.116.w3", "model.layers.0.block_sparse_moe.experts.117.w3", "model.layers.0.block_sparse_moe.experts.118.w3", "model.layers.0.block_sparse_moe.experts.119.w3", "model.layers.0.block_sparse_moe.experts.120.w3", "model.layers.0.block_sparse_moe.experts.121.w3", "model.layers.0.block_sparse_moe.experts.122.w3", "model.layers.0.block_sparse_moe.experts.123.w3", "model.layers.0.block_sparse_moe.experts.124.w3", "model.layers.0.block_sparse_moe.experts.125.w3", "model.layers.0.block_sparse_moe.experts.126.w3", "model.layers.0.block_sparse_moe.experts.127.w3", "model.layers.0.block_sparse_moe.experts.128.w3", "model.layers.0.block_sparse_moe.experts.129.w3", "model.layers.0.block_sparse_moe.experts.130.w3", "model.layers.0.block_sparse_moe.experts.131.w3", "model.layers.0.block_sparse_moe.experts.132.w3", "model.layers.0.block_sparse_moe.experts.133.w3", "model.layers.0.block_sparse_moe.experts.134.w3", "model.layers.0.block_sparse_moe.experts.135.w3", "model.layers.0.block_sparse_moe.experts.136.w3", "model.layers.0.block_sparse_moe.experts.137.w3", "model.layers.0.block_sparse_moe.experts.138.w3", "model.layers.0.block_sparse_moe.experts.139.w3", "model.layers.0.block_sparse_moe.experts.140.w3", "model.layers.0.block_sparse_moe.experts.141.w3", "model.layers.0.block_sparse_moe.experts.142.w3", "model.layers.0.block_sparse_moe.experts.143.w3", "model.layers.0.block_sparse_moe.experts.144.w3", "model.layers.0.block_sparse_moe.experts.145.w3", "model.layers.0.block_sparse_moe.experts.146.w3", "model.layers.0.block_sparse_moe.experts.147.w3", "model.layers.0.block_sparse_moe.experts.148.w3", "model.layers.0.block_sparse_moe.experts.149.w3", "model.layers.0.block_sparse_moe.experts.150.w3", "model.layers.0.block_sparse_moe.experts.151.w3", "model.layers.0.block_sparse_moe.experts.152.w3", "model.layers.0.block_sparse_moe.experts.153.w3", "model.layers.0.block_sparse_moe.experts.154.w3", "model.layers.0.block_sparse_moe.experts.155.w3", "model.layers.0.block_sparse_moe.experts.156.w3", "model.layers.0.block_sparse_moe.experts.157.w3", "model.layers.0.block_sparse_moe.experts.158.w3", "model.layers.0.block_sparse_moe.experts.159.w3", "model.layers.0.block_sparse_moe.experts.160.w3", "model.layers.0.block_sparse_moe.experts.161.w3", "model.layers.0.block_sparse_moe.experts.162.w3", "model.layers.0.block_sparse_moe.experts.163.w3", "model.layers.0.block_sparse_moe.experts.164.w3", "model.layers.0.block_sparse_moe.experts.165.w3", "model.layers.0.block_sparse_moe.experts.166.w3", "model.layers.0.block_sparse_moe.experts.167.w3", "model.layers.0.block_sparse_moe.experts.168.w3", "model.layers.0.block_sparse_moe.experts.169.w3", "model.layers.0.block_sparse_moe.experts.170.w3", "model.layers.0.block_sparse_moe.experts.171.w3", "model.layers.0.block_sparse_moe.experts.172.w3", "model.layers.0.block_sparse_moe.experts.173.w3", "model.layers.0.block_sparse_moe.experts.174.w3", "model.layers.0.block_sparse_moe.experts.175.w3", "model.layers.0.block_sparse_moe.experts.176.w3", "model.layers.0.block_sparse_moe.experts.177.w3", "model.layers.0.block_sparse_moe.experts.178.w3", "model.layers.0.block_sparse_moe.experts.179.w3", "model.layers.0.block_sparse_moe.experts.180.w3", "model.layers.0.block_sparse_moe.experts.181.w3", "model.layers.0.block_sparse_moe.experts.182.w3", "model.layers.0.block_sparse_moe.experts.183.w3", "model.layers.0.block_sparse_moe.experts.184.w3", "model.layers.0.block_sparse_moe.experts.185.w3", "model.layers.0.block_sparse_moe.experts.186.w3", "model.layers.0.block_sparse_moe.experts.187.w3", "model.layers.0.block_sparse_moe.experts.188.w3", "model.layers.0.block_sparse_moe.experts.189.w3", "model.layers.0.block_sparse_moe.experts.190.w3", "model.layers.0.block_sparse_moe.experts.191.w3", "model.layers.0.block_sparse_moe.experts.192.w3", "model.layers.0.block_sparse_moe.experts.193.w3", "model.layers.0.block_sparse_moe.experts.194.w3", "model.layers.0.block_sparse_moe.experts.195.w3", "model.layers.0.block_sparse_moe.experts.196.w3", "model.layers.0.block_sparse_moe.experts.197.w3", "model.layers.0.block_sparse_moe.experts.198.w3", "model.layers.0.block_sparse_moe.experts.199.w3", "model.layers.0.block_sparse_moe.experts.200.w3", "model.layers.0.block_sparse_moe.experts.201.w3", "model.layers.0.block_sparse_moe.experts.202.w3", "model.layers.0.block_sparse_moe.experts.203.w3", "model.layers.0.block_sparse_moe.experts.204.w3", "model.layers.0.block_sparse_moe.experts.205.w3", "model.layers.0.block_sparse_moe.experts.206.w3", "model.layers.0.block_sparse_moe.experts.207.w3", "model.layers.0.block_sparse_moe.experts.208.w3", "model.layers.0.block_sparse_moe.experts.209.w3", "model.layers.0.block_sparse_moe.experts.210.w3", "model.layers.0.block_sparse_moe.experts.211.w3", "model.layers.0.block_sparse_moe.experts.212.w3", "model.layers.0.block_sparse_moe.experts.213.w3", "model.layers.0.block_sparse_moe.experts.214.w3", "model.layers.0.block_sparse_moe.experts.215.w3", "model.layers.0.block_sparse_moe.experts.216.w3", "model.layers.0.block_sparse_moe.experts.217.w3", "model.layers.0.block_sparse_moe.experts.218.w3", "model.layers.0.block_sparse_moe.experts.219.w3", "model.layers.0.block_sparse_moe.experts.220.w3", "model.layers.0.block_sparse_moe.experts.221.w3", "model.layers.0.block_sparse_moe.experts.222.w3", "model.layers.0.block_sparse_moe.experts.223.w3", "model.layers.0.block_sparse_moe.experts.224.w3", "model.layers.0.block_sparse_moe.experts.225.w3", "model.layers.0.block_sparse_moe.experts.226.w3", "model.layers.0.block_sparse_moe.experts.227.w3", "model.layers.0.block_sparse_moe.experts.228.w3", "model.layers.0.block_sparse_moe.experts.229.w3", "model.layers.0.block_sparse_moe.experts.230.w3", "model.layers.0.block_sparse_moe.experts.231.w3", "model.layers.0.block_sparse_moe.experts.232.w3", "model.layers.0.block_sparse_moe.experts.233.w3", "model.layers.0.block_sparse_moe.experts.234.w3", "model.layers.0.block_sparse_moe.experts.235.w3", "model.layers.0.block_sparse_moe.experts.236.w3", "model.layers.0.block_sparse_moe.experts.237.w3", "model.layers.0.block_sparse_moe.experts.238.w3", "model.layers.0.block_sparse_moe.experts.239.w3", "model.layers.0.block_sparse_moe.experts.240.w3", "model.layers.0.block_sparse_moe.experts.241.w3", "model.layers.0.block_sparse_moe.experts.242.w3", "model.layers.0.block_sparse_moe.experts.243.w3", "model.layers.0.block_sparse_moe.experts.244.w3", "model.layers.0.block_sparse_moe.experts.245.w3", "model.layers.0.block_sparse_moe.experts.246.w3", "model.layers.0.block_sparse_moe.experts.247.w3", "model.layers.0.block_sparse_moe.experts.248.w3", "model.layers.0.block_sparse_moe.experts.249.w3", "model.layers.0.block_sparse_moe.experts.250.w3", "model.layers.0.block_sparse_moe.experts.251.w3", "model.layers.0.block_sparse_moe.experts.252.w3", "model.layers.0.block_sparse_moe.experts.253.w3", "model.layers.0.block_sparse_moe.experts.254.w3", "model.layers.0.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0033428072929382324, "dbits": 2415919104 } ] }, { "idx": 4, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w2", "model.layers.0.block_sparse_moe.experts.1.w2", "model.layers.0.block_sparse_moe.experts.2.w2", "model.layers.0.block_sparse_moe.experts.3.w2", "model.layers.0.block_sparse_moe.experts.4.w2", "model.layers.0.block_sparse_moe.experts.5.w2", "model.layers.0.block_sparse_moe.experts.6.w2", "model.layers.0.block_sparse_moe.experts.7.w2", "model.layers.0.block_sparse_moe.experts.8.w2", "model.layers.0.block_sparse_moe.experts.9.w2", "model.layers.0.block_sparse_moe.experts.10.w2", "model.layers.0.block_sparse_moe.experts.11.w2", "model.layers.0.block_sparse_moe.experts.12.w2", "model.layers.0.block_sparse_moe.experts.13.w2", "model.layers.0.block_sparse_moe.experts.14.w2", "model.layers.0.block_sparse_moe.experts.15.w2", "model.layers.0.block_sparse_moe.experts.16.w2", "model.layers.0.block_sparse_moe.experts.17.w2", "model.layers.0.block_sparse_moe.experts.18.w2", "model.layers.0.block_sparse_moe.experts.19.w2", "model.layers.0.block_sparse_moe.experts.20.w2", "model.layers.0.block_sparse_moe.experts.21.w2", "model.layers.0.block_sparse_moe.experts.22.w2", "model.layers.0.block_sparse_moe.experts.23.w2", "model.layers.0.block_sparse_moe.experts.24.w2", "model.layers.0.block_sparse_moe.experts.25.w2", "model.layers.0.block_sparse_moe.experts.26.w2", "model.layers.0.block_sparse_moe.experts.27.w2", "model.layers.0.block_sparse_moe.experts.28.w2", "model.layers.0.block_sparse_moe.experts.29.w2", "model.layers.0.block_sparse_moe.experts.30.w2", "model.layers.0.block_sparse_moe.experts.31.w2", "model.layers.0.block_sparse_moe.experts.32.w2", "model.layers.0.block_sparse_moe.experts.33.w2", "model.layers.0.block_sparse_moe.experts.34.w2", "model.layers.0.block_sparse_moe.experts.35.w2", "model.layers.0.block_sparse_moe.experts.36.w2", "model.layers.0.block_sparse_moe.experts.37.w2", "model.layers.0.block_sparse_moe.experts.38.w2", "model.layers.0.block_sparse_moe.experts.39.w2", "model.layers.0.block_sparse_moe.experts.40.w2", "model.layers.0.block_sparse_moe.experts.41.w2", "model.layers.0.block_sparse_moe.experts.42.w2", "model.layers.0.block_sparse_moe.experts.43.w2", "model.layers.0.block_sparse_moe.experts.44.w2", "model.layers.0.block_sparse_moe.experts.45.w2", "model.layers.0.block_sparse_moe.experts.46.w2", "model.layers.0.block_sparse_moe.experts.47.w2", "model.layers.0.block_sparse_moe.experts.48.w2", "model.layers.0.block_sparse_moe.experts.49.w2", "model.layers.0.block_sparse_moe.experts.50.w2", "model.layers.0.block_sparse_moe.experts.51.w2", "model.layers.0.block_sparse_moe.experts.52.w2", "model.layers.0.block_sparse_moe.experts.53.w2", "model.layers.0.block_sparse_moe.experts.54.w2", "model.layers.0.block_sparse_moe.experts.55.w2", "model.layers.0.block_sparse_moe.experts.56.w2", "model.layers.0.block_sparse_moe.experts.57.w2", "model.layers.0.block_sparse_moe.experts.58.w2", "model.layers.0.block_sparse_moe.experts.59.w2", "model.layers.0.block_sparse_moe.experts.60.w2", "model.layers.0.block_sparse_moe.experts.61.w2", "model.layers.0.block_sparse_moe.experts.62.w2", "model.layers.0.block_sparse_moe.experts.63.w2", "model.layers.0.block_sparse_moe.experts.64.w2", "model.layers.0.block_sparse_moe.experts.65.w2", "model.layers.0.block_sparse_moe.experts.66.w2", "model.layers.0.block_sparse_moe.experts.67.w2", "model.layers.0.block_sparse_moe.experts.68.w2", "model.layers.0.block_sparse_moe.experts.69.w2", "model.layers.0.block_sparse_moe.experts.70.w2", "model.layers.0.block_sparse_moe.experts.71.w2", "model.layers.0.block_sparse_moe.experts.72.w2", "model.layers.0.block_sparse_moe.experts.73.w2", "model.layers.0.block_sparse_moe.experts.74.w2", "model.layers.0.block_sparse_moe.experts.75.w2", "model.layers.0.block_sparse_moe.experts.76.w2", "model.layers.0.block_sparse_moe.experts.77.w2", "model.layers.0.block_sparse_moe.experts.78.w2", "model.layers.0.block_sparse_moe.experts.79.w2", "model.layers.0.block_sparse_moe.experts.80.w2", "model.layers.0.block_sparse_moe.experts.81.w2", "model.layers.0.block_sparse_moe.experts.82.w2", "model.layers.0.block_sparse_moe.experts.83.w2", "model.layers.0.block_sparse_moe.experts.84.w2", "model.layers.0.block_sparse_moe.experts.85.w2", "model.layers.0.block_sparse_moe.experts.86.w2", "model.layers.0.block_sparse_moe.experts.87.w2", "model.layers.0.block_sparse_moe.experts.88.w2", "model.layers.0.block_sparse_moe.experts.89.w2", "model.layers.0.block_sparse_moe.experts.90.w2", "model.layers.0.block_sparse_moe.experts.91.w2", "model.layers.0.block_sparse_moe.experts.92.w2", "model.layers.0.block_sparse_moe.experts.93.w2", "model.layers.0.block_sparse_moe.experts.94.w2", "model.layers.0.block_sparse_moe.experts.95.w2", "model.layers.0.block_sparse_moe.experts.96.w2", "model.layers.0.block_sparse_moe.experts.97.w2", "model.layers.0.block_sparse_moe.experts.98.w2", "model.layers.0.block_sparse_moe.experts.99.w2", "model.layers.0.block_sparse_moe.experts.100.w2", "model.layers.0.block_sparse_moe.experts.101.w2", "model.layers.0.block_sparse_moe.experts.102.w2", "model.layers.0.block_sparse_moe.experts.103.w2", "model.layers.0.block_sparse_moe.experts.104.w2", "model.layers.0.block_sparse_moe.experts.105.w2", "model.layers.0.block_sparse_moe.experts.106.w2", "model.layers.0.block_sparse_moe.experts.107.w2", "model.layers.0.block_sparse_moe.experts.108.w2", "model.layers.0.block_sparse_moe.experts.109.w2", "model.layers.0.block_sparse_moe.experts.110.w2", "model.layers.0.block_sparse_moe.experts.111.w2", "model.layers.0.block_sparse_moe.experts.112.w2", "model.layers.0.block_sparse_moe.experts.113.w2", "model.layers.0.block_sparse_moe.experts.114.w2", "model.layers.0.block_sparse_moe.experts.115.w2", "model.layers.0.block_sparse_moe.experts.116.w2", "model.layers.0.block_sparse_moe.experts.117.w2", "model.layers.0.block_sparse_moe.experts.118.w2", "model.layers.0.block_sparse_moe.experts.119.w2", "model.layers.0.block_sparse_moe.experts.120.w2", "model.layers.0.block_sparse_moe.experts.121.w2", "model.layers.0.block_sparse_moe.experts.122.w2", "model.layers.0.block_sparse_moe.experts.123.w2", "model.layers.0.block_sparse_moe.experts.124.w2", "model.layers.0.block_sparse_moe.experts.125.w2", "model.layers.0.block_sparse_moe.experts.126.w2", "model.layers.0.block_sparse_moe.experts.127.w2", "model.layers.0.block_sparse_moe.experts.128.w2", "model.layers.0.block_sparse_moe.experts.129.w2", "model.layers.0.block_sparse_moe.experts.130.w2", "model.layers.0.block_sparse_moe.experts.131.w2", "model.layers.0.block_sparse_moe.experts.132.w2", "model.layers.0.block_sparse_moe.experts.133.w2", "model.layers.0.block_sparse_moe.experts.134.w2", "model.layers.0.block_sparse_moe.experts.135.w2", "model.layers.0.block_sparse_moe.experts.136.w2", "model.layers.0.block_sparse_moe.experts.137.w2", "model.layers.0.block_sparse_moe.experts.138.w2", "model.layers.0.block_sparse_moe.experts.139.w2", "model.layers.0.block_sparse_moe.experts.140.w2", "model.layers.0.block_sparse_moe.experts.141.w2", "model.layers.0.block_sparse_moe.experts.142.w2", "model.layers.0.block_sparse_moe.experts.143.w2", "model.layers.0.block_sparse_moe.experts.144.w2", "model.layers.0.block_sparse_moe.experts.145.w2", "model.layers.0.block_sparse_moe.experts.146.w2", "model.layers.0.block_sparse_moe.experts.147.w2", "model.layers.0.block_sparse_moe.experts.148.w2", "model.layers.0.block_sparse_moe.experts.149.w2", "model.layers.0.block_sparse_moe.experts.150.w2", "model.layers.0.block_sparse_moe.experts.151.w2", "model.layers.0.block_sparse_moe.experts.152.w2", "model.layers.0.block_sparse_moe.experts.153.w2", "model.layers.0.block_sparse_moe.experts.154.w2", "model.layers.0.block_sparse_moe.experts.155.w2", "model.layers.0.block_sparse_moe.experts.156.w2", "model.layers.0.block_sparse_moe.experts.157.w2", "model.layers.0.block_sparse_moe.experts.158.w2", "model.layers.0.block_sparse_moe.experts.159.w2", "model.layers.0.block_sparse_moe.experts.160.w2", "model.layers.0.block_sparse_moe.experts.161.w2", "model.layers.0.block_sparse_moe.experts.162.w2", "model.layers.0.block_sparse_moe.experts.163.w2", "model.layers.0.block_sparse_moe.experts.164.w2", "model.layers.0.block_sparse_moe.experts.165.w2", "model.layers.0.block_sparse_moe.experts.166.w2", "model.layers.0.block_sparse_moe.experts.167.w2", "model.layers.0.block_sparse_moe.experts.168.w2", "model.layers.0.block_sparse_moe.experts.169.w2", "model.layers.0.block_sparse_moe.experts.170.w2", "model.layers.0.block_sparse_moe.experts.171.w2", "model.layers.0.block_sparse_moe.experts.172.w2", "model.layers.0.block_sparse_moe.experts.173.w2", "model.layers.0.block_sparse_moe.experts.174.w2", "model.layers.0.block_sparse_moe.experts.175.w2", "model.layers.0.block_sparse_moe.experts.176.w2", "model.layers.0.block_sparse_moe.experts.177.w2", "model.layers.0.block_sparse_moe.experts.178.w2", "model.layers.0.block_sparse_moe.experts.179.w2", "model.layers.0.block_sparse_moe.experts.180.w2", "model.layers.0.block_sparse_moe.experts.181.w2", "model.layers.0.block_sparse_moe.experts.182.w2", "model.layers.0.block_sparse_moe.experts.183.w2", "model.layers.0.block_sparse_moe.experts.184.w2", "model.layers.0.block_sparse_moe.experts.185.w2", "model.layers.0.block_sparse_moe.experts.186.w2", "model.layers.0.block_sparse_moe.experts.187.w2", "model.layers.0.block_sparse_moe.experts.188.w2", "model.layers.0.block_sparse_moe.experts.189.w2", "model.layers.0.block_sparse_moe.experts.190.w2", "model.layers.0.block_sparse_moe.experts.191.w2", "model.layers.0.block_sparse_moe.experts.192.w2", "model.layers.0.block_sparse_moe.experts.193.w2", "model.layers.0.block_sparse_moe.experts.194.w2", "model.layers.0.block_sparse_moe.experts.195.w2", "model.layers.0.block_sparse_moe.experts.196.w2", "model.layers.0.block_sparse_moe.experts.197.w2", "model.layers.0.block_sparse_moe.experts.198.w2", "model.layers.0.block_sparse_moe.experts.199.w2", "model.layers.0.block_sparse_moe.experts.200.w2", "model.layers.0.block_sparse_moe.experts.201.w2", "model.layers.0.block_sparse_moe.experts.202.w2", "model.layers.0.block_sparse_moe.experts.203.w2", "model.layers.0.block_sparse_moe.experts.204.w2", "model.layers.0.block_sparse_moe.experts.205.w2", "model.layers.0.block_sparse_moe.experts.206.w2", "model.layers.0.block_sparse_moe.experts.207.w2", "model.layers.0.block_sparse_moe.experts.208.w2", "model.layers.0.block_sparse_moe.experts.209.w2", "model.layers.0.block_sparse_moe.experts.210.w2", "model.layers.0.block_sparse_moe.experts.211.w2", "model.layers.0.block_sparse_moe.experts.212.w2", "model.layers.0.block_sparse_moe.experts.213.w2", "model.layers.0.block_sparse_moe.experts.214.w2", "model.layers.0.block_sparse_moe.experts.215.w2", "model.layers.0.block_sparse_moe.experts.216.w2", "model.layers.0.block_sparse_moe.experts.217.w2", "model.layers.0.block_sparse_moe.experts.218.w2", "model.layers.0.block_sparse_moe.experts.219.w2", "model.layers.0.block_sparse_moe.experts.220.w2", "model.layers.0.block_sparse_moe.experts.221.w2", "model.layers.0.block_sparse_moe.experts.222.w2", "model.layers.0.block_sparse_moe.experts.223.w2", "model.layers.0.block_sparse_moe.experts.224.w2", "model.layers.0.block_sparse_moe.experts.225.w2", "model.layers.0.block_sparse_moe.experts.226.w2", "model.layers.0.block_sparse_moe.experts.227.w2", "model.layers.0.block_sparse_moe.experts.228.w2", "model.layers.0.block_sparse_moe.experts.229.w2", "model.layers.0.block_sparse_moe.experts.230.w2", "model.layers.0.block_sparse_moe.experts.231.w2", "model.layers.0.block_sparse_moe.experts.232.w2", "model.layers.0.block_sparse_moe.experts.233.w2", "model.layers.0.block_sparse_moe.experts.234.w2", "model.layers.0.block_sparse_moe.experts.235.w2", "model.layers.0.block_sparse_moe.experts.236.w2", "model.layers.0.block_sparse_moe.experts.237.w2", "model.layers.0.block_sparse_moe.experts.238.w2", "model.layers.0.block_sparse_moe.experts.239.w2", "model.layers.0.block_sparse_moe.experts.240.w2", "model.layers.0.block_sparse_moe.experts.241.w2", "model.layers.0.block_sparse_moe.experts.242.w2", "model.layers.0.block_sparse_moe.experts.243.w2", "model.layers.0.block_sparse_moe.experts.244.w2", "model.layers.0.block_sparse_moe.experts.245.w2", "model.layers.0.block_sparse_moe.experts.246.w2", "model.layers.0.block_sparse_moe.experts.247.w2", "model.layers.0.block_sparse_moe.experts.248.w2", "model.layers.0.block_sparse_moe.experts.249.w2", "model.layers.0.block_sparse_moe.experts.250.w2", "model.layers.0.block_sparse_moe.experts.251.w2", "model.layers.0.block_sparse_moe.experts.252.w2", "model.layers.0.block_sparse_moe.experts.253.w2", "model.layers.0.block_sparse_moe.experts.254.w2", "model.layers.0.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.001223462820053145, "dbits": 1207959552 } ] }, { "idx": 5, "layers": [ "model.layers.1.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00019778907299039528, "dbits": 18874368 } ] }, { "idx": 6, "layers": [ "model.layers.1.self_attn.k_proj", "model.layers.1.self_attn.v_proj" ], "candidates": [ { "dkld": 0.015861159563064664, "dbits": 6291456 } ] }, { "idx": 7, "layers": [ "model.layers.1.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0016512989997864436, "dbits": 18874368 } ] }, { "idx": 8, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w1", "model.layers.1.block_sparse_moe.experts.1.w1", "model.layers.1.block_sparse_moe.experts.2.w1", "model.layers.1.block_sparse_moe.experts.3.w1", "model.layers.1.block_sparse_moe.experts.4.w1", "model.layers.1.block_sparse_moe.experts.5.w1", "model.layers.1.block_sparse_moe.experts.6.w1", "model.layers.1.block_sparse_moe.experts.7.w1", "model.layers.1.block_sparse_moe.experts.8.w1", "model.layers.1.block_sparse_moe.experts.9.w1", "model.layers.1.block_sparse_moe.experts.10.w1", "model.layers.1.block_sparse_moe.experts.11.w1", "model.layers.1.block_sparse_moe.experts.12.w1", "model.layers.1.block_sparse_moe.experts.13.w1", "model.layers.1.block_sparse_moe.experts.14.w1", "model.layers.1.block_sparse_moe.experts.15.w1", "model.layers.1.block_sparse_moe.experts.16.w1", "model.layers.1.block_sparse_moe.experts.17.w1", "model.layers.1.block_sparse_moe.experts.18.w1", "model.layers.1.block_sparse_moe.experts.19.w1", "model.layers.1.block_sparse_moe.experts.20.w1", "model.layers.1.block_sparse_moe.experts.21.w1", "model.layers.1.block_sparse_moe.experts.22.w1", "model.layers.1.block_sparse_moe.experts.23.w1", "model.layers.1.block_sparse_moe.experts.24.w1", "model.layers.1.block_sparse_moe.experts.25.w1", "model.layers.1.block_sparse_moe.experts.26.w1", "model.layers.1.block_sparse_moe.experts.27.w1", "model.layers.1.block_sparse_moe.experts.28.w1", "model.layers.1.block_sparse_moe.experts.29.w1", "model.layers.1.block_sparse_moe.experts.30.w1", "model.layers.1.block_sparse_moe.experts.31.w1", "model.layers.1.block_sparse_moe.experts.32.w1", "model.layers.1.block_sparse_moe.experts.33.w1", "model.layers.1.block_sparse_moe.experts.34.w1", "model.layers.1.block_sparse_moe.experts.35.w1", "model.layers.1.block_sparse_moe.experts.36.w1", "model.layers.1.block_sparse_moe.experts.37.w1", "model.layers.1.block_sparse_moe.experts.38.w1", "model.layers.1.block_sparse_moe.experts.39.w1", "model.layers.1.block_sparse_moe.experts.40.w1", "model.layers.1.block_sparse_moe.experts.41.w1", "model.layers.1.block_sparse_moe.experts.42.w1", "model.layers.1.block_sparse_moe.experts.43.w1", "model.layers.1.block_sparse_moe.experts.44.w1", "model.layers.1.block_sparse_moe.experts.45.w1", "model.layers.1.block_sparse_moe.experts.46.w1", "model.layers.1.block_sparse_moe.experts.47.w1", "model.layers.1.block_sparse_moe.experts.48.w1", "model.layers.1.block_sparse_moe.experts.49.w1", "model.layers.1.block_sparse_moe.experts.50.w1", "model.layers.1.block_sparse_moe.experts.51.w1", "model.layers.1.block_sparse_moe.experts.52.w1", "model.layers.1.block_sparse_moe.experts.53.w1", "model.layers.1.block_sparse_moe.experts.54.w1", "model.layers.1.block_sparse_moe.experts.55.w1", "model.layers.1.block_sparse_moe.experts.56.w1", "model.layers.1.block_sparse_moe.experts.57.w1", "model.layers.1.block_sparse_moe.experts.58.w1", "model.layers.1.block_sparse_moe.experts.59.w1", "model.layers.1.block_sparse_moe.experts.60.w1", "model.layers.1.block_sparse_moe.experts.61.w1", "model.layers.1.block_sparse_moe.experts.62.w1", "model.layers.1.block_sparse_moe.experts.63.w1", "model.layers.1.block_sparse_moe.experts.64.w1", "model.layers.1.block_sparse_moe.experts.65.w1", "model.layers.1.block_sparse_moe.experts.66.w1", "model.layers.1.block_sparse_moe.experts.67.w1", "model.layers.1.block_sparse_moe.experts.68.w1", "model.layers.1.block_sparse_moe.experts.69.w1", "model.layers.1.block_sparse_moe.experts.70.w1", "model.layers.1.block_sparse_moe.experts.71.w1", "model.layers.1.block_sparse_moe.experts.72.w1", "model.layers.1.block_sparse_moe.experts.73.w1", "model.layers.1.block_sparse_moe.experts.74.w1", "model.layers.1.block_sparse_moe.experts.75.w1", "model.layers.1.block_sparse_moe.experts.76.w1", "model.layers.1.block_sparse_moe.experts.77.w1", "model.layers.1.block_sparse_moe.experts.78.w1", "model.layers.1.block_sparse_moe.experts.79.w1", "model.layers.1.block_sparse_moe.experts.80.w1", "model.layers.1.block_sparse_moe.experts.81.w1", "model.layers.1.block_sparse_moe.experts.82.w1", "model.layers.1.block_sparse_moe.experts.83.w1", "model.layers.1.block_sparse_moe.experts.84.w1", "model.layers.1.block_sparse_moe.experts.85.w1", "model.layers.1.block_sparse_moe.experts.86.w1", "model.layers.1.block_sparse_moe.experts.87.w1", "model.layers.1.block_sparse_moe.experts.88.w1", "model.layers.1.block_sparse_moe.experts.89.w1", "model.layers.1.block_sparse_moe.experts.90.w1", "model.layers.1.block_sparse_moe.experts.91.w1", "model.layers.1.block_sparse_moe.experts.92.w1", "model.layers.1.block_sparse_moe.experts.93.w1", "model.layers.1.block_sparse_moe.experts.94.w1", "model.layers.1.block_sparse_moe.experts.95.w1", "model.layers.1.block_sparse_moe.experts.96.w1", "model.layers.1.block_sparse_moe.experts.97.w1", "model.layers.1.block_sparse_moe.experts.98.w1", "model.layers.1.block_sparse_moe.experts.99.w1", "model.layers.1.block_sparse_moe.experts.100.w1", "model.layers.1.block_sparse_moe.experts.101.w1", "model.layers.1.block_sparse_moe.experts.102.w1", "model.layers.1.block_sparse_moe.experts.103.w1", "model.layers.1.block_sparse_moe.experts.104.w1", "model.layers.1.block_sparse_moe.experts.105.w1", "model.layers.1.block_sparse_moe.experts.106.w1", "model.layers.1.block_sparse_moe.experts.107.w1", "model.layers.1.block_sparse_moe.experts.108.w1", "model.layers.1.block_sparse_moe.experts.109.w1", "model.layers.1.block_sparse_moe.experts.110.w1", "model.layers.1.block_sparse_moe.experts.111.w1", "model.layers.1.block_sparse_moe.experts.112.w1", "model.layers.1.block_sparse_moe.experts.113.w1", "model.layers.1.block_sparse_moe.experts.114.w1", "model.layers.1.block_sparse_moe.experts.115.w1", "model.layers.1.block_sparse_moe.experts.116.w1", "model.layers.1.block_sparse_moe.experts.117.w1", "model.layers.1.block_sparse_moe.experts.118.w1", "model.layers.1.block_sparse_moe.experts.119.w1", "model.layers.1.block_sparse_moe.experts.120.w1", "model.layers.1.block_sparse_moe.experts.121.w1", "model.layers.1.block_sparse_moe.experts.122.w1", "model.layers.1.block_sparse_moe.experts.123.w1", "model.layers.1.block_sparse_moe.experts.124.w1", "model.layers.1.block_sparse_moe.experts.125.w1", "model.layers.1.block_sparse_moe.experts.126.w1", "model.layers.1.block_sparse_moe.experts.127.w1", "model.layers.1.block_sparse_moe.experts.128.w1", "model.layers.1.block_sparse_moe.experts.129.w1", "model.layers.1.block_sparse_moe.experts.130.w1", "model.layers.1.block_sparse_moe.experts.131.w1", "model.layers.1.block_sparse_moe.experts.132.w1", "model.layers.1.block_sparse_moe.experts.133.w1", "model.layers.1.block_sparse_moe.experts.134.w1", "model.layers.1.block_sparse_moe.experts.135.w1", "model.layers.1.block_sparse_moe.experts.136.w1", "model.layers.1.block_sparse_moe.experts.137.w1", "model.layers.1.block_sparse_moe.experts.138.w1", "model.layers.1.block_sparse_moe.experts.139.w1", "model.layers.1.block_sparse_moe.experts.140.w1", "model.layers.1.block_sparse_moe.experts.141.w1", "model.layers.1.block_sparse_moe.experts.142.w1", "model.layers.1.block_sparse_moe.experts.143.w1", "model.layers.1.block_sparse_moe.experts.144.w1", "model.layers.1.block_sparse_moe.experts.145.w1", "model.layers.1.block_sparse_moe.experts.146.w1", "model.layers.1.block_sparse_moe.experts.147.w1", "model.layers.1.block_sparse_moe.experts.148.w1", "model.layers.1.block_sparse_moe.experts.149.w1", "model.layers.1.block_sparse_moe.experts.150.w1", "model.layers.1.block_sparse_moe.experts.151.w1", "model.layers.1.block_sparse_moe.experts.152.w1", "model.layers.1.block_sparse_moe.experts.153.w1", "model.layers.1.block_sparse_moe.experts.154.w1", "model.layers.1.block_sparse_moe.experts.155.w1", "model.layers.1.block_sparse_moe.experts.156.w1", "model.layers.1.block_sparse_moe.experts.157.w1", "model.layers.1.block_sparse_moe.experts.158.w1", "model.layers.1.block_sparse_moe.experts.159.w1", "model.layers.1.block_sparse_moe.experts.160.w1", "model.layers.1.block_sparse_moe.experts.161.w1", "model.layers.1.block_sparse_moe.experts.162.w1", "model.layers.1.block_sparse_moe.experts.163.w1", "model.layers.1.block_sparse_moe.experts.164.w1", "model.layers.1.block_sparse_moe.experts.165.w1", "model.layers.1.block_sparse_moe.experts.166.w1", "model.layers.1.block_sparse_moe.experts.167.w1", "model.layers.1.block_sparse_moe.experts.168.w1", "model.layers.1.block_sparse_moe.experts.169.w1", "model.layers.1.block_sparse_moe.experts.170.w1", "model.layers.1.block_sparse_moe.experts.171.w1", "model.layers.1.block_sparse_moe.experts.172.w1", "model.layers.1.block_sparse_moe.experts.173.w1", "model.layers.1.block_sparse_moe.experts.174.w1", "model.layers.1.block_sparse_moe.experts.175.w1", "model.layers.1.block_sparse_moe.experts.176.w1", "model.layers.1.block_sparse_moe.experts.177.w1", "model.layers.1.block_sparse_moe.experts.178.w1", "model.layers.1.block_sparse_moe.experts.179.w1", "model.layers.1.block_sparse_moe.experts.180.w1", "model.layers.1.block_sparse_moe.experts.181.w1", "model.layers.1.block_sparse_moe.experts.182.w1", "model.layers.1.block_sparse_moe.experts.183.w1", "model.layers.1.block_sparse_moe.experts.184.w1", "model.layers.1.block_sparse_moe.experts.185.w1", "model.layers.1.block_sparse_moe.experts.186.w1", "model.layers.1.block_sparse_moe.experts.187.w1", "model.layers.1.block_sparse_moe.experts.188.w1", "model.layers.1.block_sparse_moe.experts.189.w1", "model.layers.1.block_sparse_moe.experts.190.w1", "model.layers.1.block_sparse_moe.experts.191.w1", "model.layers.1.block_sparse_moe.experts.192.w1", "model.layers.1.block_sparse_moe.experts.193.w1", "model.layers.1.block_sparse_moe.experts.194.w1", "model.layers.1.block_sparse_moe.experts.195.w1", "model.layers.1.block_sparse_moe.experts.196.w1", "model.layers.1.block_sparse_moe.experts.197.w1", "model.layers.1.block_sparse_moe.experts.198.w1", "model.layers.1.block_sparse_moe.experts.199.w1", "model.layers.1.block_sparse_moe.experts.200.w1", "model.layers.1.block_sparse_moe.experts.201.w1", "model.layers.1.block_sparse_moe.experts.202.w1", "model.layers.1.block_sparse_moe.experts.203.w1", "model.layers.1.block_sparse_moe.experts.204.w1", "model.layers.1.block_sparse_moe.experts.205.w1", "model.layers.1.block_sparse_moe.experts.206.w1", "model.layers.1.block_sparse_moe.experts.207.w1", "model.layers.1.block_sparse_moe.experts.208.w1", "model.layers.1.block_sparse_moe.experts.209.w1", "model.layers.1.block_sparse_moe.experts.210.w1", "model.layers.1.block_sparse_moe.experts.211.w1", "model.layers.1.block_sparse_moe.experts.212.w1", "model.layers.1.block_sparse_moe.experts.213.w1", "model.layers.1.block_sparse_moe.experts.214.w1", "model.layers.1.block_sparse_moe.experts.215.w1", "model.layers.1.block_sparse_moe.experts.216.w1", "model.layers.1.block_sparse_moe.experts.217.w1", "model.layers.1.block_sparse_moe.experts.218.w1", "model.layers.1.block_sparse_moe.experts.219.w1", "model.layers.1.block_sparse_moe.experts.220.w1", "model.layers.1.block_sparse_moe.experts.221.w1", "model.layers.1.block_sparse_moe.experts.222.w1", "model.layers.1.block_sparse_moe.experts.223.w1", "model.layers.1.block_sparse_moe.experts.224.w1", "model.layers.1.block_sparse_moe.experts.225.w1", "model.layers.1.block_sparse_moe.experts.226.w1", "model.layers.1.block_sparse_moe.experts.227.w1", "model.layers.1.block_sparse_moe.experts.228.w1", "model.layers.1.block_sparse_moe.experts.229.w1", "model.layers.1.block_sparse_moe.experts.230.w1", "model.layers.1.block_sparse_moe.experts.231.w1", "model.layers.1.block_sparse_moe.experts.232.w1", "model.layers.1.block_sparse_moe.experts.233.w1", "model.layers.1.block_sparse_moe.experts.234.w1", "model.layers.1.block_sparse_moe.experts.235.w1", "model.layers.1.block_sparse_moe.experts.236.w1", "model.layers.1.block_sparse_moe.experts.237.w1", "model.layers.1.block_sparse_moe.experts.238.w1", "model.layers.1.block_sparse_moe.experts.239.w1", "model.layers.1.block_sparse_moe.experts.240.w1", "model.layers.1.block_sparse_moe.experts.241.w1", "model.layers.1.block_sparse_moe.experts.242.w1", "model.layers.1.block_sparse_moe.experts.243.w1", "model.layers.1.block_sparse_moe.experts.244.w1", "model.layers.1.block_sparse_moe.experts.245.w1", "model.layers.1.block_sparse_moe.experts.246.w1", "model.layers.1.block_sparse_moe.experts.247.w1", "model.layers.1.block_sparse_moe.experts.248.w1", "model.layers.1.block_sparse_moe.experts.249.w1", "model.layers.1.block_sparse_moe.experts.250.w1", "model.layers.1.block_sparse_moe.experts.251.w1", "model.layers.1.block_sparse_moe.experts.252.w1", "model.layers.1.block_sparse_moe.experts.253.w1", "model.layers.1.block_sparse_moe.experts.254.w1", "model.layers.1.block_sparse_moe.experts.255.w1", "model.layers.1.block_sparse_moe.experts.0.w3", "model.layers.1.block_sparse_moe.experts.1.w3", "model.layers.1.block_sparse_moe.experts.2.w3", "model.layers.1.block_sparse_moe.experts.3.w3", "model.layers.1.block_sparse_moe.experts.4.w3", "model.layers.1.block_sparse_moe.experts.5.w3", "model.layers.1.block_sparse_moe.experts.6.w3", "model.layers.1.block_sparse_moe.experts.7.w3", "model.layers.1.block_sparse_moe.experts.8.w3", "model.layers.1.block_sparse_moe.experts.9.w3", "model.layers.1.block_sparse_moe.experts.10.w3", "model.layers.1.block_sparse_moe.experts.11.w3", "model.layers.1.block_sparse_moe.experts.12.w3", "model.layers.1.block_sparse_moe.experts.13.w3", "model.layers.1.block_sparse_moe.experts.14.w3", "model.layers.1.block_sparse_moe.experts.15.w3", "model.layers.1.block_sparse_moe.experts.16.w3", "model.layers.1.block_sparse_moe.experts.17.w3", "model.layers.1.block_sparse_moe.experts.18.w3", "model.layers.1.block_sparse_moe.experts.19.w3", "model.layers.1.block_sparse_moe.experts.20.w3", "model.layers.1.block_sparse_moe.experts.21.w3", "model.layers.1.block_sparse_moe.experts.22.w3", "model.layers.1.block_sparse_moe.experts.23.w3", "model.layers.1.block_sparse_moe.experts.24.w3", "model.layers.1.block_sparse_moe.experts.25.w3", "model.layers.1.block_sparse_moe.experts.26.w3", "model.layers.1.block_sparse_moe.experts.27.w3", "model.layers.1.block_sparse_moe.experts.28.w3", "model.layers.1.block_sparse_moe.experts.29.w3", "model.layers.1.block_sparse_moe.experts.30.w3", "model.layers.1.block_sparse_moe.experts.31.w3", "model.layers.1.block_sparse_moe.experts.32.w3", "model.layers.1.block_sparse_moe.experts.33.w3", "model.layers.1.block_sparse_moe.experts.34.w3", "model.layers.1.block_sparse_moe.experts.35.w3", "model.layers.1.block_sparse_moe.experts.36.w3", "model.layers.1.block_sparse_moe.experts.37.w3", "model.layers.1.block_sparse_moe.experts.38.w3", "model.layers.1.block_sparse_moe.experts.39.w3", "model.layers.1.block_sparse_moe.experts.40.w3", "model.layers.1.block_sparse_moe.experts.41.w3", "model.layers.1.block_sparse_moe.experts.42.w3", "model.layers.1.block_sparse_moe.experts.43.w3", "model.layers.1.block_sparse_moe.experts.44.w3", "model.layers.1.block_sparse_moe.experts.45.w3", "model.layers.1.block_sparse_moe.experts.46.w3", "model.layers.1.block_sparse_moe.experts.47.w3", "model.layers.1.block_sparse_moe.experts.48.w3", "model.layers.1.block_sparse_moe.experts.49.w3", "model.layers.1.block_sparse_moe.experts.50.w3", "model.layers.1.block_sparse_moe.experts.51.w3", "model.layers.1.block_sparse_moe.experts.52.w3", "model.layers.1.block_sparse_moe.experts.53.w3", "model.layers.1.block_sparse_moe.experts.54.w3", "model.layers.1.block_sparse_moe.experts.55.w3", "model.layers.1.block_sparse_moe.experts.56.w3", "model.layers.1.block_sparse_moe.experts.57.w3", "model.layers.1.block_sparse_moe.experts.58.w3", "model.layers.1.block_sparse_moe.experts.59.w3", "model.layers.1.block_sparse_moe.experts.60.w3", "model.layers.1.block_sparse_moe.experts.61.w3", "model.layers.1.block_sparse_moe.experts.62.w3", "model.layers.1.block_sparse_moe.experts.63.w3", "model.layers.1.block_sparse_moe.experts.64.w3", "model.layers.1.block_sparse_moe.experts.65.w3", "model.layers.1.block_sparse_moe.experts.66.w3", "model.layers.1.block_sparse_moe.experts.67.w3", "model.layers.1.block_sparse_moe.experts.68.w3", "model.layers.1.block_sparse_moe.experts.69.w3", "model.layers.1.block_sparse_moe.experts.70.w3", "model.layers.1.block_sparse_moe.experts.71.w3", "model.layers.1.block_sparse_moe.experts.72.w3", "model.layers.1.block_sparse_moe.experts.73.w3", "model.layers.1.block_sparse_moe.experts.74.w3", "model.layers.1.block_sparse_moe.experts.75.w3", "model.layers.1.block_sparse_moe.experts.76.w3", "model.layers.1.block_sparse_moe.experts.77.w3", "model.layers.1.block_sparse_moe.experts.78.w3", "model.layers.1.block_sparse_moe.experts.79.w3", "model.layers.1.block_sparse_moe.experts.80.w3", "model.layers.1.block_sparse_moe.experts.81.w3", "model.layers.1.block_sparse_moe.experts.82.w3", "model.layers.1.block_sparse_moe.experts.83.w3", "model.layers.1.block_sparse_moe.experts.84.w3", "model.layers.1.block_sparse_moe.experts.85.w3", "model.layers.1.block_sparse_moe.experts.86.w3", "model.layers.1.block_sparse_moe.experts.87.w3", "model.layers.1.block_sparse_moe.experts.88.w3", "model.layers.1.block_sparse_moe.experts.89.w3", "model.layers.1.block_sparse_moe.experts.90.w3", "model.layers.1.block_sparse_moe.experts.91.w3", "model.layers.1.block_sparse_moe.experts.92.w3", "model.layers.1.block_sparse_moe.experts.93.w3", "model.layers.1.block_sparse_moe.experts.94.w3", "model.layers.1.block_sparse_moe.experts.95.w3", "model.layers.1.block_sparse_moe.experts.96.w3", "model.layers.1.block_sparse_moe.experts.97.w3", "model.layers.1.block_sparse_moe.experts.98.w3", "model.layers.1.block_sparse_moe.experts.99.w3", "model.layers.1.block_sparse_moe.experts.100.w3", "model.layers.1.block_sparse_moe.experts.101.w3", "model.layers.1.block_sparse_moe.experts.102.w3", "model.layers.1.block_sparse_moe.experts.103.w3", "model.layers.1.block_sparse_moe.experts.104.w3", "model.layers.1.block_sparse_moe.experts.105.w3", "model.layers.1.block_sparse_moe.experts.106.w3", "model.layers.1.block_sparse_moe.experts.107.w3", "model.layers.1.block_sparse_moe.experts.108.w3", "model.layers.1.block_sparse_moe.experts.109.w3", "model.layers.1.block_sparse_moe.experts.110.w3", "model.layers.1.block_sparse_moe.experts.111.w3", "model.layers.1.block_sparse_moe.experts.112.w3", "model.layers.1.block_sparse_moe.experts.113.w3", "model.layers.1.block_sparse_moe.experts.114.w3", "model.layers.1.block_sparse_moe.experts.115.w3", "model.layers.1.block_sparse_moe.experts.116.w3", "model.layers.1.block_sparse_moe.experts.117.w3", "model.layers.1.block_sparse_moe.experts.118.w3", "model.layers.1.block_sparse_moe.experts.119.w3", "model.layers.1.block_sparse_moe.experts.120.w3", "model.layers.1.block_sparse_moe.experts.121.w3", "model.layers.1.block_sparse_moe.experts.122.w3", "model.layers.1.block_sparse_moe.experts.123.w3", "model.layers.1.block_sparse_moe.experts.124.w3", "model.layers.1.block_sparse_moe.experts.125.w3", "model.layers.1.block_sparse_moe.experts.126.w3", "model.layers.1.block_sparse_moe.experts.127.w3", "model.layers.1.block_sparse_moe.experts.128.w3", "model.layers.1.block_sparse_moe.experts.129.w3", "model.layers.1.block_sparse_moe.experts.130.w3", "model.layers.1.block_sparse_moe.experts.131.w3", "model.layers.1.block_sparse_moe.experts.132.w3", "model.layers.1.block_sparse_moe.experts.133.w3", "model.layers.1.block_sparse_moe.experts.134.w3", "model.layers.1.block_sparse_moe.experts.135.w3", "model.layers.1.block_sparse_moe.experts.136.w3", "model.layers.1.block_sparse_moe.experts.137.w3", "model.layers.1.block_sparse_moe.experts.138.w3", "model.layers.1.block_sparse_moe.experts.139.w3", "model.layers.1.block_sparse_moe.experts.140.w3", "model.layers.1.block_sparse_moe.experts.141.w3", "model.layers.1.block_sparse_moe.experts.142.w3", "model.layers.1.block_sparse_moe.experts.143.w3", "model.layers.1.block_sparse_moe.experts.144.w3", "model.layers.1.block_sparse_moe.experts.145.w3", "model.layers.1.block_sparse_moe.experts.146.w3", "model.layers.1.block_sparse_moe.experts.147.w3", "model.layers.1.block_sparse_moe.experts.148.w3", "model.layers.1.block_sparse_moe.experts.149.w3", "model.layers.1.block_sparse_moe.experts.150.w3", "model.layers.1.block_sparse_moe.experts.151.w3", "model.layers.1.block_sparse_moe.experts.152.w3", "model.layers.1.block_sparse_moe.experts.153.w3", "model.layers.1.block_sparse_moe.experts.154.w3", "model.layers.1.block_sparse_moe.experts.155.w3", "model.layers.1.block_sparse_moe.experts.156.w3", "model.layers.1.block_sparse_moe.experts.157.w3", "model.layers.1.block_sparse_moe.experts.158.w3", "model.layers.1.block_sparse_moe.experts.159.w3", "model.layers.1.block_sparse_moe.experts.160.w3", "model.layers.1.block_sparse_moe.experts.161.w3", "model.layers.1.block_sparse_moe.experts.162.w3", "model.layers.1.block_sparse_moe.experts.163.w3", "model.layers.1.block_sparse_moe.experts.164.w3", "model.layers.1.block_sparse_moe.experts.165.w3", "model.layers.1.block_sparse_moe.experts.166.w3", "model.layers.1.block_sparse_moe.experts.167.w3", "model.layers.1.block_sparse_moe.experts.168.w3", "model.layers.1.block_sparse_moe.experts.169.w3", "model.layers.1.block_sparse_moe.experts.170.w3", "model.layers.1.block_sparse_moe.experts.171.w3", "model.layers.1.block_sparse_moe.experts.172.w3", "model.layers.1.block_sparse_moe.experts.173.w3", "model.layers.1.block_sparse_moe.experts.174.w3", "model.layers.1.block_sparse_moe.experts.175.w3", "model.layers.1.block_sparse_moe.experts.176.w3", "model.layers.1.block_sparse_moe.experts.177.w3", "model.layers.1.block_sparse_moe.experts.178.w3", "model.layers.1.block_sparse_moe.experts.179.w3", "model.layers.1.block_sparse_moe.experts.180.w3", "model.layers.1.block_sparse_moe.experts.181.w3", "model.layers.1.block_sparse_moe.experts.182.w3", "model.layers.1.block_sparse_moe.experts.183.w3", "model.layers.1.block_sparse_moe.experts.184.w3", "model.layers.1.block_sparse_moe.experts.185.w3", "model.layers.1.block_sparse_moe.experts.186.w3", "model.layers.1.block_sparse_moe.experts.187.w3", "model.layers.1.block_sparse_moe.experts.188.w3", "model.layers.1.block_sparse_moe.experts.189.w3", "model.layers.1.block_sparse_moe.experts.190.w3", "model.layers.1.block_sparse_moe.experts.191.w3", "model.layers.1.block_sparse_moe.experts.192.w3", "model.layers.1.block_sparse_moe.experts.193.w3", "model.layers.1.block_sparse_moe.experts.194.w3", "model.layers.1.block_sparse_moe.experts.195.w3", "model.layers.1.block_sparse_moe.experts.196.w3", "model.layers.1.block_sparse_moe.experts.197.w3", "model.layers.1.block_sparse_moe.experts.198.w3", "model.layers.1.block_sparse_moe.experts.199.w3", "model.layers.1.block_sparse_moe.experts.200.w3", "model.layers.1.block_sparse_moe.experts.201.w3", "model.layers.1.block_sparse_moe.experts.202.w3", "model.layers.1.block_sparse_moe.experts.203.w3", "model.layers.1.block_sparse_moe.experts.204.w3", "model.layers.1.block_sparse_moe.experts.205.w3", "model.layers.1.block_sparse_moe.experts.206.w3", "model.layers.1.block_sparse_moe.experts.207.w3", "model.layers.1.block_sparse_moe.experts.208.w3", "model.layers.1.block_sparse_moe.experts.209.w3", "model.layers.1.block_sparse_moe.experts.210.w3", "model.layers.1.block_sparse_moe.experts.211.w3", "model.layers.1.block_sparse_moe.experts.212.w3", "model.layers.1.block_sparse_moe.experts.213.w3", "model.layers.1.block_sparse_moe.experts.214.w3", "model.layers.1.block_sparse_moe.experts.215.w3", "model.layers.1.block_sparse_moe.experts.216.w3", "model.layers.1.block_sparse_moe.experts.217.w3", "model.layers.1.block_sparse_moe.experts.218.w3", "model.layers.1.block_sparse_moe.experts.219.w3", "model.layers.1.block_sparse_moe.experts.220.w3", "model.layers.1.block_sparse_moe.experts.221.w3", "model.layers.1.block_sparse_moe.experts.222.w3", "model.layers.1.block_sparse_moe.experts.223.w3", "model.layers.1.block_sparse_moe.experts.224.w3", "model.layers.1.block_sparse_moe.experts.225.w3", "model.layers.1.block_sparse_moe.experts.226.w3", "model.layers.1.block_sparse_moe.experts.227.w3", "model.layers.1.block_sparse_moe.experts.228.w3", "model.layers.1.block_sparse_moe.experts.229.w3", "model.layers.1.block_sparse_moe.experts.230.w3", "model.layers.1.block_sparse_moe.experts.231.w3", "model.layers.1.block_sparse_moe.experts.232.w3", "model.layers.1.block_sparse_moe.experts.233.w3", "model.layers.1.block_sparse_moe.experts.234.w3", "model.layers.1.block_sparse_moe.experts.235.w3", "model.layers.1.block_sparse_moe.experts.236.w3", "model.layers.1.block_sparse_moe.experts.237.w3", "model.layers.1.block_sparse_moe.experts.238.w3", "model.layers.1.block_sparse_moe.experts.239.w3", "model.layers.1.block_sparse_moe.experts.240.w3", "model.layers.1.block_sparse_moe.experts.241.w3", "model.layers.1.block_sparse_moe.experts.242.w3", "model.layers.1.block_sparse_moe.experts.243.w3", "model.layers.1.block_sparse_moe.experts.244.w3", "model.layers.1.block_sparse_moe.experts.245.w3", "model.layers.1.block_sparse_moe.experts.246.w3", "model.layers.1.block_sparse_moe.experts.247.w3", "model.layers.1.block_sparse_moe.experts.248.w3", "model.layers.1.block_sparse_moe.experts.249.w3", "model.layers.1.block_sparse_moe.experts.250.w3", "model.layers.1.block_sparse_moe.experts.251.w3", "model.layers.1.block_sparse_moe.experts.252.w3", "model.layers.1.block_sparse_moe.experts.253.w3", "model.layers.1.block_sparse_moe.experts.254.w3", "model.layers.1.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0016111284494400024, "dbits": 2415919104 } ] }, { "idx": 9, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w2", "model.layers.1.block_sparse_moe.experts.1.w2", "model.layers.1.block_sparse_moe.experts.2.w2", "model.layers.1.block_sparse_moe.experts.3.w2", "model.layers.1.block_sparse_moe.experts.4.w2", "model.layers.1.block_sparse_moe.experts.5.w2", "model.layers.1.block_sparse_moe.experts.6.w2", "model.layers.1.block_sparse_moe.experts.7.w2", "model.layers.1.block_sparse_moe.experts.8.w2", "model.layers.1.block_sparse_moe.experts.9.w2", "model.layers.1.block_sparse_moe.experts.10.w2", "model.layers.1.block_sparse_moe.experts.11.w2", "model.layers.1.block_sparse_moe.experts.12.w2", "model.layers.1.block_sparse_moe.experts.13.w2", "model.layers.1.block_sparse_moe.experts.14.w2", "model.layers.1.block_sparse_moe.experts.15.w2", "model.layers.1.block_sparse_moe.experts.16.w2", "model.layers.1.block_sparse_moe.experts.17.w2", "model.layers.1.block_sparse_moe.experts.18.w2", "model.layers.1.block_sparse_moe.experts.19.w2", "model.layers.1.block_sparse_moe.experts.20.w2", "model.layers.1.block_sparse_moe.experts.21.w2", "model.layers.1.block_sparse_moe.experts.22.w2", "model.layers.1.block_sparse_moe.experts.23.w2", "model.layers.1.block_sparse_moe.experts.24.w2", "model.layers.1.block_sparse_moe.experts.25.w2", "model.layers.1.block_sparse_moe.experts.26.w2", "model.layers.1.block_sparse_moe.experts.27.w2", "model.layers.1.block_sparse_moe.experts.28.w2", "model.layers.1.block_sparse_moe.experts.29.w2", "model.layers.1.block_sparse_moe.experts.30.w2", "model.layers.1.block_sparse_moe.experts.31.w2", "model.layers.1.block_sparse_moe.experts.32.w2", "model.layers.1.block_sparse_moe.experts.33.w2", "model.layers.1.block_sparse_moe.experts.34.w2", "model.layers.1.block_sparse_moe.experts.35.w2", "model.layers.1.block_sparse_moe.experts.36.w2", "model.layers.1.block_sparse_moe.experts.37.w2", "model.layers.1.block_sparse_moe.experts.38.w2", "model.layers.1.block_sparse_moe.experts.39.w2", "model.layers.1.block_sparse_moe.experts.40.w2", "model.layers.1.block_sparse_moe.experts.41.w2", "model.layers.1.block_sparse_moe.experts.42.w2", "model.layers.1.block_sparse_moe.experts.43.w2", "model.layers.1.block_sparse_moe.experts.44.w2", "model.layers.1.block_sparse_moe.experts.45.w2", "model.layers.1.block_sparse_moe.experts.46.w2", "model.layers.1.block_sparse_moe.experts.47.w2", "model.layers.1.block_sparse_moe.experts.48.w2", "model.layers.1.block_sparse_moe.experts.49.w2", "model.layers.1.block_sparse_moe.experts.50.w2", "model.layers.1.block_sparse_moe.experts.51.w2", "model.layers.1.block_sparse_moe.experts.52.w2", "model.layers.1.block_sparse_moe.experts.53.w2", "model.layers.1.block_sparse_moe.experts.54.w2", "model.layers.1.block_sparse_moe.experts.55.w2", "model.layers.1.block_sparse_moe.experts.56.w2", "model.layers.1.block_sparse_moe.experts.57.w2", "model.layers.1.block_sparse_moe.experts.58.w2", "model.layers.1.block_sparse_moe.experts.59.w2", "model.layers.1.block_sparse_moe.experts.60.w2", "model.layers.1.block_sparse_moe.experts.61.w2", "model.layers.1.block_sparse_moe.experts.62.w2", "model.layers.1.block_sparse_moe.experts.63.w2", "model.layers.1.block_sparse_moe.experts.64.w2", "model.layers.1.block_sparse_moe.experts.65.w2", "model.layers.1.block_sparse_moe.experts.66.w2", "model.layers.1.block_sparse_moe.experts.67.w2", "model.layers.1.block_sparse_moe.experts.68.w2", "model.layers.1.block_sparse_moe.experts.69.w2", "model.layers.1.block_sparse_moe.experts.70.w2", "model.layers.1.block_sparse_moe.experts.71.w2", "model.layers.1.block_sparse_moe.experts.72.w2", "model.layers.1.block_sparse_moe.experts.73.w2", "model.layers.1.block_sparse_moe.experts.74.w2", "model.layers.1.block_sparse_moe.experts.75.w2", "model.layers.1.block_sparse_moe.experts.76.w2", "model.layers.1.block_sparse_moe.experts.77.w2", "model.layers.1.block_sparse_moe.experts.78.w2", "model.layers.1.block_sparse_moe.experts.79.w2", "model.layers.1.block_sparse_moe.experts.80.w2", "model.layers.1.block_sparse_moe.experts.81.w2", "model.layers.1.block_sparse_moe.experts.82.w2", "model.layers.1.block_sparse_moe.experts.83.w2", "model.layers.1.block_sparse_moe.experts.84.w2", "model.layers.1.block_sparse_moe.experts.85.w2", "model.layers.1.block_sparse_moe.experts.86.w2", "model.layers.1.block_sparse_moe.experts.87.w2", "model.layers.1.block_sparse_moe.experts.88.w2", "model.layers.1.block_sparse_moe.experts.89.w2", "model.layers.1.block_sparse_moe.experts.90.w2", "model.layers.1.block_sparse_moe.experts.91.w2", "model.layers.1.block_sparse_moe.experts.92.w2", "model.layers.1.block_sparse_moe.experts.93.w2", "model.layers.1.block_sparse_moe.experts.94.w2", "model.layers.1.block_sparse_moe.experts.95.w2", "model.layers.1.block_sparse_moe.experts.96.w2", "model.layers.1.block_sparse_moe.experts.97.w2", "model.layers.1.block_sparse_moe.experts.98.w2", "model.layers.1.block_sparse_moe.experts.99.w2", "model.layers.1.block_sparse_moe.experts.100.w2", "model.layers.1.block_sparse_moe.experts.101.w2", "model.layers.1.block_sparse_moe.experts.102.w2", "model.layers.1.block_sparse_moe.experts.103.w2", "model.layers.1.block_sparse_moe.experts.104.w2", "model.layers.1.block_sparse_moe.experts.105.w2", "model.layers.1.block_sparse_moe.experts.106.w2", "model.layers.1.block_sparse_moe.experts.107.w2", "model.layers.1.block_sparse_moe.experts.108.w2", "model.layers.1.block_sparse_moe.experts.109.w2", "model.layers.1.block_sparse_moe.experts.110.w2", "model.layers.1.block_sparse_moe.experts.111.w2", "model.layers.1.block_sparse_moe.experts.112.w2", "model.layers.1.block_sparse_moe.experts.113.w2", "model.layers.1.block_sparse_moe.experts.114.w2", "model.layers.1.block_sparse_moe.experts.115.w2", "model.layers.1.block_sparse_moe.experts.116.w2", "model.layers.1.block_sparse_moe.experts.117.w2", "model.layers.1.block_sparse_moe.experts.118.w2", "model.layers.1.block_sparse_moe.experts.119.w2", "model.layers.1.block_sparse_moe.experts.120.w2", "model.layers.1.block_sparse_moe.experts.121.w2", "model.layers.1.block_sparse_moe.experts.122.w2", "model.layers.1.block_sparse_moe.experts.123.w2", "model.layers.1.block_sparse_moe.experts.124.w2", "model.layers.1.block_sparse_moe.experts.125.w2", "model.layers.1.block_sparse_moe.experts.126.w2", "model.layers.1.block_sparse_moe.experts.127.w2", "model.layers.1.block_sparse_moe.experts.128.w2", "model.layers.1.block_sparse_moe.experts.129.w2", "model.layers.1.block_sparse_moe.experts.130.w2", "model.layers.1.block_sparse_moe.experts.131.w2", "model.layers.1.block_sparse_moe.experts.132.w2", "model.layers.1.block_sparse_moe.experts.133.w2", "model.layers.1.block_sparse_moe.experts.134.w2", "model.layers.1.block_sparse_moe.experts.135.w2", "model.layers.1.block_sparse_moe.experts.136.w2", "model.layers.1.block_sparse_moe.experts.137.w2", "model.layers.1.block_sparse_moe.experts.138.w2", "model.layers.1.block_sparse_moe.experts.139.w2", "model.layers.1.block_sparse_moe.experts.140.w2", "model.layers.1.block_sparse_moe.experts.141.w2", "model.layers.1.block_sparse_moe.experts.142.w2", "model.layers.1.block_sparse_moe.experts.143.w2", "model.layers.1.block_sparse_moe.experts.144.w2", "model.layers.1.block_sparse_moe.experts.145.w2", "model.layers.1.block_sparse_moe.experts.146.w2", "model.layers.1.block_sparse_moe.experts.147.w2", "model.layers.1.block_sparse_moe.experts.148.w2", "model.layers.1.block_sparse_moe.experts.149.w2", "model.layers.1.block_sparse_moe.experts.150.w2", "model.layers.1.block_sparse_moe.experts.151.w2", "model.layers.1.block_sparse_moe.experts.152.w2", "model.layers.1.block_sparse_moe.experts.153.w2", "model.layers.1.block_sparse_moe.experts.154.w2", "model.layers.1.block_sparse_moe.experts.155.w2", "model.layers.1.block_sparse_moe.experts.156.w2", "model.layers.1.block_sparse_moe.experts.157.w2", "model.layers.1.block_sparse_moe.experts.158.w2", "model.layers.1.block_sparse_moe.experts.159.w2", "model.layers.1.block_sparse_moe.experts.160.w2", "model.layers.1.block_sparse_moe.experts.161.w2", "model.layers.1.block_sparse_moe.experts.162.w2", "model.layers.1.block_sparse_moe.experts.163.w2", "model.layers.1.block_sparse_moe.experts.164.w2", "model.layers.1.block_sparse_moe.experts.165.w2", "model.layers.1.block_sparse_moe.experts.166.w2", "model.layers.1.block_sparse_moe.experts.167.w2", "model.layers.1.block_sparse_moe.experts.168.w2", "model.layers.1.block_sparse_moe.experts.169.w2", "model.layers.1.block_sparse_moe.experts.170.w2", "model.layers.1.block_sparse_moe.experts.171.w2", "model.layers.1.block_sparse_moe.experts.172.w2", "model.layers.1.block_sparse_moe.experts.173.w2", "model.layers.1.block_sparse_moe.experts.174.w2", "model.layers.1.block_sparse_moe.experts.175.w2", "model.layers.1.block_sparse_moe.experts.176.w2", "model.layers.1.block_sparse_moe.experts.177.w2", "model.layers.1.block_sparse_moe.experts.178.w2", "model.layers.1.block_sparse_moe.experts.179.w2", "model.layers.1.block_sparse_moe.experts.180.w2", "model.layers.1.block_sparse_moe.experts.181.w2", "model.layers.1.block_sparse_moe.experts.182.w2", "model.layers.1.block_sparse_moe.experts.183.w2", "model.layers.1.block_sparse_moe.experts.184.w2", "model.layers.1.block_sparse_moe.experts.185.w2", "model.layers.1.block_sparse_moe.experts.186.w2", "model.layers.1.block_sparse_moe.experts.187.w2", "model.layers.1.block_sparse_moe.experts.188.w2", "model.layers.1.block_sparse_moe.experts.189.w2", "model.layers.1.block_sparse_moe.experts.190.w2", "model.layers.1.block_sparse_moe.experts.191.w2", "model.layers.1.block_sparse_moe.experts.192.w2", "model.layers.1.block_sparse_moe.experts.193.w2", "model.layers.1.block_sparse_moe.experts.194.w2", "model.layers.1.block_sparse_moe.experts.195.w2", "model.layers.1.block_sparse_moe.experts.196.w2", "model.layers.1.block_sparse_moe.experts.197.w2", "model.layers.1.block_sparse_moe.experts.198.w2", "model.layers.1.block_sparse_moe.experts.199.w2", "model.layers.1.block_sparse_moe.experts.200.w2", "model.layers.1.block_sparse_moe.experts.201.w2", "model.layers.1.block_sparse_moe.experts.202.w2", "model.layers.1.block_sparse_moe.experts.203.w2", "model.layers.1.block_sparse_moe.experts.204.w2", "model.layers.1.block_sparse_moe.experts.205.w2", "model.layers.1.block_sparse_moe.experts.206.w2", "model.layers.1.block_sparse_moe.experts.207.w2", "model.layers.1.block_sparse_moe.experts.208.w2", "model.layers.1.block_sparse_moe.experts.209.w2", "model.layers.1.block_sparse_moe.experts.210.w2", "model.layers.1.block_sparse_moe.experts.211.w2", "model.layers.1.block_sparse_moe.experts.212.w2", "model.layers.1.block_sparse_moe.experts.213.w2", "model.layers.1.block_sparse_moe.experts.214.w2", "model.layers.1.block_sparse_moe.experts.215.w2", "model.layers.1.block_sparse_moe.experts.216.w2", "model.layers.1.block_sparse_moe.experts.217.w2", "model.layers.1.block_sparse_moe.experts.218.w2", "model.layers.1.block_sparse_moe.experts.219.w2", "model.layers.1.block_sparse_moe.experts.220.w2", "model.layers.1.block_sparse_moe.experts.221.w2", "model.layers.1.block_sparse_moe.experts.222.w2", "model.layers.1.block_sparse_moe.experts.223.w2", "model.layers.1.block_sparse_moe.experts.224.w2", "model.layers.1.block_sparse_moe.experts.225.w2", "model.layers.1.block_sparse_moe.experts.226.w2", "model.layers.1.block_sparse_moe.experts.227.w2", "model.layers.1.block_sparse_moe.experts.228.w2", "model.layers.1.block_sparse_moe.experts.229.w2", "model.layers.1.block_sparse_moe.experts.230.w2", "model.layers.1.block_sparse_moe.experts.231.w2", "model.layers.1.block_sparse_moe.experts.232.w2", "model.layers.1.block_sparse_moe.experts.233.w2", "model.layers.1.block_sparse_moe.experts.234.w2", "model.layers.1.block_sparse_moe.experts.235.w2", "model.layers.1.block_sparse_moe.experts.236.w2", "model.layers.1.block_sparse_moe.experts.237.w2", "model.layers.1.block_sparse_moe.experts.238.w2", "model.layers.1.block_sparse_moe.experts.239.w2", "model.layers.1.block_sparse_moe.experts.240.w2", "model.layers.1.block_sparse_moe.experts.241.w2", "model.layers.1.block_sparse_moe.experts.242.w2", "model.layers.1.block_sparse_moe.experts.243.w2", "model.layers.1.block_sparse_moe.experts.244.w2", "model.layers.1.block_sparse_moe.experts.245.w2", "model.layers.1.block_sparse_moe.experts.246.w2", "model.layers.1.block_sparse_moe.experts.247.w2", "model.layers.1.block_sparse_moe.experts.248.w2", "model.layers.1.block_sparse_moe.experts.249.w2", "model.layers.1.block_sparse_moe.experts.250.w2", "model.layers.1.block_sparse_moe.experts.251.w2", "model.layers.1.block_sparse_moe.experts.252.w2", "model.layers.1.block_sparse_moe.experts.253.w2", "model.layers.1.block_sparse_moe.experts.254.w2", "model.layers.1.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00308053195476532, "dbits": 1207959552 } ] }, { "idx": 10, "layers": [ "model.layers.2.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0006501704454422441, "dbits": 18874368 } ] }, { "idx": 11, "layers": [ "model.layers.2.self_attn.k_proj", "model.layers.2.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004151302576065108, "dbits": 6291456 } ] }, { "idx": 12, "layers": [ "model.layers.2.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005829766392707825, "dbits": 18874368 } ] }, { "idx": 13, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w1", "model.layers.2.block_sparse_moe.experts.1.w1", "model.layers.2.block_sparse_moe.experts.2.w1", "model.layers.2.block_sparse_moe.experts.3.w1", "model.layers.2.block_sparse_moe.experts.4.w1", "model.layers.2.block_sparse_moe.experts.5.w1", "model.layers.2.block_sparse_moe.experts.6.w1", "model.layers.2.block_sparse_moe.experts.7.w1", "model.layers.2.block_sparse_moe.experts.8.w1", "model.layers.2.block_sparse_moe.experts.9.w1", "model.layers.2.block_sparse_moe.experts.10.w1", "model.layers.2.block_sparse_moe.experts.11.w1", "model.layers.2.block_sparse_moe.experts.12.w1", "model.layers.2.block_sparse_moe.experts.13.w1", "model.layers.2.block_sparse_moe.experts.14.w1", "model.layers.2.block_sparse_moe.experts.15.w1", "model.layers.2.block_sparse_moe.experts.16.w1", "model.layers.2.block_sparse_moe.experts.17.w1", "model.layers.2.block_sparse_moe.experts.18.w1", "model.layers.2.block_sparse_moe.experts.19.w1", "model.layers.2.block_sparse_moe.experts.20.w1", "model.layers.2.block_sparse_moe.experts.21.w1", "model.layers.2.block_sparse_moe.experts.22.w1", "model.layers.2.block_sparse_moe.experts.23.w1", "model.layers.2.block_sparse_moe.experts.24.w1", "model.layers.2.block_sparse_moe.experts.25.w1", "model.layers.2.block_sparse_moe.experts.26.w1", "model.layers.2.block_sparse_moe.experts.27.w1", "model.layers.2.block_sparse_moe.experts.28.w1", "model.layers.2.block_sparse_moe.experts.29.w1", "model.layers.2.block_sparse_moe.experts.30.w1", "model.layers.2.block_sparse_moe.experts.31.w1", "model.layers.2.block_sparse_moe.experts.32.w1", "model.layers.2.block_sparse_moe.experts.33.w1", "model.layers.2.block_sparse_moe.experts.34.w1", "model.layers.2.block_sparse_moe.experts.35.w1", "model.layers.2.block_sparse_moe.experts.36.w1", "model.layers.2.block_sparse_moe.experts.37.w1", "model.layers.2.block_sparse_moe.experts.38.w1", "model.layers.2.block_sparse_moe.experts.39.w1", "model.layers.2.block_sparse_moe.experts.40.w1", "model.layers.2.block_sparse_moe.experts.41.w1", "model.layers.2.block_sparse_moe.experts.42.w1", "model.layers.2.block_sparse_moe.experts.43.w1", "model.layers.2.block_sparse_moe.experts.44.w1", "model.layers.2.block_sparse_moe.experts.45.w1", "model.layers.2.block_sparse_moe.experts.46.w1", "model.layers.2.block_sparse_moe.experts.47.w1", "model.layers.2.block_sparse_moe.experts.48.w1", "model.layers.2.block_sparse_moe.experts.49.w1", "model.layers.2.block_sparse_moe.experts.50.w1", "model.layers.2.block_sparse_moe.experts.51.w1", "model.layers.2.block_sparse_moe.experts.52.w1", "model.layers.2.block_sparse_moe.experts.53.w1", "model.layers.2.block_sparse_moe.experts.54.w1", "model.layers.2.block_sparse_moe.experts.55.w1", "model.layers.2.block_sparse_moe.experts.56.w1", "model.layers.2.block_sparse_moe.experts.57.w1", "model.layers.2.block_sparse_moe.experts.58.w1", "model.layers.2.block_sparse_moe.experts.59.w1", "model.layers.2.block_sparse_moe.experts.60.w1", "model.layers.2.block_sparse_moe.experts.61.w1", "model.layers.2.block_sparse_moe.experts.62.w1", "model.layers.2.block_sparse_moe.experts.63.w1", "model.layers.2.block_sparse_moe.experts.64.w1", "model.layers.2.block_sparse_moe.experts.65.w1", "model.layers.2.block_sparse_moe.experts.66.w1", "model.layers.2.block_sparse_moe.experts.67.w1", "model.layers.2.block_sparse_moe.experts.68.w1", "model.layers.2.block_sparse_moe.experts.69.w1", "model.layers.2.block_sparse_moe.experts.70.w1", "model.layers.2.block_sparse_moe.experts.71.w1", "model.layers.2.block_sparse_moe.experts.72.w1", "model.layers.2.block_sparse_moe.experts.73.w1", "model.layers.2.block_sparse_moe.experts.74.w1", "model.layers.2.block_sparse_moe.experts.75.w1", "model.layers.2.block_sparse_moe.experts.76.w1", "model.layers.2.block_sparse_moe.experts.77.w1", "model.layers.2.block_sparse_moe.experts.78.w1", "model.layers.2.block_sparse_moe.experts.79.w1", "model.layers.2.block_sparse_moe.experts.80.w1", "model.layers.2.block_sparse_moe.experts.81.w1", "model.layers.2.block_sparse_moe.experts.82.w1", "model.layers.2.block_sparse_moe.experts.83.w1", "model.layers.2.block_sparse_moe.experts.84.w1", "model.layers.2.block_sparse_moe.experts.85.w1", "model.layers.2.block_sparse_moe.experts.86.w1", "model.layers.2.block_sparse_moe.experts.87.w1", "model.layers.2.block_sparse_moe.experts.88.w1", "model.layers.2.block_sparse_moe.experts.89.w1", "model.layers.2.block_sparse_moe.experts.90.w1", "model.layers.2.block_sparse_moe.experts.91.w1", "model.layers.2.block_sparse_moe.experts.92.w1", "model.layers.2.block_sparse_moe.experts.93.w1", "model.layers.2.block_sparse_moe.experts.94.w1", "model.layers.2.block_sparse_moe.experts.95.w1", "model.layers.2.block_sparse_moe.experts.96.w1", "model.layers.2.block_sparse_moe.experts.97.w1", "model.layers.2.block_sparse_moe.experts.98.w1", "model.layers.2.block_sparse_moe.experts.99.w1", "model.layers.2.block_sparse_moe.experts.100.w1", "model.layers.2.block_sparse_moe.experts.101.w1", "model.layers.2.block_sparse_moe.experts.102.w1", "model.layers.2.block_sparse_moe.experts.103.w1", "model.layers.2.block_sparse_moe.experts.104.w1", "model.layers.2.block_sparse_moe.experts.105.w1", "model.layers.2.block_sparse_moe.experts.106.w1", "model.layers.2.block_sparse_moe.experts.107.w1", "model.layers.2.block_sparse_moe.experts.108.w1", "model.layers.2.block_sparse_moe.experts.109.w1", "model.layers.2.block_sparse_moe.experts.110.w1", "model.layers.2.block_sparse_moe.experts.111.w1", "model.layers.2.block_sparse_moe.experts.112.w1", "model.layers.2.block_sparse_moe.experts.113.w1", "model.layers.2.block_sparse_moe.experts.114.w1", "model.layers.2.block_sparse_moe.experts.115.w1", "model.layers.2.block_sparse_moe.experts.116.w1", "model.layers.2.block_sparse_moe.experts.117.w1", "model.layers.2.block_sparse_moe.experts.118.w1", "model.layers.2.block_sparse_moe.experts.119.w1", "model.layers.2.block_sparse_moe.experts.120.w1", "model.layers.2.block_sparse_moe.experts.121.w1", "model.layers.2.block_sparse_moe.experts.122.w1", "model.layers.2.block_sparse_moe.experts.123.w1", "model.layers.2.block_sparse_moe.experts.124.w1", "model.layers.2.block_sparse_moe.experts.125.w1", "model.layers.2.block_sparse_moe.experts.126.w1", "model.layers.2.block_sparse_moe.experts.127.w1", "model.layers.2.block_sparse_moe.experts.128.w1", "model.layers.2.block_sparse_moe.experts.129.w1", "model.layers.2.block_sparse_moe.experts.130.w1", "model.layers.2.block_sparse_moe.experts.131.w1", "model.layers.2.block_sparse_moe.experts.132.w1", "model.layers.2.block_sparse_moe.experts.133.w1", "model.layers.2.block_sparse_moe.experts.134.w1", "model.layers.2.block_sparse_moe.experts.135.w1", "model.layers.2.block_sparse_moe.experts.136.w1", "model.layers.2.block_sparse_moe.experts.137.w1", "model.layers.2.block_sparse_moe.experts.138.w1", "model.layers.2.block_sparse_moe.experts.139.w1", "model.layers.2.block_sparse_moe.experts.140.w1", "model.layers.2.block_sparse_moe.experts.141.w1", "model.layers.2.block_sparse_moe.experts.142.w1", "model.layers.2.block_sparse_moe.experts.143.w1", "model.layers.2.block_sparse_moe.experts.144.w1", "model.layers.2.block_sparse_moe.experts.145.w1", "model.layers.2.block_sparse_moe.experts.146.w1", "model.layers.2.block_sparse_moe.experts.147.w1", "model.layers.2.block_sparse_moe.experts.148.w1", "model.layers.2.block_sparse_moe.experts.149.w1", "model.layers.2.block_sparse_moe.experts.150.w1", "model.layers.2.block_sparse_moe.experts.151.w1", "model.layers.2.block_sparse_moe.experts.152.w1", "model.layers.2.block_sparse_moe.experts.153.w1", "model.layers.2.block_sparse_moe.experts.154.w1", "model.layers.2.block_sparse_moe.experts.155.w1", "model.layers.2.block_sparse_moe.experts.156.w1", "model.layers.2.block_sparse_moe.experts.157.w1", "model.layers.2.block_sparse_moe.experts.158.w1", "model.layers.2.block_sparse_moe.experts.159.w1", "model.layers.2.block_sparse_moe.experts.160.w1", "model.layers.2.block_sparse_moe.experts.161.w1", "model.layers.2.block_sparse_moe.experts.162.w1", "model.layers.2.block_sparse_moe.experts.163.w1", "model.layers.2.block_sparse_moe.experts.164.w1", "model.layers.2.block_sparse_moe.experts.165.w1", "model.layers.2.block_sparse_moe.experts.166.w1", "model.layers.2.block_sparse_moe.experts.167.w1", "model.layers.2.block_sparse_moe.experts.168.w1", "model.layers.2.block_sparse_moe.experts.169.w1", "model.layers.2.block_sparse_moe.experts.170.w1", "model.layers.2.block_sparse_moe.experts.171.w1", "model.layers.2.block_sparse_moe.experts.172.w1", "model.layers.2.block_sparse_moe.experts.173.w1", "model.layers.2.block_sparse_moe.experts.174.w1", "model.layers.2.block_sparse_moe.experts.175.w1", "model.layers.2.block_sparse_moe.experts.176.w1", "model.layers.2.block_sparse_moe.experts.177.w1", "model.layers.2.block_sparse_moe.experts.178.w1", "model.layers.2.block_sparse_moe.experts.179.w1", "model.layers.2.block_sparse_moe.experts.180.w1", "model.layers.2.block_sparse_moe.experts.181.w1", "model.layers.2.block_sparse_moe.experts.182.w1", "model.layers.2.block_sparse_moe.experts.183.w1", "model.layers.2.block_sparse_moe.experts.184.w1", "model.layers.2.block_sparse_moe.experts.185.w1", "model.layers.2.block_sparse_moe.experts.186.w1", "model.layers.2.block_sparse_moe.experts.187.w1", "model.layers.2.block_sparse_moe.experts.188.w1", "model.layers.2.block_sparse_moe.experts.189.w1", "model.layers.2.block_sparse_moe.experts.190.w1", "model.layers.2.block_sparse_moe.experts.191.w1", "model.layers.2.block_sparse_moe.experts.192.w1", "model.layers.2.block_sparse_moe.experts.193.w1", "model.layers.2.block_sparse_moe.experts.194.w1", "model.layers.2.block_sparse_moe.experts.195.w1", "model.layers.2.block_sparse_moe.experts.196.w1", "model.layers.2.block_sparse_moe.experts.197.w1", "model.layers.2.block_sparse_moe.experts.198.w1", "model.layers.2.block_sparse_moe.experts.199.w1", "model.layers.2.block_sparse_moe.experts.200.w1", "model.layers.2.block_sparse_moe.experts.201.w1", "model.layers.2.block_sparse_moe.experts.202.w1", "model.layers.2.block_sparse_moe.experts.203.w1", "model.layers.2.block_sparse_moe.experts.204.w1", "model.layers.2.block_sparse_moe.experts.205.w1", "model.layers.2.block_sparse_moe.experts.206.w1", "model.layers.2.block_sparse_moe.experts.207.w1", "model.layers.2.block_sparse_moe.experts.208.w1", "model.layers.2.block_sparse_moe.experts.209.w1", "model.layers.2.block_sparse_moe.experts.210.w1", "model.layers.2.block_sparse_moe.experts.211.w1", "model.layers.2.block_sparse_moe.experts.212.w1", "model.layers.2.block_sparse_moe.experts.213.w1", "model.layers.2.block_sparse_moe.experts.214.w1", "model.layers.2.block_sparse_moe.experts.215.w1", "model.layers.2.block_sparse_moe.experts.216.w1", "model.layers.2.block_sparse_moe.experts.217.w1", "model.layers.2.block_sparse_moe.experts.218.w1", "model.layers.2.block_sparse_moe.experts.219.w1", "model.layers.2.block_sparse_moe.experts.220.w1", "model.layers.2.block_sparse_moe.experts.221.w1", "model.layers.2.block_sparse_moe.experts.222.w1", "model.layers.2.block_sparse_moe.experts.223.w1", "model.layers.2.block_sparse_moe.experts.224.w1", "model.layers.2.block_sparse_moe.experts.225.w1", "model.layers.2.block_sparse_moe.experts.226.w1", "model.layers.2.block_sparse_moe.experts.227.w1", "model.layers.2.block_sparse_moe.experts.228.w1", "model.layers.2.block_sparse_moe.experts.229.w1", "model.layers.2.block_sparse_moe.experts.230.w1", "model.layers.2.block_sparse_moe.experts.231.w1", "model.layers.2.block_sparse_moe.experts.232.w1", "model.layers.2.block_sparse_moe.experts.233.w1", "model.layers.2.block_sparse_moe.experts.234.w1", "model.layers.2.block_sparse_moe.experts.235.w1", "model.layers.2.block_sparse_moe.experts.236.w1", "model.layers.2.block_sparse_moe.experts.237.w1", "model.layers.2.block_sparse_moe.experts.238.w1", "model.layers.2.block_sparse_moe.experts.239.w1", "model.layers.2.block_sparse_moe.experts.240.w1", "model.layers.2.block_sparse_moe.experts.241.w1", "model.layers.2.block_sparse_moe.experts.242.w1", "model.layers.2.block_sparse_moe.experts.243.w1", "model.layers.2.block_sparse_moe.experts.244.w1", "model.layers.2.block_sparse_moe.experts.245.w1", "model.layers.2.block_sparse_moe.experts.246.w1", "model.layers.2.block_sparse_moe.experts.247.w1", "model.layers.2.block_sparse_moe.experts.248.w1", "model.layers.2.block_sparse_moe.experts.249.w1", "model.layers.2.block_sparse_moe.experts.250.w1", "model.layers.2.block_sparse_moe.experts.251.w1", "model.layers.2.block_sparse_moe.experts.252.w1", "model.layers.2.block_sparse_moe.experts.253.w1", "model.layers.2.block_sparse_moe.experts.254.w1", "model.layers.2.block_sparse_moe.experts.255.w1", "model.layers.2.block_sparse_moe.experts.0.w3", "model.layers.2.block_sparse_moe.experts.1.w3", "model.layers.2.block_sparse_moe.experts.2.w3", "model.layers.2.block_sparse_moe.experts.3.w3", "model.layers.2.block_sparse_moe.experts.4.w3", "model.layers.2.block_sparse_moe.experts.5.w3", "model.layers.2.block_sparse_moe.experts.6.w3", "model.layers.2.block_sparse_moe.experts.7.w3", "model.layers.2.block_sparse_moe.experts.8.w3", "model.layers.2.block_sparse_moe.experts.9.w3", "model.layers.2.block_sparse_moe.experts.10.w3", "model.layers.2.block_sparse_moe.experts.11.w3", "model.layers.2.block_sparse_moe.experts.12.w3", "model.layers.2.block_sparse_moe.experts.13.w3", "model.layers.2.block_sparse_moe.experts.14.w3", "model.layers.2.block_sparse_moe.experts.15.w3", "model.layers.2.block_sparse_moe.experts.16.w3", "model.layers.2.block_sparse_moe.experts.17.w3", "model.layers.2.block_sparse_moe.experts.18.w3", "model.layers.2.block_sparse_moe.experts.19.w3", "model.layers.2.block_sparse_moe.experts.20.w3", "model.layers.2.block_sparse_moe.experts.21.w3", "model.layers.2.block_sparse_moe.experts.22.w3", "model.layers.2.block_sparse_moe.experts.23.w3", "model.layers.2.block_sparse_moe.experts.24.w3", "model.layers.2.block_sparse_moe.experts.25.w3", "model.layers.2.block_sparse_moe.experts.26.w3", "model.layers.2.block_sparse_moe.experts.27.w3", "model.layers.2.block_sparse_moe.experts.28.w3", "model.layers.2.block_sparse_moe.experts.29.w3", "model.layers.2.block_sparse_moe.experts.30.w3", "model.layers.2.block_sparse_moe.experts.31.w3", "model.layers.2.block_sparse_moe.experts.32.w3", "model.layers.2.block_sparse_moe.experts.33.w3", "model.layers.2.block_sparse_moe.experts.34.w3", "model.layers.2.block_sparse_moe.experts.35.w3", "model.layers.2.block_sparse_moe.experts.36.w3", "model.layers.2.block_sparse_moe.experts.37.w3", "model.layers.2.block_sparse_moe.experts.38.w3", "model.layers.2.block_sparse_moe.experts.39.w3", "model.layers.2.block_sparse_moe.experts.40.w3", "model.layers.2.block_sparse_moe.experts.41.w3", "model.layers.2.block_sparse_moe.experts.42.w3", "model.layers.2.block_sparse_moe.experts.43.w3", "model.layers.2.block_sparse_moe.experts.44.w3", "model.layers.2.block_sparse_moe.experts.45.w3", "model.layers.2.block_sparse_moe.experts.46.w3", "model.layers.2.block_sparse_moe.experts.47.w3", "model.layers.2.block_sparse_moe.experts.48.w3", "model.layers.2.block_sparse_moe.experts.49.w3", "model.layers.2.block_sparse_moe.experts.50.w3", "model.layers.2.block_sparse_moe.experts.51.w3", "model.layers.2.block_sparse_moe.experts.52.w3", "model.layers.2.block_sparse_moe.experts.53.w3", "model.layers.2.block_sparse_moe.experts.54.w3", "model.layers.2.block_sparse_moe.experts.55.w3", "model.layers.2.block_sparse_moe.experts.56.w3", "model.layers.2.block_sparse_moe.experts.57.w3", "model.layers.2.block_sparse_moe.experts.58.w3", "model.layers.2.block_sparse_moe.experts.59.w3", "model.layers.2.block_sparse_moe.experts.60.w3", "model.layers.2.block_sparse_moe.experts.61.w3", "model.layers.2.block_sparse_moe.experts.62.w3", "model.layers.2.block_sparse_moe.experts.63.w3", "model.layers.2.block_sparse_moe.experts.64.w3", "model.layers.2.block_sparse_moe.experts.65.w3", "model.layers.2.block_sparse_moe.experts.66.w3", "model.layers.2.block_sparse_moe.experts.67.w3", "model.layers.2.block_sparse_moe.experts.68.w3", "model.layers.2.block_sparse_moe.experts.69.w3", "model.layers.2.block_sparse_moe.experts.70.w3", "model.layers.2.block_sparse_moe.experts.71.w3", "model.layers.2.block_sparse_moe.experts.72.w3", "model.layers.2.block_sparse_moe.experts.73.w3", "model.layers.2.block_sparse_moe.experts.74.w3", "model.layers.2.block_sparse_moe.experts.75.w3", "model.layers.2.block_sparse_moe.experts.76.w3", "model.layers.2.block_sparse_moe.experts.77.w3", "model.layers.2.block_sparse_moe.experts.78.w3", "model.layers.2.block_sparse_moe.experts.79.w3", "model.layers.2.block_sparse_moe.experts.80.w3", "model.layers.2.block_sparse_moe.experts.81.w3", "model.layers.2.block_sparse_moe.experts.82.w3", "model.layers.2.block_sparse_moe.experts.83.w3", "model.layers.2.block_sparse_moe.experts.84.w3", "model.layers.2.block_sparse_moe.experts.85.w3", "model.layers.2.block_sparse_moe.experts.86.w3", "model.layers.2.block_sparse_moe.experts.87.w3", "model.layers.2.block_sparse_moe.experts.88.w3", "model.layers.2.block_sparse_moe.experts.89.w3", "model.layers.2.block_sparse_moe.experts.90.w3", "model.layers.2.block_sparse_moe.experts.91.w3", "model.layers.2.block_sparse_moe.experts.92.w3", "model.layers.2.block_sparse_moe.experts.93.w3", "model.layers.2.block_sparse_moe.experts.94.w3", "model.layers.2.block_sparse_moe.experts.95.w3", "model.layers.2.block_sparse_moe.experts.96.w3", "model.layers.2.block_sparse_moe.experts.97.w3", "model.layers.2.block_sparse_moe.experts.98.w3", "model.layers.2.block_sparse_moe.experts.99.w3", "model.layers.2.block_sparse_moe.experts.100.w3", "model.layers.2.block_sparse_moe.experts.101.w3", "model.layers.2.block_sparse_moe.experts.102.w3", "model.layers.2.block_sparse_moe.experts.103.w3", "model.layers.2.block_sparse_moe.experts.104.w3", "model.layers.2.block_sparse_moe.experts.105.w3", "model.layers.2.block_sparse_moe.experts.106.w3", "model.layers.2.block_sparse_moe.experts.107.w3", "model.layers.2.block_sparse_moe.experts.108.w3", "model.layers.2.block_sparse_moe.experts.109.w3", "model.layers.2.block_sparse_moe.experts.110.w3", "model.layers.2.block_sparse_moe.experts.111.w3", "model.layers.2.block_sparse_moe.experts.112.w3", "model.layers.2.block_sparse_moe.experts.113.w3", "model.layers.2.block_sparse_moe.experts.114.w3", "model.layers.2.block_sparse_moe.experts.115.w3", "model.layers.2.block_sparse_moe.experts.116.w3", "model.layers.2.block_sparse_moe.experts.117.w3", "model.layers.2.block_sparse_moe.experts.118.w3", "model.layers.2.block_sparse_moe.experts.119.w3", "model.layers.2.block_sparse_moe.experts.120.w3", "model.layers.2.block_sparse_moe.experts.121.w3", "model.layers.2.block_sparse_moe.experts.122.w3", "model.layers.2.block_sparse_moe.experts.123.w3", "model.layers.2.block_sparse_moe.experts.124.w3", "model.layers.2.block_sparse_moe.experts.125.w3", "model.layers.2.block_sparse_moe.experts.126.w3", "model.layers.2.block_sparse_moe.experts.127.w3", "model.layers.2.block_sparse_moe.experts.128.w3", "model.layers.2.block_sparse_moe.experts.129.w3", "model.layers.2.block_sparse_moe.experts.130.w3", "model.layers.2.block_sparse_moe.experts.131.w3", "model.layers.2.block_sparse_moe.experts.132.w3", "model.layers.2.block_sparse_moe.experts.133.w3", "model.layers.2.block_sparse_moe.experts.134.w3", "model.layers.2.block_sparse_moe.experts.135.w3", "model.layers.2.block_sparse_moe.experts.136.w3", "model.layers.2.block_sparse_moe.experts.137.w3", "model.layers.2.block_sparse_moe.experts.138.w3", "model.layers.2.block_sparse_moe.experts.139.w3", "model.layers.2.block_sparse_moe.experts.140.w3", "model.layers.2.block_sparse_moe.experts.141.w3", "model.layers.2.block_sparse_moe.experts.142.w3", "model.layers.2.block_sparse_moe.experts.143.w3", "model.layers.2.block_sparse_moe.experts.144.w3", "model.layers.2.block_sparse_moe.experts.145.w3", "model.layers.2.block_sparse_moe.experts.146.w3", "model.layers.2.block_sparse_moe.experts.147.w3", "model.layers.2.block_sparse_moe.experts.148.w3", "model.layers.2.block_sparse_moe.experts.149.w3", "model.layers.2.block_sparse_moe.experts.150.w3", "model.layers.2.block_sparse_moe.experts.151.w3", "model.layers.2.block_sparse_moe.experts.152.w3", "model.layers.2.block_sparse_moe.experts.153.w3", "model.layers.2.block_sparse_moe.experts.154.w3", "model.layers.2.block_sparse_moe.experts.155.w3", "model.layers.2.block_sparse_moe.experts.156.w3", "model.layers.2.block_sparse_moe.experts.157.w3", "model.layers.2.block_sparse_moe.experts.158.w3", "model.layers.2.block_sparse_moe.experts.159.w3", "model.layers.2.block_sparse_moe.experts.160.w3", "model.layers.2.block_sparse_moe.experts.161.w3", "model.layers.2.block_sparse_moe.experts.162.w3", "model.layers.2.block_sparse_moe.experts.163.w3", "model.layers.2.block_sparse_moe.experts.164.w3", "model.layers.2.block_sparse_moe.experts.165.w3", "model.layers.2.block_sparse_moe.experts.166.w3", "model.layers.2.block_sparse_moe.experts.167.w3", "model.layers.2.block_sparse_moe.experts.168.w3", "model.layers.2.block_sparse_moe.experts.169.w3", "model.layers.2.block_sparse_moe.experts.170.w3", "model.layers.2.block_sparse_moe.experts.171.w3", "model.layers.2.block_sparse_moe.experts.172.w3", "model.layers.2.block_sparse_moe.experts.173.w3", "model.layers.2.block_sparse_moe.experts.174.w3", "model.layers.2.block_sparse_moe.experts.175.w3", "model.layers.2.block_sparse_moe.experts.176.w3", "model.layers.2.block_sparse_moe.experts.177.w3", "model.layers.2.block_sparse_moe.experts.178.w3", "model.layers.2.block_sparse_moe.experts.179.w3", "model.layers.2.block_sparse_moe.experts.180.w3", "model.layers.2.block_sparse_moe.experts.181.w3", "model.layers.2.block_sparse_moe.experts.182.w3", "model.layers.2.block_sparse_moe.experts.183.w3", "model.layers.2.block_sparse_moe.experts.184.w3", "model.layers.2.block_sparse_moe.experts.185.w3", "model.layers.2.block_sparse_moe.experts.186.w3", "model.layers.2.block_sparse_moe.experts.187.w3", "model.layers.2.block_sparse_moe.experts.188.w3", "model.layers.2.block_sparse_moe.experts.189.w3", "model.layers.2.block_sparse_moe.experts.190.w3", "model.layers.2.block_sparse_moe.experts.191.w3", "model.layers.2.block_sparse_moe.experts.192.w3", "model.layers.2.block_sparse_moe.experts.193.w3", "model.layers.2.block_sparse_moe.experts.194.w3", "model.layers.2.block_sparse_moe.experts.195.w3", "model.layers.2.block_sparse_moe.experts.196.w3", "model.layers.2.block_sparse_moe.experts.197.w3", "model.layers.2.block_sparse_moe.experts.198.w3", "model.layers.2.block_sparse_moe.experts.199.w3", "model.layers.2.block_sparse_moe.experts.200.w3", "model.layers.2.block_sparse_moe.experts.201.w3", "model.layers.2.block_sparse_moe.experts.202.w3", "model.layers.2.block_sparse_moe.experts.203.w3", "model.layers.2.block_sparse_moe.experts.204.w3", "model.layers.2.block_sparse_moe.experts.205.w3", "model.layers.2.block_sparse_moe.experts.206.w3", "model.layers.2.block_sparse_moe.experts.207.w3", "model.layers.2.block_sparse_moe.experts.208.w3", "model.layers.2.block_sparse_moe.experts.209.w3", "model.layers.2.block_sparse_moe.experts.210.w3", "model.layers.2.block_sparse_moe.experts.211.w3", "model.layers.2.block_sparse_moe.experts.212.w3", "model.layers.2.block_sparse_moe.experts.213.w3", "model.layers.2.block_sparse_moe.experts.214.w3", "model.layers.2.block_sparse_moe.experts.215.w3", "model.layers.2.block_sparse_moe.experts.216.w3", "model.layers.2.block_sparse_moe.experts.217.w3", "model.layers.2.block_sparse_moe.experts.218.w3", "model.layers.2.block_sparse_moe.experts.219.w3", "model.layers.2.block_sparse_moe.experts.220.w3", "model.layers.2.block_sparse_moe.experts.221.w3", "model.layers.2.block_sparse_moe.experts.222.w3", "model.layers.2.block_sparse_moe.experts.223.w3", "model.layers.2.block_sparse_moe.experts.224.w3", "model.layers.2.block_sparse_moe.experts.225.w3", "model.layers.2.block_sparse_moe.experts.226.w3", "model.layers.2.block_sparse_moe.experts.227.w3", "model.layers.2.block_sparse_moe.experts.228.w3", "model.layers.2.block_sparse_moe.experts.229.w3", "model.layers.2.block_sparse_moe.experts.230.w3", "model.layers.2.block_sparse_moe.experts.231.w3", "model.layers.2.block_sparse_moe.experts.232.w3", "model.layers.2.block_sparse_moe.experts.233.w3", "model.layers.2.block_sparse_moe.experts.234.w3", "model.layers.2.block_sparse_moe.experts.235.w3", "model.layers.2.block_sparse_moe.experts.236.w3", "model.layers.2.block_sparse_moe.experts.237.w3", "model.layers.2.block_sparse_moe.experts.238.w3", "model.layers.2.block_sparse_moe.experts.239.w3", "model.layers.2.block_sparse_moe.experts.240.w3", "model.layers.2.block_sparse_moe.experts.241.w3", "model.layers.2.block_sparse_moe.experts.242.w3", "model.layers.2.block_sparse_moe.experts.243.w3", "model.layers.2.block_sparse_moe.experts.244.w3", "model.layers.2.block_sparse_moe.experts.245.w3", "model.layers.2.block_sparse_moe.experts.246.w3", "model.layers.2.block_sparse_moe.experts.247.w3", "model.layers.2.block_sparse_moe.experts.248.w3", "model.layers.2.block_sparse_moe.experts.249.w3", "model.layers.2.block_sparse_moe.experts.250.w3", "model.layers.2.block_sparse_moe.experts.251.w3", "model.layers.2.block_sparse_moe.experts.252.w3", "model.layers.2.block_sparse_moe.experts.253.w3", "model.layers.2.block_sparse_moe.experts.254.w3", "model.layers.2.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0042592793703079, "dbits": 2415919104 } ] }, { "idx": 14, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w2", "model.layers.2.block_sparse_moe.experts.1.w2", "model.layers.2.block_sparse_moe.experts.2.w2", "model.layers.2.block_sparse_moe.experts.3.w2", "model.layers.2.block_sparse_moe.experts.4.w2", "model.layers.2.block_sparse_moe.experts.5.w2", "model.layers.2.block_sparse_moe.experts.6.w2", "model.layers.2.block_sparse_moe.experts.7.w2", "model.layers.2.block_sparse_moe.experts.8.w2", "model.layers.2.block_sparse_moe.experts.9.w2", "model.layers.2.block_sparse_moe.experts.10.w2", "model.layers.2.block_sparse_moe.experts.11.w2", "model.layers.2.block_sparse_moe.experts.12.w2", "model.layers.2.block_sparse_moe.experts.13.w2", "model.layers.2.block_sparse_moe.experts.14.w2", "model.layers.2.block_sparse_moe.experts.15.w2", "model.layers.2.block_sparse_moe.experts.16.w2", "model.layers.2.block_sparse_moe.experts.17.w2", "model.layers.2.block_sparse_moe.experts.18.w2", "model.layers.2.block_sparse_moe.experts.19.w2", "model.layers.2.block_sparse_moe.experts.20.w2", "model.layers.2.block_sparse_moe.experts.21.w2", "model.layers.2.block_sparse_moe.experts.22.w2", "model.layers.2.block_sparse_moe.experts.23.w2", "model.layers.2.block_sparse_moe.experts.24.w2", "model.layers.2.block_sparse_moe.experts.25.w2", "model.layers.2.block_sparse_moe.experts.26.w2", "model.layers.2.block_sparse_moe.experts.27.w2", "model.layers.2.block_sparse_moe.experts.28.w2", "model.layers.2.block_sparse_moe.experts.29.w2", "model.layers.2.block_sparse_moe.experts.30.w2", "model.layers.2.block_sparse_moe.experts.31.w2", "model.layers.2.block_sparse_moe.experts.32.w2", "model.layers.2.block_sparse_moe.experts.33.w2", "model.layers.2.block_sparse_moe.experts.34.w2", "model.layers.2.block_sparse_moe.experts.35.w2", "model.layers.2.block_sparse_moe.experts.36.w2", "model.layers.2.block_sparse_moe.experts.37.w2", "model.layers.2.block_sparse_moe.experts.38.w2", "model.layers.2.block_sparse_moe.experts.39.w2", "model.layers.2.block_sparse_moe.experts.40.w2", "model.layers.2.block_sparse_moe.experts.41.w2", "model.layers.2.block_sparse_moe.experts.42.w2", "model.layers.2.block_sparse_moe.experts.43.w2", "model.layers.2.block_sparse_moe.experts.44.w2", "model.layers.2.block_sparse_moe.experts.45.w2", "model.layers.2.block_sparse_moe.experts.46.w2", "model.layers.2.block_sparse_moe.experts.47.w2", "model.layers.2.block_sparse_moe.experts.48.w2", "model.layers.2.block_sparse_moe.experts.49.w2", "model.layers.2.block_sparse_moe.experts.50.w2", "model.layers.2.block_sparse_moe.experts.51.w2", "model.layers.2.block_sparse_moe.experts.52.w2", "model.layers.2.block_sparse_moe.experts.53.w2", "model.layers.2.block_sparse_moe.experts.54.w2", "model.layers.2.block_sparse_moe.experts.55.w2", "model.layers.2.block_sparse_moe.experts.56.w2", "model.layers.2.block_sparse_moe.experts.57.w2", "model.layers.2.block_sparse_moe.experts.58.w2", "model.layers.2.block_sparse_moe.experts.59.w2", "model.layers.2.block_sparse_moe.experts.60.w2", "model.layers.2.block_sparse_moe.experts.61.w2", "model.layers.2.block_sparse_moe.experts.62.w2", "model.layers.2.block_sparse_moe.experts.63.w2", "model.layers.2.block_sparse_moe.experts.64.w2", "model.layers.2.block_sparse_moe.experts.65.w2", "model.layers.2.block_sparse_moe.experts.66.w2", "model.layers.2.block_sparse_moe.experts.67.w2", "model.layers.2.block_sparse_moe.experts.68.w2", "model.layers.2.block_sparse_moe.experts.69.w2", "model.layers.2.block_sparse_moe.experts.70.w2", "model.layers.2.block_sparse_moe.experts.71.w2", "model.layers.2.block_sparse_moe.experts.72.w2", "model.layers.2.block_sparse_moe.experts.73.w2", "model.layers.2.block_sparse_moe.experts.74.w2", "model.layers.2.block_sparse_moe.experts.75.w2", "model.layers.2.block_sparse_moe.experts.76.w2", "model.layers.2.block_sparse_moe.experts.77.w2", "model.layers.2.block_sparse_moe.experts.78.w2", "model.layers.2.block_sparse_moe.experts.79.w2", "model.layers.2.block_sparse_moe.experts.80.w2", "model.layers.2.block_sparse_moe.experts.81.w2", "model.layers.2.block_sparse_moe.experts.82.w2", "model.layers.2.block_sparse_moe.experts.83.w2", "model.layers.2.block_sparse_moe.experts.84.w2", "model.layers.2.block_sparse_moe.experts.85.w2", "model.layers.2.block_sparse_moe.experts.86.w2", "model.layers.2.block_sparse_moe.experts.87.w2", "model.layers.2.block_sparse_moe.experts.88.w2", "model.layers.2.block_sparse_moe.experts.89.w2", "model.layers.2.block_sparse_moe.experts.90.w2", "model.layers.2.block_sparse_moe.experts.91.w2", "model.layers.2.block_sparse_moe.experts.92.w2", "model.layers.2.block_sparse_moe.experts.93.w2", "model.layers.2.block_sparse_moe.experts.94.w2", "model.layers.2.block_sparse_moe.experts.95.w2", "model.layers.2.block_sparse_moe.experts.96.w2", "model.layers.2.block_sparse_moe.experts.97.w2", "model.layers.2.block_sparse_moe.experts.98.w2", "model.layers.2.block_sparse_moe.experts.99.w2", "model.layers.2.block_sparse_moe.experts.100.w2", "model.layers.2.block_sparse_moe.experts.101.w2", "model.layers.2.block_sparse_moe.experts.102.w2", "model.layers.2.block_sparse_moe.experts.103.w2", "model.layers.2.block_sparse_moe.experts.104.w2", "model.layers.2.block_sparse_moe.experts.105.w2", "model.layers.2.block_sparse_moe.experts.106.w2", "model.layers.2.block_sparse_moe.experts.107.w2", "model.layers.2.block_sparse_moe.experts.108.w2", "model.layers.2.block_sparse_moe.experts.109.w2", "model.layers.2.block_sparse_moe.experts.110.w2", "model.layers.2.block_sparse_moe.experts.111.w2", "model.layers.2.block_sparse_moe.experts.112.w2", "model.layers.2.block_sparse_moe.experts.113.w2", "model.layers.2.block_sparse_moe.experts.114.w2", "model.layers.2.block_sparse_moe.experts.115.w2", "model.layers.2.block_sparse_moe.experts.116.w2", "model.layers.2.block_sparse_moe.experts.117.w2", "model.layers.2.block_sparse_moe.experts.118.w2", "model.layers.2.block_sparse_moe.experts.119.w2", "model.layers.2.block_sparse_moe.experts.120.w2", "model.layers.2.block_sparse_moe.experts.121.w2", "model.layers.2.block_sparse_moe.experts.122.w2", "model.layers.2.block_sparse_moe.experts.123.w2", "model.layers.2.block_sparse_moe.experts.124.w2", "model.layers.2.block_sparse_moe.experts.125.w2", "model.layers.2.block_sparse_moe.experts.126.w2", "model.layers.2.block_sparse_moe.experts.127.w2", "model.layers.2.block_sparse_moe.experts.128.w2", "model.layers.2.block_sparse_moe.experts.129.w2", "model.layers.2.block_sparse_moe.experts.130.w2", "model.layers.2.block_sparse_moe.experts.131.w2", "model.layers.2.block_sparse_moe.experts.132.w2", "model.layers.2.block_sparse_moe.experts.133.w2", "model.layers.2.block_sparse_moe.experts.134.w2", "model.layers.2.block_sparse_moe.experts.135.w2", "model.layers.2.block_sparse_moe.experts.136.w2", "model.layers.2.block_sparse_moe.experts.137.w2", "model.layers.2.block_sparse_moe.experts.138.w2", "model.layers.2.block_sparse_moe.experts.139.w2", "model.layers.2.block_sparse_moe.experts.140.w2", "model.layers.2.block_sparse_moe.experts.141.w2", "model.layers.2.block_sparse_moe.experts.142.w2", "model.layers.2.block_sparse_moe.experts.143.w2", "model.layers.2.block_sparse_moe.experts.144.w2", "model.layers.2.block_sparse_moe.experts.145.w2", "model.layers.2.block_sparse_moe.experts.146.w2", "model.layers.2.block_sparse_moe.experts.147.w2", "model.layers.2.block_sparse_moe.experts.148.w2", "model.layers.2.block_sparse_moe.experts.149.w2", "model.layers.2.block_sparse_moe.experts.150.w2", "model.layers.2.block_sparse_moe.experts.151.w2", "model.layers.2.block_sparse_moe.experts.152.w2", "model.layers.2.block_sparse_moe.experts.153.w2", "model.layers.2.block_sparse_moe.experts.154.w2", "model.layers.2.block_sparse_moe.experts.155.w2", "model.layers.2.block_sparse_moe.experts.156.w2", "model.layers.2.block_sparse_moe.experts.157.w2", "model.layers.2.block_sparse_moe.experts.158.w2", "model.layers.2.block_sparse_moe.experts.159.w2", "model.layers.2.block_sparse_moe.experts.160.w2", "model.layers.2.block_sparse_moe.experts.161.w2", "model.layers.2.block_sparse_moe.experts.162.w2", "model.layers.2.block_sparse_moe.experts.163.w2", "model.layers.2.block_sparse_moe.experts.164.w2", "model.layers.2.block_sparse_moe.experts.165.w2", "model.layers.2.block_sparse_moe.experts.166.w2", "model.layers.2.block_sparse_moe.experts.167.w2", "model.layers.2.block_sparse_moe.experts.168.w2", "model.layers.2.block_sparse_moe.experts.169.w2", "model.layers.2.block_sparse_moe.experts.170.w2", "model.layers.2.block_sparse_moe.experts.171.w2", "model.layers.2.block_sparse_moe.experts.172.w2", "model.layers.2.block_sparse_moe.experts.173.w2", "model.layers.2.block_sparse_moe.experts.174.w2", "model.layers.2.block_sparse_moe.experts.175.w2", "model.layers.2.block_sparse_moe.experts.176.w2", "model.layers.2.block_sparse_moe.experts.177.w2", "model.layers.2.block_sparse_moe.experts.178.w2", "model.layers.2.block_sparse_moe.experts.179.w2", "model.layers.2.block_sparse_moe.experts.180.w2", "model.layers.2.block_sparse_moe.experts.181.w2", "model.layers.2.block_sparse_moe.experts.182.w2", "model.layers.2.block_sparse_moe.experts.183.w2", "model.layers.2.block_sparse_moe.experts.184.w2", "model.layers.2.block_sparse_moe.experts.185.w2", "model.layers.2.block_sparse_moe.experts.186.w2", "model.layers.2.block_sparse_moe.experts.187.w2", "model.layers.2.block_sparse_moe.experts.188.w2", "model.layers.2.block_sparse_moe.experts.189.w2", "model.layers.2.block_sparse_moe.experts.190.w2", "model.layers.2.block_sparse_moe.experts.191.w2", "model.layers.2.block_sparse_moe.experts.192.w2", "model.layers.2.block_sparse_moe.experts.193.w2", "model.layers.2.block_sparse_moe.experts.194.w2", "model.layers.2.block_sparse_moe.experts.195.w2", "model.layers.2.block_sparse_moe.experts.196.w2", "model.layers.2.block_sparse_moe.experts.197.w2", "model.layers.2.block_sparse_moe.experts.198.w2", "model.layers.2.block_sparse_moe.experts.199.w2", "model.layers.2.block_sparse_moe.experts.200.w2", "model.layers.2.block_sparse_moe.experts.201.w2", "model.layers.2.block_sparse_moe.experts.202.w2", "model.layers.2.block_sparse_moe.experts.203.w2", "model.layers.2.block_sparse_moe.experts.204.w2", "model.layers.2.block_sparse_moe.experts.205.w2", "model.layers.2.block_sparse_moe.experts.206.w2", "model.layers.2.block_sparse_moe.experts.207.w2", "model.layers.2.block_sparse_moe.experts.208.w2", "model.layers.2.block_sparse_moe.experts.209.w2", "model.layers.2.block_sparse_moe.experts.210.w2", "model.layers.2.block_sparse_moe.experts.211.w2", "model.layers.2.block_sparse_moe.experts.212.w2", "model.layers.2.block_sparse_moe.experts.213.w2", "model.layers.2.block_sparse_moe.experts.214.w2", "model.layers.2.block_sparse_moe.experts.215.w2", "model.layers.2.block_sparse_moe.experts.216.w2", "model.layers.2.block_sparse_moe.experts.217.w2", "model.layers.2.block_sparse_moe.experts.218.w2", "model.layers.2.block_sparse_moe.experts.219.w2", "model.layers.2.block_sparse_moe.experts.220.w2", "model.layers.2.block_sparse_moe.experts.221.w2", "model.layers.2.block_sparse_moe.experts.222.w2", "model.layers.2.block_sparse_moe.experts.223.w2", "model.layers.2.block_sparse_moe.experts.224.w2", "model.layers.2.block_sparse_moe.experts.225.w2", "model.layers.2.block_sparse_moe.experts.226.w2", "model.layers.2.block_sparse_moe.experts.227.w2", "model.layers.2.block_sparse_moe.experts.228.w2", "model.layers.2.block_sparse_moe.experts.229.w2", "model.layers.2.block_sparse_moe.experts.230.w2", "model.layers.2.block_sparse_moe.experts.231.w2", "model.layers.2.block_sparse_moe.experts.232.w2", "model.layers.2.block_sparse_moe.experts.233.w2", "model.layers.2.block_sparse_moe.experts.234.w2", "model.layers.2.block_sparse_moe.experts.235.w2", "model.layers.2.block_sparse_moe.experts.236.w2", "model.layers.2.block_sparse_moe.experts.237.w2", "model.layers.2.block_sparse_moe.experts.238.w2", "model.layers.2.block_sparse_moe.experts.239.w2", "model.layers.2.block_sparse_moe.experts.240.w2", "model.layers.2.block_sparse_moe.experts.241.w2", "model.layers.2.block_sparse_moe.experts.242.w2", "model.layers.2.block_sparse_moe.experts.243.w2", "model.layers.2.block_sparse_moe.experts.244.w2", "model.layers.2.block_sparse_moe.experts.245.w2", "model.layers.2.block_sparse_moe.experts.246.w2", "model.layers.2.block_sparse_moe.experts.247.w2", "model.layers.2.block_sparse_moe.experts.248.w2", "model.layers.2.block_sparse_moe.experts.249.w2", "model.layers.2.block_sparse_moe.experts.250.w2", "model.layers.2.block_sparse_moe.experts.251.w2", "model.layers.2.block_sparse_moe.experts.252.w2", "model.layers.2.block_sparse_moe.experts.253.w2", "model.layers.2.block_sparse_moe.experts.254.w2", "model.layers.2.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007763624191283736, "dbits": 1207959552 } ] }, { "idx": 15, "layers": [ "model.layers.3.self_attn.q_proj" ], "candidates": [ { "dkld": 5.602836609108763e-07, "dbits": 18874368 } ] }, { "idx": 16, "layers": [ "model.layers.3.self_attn.k_proj", "model.layers.3.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0020090699195862705, "dbits": 6291456 } ] }, { "idx": 17, "layers": [ "model.layers.3.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0012249499559402466, "dbits": 18874368 } ] }, { "idx": 18, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w1", "model.layers.3.block_sparse_moe.experts.1.w1", "model.layers.3.block_sparse_moe.experts.2.w1", "model.layers.3.block_sparse_moe.experts.3.w1", "model.layers.3.block_sparse_moe.experts.4.w1", "model.layers.3.block_sparse_moe.experts.5.w1", "model.layers.3.block_sparse_moe.experts.6.w1", "model.layers.3.block_sparse_moe.experts.7.w1", "model.layers.3.block_sparse_moe.experts.8.w1", "model.layers.3.block_sparse_moe.experts.9.w1", "model.layers.3.block_sparse_moe.experts.10.w1", "model.layers.3.block_sparse_moe.experts.11.w1", "model.layers.3.block_sparse_moe.experts.12.w1", "model.layers.3.block_sparse_moe.experts.13.w1", "model.layers.3.block_sparse_moe.experts.14.w1", "model.layers.3.block_sparse_moe.experts.15.w1", "model.layers.3.block_sparse_moe.experts.16.w1", "model.layers.3.block_sparse_moe.experts.17.w1", "model.layers.3.block_sparse_moe.experts.18.w1", "model.layers.3.block_sparse_moe.experts.19.w1", "model.layers.3.block_sparse_moe.experts.20.w1", "model.layers.3.block_sparse_moe.experts.21.w1", "model.layers.3.block_sparse_moe.experts.22.w1", "model.layers.3.block_sparse_moe.experts.23.w1", "model.layers.3.block_sparse_moe.experts.24.w1", "model.layers.3.block_sparse_moe.experts.25.w1", "model.layers.3.block_sparse_moe.experts.26.w1", "model.layers.3.block_sparse_moe.experts.27.w1", "model.layers.3.block_sparse_moe.experts.28.w1", "model.layers.3.block_sparse_moe.experts.29.w1", "model.layers.3.block_sparse_moe.experts.30.w1", "model.layers.3.block_sparse_moe.experts.31.w1", "model.layers.3.block_sparse_moe.experts.32.w1", "model.layers.3.block_sparse_moe.experts.33.w1", "model.layers.3.block_sparse_moe.experts.34.w1", "model.layers.3.block_sparse_moe.experts.35.w1", "model.layers.3.block_sparse_moe.experts.36.w1", "model.layers.3.block_sparse_moe.experts.37.w1", "model.layers.3.block_sparse_moe.experts.38.w1", "model.layers.3.block_sparse_moe.experts.39.w1", "model.layers.3.block_sparse_moe.experts.40.w1", "model.layers.3.block_sparse_moe.experts.41.w1", "model.layers.3.block_sparse_moe.experts.42.w1", "model.layers.3.block_sparse_moe.experts.43.w1", "model.layers.3.block_sparse_moe.experts.44.w1", "model.layers.3.block_sparse_moe.experts.45.w1", "model.layers.3.block_sparse_moe.experts.46.w1", "model.layers.3.block_sparse_moe.experts.47.w1", "model.layers.3.block_sparse_moe.experts.48.w1", "model.layers.3.block_sparse_moe.experts.49.w1", "model.layers.3.block_sparse_moe.experts.50.w1", "model.layers.3.block_sparse_moe.experts.51.w1", "model.layers.3.block_sparse_moe.experts.52.w1", "model.layers.3.block_sparse_moe.experts.53.w1", "model.layers.3.block_sparse_moe.experts.54.w1", "model.layers.3.block_sparse_moe.experts.55.w1", "model.layers.3.block_sparse_moe.experts.56.w1", "model.layers.3.block_sparse_moe.experts.57.w1", "model.layers.3.block_sparse_moe.experts.58.w1", "model.layers.3.block_sparse_moe.experts.59.w1", "model.layers.3.block_sparse_moe.experts.60.w1", "model.layers.3.block_sparse_moe.experts.61.w1", "model.layers.3.block_sparse_moe.experts.62.w1", "model.layers.3.block_sparse_moe.experts.63.w1", "model.layers.3.block_sparse_moe.experts.64.w1", "model.layers.3.block_sparse_moe.experts.65.w1", "model.layers.3.block_sparse_moe.experts.66.w1", "model.layers.3.block_sparse_moe.experts.67.w1", "model.layers.3.block_sparse_moe.experts.68.w1", "model.layers.3.block_sparse_moe.experts.69.w1", "model.layers.3.block_sparse_moe.experts.70.w1", "model.layers.3.block_sparse_moe.experts.71.w1", "model.layers.3.block_sparse_moe.experts.72.w1", "model.layers.3.block_sparse_moe.experts.73.w1", "model.layers.3.block_sparse_moe.experts.74.w1", "model.layers.3.block_sparse_moe.experts.75.w1", "model.layers.3.block_sparse_moe.experts.76.w1", "model.layers.3.block_sparse_moe.experts.77.w1", "model.layers.3.block_sparse_moe.experts.78.w1", "model.layers.3.block_sparse_moe.experts.79.w1", "model.layers.3.block_sparse_moe.experts.80.w1", "model.layers.3.block_sparse_moe.experts.81.w1", "model.layers.3.block_sparse_moe.experts.82.w1", "model.layers.3.block_sparse_moe.experts.83.w1", "model.layers.3.block_sparse_moe.experts.84.w1", "model.layers.3.block_sparse_moe.experts.85.w1", "model.layers.3.block_sparse_moe.experts.86.w1", "model.layers.3.block_sparse_moe.experts.87.w1", "model.layers.3.block_sparse_moe.experts.88.w1", "model.layers.3.block_sparse_moe.experts.89.w1", "model.layers.3.block_sparse_moe.experts.90.w1", "model.layers.3.block_sparse_moe.experts.91.w1", "model.layers.3.block_sparse_moe.experts.92.w1", "model.layers.3.block_sparse_moe.experts.93.w1", "model.layers.3.block_sparse_moe.experts.94.w1", "model.layers.3.block_sparse_moe.experts.95.w1", "model.layers.3.block_sparse_moe.experts.96.w1", "model.layers.3.block_sparse_moe.experts.97.w1", "model.layers.3.block_sparse_moe.experts.98.w1", "model.layers.3.block_sparse_moe.experts.99.w1", "model.layers.3.block_sparse_moe.experts.100.w1", "model.layers.3.block_sparse_moe.experts.101.w1", "model.layers.3.block_sparse_moe.experts.102.w1", "model.layers.3.block_sparse_moe.experts.103.w1", "model.layers.3.block_sparse_moe.experts.104.w1", "model.layers.3.block_sparse_moe.experts.105.w1", "model.layers.3.block_sparse_moe.experts.106.w1", "model.layers.3.block_sparse_moe.experts.107.w1", "model.layers.3.block_sparse_moe.experts.108.w1", "model.layers.3.block_sparse_moe.experts.109.w1", "model.layers.3.block_sparse_moe.experts.110.w1", "model.layers.3.block_sparse_moe.experts.111.w1", "model.layers.3.block_sparse_moe.experts.112.w1", "model.layers.3.block_sparse_moe.experts.113.w1", "model.layers.3.block_sparse_moe.experts.114.w1", "model.layers.3.block_sparse_moe.experts.115.w1", "model.layers.3.block_sparse_moe.experts.116.w1", "model.layers.3.block_sparse_moe.experts.117.w1", "model.layers.3.block_sparse_moe.experts.118.w1", "model.layers.3.block_sparse_moe.experts.119.w1", "model.layers.3.block_sparse_moe.experts.120.w1", "model.layers.3.block_sparse_moe.experts.121.w1", "model.layers.3.block_sparse_moe.experts.122.w1", "model.layers.3.block_sparse_moe.experts.123.w1", "model.layers.3.block_sparse_moe.experts.124.w1", "model.layers.3.block_sparse_moe.experts.125.w1", "model.layers.3.block_sparse_moe.experts.126.w1", "model.layers.3.block_sparse_moe.experts.127.w1", "model.layers.3.block_sparse_moe.experts.128.w1", "model.layers.3.block_sparse_moe.experts.129.w1", "model.layers.3.block_sparse_moe.experts.130.w1", "model.layers.3.block_sparse_moe.experts.131.w1", "model.layers.3.block_sparse_moe.experts.132.w1", "model.layers.3.block_sparse_moe.experts.133.w1", "model.layers.3.block_sparse_moe.experts.134.w1", "model.layers.3.block_sparse_moe.experts.135.w1", "model.layers.3.block_sparse_moe.experts.136.w1", "model.layers.3.block_sparse_moe.experts.137.w1", "model.layers.3.block_sparse_moe.experts.138.w1", "model.layers.3.block_sparse_moe.experts.139.w1", "model.layers.3.block_sparse_moe.experts.140.w1", "model.layers.3.block_sparse_moe.experts.141.w1", "model.layers.3.block_sparse_moe.experts.142.w1", "model.layers.3.block_sparse_moe.experts.143.w1", "model.layers.3.block_sparse_moe.experts.144.w1", "model.layers.3.block_sparse_moe.experts.145.w1", "model.layers.3.block_sparse_moe.experts.146.w1", "model.layers.3.block_sparse_moe.experts.147.w1", "model.layers.3.block_sparse_moe.experts.148.w1", "model.layers.3.block_sparse_moe.experts.149.w1", "model.layers.3.block_sparse_moe.experts.150.w1", "model.layers.3.block_sparse_moe.experts.151.w1", "model.layers.3.block_sparse_moe.experts.152.w1", "model.layers.3.block_sparse_moe.experts.153.w1", "model.layers.3.block_sparse_moe.experts.154.w1", "model.layers.3.block_sparse_moe.experts.155.w1", "model.layers.3.block_sparse_moe.experts.156.w1", "model.layers.3.block_sparse_moe.experts.157.w1", "model.layers.3.block_sparse_moe.experts.158.w1", "model.layers.3.block_sparse_moe.experts.159.w1", "model.layers.3.block_sparse_moe.experts.160.w1", "model.layers.3.block_sparse_moe.experts.161.w1", "model.layers.3.block_sparse_moe.experts.162.w1", "model.layers.3.block_sparse_moe.experts.163.w1", "model.layers.3.block_sparse_moe.experts.164.w1", "model.layers.3.block_sparse_moe.experts.165.w1", "model.layers.3.block_sparse_moe.experts.166.w1", "model.layers.3.block_sparse_moe.experts.167.w1", "model.layers.3.block_sparse_moe.experts.168.w1", "model.layers.3.block_sparse_moe.experts.169.w1", "model.layers.3.block_sparse_moe.experts.170.w1", "model.layers.3.block_sparse_moe.experts.171.w1", "model.layers.3.block_sparse_moe.experts.172.w1", "model.layers.3.block_sparse_moe.experts.173.w1", "model.layers.3.block_sparse_moe.experts.174.w1", "model.layers.3.block_sparse_moe.experts.175.w1", "model.layers.3.block_sparse_moe.experts.176.w1", "model.layers.3.block_sparse_moe.experts.177.w1", "model.layers.3.block_sparse_moe.experts.178.w1", "model.layers.3.block_sparse_moe.experts.179.w1", "model.layers.3.block_sparse_moe.experts.180.w1", "model.layers.3.block_sparse_moe.experts.181.w1", "model.layers.3.block_sparse_moe.experts.182.w1", "model.layers.3.block_sparse_moe.experts.183.w1", "model.layers.3.block_sparse_moe.experts.184.w1", "model.layers.3.block_sparse_moe.experts.185.w1", "model.layers.3.block_sparse_moe.experts.186.w1", "model.layers.3.block_sparse_moe.experts.187.w1", "model.layers.3.block_sparse_moe.experts.188.w1", "model.layers.3.block_sparse_moe.experts.189.w1", "model.layers.3.block_sparse_moe.experts.190.w1", "model.layers.3.block_sparse_moe.experts.191.w1", "model.layers.3.block_sparse_moe.experts.192.w1", "model.layers.3.block_sparse_moe.experts.193.w1", "model.layers.3.block_sparse_moe.experts.194.w1", "model.layers.3.block_sparse_moe.experts.195.w1", "model.layers.3.block_sparse_moe.experts.196.w1", "model.layers.3.block_sparse_moe.experts.197.w1", "model.layers.3.block_sparse_moe.experts.198.w1", "model.layers.3.block_sparse_moe.experts.199.w1", "model.layers.3.block_sparse_moe.experts.200.w1", "model.layers.3.block_sparse_moe.experts.201.w1", "model.layers.3.block_sparse_moe.experts.202.w1", "model.layers.3.block_sparse_moe.experts.203.w1", "model.layers.3.block_sparse_moe.experts.204.w1", "model.layers.3.block_sparse_moe.experts.205.w1", "model.layers.3.block_sparse_moe.experts.206.w1", "model.layers.3.block_sparse_moe.experts.207.w1", "model.layers.3.block_sparse_moe.experts.208.w1", "model.layers.3.block_sparse_moe.experts.209.w1", "model.layers.3.block_sparse_moe.experts.210.w1", "model.layers.3.block_sparse_moe.experts.211.w1", "model.layers.3.block_sparse_moe.experts.212.w1", "model.layers.3.block_sparse_moe.experts.213.w1", "model.layers.3.block_sparse_moe.experts.214.w1", "model.layers.3.block_sparse_moe.experts.215.w1", "model.layers.3.block_sparse_moe.experts.216.w1", "model.layers.3.block_sparse_moe.experts.217.w1", "model.layers.3.block_sparse_moe.experts.218.w1", "model.layers.3.block_sparse_moe.experts.219.w1", "model.layers.3.block_sparse_moe.experts.220.w1", "model.layers.3.block_sparse_moe.experts.221.w1", "model.layers.3.block_sparse_moe.experts.222.w1", "model.layers.3.block_sparse_moe.experts.223.w1", "model.layers.3.block_sparse_moe.experts.224.w1", "model.layers.3.block_sparse_moe.experts.225.w1", "model.layers.3.block_sparse_moe.experts.226.w1", "model.layers.3.block_sparse_moe.experts.227.w1", "model.layers.3.block_sparse_moe.experts.228.w1", "model.layers.3.block_sparse_moe.experts.229.w1", "model.layers.3.block_sparse_moe.experts.230.w1", "model.layers.3.block_sparse_moe.experts.231.w1", "model.layers.3.block_sparse_moe.experts.232.w1", "model.layers.3.block_sparse_moe.experts.233.w1", "model.layers.3.block_sparse_moe.experts.234.w1", "model.layers.3.block_sparse_moe.experts.235.w1", "model.layers.3.block_sparse_moe.experts.236.w1", "model.layers.3.block_sparse_moe.experts.237.w1", "model.layers.3.block_sparse_moe.experts.238.w1", "model.layers.3.block_sparse_moe.experts.239.w1", "model.layers.3.block_sparse_moe.experts.240.w1", "model.layers.3.block_sparse_moe.experts.241.w1", "model.layers.3.block_sparse_moe.experts.242.w1", "model.layers.3.block_sparse_moe.experts.243.w1", "model.layers.3.block_sparse_moe.experts.244.w1", "model.layers.3.block_sparse_moe.experts.245.w1", "model.layers.3.block_sparse_moe.experts.246.w1", "model.layers.3.block_sparse_moe.experts.247.w1", "model.layers.3.block_sparse_moe.experts.248.w1", "model.layers.3.block_sparse_moe.experts.249.w1", "model.layers.3.block_sparse_moe.experts.250.w1", "model.layers.3.block_sparse_moe.experts.251.w1", "model.layers.3.block_sparse_moe.experts.252.w1", "model.layers.3.block_sparse_moe.experts.253.w1", "model.layers.3.block_sparse_moe.experts.254.w1", "model.layers.3.block_sparse_moe.experts.255.w1", "model.layers.3.block_sparse_moe.experts.0.w3", "model.layers.3.block_sparse_moe.experts.1.w3", "model.layers.3.block_sparse_moe.experts.2.w3", "model.layers.3.block_sparse_moe.experts.3.w3", "model.layers.3.block_sparse_moe.experts.4.w3", "model.layers.3.block_sparse_moe.experts.5.w3", "model.layers.3.block_sparse_moe.experts.6.w3", "model.layers.3.block_sparse_moe.experts.7.w3", "model.layers.3.block_sparse_moe.experts.8.w3", "model.layers.3.block_sparse_moe.experts.9.w3", "model.layers.3.block_sparse_moe.experts.10.w3", "model.layers.3.block_sparse_moe.experts.11.w3", "model.layers.3.block_sparse_moe.experts.12.w3", "model.layers.3.block_sparse_moe.experts.13.w3", "model.layers.3.block_sparse_moe.experts.14.w3", "model.layers.3.block_sparse_moe.experts.15.w3", "model.layers.3.block_sparse_moe.experts.16.w3", "model.layers.3.block_sparse_moe.experts.17.w3", "model.layers.3.block_sparse_moe.experts.18.w3", "model.layers.3.block_sparse_moe.experts.19.w3", "model.layers.3.block_sparse_moe.experts.20.w3", "model.layers.3.block_sparse_moe.experts.21.w3", "model.layers.3.block_sparse_moe.experts.22.w3", "model.layers.3.block_sparse_moe.experts.23.w3", "model.layers.3.block_sparse_moe.experts.24.w3", "model.layers.3.block_sparse_moe.experts.25.w3", "model.layers.3.block_sparse_moe.experts.26.w3", "model.layers.3.block_sparse_moe.experts.27.w3", "model.layers.3.block_sparse_moe.experts.28.w3", "model.layers.3.block_sparse_moe.experts.29.w3", "model.layers.3.block_sparse_moe.experts.30.w3", "model.layers.3.block_sparse_moe.experts.31.w3", "model.layers.3.block_sparse_moe.experts.32.w3", "model.layers.3.block_sparse_moe.experts.33.w3", "model.layers.3.block_sparse_moe.experts.34.w3", "model.layers.3.block_sparse_moe.experts.35.w3", "model.layers.3.block_sparse_moe.experts.36.w3", "model.layers.3.block_sparse_moe.experts.37.w3", "model.layers.3.block_sparse_moe.experts.38.w3", "model.layers.3.block_sparse_moe.experts.39.w3", "model.layers.3.block_sparse_moe.experts.40.w3", "model.layers.3.block_sparse_moe.experts.41.w3", "model.layers.3.block_sparse_moe.experts.42.w3", "model.layers.3.block_sparse_moe.experts.43.w3", "model.layers.3.block_sparse_moe.experts.44.w3", "model.layers.3.block_sparse_moe.experts.45.w3", "model.layers.3.block_sparse_moe.experts.46.w3", "model.layers.3.block_sparse_moe.experts.47.w3", "model.layers.3.block_sparse_moe.experts.48.w3", "model.layers.3.block_sparse_moe.experts.49.w3", "model.layers.3.block_sparse_moe.experts.50.w3", "model.layers.3.block_sparse_moe.experts.51.w3", "model.layers.3.block_sparse_moe.experts.52.w3", "model.layers.3.block_sparse_moe.experts.53.w3", "model.layers.3.block_sparse_moe.experts.54.w3", "model.layers.3.block_sparse_moe.experts.55.w3", "model.layers.3.block_sparse_moe.experts.56.w3", "model.layers.3.block_sparse_moe.experts.57.w3", "model.layers.3.block_sparse_moe.experts.58.w3", "model.layers.3.block_sparse_moe.experts.59.w3", "model.layers.3.block_sparse_moe.experts.60.w3", "model.layers.3.block_sparse_moe.experts.61.w3", "model.layers.3.block_sparse_moe.experts.62.w3", "model.layers.3.block_sparse_moe.experts.63.w3", "model.layers.3.block_sparse_moe.experts.64.w3", "model.layers.3.block_sparse_moe.experts.65.w3", "model.layers.3.block_sparse_moe.experts.66.w3", "model.layers.3.block_sparse_moe.experts.67.w3", "model.layers.3.block_sparse_moe.experts.68.w3", "model.layers.3.block_sparse_moe.experts.69.w3", "model.layers.3.block_sparse_moe.experts.70.w3", "model.layers.3.block_sparse_moe.experts.71.w3", "model.layers.3.block_sparse_moe.experts.72.w3", "model.layers.3.block_sparse_moe.experts.73.w3", "model.layers.3.block_sparse_moe.experts.74.w3", "model.layers.3.block_sparse_moe.experts.75.w3", "model.layers.3.block_sparse_moe.experts.76.w3", "model.layers.3.block_sparse_moe.experts.77.w3", "model.layers.3.block_sparse_moe.experts.78.w3", "model.layers.3.block_sparse_moe.experts.79.w3", "model.layers.3.block_sparse_moe.experts.80.w3", "model.layers.3.block_sparse_moe.experts.81.w3", "model.layers.3.block_sparse_moe.experts.82.w3", "model.layers.3.block_sparse_moe.experts.83.w3", "model.layers.3.block_sparse_moe.experts.84.w3", "model.layers.3.block_sparse_moe.experts.85.w3", "model.layers.3.block_sparse_moe.experts.86.w3", "model.layers.3.block_sparse_moe.experts.87.w3", "model.layers.3.block_sparse_moe.experts.88.w3", "model.layers.3.block_sparse_moe.experts.89.w3", "model.layers.3.block_sparse_moe.experts.90.w3", "model.layers.3.block_sparse_moe.experts.91.w3", "model.layers.3.block_sparse_moe.experts.92.w3", "model.layers.3.block_sparse_moe.experts.93.w3", "model.layers.3.block_sparse_moe.experts.94.w3", "model.layers.3.block_sparse_moe.experts.95.w3", "model.layers.3.block_sparse_moe.experts.96.w3", "model.layers.3.block_sparse_moe.experts.97.w3", "model.layers.3.block_sparse_moe.experts.98.w3", "model.layers.3.block_sparse_moe.experts.99.w3", "model.layers.3.block_sparse_moe.experts.100.w3", "model.layers.3.block_sparse_moe.experts.101.w3", "model.layers.3.block_sparse_moe.experts.102.w3", "model.layers.3.block_sparse_moe.experts.103.w3", "model.layers.3.block_sparse_moe.experts.104.w3", "model.layers.3.block_sparse_moe.experts.105.w3", "model.layers.3.block_sparse_moe.experts.106.w3", "model.layers.3.block_sparse_moe.experts.107.w3", "model.layers.3.block_sparse_moe.experts.108.w3", "model.layers.3.block_sparse_moe.experts.109.w3", "model.layers.3.block_sparse_moe.experts.110.w3", "model.layers.3.block_sparse_moe.experts.111.w3", "model.layers.3.block_sparse_moe.experts.112.w3", "model.layers.3.block_sparse_moe.experts.113.w3", "model.layers.3.block_sparse_moe.experts.114.w3", "model.layers.3.block_sparse_moe.experts.115.w3", "model.layers.3.block_sparse_moe.experts.116.w3", "model.layers.3.block_sparse_moe.experts.117.w3", "model.layers.3.block_sparse_moe.experts.118.w3", "model.layers.3.block_sparse_moe.experts.119.w3", "model.layers.3.block_sparse_moe.experts.120.w3", "model.layers.3.block_sparse_moe.experts.121.w3", "model.layers.3.block_sparse_moe.experts.122.w3", "model.layers.3.block_sparse_moe.experts.123.w3", "model.layers.3.block_sparse_moe.experts.124.w3", "model.layers.3.block_sparse_moe.experts.125.w3", "model.layers.3.block_sparse_moe.experts.126.w3", "model.layers.3.block_sparse_moe.experts.127.w3", "model.layers.3.block_sparse_moe.experts.128.w3", "model.layers.3.block_sparse_moe.experts.129.w3", "model.layers.3.block_sparse_moe.experts.130.w3", "model.layers.3.block_sparse_moe.experts.131.w3", "model.layers.3.block_sparse_moe.experts.132.w3", "model.layers.3.block_sparse_moe.experts.133.w3", "model.layers.3.block_sparse_moe.experts.134.w3", "model.layers.3.block_sparse_moe.experts.135.w3", "model.layers.3.block_sparse_moe.experts.136.w3", "model.layers.3.block_sparse_moe.experts.137.w3", "model.layers.3.block_sparse_moe.experts.138.w3", "model.layers.3.block_sparse_moe.experts.139.w3", "model.layers.3.block_sparse_moe.experts.140.w3", "model.layers.3.block_sparse_moe.experts.141.w3", "model.layers.3.block_sparse_moe.experts.142.w3", "model.layers.3.block_sparse_moe.experts.143.w3", "model.layers.3.block_sparse_moe.experts.144.w3", "model.layers.3.block_sparse_moe.experts.145.w3", "model.layers.3.block_sparse_moe.experts.146.w3", "model.layers.3.block_sparse_moe.experts.147.w3", "model.layers.3.block_sparse_moe.experts.148.w3", "model.layers.3.block_sparse_moe.experts.149.w3", "model.layers.3.block_sparse_moe.experts.150.w3", "model.layers.3.block_sparse_moe.experts.151.w3", "model.layers.3.block_sparse_moe.experts.152.w3", "model.layers.3.block_sparse_moe.experts.153.w3", "model.layers.3.block_sparse_moe.experts.154.w3", "model.layers.3.block_sparse_moe.experts.155.w3", "model.layers.3.block_sparse_moe.experts.156.w3", "model.layers.3.block_sparse_moe.experts.157.w3", "model.layers.3.block_sparse_moe.experts.158.w3", "model.layers.3.block_sparse_moe.experts.159.w3", "model.layers.3.block_sparse_moe.experts.160.w3", "model.layers.3.block_sparse_moe.experts.161.w3", "model.layers.3.block_sparse_moe.experts.162.w3", "model.layers.3.block_sparse_moe.experts.163.w3", "model.layers.3.block_sparse_moe.experts.164.w3", "model.layers.3.block_sparse_moe.experts.165.w3", "model.layers.3.block_sparse_moe.experts.166.w3", "model.layers.3.block_sparse_moe.experts.167.w3", "model.layers.3.block_sparse_moe.experts.168.w3", "model.layers.3.block_sparse_moe.experts.169.w3", "model.layers.3.block_sparse_moe.experts.170.w3", "model.layers.3.block_sparse_moe.experts.171.w3", "model.layers.3.block_sparse_moe.experts.172.w3", "model.layers.3.block_sparse_moe.experts.173.w3", "model.layers.3.block_sparse_moe.experts.174.w3", "model.layers.3.block_sparse_moe.experts.175.w3", "model.layers.3.block_sparse_moe.experts.176.w3", "model.layers.3.block_sparse_moe.experts.177.w3", "model.layers.3.block_sparse_moe.experts.178.w3", "model.layers.3.block_sparse_moe.experts.179.w3", "model.layers.3.block_sparse_moe.experts.180.w3", "model.layers.3.block_sparse_moe.experts.181.w3", "model.layers.3.block_sparse_moe.experts.182.w3", "model.layers.3.block_sparse_moe.experts.183.w3", "model.layers.3.block_sparse_moe.experts.184.w3", "model.layers.3.block_sparse_moe.experts.185.w3", "model.layers.3.block_sparse_moe.experts.186.w3", "model.layers.3.block_sparse_moe.experts.187.w3", "model.layers.3.block_sparse_moe.experts.188.w3", "model.layers.3.block_sparse_moe.experts.189.w3", "model.layers.3.block_sparse_moe.experts.190.w3", "model.layers.3.block_sparse_moe.experts.191.w3", "model.layers.3.block_sparse_moe.experts.192.w3", "model.layers.3.block_sparse_moe.experts.193.w3", "model.layers.3.block_sparse_moe.experts.194.w3", "model.layers.3.block_sparse_moe.experts.195.w3", "model.layers.3.block_sparse_moe.experts.196.w3", "model.layers.3.block_sparse_moe.experts.197.w3", "model.layers.3.block_sparse_moe.experts.198.w3", "model.layers.3.block_sparse_moe.experts.199.w3", "model.layers.3.block_sparse_moe.experts.200.w3", "model.layers.3.block_sparse_moe.experts.201.w3", "model.layers.3.block_sparse_moe.experts.202.w3", "model.layers.3.block_sparse_moe.experts.203.w3", "model.layers.3.block_sparse_moe.experts.204.w3", "model.layers.3.block_sparse_moe.experts.205.w3", "model.layers.3.block_sparse_moe.experts.206.w3", "model.layers.3.block_sparse_moe.experts.207.w3", "model.layers.3.block_sparse_moe.experts.208.w3", "model.layers.3.block_sparse_moe.experts.209.w3", "model.layers.3.block_sparse_moe.experts.210.w3", "model.layers.3.block_sparse_moe.experts.211.w3", "model.layers.3.block_sparse_moe.experts.212.w3", "model.layers.3.block_sparse_moe.experts.213.w3", "model.layers.3.block_sparse_moe.experts.214.w3", "model.layers.3.block_sparse_moe.experts.215.w3", "model.layers.3.block_sparse_moe.experts.216.w3", "model.layers.3.block_sparse_moe.experts.217.w3", "model.layers.3.block_sparse_moe.experts.218.w3", "model.layers.3.block_sparse_moe.experts.219.w3", "model.layers.3.block_sparse_moe.experts.220.w3", "model.layers.3.block_sparse_moe.experts.221.w3", "model.layers.3.block_sparse_moe.experts.222.w3", "model.layers.3.block_sparse_moe.experts.223.w3", "model.layers.3.block_sparse_moe.experts.224.w3", "model.layers.3.block_sparse_moe.experts.225.w3", "model.layers.3.block_sparse_moe.experts.226.w3", "model.layers.3.block_sparse_moe.experts.227.w3", "model.layers.3.block_sparse_moe.experts.228.w3", "model.layers.3.block_sparse_moe.experts.229.w3", "model.layers.3.block_sparse_moe.experts.230.w3", "model.layers.3.block_sparse_moe.experts.231.w3", "model.layers.3.block_sparse_moe.experts.232.w3", "model.layers.3.block_sparse_moe.experts.233.w3", "model.layers.3.block_sparse_moe.experts.234.w3", "model.layers.3.block_sparse_moe.experts.235.w3", "model.layers.3.block_sparse_moe.experts.236.w3", "model.layers.3.block_sparse_moe.experts.237.w3", "model.layers.3.block_sparse_moe.experts.238.w3", "model.layers.3.block_sparse_moe.experts.239.w3", "model.layers.3.block_sparse_moe.experts.240.w3", "model.layers.3.block_sparse_moe.experts.241.w3", "model.layers.3.block_sparse_moe.experts.242.w3", "model.layers.3.block_sparse_moe.experts.243.w3", "model.layers.3.block_sparse_moe.experts.244.w3", "model.layers.3.block_sparse_moe.experts.245.w3", "model.layers.3.block_sparse_moe.experts.246.w3", "model.layers.3.block_sparse_moe.experts.247.w3", "model.layers.3.block_sparse_moe.experts.248.w3", "model.layers.3.block_sparse_moe.experts.249.w3", "model.layers.3.block_sparse_moe.experts.250.w3", "model.layers.3.block_sparse_moe.experts.251.w3", "model.layers.3.block_sparse_moe.experts.252.w3", "model.layers.3.block_sparse_moe.experts.253.w3", "model.layers.3.block_sparse_moe.experts.254.w3", "model.layers.3.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.006625518202781677, "dbits": 2415919104 } ] }, { "idx": 19, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w2", "model.layers.3.block_sparse_moe.experts.1.w2", "model.layers.3.block_sparse_moe.experts.2.w2", "model.layers.3.block_sparse_moe.experts.3.w2", "model.layers.3.block_sparse_moe.experts.4.w2", "model.layers.3.block_sparse_moe.experts.5.w2", "model.layers.3.block_sparse_moe.experts.6.w2", "model.layers.3.block_sparse_moe.experts.7.w2", "model.layers.3.block_sparse_moe.experts.8.w2", "model.layers.3.block_sparse_moe.experts.9.w2", "model.layers.3.block_sparse_moe.experts.10.w2", "model.layers.3.block_sparse_moe.experts.11.w2", "model.layers.3.block_sparse_moe.experts.12.w2", "model.layers.3.block_sparse_moe.experts.13.w2", "model.layers.3.block_sparse_moe.experts.14.w2", "model.layers.3.block_sparse_moe.experts.15.w2", "model.layers.3.block_sparse_moe.experts.16.w2", "model.layers.3.block_sparse_moe.experts.17.w2", "model.layers.3.block_sparse_moe.experts.18.w2", "model.layers.3.block_sparse_moe.experts.19.w2", "model.layers.3.block_sparse_moe.experts.20.w2", "model.layers.3.block_sparse_moe.experts.21.w2", "model.layers.3.block_sparse_moe.experts.22.w2", "model.layers.3.block_sparse_moe.experts.23.w2", "model.layers.3.block_sparse_moe.experts.24.w2", "model.layers.3.block_sparse_moe.experts.25.w2", "model.layers.3.block_sparse_moe.experts.26.w2", "model.layers.3.block_sparse_moe.experts.27.w2", "model.layers.3.block_sparse_moe.experts.28.w2", "model.layers.3.block_sparse_moe.experts.29.w2", "model.layers.3.block_sparse_moe.experts.30.w2", "model.layers.3.block_sparse_moe.experts.31.w2", "model.layers.3.block_sparse_moe.experts.32.w2", "model.layers.3.block_sparse_moe.experts.33.w2", "model.layers.3.block_sparse_moe.experts.34.w2", "model.layers.3.block_sparse_moe.experts.35.w2", "model.layers.3.block_sparse_moe.experts.36.w2", "model.layers.3.block_sparse_moe.experts.37.w2", "model.layers.3.block_sparse_moe.experts.38.w2", "model.layers.3.block_sparse_moe.experts.39.w2", "model.layers.3.block_sparse_moe.experts.40.w2", "model.layers.3.block_sparse_moe.experts.41.w2", "model.layers.3.block_sparse_moe.experts.42.w2", "model.layers.3.block_sparse_moe.experts.43.w2", "model.layers.3.block_sparse_moe.experts.44.w2", "model.layers.3.block_sparse_moe.experts.45.w2", "model.layers.3.block_sparse_moe.experts.46.w2", "model.layers.3.block_sparse_moe.experts.47.w2", "model.layers.3.block_sparse_moe.experts.48.w2", "model.layers.3.block_sparse_moe.experts.49.w2", "model.layers.3.block_sparse_moe.experts.50.w2", "model.layers.3.block_sparse_moe.experts.51.w2", "model.layers.3.block_sparse_moe.experts.52.w2", "model.layers.3.block_sparse_moe.experts.53.w2", "model.layers.3.block_sparse_moe.experts.54.w2", "model.layers.3.block_sparse_moe.experts.55.w2", "model.layers.3.block_sparse_moe.experts.56.w2", "model.layers.3.block_sparse_moe.experts.57.w2", "model.layers.3.block_sparse_moe.experts.58.w2", "model.layers.3.block_sparse_moe.experts.59.w2", "model.layers.3.block_sparse_moe.experts.60.w2", "model.layers.3.block_sparse_moe.experts.61.w2", "model.layers.3.block_sparse_moe.experts.62.w2", "model.layers.3.block_sparse_moe.experts.63.w2", "model.layers.3.block_sparse_moe.experts.64.w2", "model.layers.3.block_sparse_moe.experts.65.w2", "model.layers.3.block_sparse_moe.experts.66.w2", "model.layers.3.block_sparse_moe.experts.67.w2", "model.layers.3.block_sparse_moe.experts.68.w2", "model.layers.3.block_sparse_moe.experts.69.w2", "model.layers.3.block_sparse_moe.experts.70.w2", "model.layers.3.block_sparse_moe.experts.71.w2", "model.layers.3.block_sparse_moe.experts.72.w2", "model.layers.3.block_sparse_moe.experts.73.w2", "model.layers.3.block_sparse_moe.experts.74.w2", "model.layers.3.block_sparse_moe.experts.75.w2", "model.layers.3.block_sparse_moe.experts.76.w2", "model.layers.3.block_sparse_moe.experts.77.w2", "model.layers.3.block_sparse_moe.experts.78.w2", "model.layers.3.block_sparse_moe.experts.79.w2", "model.layers.3.block_sparse_moe.experts.80.w2", "model.layers.3.block_sparse_moe.experts.81.w2", "model.layers.3.block_sparse_moe.experts.82.w2", "model.layers.3.block_sparse_moe.experts.83.w2", "model.layers.3.block_sparse_moe.experts.84.w2", "model.layers.3.block_sparse_moe.experts.85.w2", "model.layers.3.block_sparse_moe.experts.86.w2", "model.layers.3.block_sparse_moe.experts.87.w2", "model.layers.3.block_sparse_moe.experts.88.w2", "model.layers.3.block_sparse_moe.experts.89.w2", "model.layers.3.block_sparse_moe.experts.90.w2", "model.layers.3.block_sparse_moe.experts.91.w2", "model.layers.3.block_sparse_moe.experts.92.w2", "model.layers.3.block_sparse_moe.experts.93.w2", "model.layers.3.block_sparse_moe.experts.94.w2", "model.layers.3.block_sparse_moe.experts.95.w2", "model.layers.3.block_sparse_moe.experts.96.w2", "model.layers.3.block_sparse_moe.experts.97.w2", "model.layers.3.block_sparse_moe.experts.98.w2", "model.layers.3.block_sparse_moe.experts.99.w2", "model.layers.3.block_sparse_moe.experts.100.w2", "model.layers.3.block_sparse_moe.experts.101.w2", "model.layers.3.block_sparse_moe.experts.102.w2", "model.layers.3.block_sparse_moe.experts.103.w2", "model.layers.3.block_sparse_moe.experts.104.w2", "model.layers.3.block_sparse_moe.experts.105.w2", "model.layers.3.block_sparse_moe.experts.106.w2", "model.layers.3.block_sparse_moe.experts.107.w2", "model.layers.3.block_sparse_moe.experts.108.w2", "model.layers.3.block_sparse_moe.experts.109.w2", "model.layers.3.block_sparse_moe.experts.110.w2", "model.layers.3.block_sparse_moe.experts.111.w2", "model.layers.3.block_sparse_moe.experts.112.w2", "model.layers.3.block_sparse_moe.experts.113.w2", "model.layers.3.block_sparse_moe.experts.114.w2", "model.layers.3.block_sparse_moe.experts.115.w2", "model.layers.3.block_sparse_moe.experts.116.w2", "model.layers.3.block_sparse_moe.experts.117.w2", "model.layers.3.block_sparse_moe.experts.118.w2", "model.layers.3.block_sparse_moe.experts.119.w2", "model.layers.3.block_sparse_moe.experts.120.w2", "model.layers.3.block_sparse_moe.experts.121.w2", "model.layers.3.block_sparse_moe.experts.122.w2", "model.layers.3.block_sparse_moe.experts.123.w2", "model.layers.3.block_sparse_moe.experts.124.w2", "model.layers.3.block_sparse_moe.experts.125.w2", "model.layers.3.block_sparse_moe.experts.126.w2", "model.layers.3.block_sparse_moe.experts.127.w2", "model.layers.3.block_sparse_moe.experts.128.w2", "model.layers.3.block_sparse_moe.experts.129.w2", "model.layers.3.block_sparse_moe.experts.130.w2", "model.layers.3.block_sparse_moe.experts.131.w2", "model.layers.3.block_sparse_moe.experts.132.w2", "model.layers.3.block_sparse_moe.experts.133.w2", "model.layers.3.block_sparse_moe.experts.134.w2", "model.layers.3.block_sparse_moe.experts.135.w2", "model.layers.3.block_sparse_moe.experts.136.w2", "model.layers.3.block_sparse_moe.experts.137.w2", "model.layers.3.block_sparse_moe.experts.138.w2", "model.layers.3.block_sparse_moe.experts.139.w2", "model.layers.3.block_sparse_moe.experts.140.w2", "model.layers.3.block_sparse_moe.experts.141.w2", "model.layers.3.block_sparse_moe.experts.142.w2", "model.layers.3.block_sparse_moe.experts.143.w2", "model.layers.3.block_sparse_moe.experts.144.w2", "model.layers.3.block_sparse_moe.experts.145.w2", "model.layers.3.block_sparse_moe.experts.146.w2", "model.layers.3.block_sparse_moe.experts.147.w2", "model.layers.3.block_sparse_moe.experts.148.w2", "model.layers.3.block_sparse_moe.experts.149.w2", "model.layers.3.block_sparse_moe.experts.150.w2", "model.layers.3.block_sparse_moe.experts.151.w2", "model.layers.3.block_sparse_moe.experts.152.w2", "model.layers.3.block_sparse_moe.experts.153.w2", "model.layers.3.block_sparse_moe.experts.154.w2", "model.layers.3.block_sparse_moe.experts.155.w2", "model.layers.3.block_sparse_moe.experts.156.w2", "model.layers.3.block_sparse_moe.experts.157.w2", "model.layers.3.block_sparse_moe.experts.158.w2", "model.layers.3.block_sparse_moe.experts.159.w2", "model.layers.3.block_sparse_moe.experts.160.w2", "model.layers.3.block_sparse_moe.experts.161.w2", "model.layers.3.block_sparse_moe.experts.162.w2", "model.layers.3.block_sparse_moe.experts.163.w2", "model.layers.3.block_sparse_moe.experts.164.w2", "model.layers.3.block_sparse_moe.experts.165.w2", "model.layers.3.block_sparse_moe.experts.166.w2", "model.layers.3.block_sparse_moe.experts.167.w2", "model.layers.3.block_sparse_moe.experts.168.w2", "model.layers.3.block_sparse_moe.experts.169.w2", "model.layers.3.block_sparse_moe.experts.170.w2", "model.layers.3.block_sparse_moe.experts.171.w2", "model.layers.3.block_sparse_moe.experts.172.w2", "model.layers.3.block_sparse_moe.experts.173.w2", "model.layers.3.block_sparse_moe.experts.174.w2", "model.layers.3.block_sparse_moe.experts.175.w2", "model.layers.3.block_sparse_moe.experts.176.w2", "model.layers.3.block_sparse_moe.experts.177.w2", "model.layers.3.block_sparse_moe.experts.178.w2", "model.layers.3.block_sparse_moe.experts.179.w2", "model.layers.3.block_sparse_moe.experts.180.w2", "model.layers.3.block_sparse_moe.experts.181.w2", "model.layers.3.block_sparse_moe.experts.182.w2", "model.layers.3.block_sparse_moe.experts.183.w2", "model.layers.3.block_sparse_moe.experts.184.w2", "model.layers.3.block_sparse_moe.experts.185.w2", "model.layers.3.block_sparse_moe.experts.186.w2", "model.layers.3.block_sparse_moe.experts.187.w2", "model.layers.3.block_sparse_moe.experts.188.w2", "model.layers.3.block_sparse_moe.experts.189.w2", "model.layers.3.block_sparse_moe.experts.190.w2", "model.layers.3.block_sparse_moe.experts.191.w2", "model.layers.3.block_sparse_moe.experts.192.w2", "model.layers.3.block_sparse_moe.experts.193.w2", "model.layers.3.block_sparse_moe.experts.194.w2", "model.layers.3.block_sparse_moe.experts.195.w2", "model.layers.3.block_sparse_moe.experts.196.w2", "model.layers.3.block_sparse_moe.experts.197.w2", "model.layers.3.block_sparse_moe.experts.198.w2", "model.layers.3.block_sparse_moe.experts.199.w2", "model.layers.3.block_sparse_moe.experts.200.w2", "model.layers.3.block_sparse_moe.experts.201.w2", "model.layers.3.block_sparse_moe.experts.202.w2", "model.layers.3.block_sparse_moe.experts.203.w2", "model.layers.3.block_sparse_moe.experts.204.w2", "model.layers.3.block_sparse_moe.experts.205.w2", "model.layers.3.block_sparse_moe.experts.206.w2", "model.layers.3.block_sparse_moe.experts.207.w2", "model.layers.3.block_sparse_moe.experts.208.w2", "model.layers.3.block_sparse_moe.experts.209.w2", "model.layers.3.block_sparse_moe.experts.210.w2", "model.layers.3.block_sparse_moe.experts.211.w2", "model.layers.3.block_sparse_moe.experts.212.w2", "model.layers.3.block_sparse_moe.experts.213.w2", "model.layers.3.block_sparse_moe.experts.214.w2", "model.layers.3.block_sparse_moe.experts.215.w2", "model.layers.3.block_sparse_moe.experts.216.w2", "model.layers.3.block_sparse_moe.experts.217.w2", "model.layers.3.block_sparse_moe.experts.218.w2", "model.layers.3.block_sparse_moe.experts.219.w2", "model.layers.3.block_sparse_moe.experts.220.w2", "model.layers.3.block_sparse_moe.experts.221.w2", "model.layers.3.block_sparse_moe.experts.222.w2", "model.layers.3.block_sparse_moe.experts.223.w2", "model.layers.3.block_sparse_moe.experts.224.w2", "model.layers.3.block_sparse_moe.experts.225.w2", "model.layers.3.block_sparse_moe.experts.226.w2", "model.layers.3.block_sparse_moe.experts.227.w2", "model.layers.3.block_sparse_moe.experts.228.w2", "model.layers.3.block_sparse_moe.experts.229.w2", "model.layers.3.block_sparse_moe.experts.230.w2", "model.layers.3.block_sparse_moe.experts.231.w2", "model.layers.3.block_sparse_moe.experts.232.w2", "model.layers.3.block_sparse_moe.experts.233.w2", "model.layers.3.block_sparse_moe.experts.234.w2", "model.layers.3.block_sparse_moe.experts.235.w2", "model.layers.3.block_sparse_moe.experts.236.w2", "model.layers.3.block_sparse_moe.experts.237.w2", "model.layers.3.block_sparse_moe.experts.238.w2", "model.layers.3.block_sparse_moe.experts.239.w2", "model.layers.3.block_sparse_moe.experts.240.w2", "model.layers.3.block_sparse_moe.experts.241.w2", "model.layers.3.block_sparse_moe.experts.242.w2", "model.layers.3.block_sparse_moe.experts.243.w2", "model.layers.3.block_sparse_moe.experts.244.w2", "model.layers.3.block_sparse_moe.experts.245.w2", "model.layers.3.block_sparse_moe.experts.246.w2", "model.layers.3.block_sparse_moe.experts.247.w2", "model.layers.3.block_sparse_moe.experts.248.w2", "model.layers.3.block_sparse_moe.experts.249.w2", "model.layers.3.block_sparse_moe.experts.250.w2", "model.layers.3.block_sparse_moe.experts.251.w2", "model.layers.3.block_sparse_moe.experts.252.w2", "model.layers.3.block_sparse_moe.experts.253.w2", "model.layers.3.block_sparse_moe.experts.254.w2", "model.layers.3.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -1.7344951629416627e-06, "dbits": 1207959552 } ] }, { "idx": 20, "layers": [ "model.layers.4.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0012144118547440241, "dbits": 18874368 } ] }, { "idx": 21, "layers": [ "model.layers.4.self_attn.k_proj", "model.layers.4.self_attn.v_proj" ], "candidates": [ { "dkld": 0.007871416211128301, "dbits": 6291456 } ] }, { "idx": 22, "layers": [ "model.layers.4.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001679575443267911, "dbits": 18874368 } ] }, { "idx": 23, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w1", "model.layers.4.block_sparse_moe.experts.1.w1", "model.layers.4.block_sparse_moe.experts.2.w1", "model.layers.4.block_sparse_moe.experts.3.w1", "model.layers.4.block_sparse_moe.experts.4.w1", "model.layers.4.block_sparse_moe.experts.5.w1", "model.layers.4.block_sparse_moe.experts.6.w1", "model.layers.4.block_sparse_moe.experts.7.w1", "model.layers.4.block_sparse_moe.experts.8.w1", "model.layers.4.block_sparse_moe.experts.9.w1", "model.layers.4.block_sparse_moe.experts.10.w1", "model.layers.4.block_sparse_moe.experts.11.w1", "model.layers.4.block_sparse_moe.experts.12.w1", "model.layers.4.block_sparse_moe.experts.13.w1", "model.layers.4.block_sparse_moe.experts.14.w1", "model.layers.4.block_sparse_moe.experts.15.w1", "model.layers.4.block_sparse_moe.experts.16.w1", "model.layers.4.block_sparse_moe.experts.17.w1", "model.layers.4.block_sparse_moe.experts.18.w1", "model.layers.4.block_sparse_moe.experts.19.w1", "model.layers.4.block_sparse_moe.experts.20.w1", "model.layers.4.block_sparse_moe.experts.21.w1", "model.layers.4.block_sparse_moe.experts.22.w1", "model.layers.4.block_sparse_moe.experts.23.w1", "model.layers.4.block_sparse_moe.experts.24.w1", "model.layers.4.block_sparse_moe.experts.25.w1", "model.layers.4.block_sparse_moe.experts.26.w1", "model.layers.4.block_sparse_moe.experts.27.w1", "model.layers.4.block_sparse_moe.experts.28.w1", "model.layers.4.block_sparse_moe.experts.29.w1", "model.layers.4.block_sparse_moe.experts.30.w1", "model.layers.4.block_sparse_moe.experts.31.w1", "model.layers.4.block_sparse_moe.experts.32.w1", "model.layers.4.block_sparse_moe.experts.33.w1", "model.layers.4.block_sparse_moe.experts.34.w1", "model.layers.4.block_sparse_moe.experts.35.w1", "model.layers.4.block_sparse_moe.experts.36.w1", "model.layers.4.block_sparse_moe.experts.37.w1", "model.layers.4.block_sparse_moe.experts.38.w1", "model.layers.4.block_sparse_moe.experts.39.w1", "model.layers.4.block_sparse_moe.experts.40.w1", "model.layers.4.block_sparse_moe.experts.41.w1", "model.layers.4.block_sparse_moe.experts.42.w1", "model.layers.4.block_sparse_moe.experts.43.w1", "model.layers.4.block_sparse_moe.experts.44.w1", "model.layers.4.block_sparse_moe.experts.45.w1", "model.layers.4.block_sparse_moe.experts.46.w1", "model.layers.4.block_sparse_moe.experts.47.w1", "model.layers.4.block_sparse_moe.experts.48.w1", "model.layers.4.block_sparse_moe.experts.49.w1", "model.layers.4.block_sparse_moe.experts.50.w1", "model.layers.4.block_sparse_moe.experts.51.w1", "model.layers.4.block_sparse_moe.experts.52.w1", "model.layers.4.block_sparse_moe.experts.53.w1", "model.layers.4.block_sparse_moe.experts.54.w1", "model.layers.4.block_sparse_moe.experts.55.w1", "model.layers.4.block_sparse_moe.experts.56.w1", "model.layers.4.block_sparse_moe.experts.57.w1", "model.layers.4.block_sparse_moe.experts.58.w1", "model.layers.4.block_sparse_moe.experts.59.w1", "model.layers.4.block_sparse_moe.experts.60.w1", "model.layers.4.block_sparse_moe.experts.61.w1", "model.layers.4.block_sparse_moe.experts.62.w1", "model.layers.4.block_sparse_moe.experts.63.w1", "model.layers.4.block_sparse_moe.experts.64.w1", "model.layers.4.block_sparse_moe.experts.65.w1", "model.layers.4.block_sparse_moe.experts.66.w1", "model.layers.4.block_sparse_moe.experts.67.w1", "model.layers.4.block_sparse_moe.experts.68.w1", "model.layers.4.block_sparse_moe.experts.69.w1", "model.layers.4.block_sparse_moe.experts.70.w1", "model.layers.4.block_sparse_moe.experts.71.w1", "model.layers.4.block_sparse_moe.experts.72.w1", "model.layers.4.block_sparse_moe.experts.73.w1", "model.layers.4.block_sparse_moe.experts.74.w1", "model.layers.4.block_sparse_moe.experts.75.w1", "model.layers.4.block_sparse_moe.experts.76.w1", "model.layers.4.block_sparse_moe.experts.77.w1", "model.layers.4.block_sparse_moe.experts.78.w1", "model.layers.4.block_sparse_moe.experts.79.w1", "model.layers.4.block_sparse_moe.experts.80.w1", "model.layers.4.block_sparse_moe.experts.81.w1", "model.layers.4.block_sparse_moe.experts.82.w1", "model.layers.4.block_sparse_moe.experts.83.w1", "model.layers.4.block_sparse_moe.experts.84.w1", "model.layers.4.block_sparse_moe.experts.85.w1", "model.layers.4.block_sparse_moe.experts.86.w1", "model.layers.4.block_sparse_moe.experts.87.w1", "model.layers.4.block_sparse_moe.experts.88.w1", "model.layers.4.block_sparse_moe.experts.89.w1", "model.layers.4.block_sparse_moe.experts.90.w1", "model.layers.4.block_sparse_moe.experts.91.w1", "model.layers.4.block_sparse_moe.experts.92.w1", "model.layers.4.block_sparse_moe.experts.93.w1", "model.layers.4.block_sparse_moe.experts.94.w1", "model.layers.4.block_sparse_moe.experts.95.w1", "model.layers.4.block_sparse_moe.experts.96.w1", "model.layers.4.block_sparse_moe.experts.97.w1", "model.layers.4.block_sparse_moe.experts.98.w1", "model.layers.4.block_sparse_moe.experts.99.w1", "model.layers.4.block_sparse_moe.experts.100.w1", "model.layers.4.block_sparse_moe.experts.101.w1", "model.layers.4.block_sparse_moe.experts.102.w1", "model.layers.4.block_sparse_moe.experts.103.w1", "model.layers.4.block_sparse_moe.experts.104.w1", "model.layers.4.block_sparse_moe.experts.105.w1", "model.layers.4.block_sparse_moe.experts.106.w1", "model.layers.4.block_sparse_moe.experts.107.w1", "model.layers.4.block_sparse_moe.experts.108.w1", "model.layers.4.block_sparse_moe.experts.109.w1", "model.layers.4.block_sparse_moe.experts.110.w1", "model.layers.4.block_sparse_moe.experts.111.w1", "model.layers.4.block_sparse_moe.experts.112.w1", "model.layers.4.block_sparse_moe.experts.113.w1", "model.layers.4.block_sparse_moe.experts.114.w1", "model.layers.4.block_sparse_moe.experts.115.w1", "model.layers.4.block_sparse_moe.experts.116.w1", "model.layers.4.block_sparse_moe.experts.117.w1", "model.layers.4.block_sparse_moe.experts.118.w1", "model.layers.4.block_sparse_moe.experts.119.w1", "model.layers.4.block_sparse_moe.experts.120.w1", "model.layers.4.block_sparse_moe.experts.121.w1", "model.layers.4.block_sparse_moe.experts.122.w1", "model.layers.4.block_sparse_moe.experts.123.w1", "model.layers.4.block_sparse_moe.experts.124.w1", "model.layers.4.block_sparse_moe.experts.125.w1", "model.layers.4.block_sparse_moe.experts.126.w1", "model.layers.4.block_sparse_moe.experts.127.w1", "model.layers.4.block_sparse_moe.experts.128.w1", "model.layers.4.block_sparse_moe.experts.129.w1", "model.layers.4.block_sparse_moe.experts.130.w1", "model.layers.4.block_sparse_moe.experts.131.w1", "model.layers.4.block_sparse_moe.experts.132.w1", "model.layers.4.block_sparse_moe.experts.133.w1", "model.layers.4.block_sparse_moe.experts.134.w1", "model.layers.4.block_sparse_moe.experts.135.w1", "model.layers.4.block_sparse_moe.experts.136.w1", "model.layers.4.block_sparse_moe.experts.137.w1", "model.layers.4.block_sparse_moe.experts.138.w1", "model.layers.4.block_sparse_moe.experts.139.w1", "model.layers.4.block_sparse_moe.experts.140.w1", "model.layers.4.block_sparse_moe.experts.141.w1", "model.layers.4.block_sparse_moe.experts.142.w1", "model.layers.4.block_sparse_moe.experts.143.w1", "model.layers.4.block_sparse_moe.experts.144.w1", "model.layers.4.block_sparse_moe.experts.145.w1", "model.layers.4.block_sparse_moe.experts.146.w1", "model.layers.4.block_sparse_moe.experts.147.w1", "model.layers.4.block_sparse_moe.experts.148.w1", "model.layers.4.block_sparse_moe.experts.149.w1", "model.layers.4.block_sparse_moe.experts.150.w1", "model.layers.4.block_sparse_moe.experts.151.w1", "model.layers.4.block_sparse_moe.experts.152.w1", "model.layers.4.block_sparse_moe.experts.153.w1", "model.layers.4.block_sparse_moe.experts.154.w1", "model.layers.4.block_sparse_moe.experts.155.w1", "model.layers.4.block_sparse_moe.experts.156.w1", "model.layers.4.block_sparse_moe.experts.157.w1", "model.layers.4.block_sparse_moe.experts.158.w1", "model.layers.4.block_sparse_moe.experts.159.w1", "model.layers.4.block_sparse_moe.experts.160.w1", "model.layers.4.block_sparse_moe.experts.161.w1", "model.layers.4.block_sparse_moe.experts.162.w1", "model.layers.4.block_sparse_moe.experts.163.w1", "model.layers.4.block_sparse_moe.experts.164.w1", "model.layers.4.block_sparse_moe.experts.165.w1", "model.layers.4.block_sparse_moe.experts.166.w1", "model.layers.4.block_sparse_moe.experts.167.w1", "model.layers.4.block_sparse_moe.experts.168.w1", "model.layers.4.block_sparse_moe.experts.169.w1", "model.layers.4.block_sparse_moe.experts.170.w1", "model.layers.4.block_sparse_moe.experts.171.w1", "model.layers.4.block_sparse_moe.experts.172.w1", "model.layers.4.block_sparse_moe.experts.173.w1", "model.layers.4.block_sparse_moe.experts.174.w1", "model.layers.4.block_sparse_moe.experts.175.w1", "model.layers.4.block_sparse_moe.experts.176.w1", "model.layers.4.block_sparse_moe.experts.177.w1", "model.layers.4.block_sparse_moe.experts.178.w1", "model.layers.4.block_sparse_moe.experts.179.w1", "model.layers.4.block_sparse_moe.experts.180.w1", "model.layers.4.block_sparse_moe.experts.181.w1", "model.layers.4.block_sparse_moe.experts.182.w1", "model.layers.4.block_sparse_moe.experts.183.w1", "model.layers.4.block_sparse_moe.experts.184.w1", "model.layers.4.block_sparse_moe.experts.185.w1", "model.layers.4.block_sparse_moe.experts.186.w1", "model.layers.4.block_sparse_moe.experts.187.w1", "model.layers.4.block_sparse_moe.experts.188.w1", "model.layers.4.block_sparse_moe.experts.189.w1", "model.layers.4.block_sparse_moe.experts.190.w1", "model.layers.4.block_sparse_moe.experts.191.w1", "model.layers.4.block_sparse_moe.experts.192.w1", "model.layers.4.block_sparse_moe.experts.193.w1", "model.layers.4.block_sparse_moe.experts.194.w1", "model.layers.4.block_sparse_moe.experts.195.w1", "model.layers.4.block_sparse_moe.experts.196.w1", "model.layers.4.block_sparse_moe.experts.197.w1", "model.layers.4.block_sparse_moe.experts.198.w1", "model.layers.4.block_sparse_moe.experts.199.w1", "model.layers.4.block_sparse_moe.experts.200.w1", "model.layers.4.block_sparse_moe.experts.201.w1", "model.layers.4.block_sparse_moe.experts.202.w1", "model.layers.4.block_sparse_moe.experts.203.w1", "model.layers.4.block_sparse_moe.experts.204.w1", "model.layers.4.block_sparse_moe.experts.205.w1", "model.layers.4.block_sparse_moe.experts.206.w1", "model.layers.4.block_sparse_moe.experts.207.w1", "model.layers.4.block_sparse_moe.experts.208.w1", "model.layers.4.block_sparse_moe.experts.209.w1", "model.layers.4.block_sparse_moe.experts.210.w1", "model.layers.4.block_sparse_moe.experts.211.w1", "model.layers.4.block_sparse_moe.experts.212.w1", "model.layers.4.block_sparse_moe.experts.213.w1", "model.layers.4.block_sparse_moe.experts.214.w1", "model.layers.4.block_sparse_moe.experts.215.w1", "model.layers.4.block_sparse_moe.experts.216.w1", "model.layers.4.block_sparse_moe.experts.217.w1", "model.layers.4.block_sparse_moe.experts.218.w1", "model.layers.4.block_sparse_moe.experts.219.w1", "model.layers.4.block_sparse_moe.experts.220.w1", "model.layers.4.block_sparse_moe.experts.221.w1", "model.layers.4.block_sparse_moe.experts.222.w1", "model.layers.4.block_sparse_moe.experts.223.w1", "model.layers.4.block_sparse_moe.experts.224.w1", "model.layers.4.block_sparse_moe.experts.225.w1", "model.layers.4.block_sparse_moe.experts.226.w1", "model.layers.4.block_sparse_moe.experts.227.w1", "model.layers.4.block_sparse_moe.experts.228.w1", "model.layers.4.block_sparse_moe.experts.229.w1", "model.layers.4.block_sparse_moe.experts.230.w1", "model.layers.4.block_sparse_moe.experts.231.w1", "model.layers.4.block_sparse_moe.experts.232.w1", "model.layers.4.block_sparse_moe.experts.233.w1", "model.layers.4.block_sparse_moe.experts.234.w1", "model.layers.4.block_sparse_moe.experts.235.w1", "model.layers.4.block_sparse_moe.experts.236.w1", "model.layers.4.block_sparse_moe.experts.237.w1", "model.layers.4.block_sparse_moe.experts.238.w1", "model.layers.4.block_sparse_moe.experts.239.w1", "model.layers.4.block_sparse_moe.experts.240.w1", "model.layers.4.block_sparse_moe.experts.241.w1", "model.layers.4.block_sparse_moe.experts.242.w1", "model.layers.4.block_sparse_moe.experts.243.w1", "model.layers.4.block_sparse_moe.experts.244.w1", "model.layers.4.block_sparse_moe.experts.245.w1", "model.layers.4.block_sparse_moe.experts.246.w1", "model.layers.4.block_sparse_moe.experts.247.w1", "model.layers.4.block_sparse_moe.experts.248.w1", "model.layers.4.block_sparse_moe.experts.249.w1", "model.layers.4.block_sparse_moe.experts.250.w1", "model.layers.4.block_sparse_moe.experts.251.w1", "model.layers.4.block_sparse_moe.experts.252.w1", "model.layers.4.block_sparse_moe.experts.253.w1", "model.layers.4.block_sparse_moe.experts.254.w1", "model.layers.4.block_sparse_moe.experts.255.w1", "model.layers.4.block_sparse_moe.experts.0.w3", "model.layers.4.block_sparse_moe.experts.1.w3", "model.layers.4.block_sparse_moe.experts.2.w3", "model.layers.4.block_sparse_moe.experts.3.w3", "model.layers.4.block_sparse_moe.experts.4.w3", "model.layers.4.block_sparse_moe.experts.5.w3", "model.layers.4.block_sparse_moe.experts.6.w3", "model.layers.4.block_sparse_moe.experts.7.w3", "model.layers.4.block_sparse_moe.experts.8.w3", "model.layers.4.block_sparse_moe.experts.9.w3", "model.layers.4.block_sparse_moe.experts.10.w3", "model.layers.4.block_sparse_moe.experts.11.w3", "model.layers.4.block_sparse_moe.experts.12.w3", "model.layers.4.block_sparse_moe.experts.13.w3", "model.layers.4.block_sparse_moe.experts.14.w3", "model.layers.4.block_sparse_moe.experts.15.w3", "model.layers.4.block_sparse_moe.experts.16.w3", "model.layers.4.block_sparse_moe.experts.17.w3", "model.layers.4.block_sparse_moe.experts.18.w3", "model.layers.4.block_sparse_moe.experts.19.w3", "model.layers.4.block_sparse_moe.experts.20.w3", "model.layers.4.block_sparse_moe.experts.21.w3", "model.layers.4.block_sparse_moe.experts.22.w3", "model.layers.4.block_sparse_moe.experts.23.w3", "model.layers.4.block_sparse_moe.experts.24.w3", "model.layers.4.block_sparse_moe.experts.25.w3", "model.layers.4.block_sparse_moe.experts.26.w3", "model.layers.4.block_sparse_moe.experts.27.w3", "model.layers.4.block_sparse_moe.experts.28.w3", "model.layers.4.block_sparse_moe.experts.29.w3", "model.layers.4.block_sparse_moe.experts.30.w3", "model.layers.4.block_sparse_moe.experts.31.w3", "model.layers.4.block_sparse_moe.experts.32.w3", "model.layers.4.block_sparse_moe.experts.33.w3", "model.layers.4.block_sparse_moe.experts.34.w3", "model.layers.4.block_sparse_moe.experts.35.w3", "model.layers.4.block_sparse_moe.experts.36.w3", "model.layers.4.block_sparse_moe.experts.37.w3", "model.layers.4.block_sparse_moe.experts.38.w3", "model.layers.4.block_sparse_moe.experts.39.w3", "model.layers.4.block_sparse_moe.experts.40.w3", "model.layers.4.block_sparse_moe.experts.41.w3", "model.layers.4.block_sparse_moe.experts.42.w3", "model.layers.4.block_sparse_moe.experts.43.w3", "model.layers.4.block_sparse_moe.experts.44.w3", "model.layers.4.block_sparse_moe.experts.45.w3", "model.layers.4.block_sparse_moe.experts.46.w3", "model.layers.4.block_sparse_moe.experts.47.w3", "model.layers.4.block_sparse_moe.experts.48.w3", "model.layers.4.block_sparse_moe.experts.49.w3", "model.layers.4.block_sparse_moe.experts.50.w3", "model.layers.4.block_sparse_moe.experts.51.w3", "model.layers.4.block_sparse_moe.experts.52.w3", "model.layers.4.block_sparse_moe.experts.53.w3", "model.layers.4.block_sparse_moe.experts.54.w3", "model.layers.4.block_sparse_moe.experts.55.w3", "model.layers.4.block_sparse_moe.experts.56.w3", "model.layers.4.block_sparse_moe.experts.57.w3", "model.layers.4.block_sparse_moe.experts.58.w3", "model.layers.4.block_sparse_moe.experts.59.w3", "model.layers.4.block_sparse_moe.experts.60.w3", "model.layers.4.block_sparse_moe.experts.61.w3", "model.layers.4.block_sparse_moe.experts.62.w3", "model.layers.4.block_sparse_moe.experts.63.w3", "model.layers.4.block_sparse_moe.experts.64.w3", "model.layers.4.block_sparse_moe.experts.65.w3", "model.layers.4.block_sparse_moe.experts.66.w3", "model.layers.4.block_sparse_moe.experts.67.w3", "model.layers.4.block_sparse_moe.experts.68.w3", "model.layers.4.block_sparse_moe.experts.69.w3", "model.layers.4.block_sparse_moe.experts.70.w3", "model.layers.4.block_sparse_moe.experts.71.w3", "model.layers.4.block_sparse_moe.experts.72.w3", "model.layers.4.block_sparse_moe.experts.73.w3", "model.layers.4.block_sparse_moe.experts.74.w3", "model.layers.4.block_sparse_moe.experts.75.w3", "model.layers.4.block_sparse_moe.experts.76.w3", "model.layers.4.block_sparse_moe.experts.77.w3", "model.layers.4.block_sparse_moe.experts.78.w3", "model.layers.4.block_sparse_moe.experts.79.w3", "model.layers.4.block_sparse_moe.experts.80.w3", "model.layers.4.block_sparse_moe.experts.81.w3", "model.layers.4.block_sparse_moe.experts.82.w3", "model.layers.4.block_sparse_moe.experts.83.w3", "model.layers.4.block_sparse_moe.experts.84.w3", "model.layers.4.block_sparse_moe.experts.85.w3", "model.layers.4.block_sparse_moe.experts.86.w3", "model.layers.4.block_sparse_moe.experts.87.w3", "model.layers.4.block_sparse_moe.experts.88.w3", "model.layers.4.block_sparse_moe.experts.89.w3", "model.layers.4.block_sparse_moe.experts.90.w3", "model.layers.4.block_sparse_moe.experts.91.w3", "model.layers.4.block_sparse_moe.experts.92.w3", "model.layers.4.block_sparse_moe.experts.93.w3", "model.layers.4.block_sparse_moe.experts.94.w3", "model.layers.4.block_sparse_moe.experts.95.w3", "model.layers.4.block_sparse_moe.experts.96.w3", "model.layers.4.block_sparse_moe.experts.97.w3", "model.layers.4.block_sparse_moe.experts.98.w3", "model.layers.4.block_sparse_moe.experts.99.w3", "model.layers.4.block_sparse_moe.experts.100.w3", "model.layers.4.block_sparse_moe.experts.101.w3", "model.layers.4.block_sparse_moe.experts.102.w3", "model.layers.4.block_sparse_moe.experts.103.w3", "model.layers.4.block_sparse_moe.experts.104.w3", "model.layers.4.block_sparse_moe.experts.105.w3", "model.layers.4.block_sparse_moe.experts.106.w3", "model.layers.4.block_sparse_moe.experts.107.w3", "model.layers.4.block_sparse_moe.experts.108.w3", "model.layers.4.block_sparse_moe.experts.109.w3", "model.layers.4.block_sparse_moe.experts.110.w3", "model.layers.4.block_sparse_moe.experts.111.w3", "model.layers.4.block_sparse_moe.experts.112.w3", "model.layers.4.block_sparse_moe.experts.113.w3", "model.layers.4.block_sparse_moe.experts.114.w3", "model.layers.4.block_sparse_moe.experts.115.w3", "model.layers.4.block_sparse_moe.experts.116.w3", "model.layers.4.block_sparse_moe.experts.117.w3", "model.layers.4.block_sparse_moe.experts.118.w3", "model.layers.4.block_sparse_moe.experts.119.w3", "model.layers.4.block_sparse_moe.experts.120.w3", "model.layers.4.block_sparse_moe.experts.121.w3", "model.layers.4.block_sparse_moe.experts.122.w3", "model.layers.4.block_sparse_moe.experts.123.w3", "model.layers.4.block_sparse_moe.experts.124.w3", "model.layers.4.block_sparse_moe.experts.125.w3", "model.layers.4.block_sparse_moe.experts.126.w3", "model.layers.4.block_sparse_moe.experts.127.w3", "model.layers.4.block_sparse_moe.experts.128.w3", "model.layers.4.block_sparse_moe.experts.129.w3", "model.layers.4.block_sparse_moe.experts.130.w3", "model.layers.4.block_sparse_moe.experts.131.w3", "model.layers.4.block_sparse_moe.experts.132.w3", "model.layers.4.block_sparse_moe.experts.133.w3", "model.layers.4.block_sparse_moe.experts.134.w3", "model.layers.4.block_sparse_moe.experts.135.w3", "model.layers.4.block_sparse_moe.experts.136.w3", "model.layers.4.block_sparse_moe.experts.137.w3", "model.layers.4.block_sparse_moe.experts.138.w3", "model.layers.4.block_sparse_moe.experts.139.w3", "model.layers.4.block_sparse_moe.experts.140.w3", "model.layers.4.block_sparse_moe.experts.141.w3", "model.layers.4.block_sparse_moe.experts.142.w3", "model.layers.4.block_sparse_moe.experts.143.w3", "model.layers.4.block_sparse_moe.experts.144.w3", "model.layers.4.block_sparse_moe.experts.145.w3", "model.layers.4.block_sparse_moe.experts.146.w3", "model.layers.4.block_sparse_moe.experts.147.w3", "model.layers.4.block_sparse_moe.experts.148.w3", "model.layers.4.block_sparse_moe.experts.149.w3", "model.layers.4.block_sparse_moe.experts.150.w3", "model.layers.4.block_sparse_moe.experts.151.w3", "model.layers.4.block_sparse_moe.experts.152.w3", "model.layers.4.block_sparse_moe.experts.153.w3", "model.layers.4.block_sparse_moe.experts.154.w3", "model.layers.4.block_sparse_moe.experts.155.w3", "model.layers.4.block_sparse_moe.experts.156.w3", "model.layers.4.block_sparse_moe.experts.157.w3", "model.layers.4.block_sparse_moe.experts.158.w3", "model.layers.4.block_sparse_moe.experts.159.w3", "model.layers.4.block_sparse_moe.experts.160.w3", "model.layers.4.block_sparse_moe.experts.161.w3", "model.layers.4.block_sparse_moe.experts.162.w3", "model.layers.4.block_sparse_moe.experts.163.w3", "model.layers.4.block_sparse_moe.experts.164.w3", "model.layers.4.block_sparse_moe.experts.165.w3", "model.layers.4.block_sparse_moe.experts.166.w3", "model.layers.4.block_sparse_moe.experts.167.w3", "model.layers.4.block_sparse_moe.experts.168.w3", "model.layers.4.block_sparse_moe.experts.169.w3", "model.layers.4.block_sparse_moe.experts.170.w3", "model.layers.4.block_sparse_moe.experts.171.w3", "model.layers.4.block_sparse_moe.experts.172.w3", "model.layers.4.block_sparse_moe.experts.173.w3", "model.layers.4.block_sparse_moe.experts.174.w3", "model.layers.4.block_sparse_moe.experts.175.w3", "model.layers.4.block_sparse_moe.experts.176.w3", "model.layers.4.block_sparse_moe.experts.177.w3", "model.layers.4.block_sparse_moe.experts.178.w3", "model.layers.4.block_sparse_moe.experts.179.w3", "model.layers.4.block_sparse_moe.experts.180.w3", "model.layers.4.block_sparse_moe.experts.181.w3", "model.layers.4.block_sparse_moe.experts.182.w3", "model.layers.4.block_sparse_moe.experts.183.w3", "model.layers.4.block_sparse_moe.experts.184.w3", "model.layers.4.block_sparse_moe.experts.185.w3", "model.layers.4.block_sparse_moe.experts.186.w3", "model.layers.4.block_sparse_moe.experts.187.w3", "model.layers.4.block_sparse_moe.experts.188.w3", "model.layers.4.block_sparse_moe.experts.189.w3", "model.layers.4.block_sparse_moe.experts.190.w3", "model.layers.4.block_sparse_moe.experts.191.w3", "model.layers.4.block_sparse_moe.experts.192.w3", "model.layers.4.block_sparse_moe.experts.193.w3", "model.layers.4.block_sparse_moe.experts.194.w3", "model.layers.4.block_sparse_moe.experts.195.w3", "model.layers.4.block_sparse_moe.experts.196.w3", "model.layers.4.block_sparse_moe.experts.197.w3", "model.layers.4.block_sparse_moe.experts.198.w3", "model.layers.4.block_sparse_moe.experts.199.w3", "model.layers.4.block_sparse_moe.experts.200.w3", "model.layers.4.block_sparse_moe.experts.201.w3", "model.layers.4.block_sparse_moe.experts.202.w3", "model.layers.4.block_sparse_moe.experts.203.w3", "model.layers.4.block_sparse_moe.experts.204.w3", "model.layers.4.block_sparse_moe.experts.205.w3", "model.layers.4.block_sparse_moe.experts.206.w3", "model.layers.4.block_sparse_moe.experts.207.w3", "model.layers.4.block_sparse_moe.experts.208.w3", "model.layers.4.block_sparse_moe.experts.209.w3", "model.layers.4.block_sparse_moe.experts.210.w3", "model.layers.4.block_sparse_moe.experts.211.w3", "model.layers.4.block_sparse_moe.experts.212.w3", "model.layers.4.block_sparse_moe.experts.213.w3", "model.layers.4.block_sparse_moe.experts.214.w3", "model.layers.4.block_sparse_moe.experts.215.w3", "model.layers.4.block_sparse_moe.experts.216.w3", "model.layers.4.block_sparse_moe.experts.217.w3", "model.layers.4.block_sparse_moe.experts.218.w3", "model.layers.4.block_sparse_moe.experts.219.w3", "model.layers.4.block_sparse_moe.experts.220.w3", "model.layers.4.block_sparse_moe.experts.221.w3", "model.layers.4.block_sparse_moe.experts.222.w3", "model.layers.4.block_sparse_moe.experts.223.w3", "model.layers.4.block_sparse_moe.experts.224.w3", "model.layers.4.block_sparse_moe.experts.225.w3", "model.layers.4.block_sparse_moe.experts.226.w3", "model.layers.4.block_sparse_moe.experts.227.w3", "model.layers.4.block_sparse_moe.experts.228.w3", "model.layers.4.block_sparse_moe.experts.229.w3", "model.layers.4.block_sparse_moe.experts.230.w3", "model.layers.4.block_sparse_moe.experts.231.w3", "model.layers.4.block_sparse_moe.experts.232.w3", "model.layers.4.block_sparse_moe.experts.233.w3", "model.layers.4.block_sparse_moe.experts.234.w3", "model.layers.4.block_sparse_moe.experts.235.w3", "model.layers.4.block_sparse_moe.experts.236.w3", "model.layers.4.block_sparse_moe.experts.237.w3", "model.layers.4.block_sparse_moe.experts.238.w3", "model.layers.4.block_sparse_moe.experts.239.w3", "model.layers.4.block_sparse_moe.experts.240.w3", "model.layers.4.block_sparse_moe.experts.241.w3", "model.layers.4.block_sparse_moe.experts.242.w3", "model.layers.4.block_sparse_moe.experts.243.w3", "model.layers.4.block_sparse_moe.experts.244.w3", "model.layers.4.block_sparse_moe.experts.245.w3", "model.layers.4.block_sparse_moe.experts.246.w3", "model.layers.4.block_sparse_moe.experts.247.w3", "model.layers.4.block_sparse_moe.experts.248.w3", "model.layers.4.block_sparse_moe.experts.249.w3", "model.layers.4.block_sparse_moe.experts.250.w3", "model.layers.4.block_sparse_moe.experts.251.w3", "model.layers.4.block_sparse_moe.experts.252.w3", "model.layers.4.block_sparse_moe.experts.253.w3", "model.layers.4.block_sparse_moe.experts.254.w3", "model.layers.4.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0016158729791641013, "dbits": 2415919104 } ] }, { "idx": 24, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w2", "model.layers.4.block_sparse_moe.experts.1.w2", "model.layers.4.block_sparse_moe.experts.2.w2", "model.layers.4.block_sparse_moe.experts.3.w2", "model.layers.4.block_sparse_moe.experts.4.w2", "model.layers.4.block_sparse_moe.experts.5.w2", "model.layers.4.block_sparse_moe.experts.6.w2", "model.layers.4.block_sparse_moe.experts.7.w2", "model.layers.4.block_sparse_moe.experts.8.w2", "model.layers.4.block_sparse_moe.experts.9.w2", "model.layers.4.block_sparse_moe.experts.10.w2", "model.layers.4.block_sparse_moe.experts.11.w2", "model.layers.4.block_sparse_moe.experts.12.w2", "model.layers.4.block_sparse_moe.experts.13.w2", "model.layers.4.block_sparse_moe.experts.14.w2", "model.layers.4.block_sparse_moe.experts.15.w2", "model.layers.4.block_sparse_moe.experts.16.w2", "model.layers.4.block_sparse_moe.experts.17.w2", "model.layers.4.block_sparse_moe.experts.18.w2", "model.layers.4.block_sparse_moe.experts.19.w2", "model.layers.4.block_sparse_moe.experts.20.w2", "model.layers.4.block_sparse_moe.experts.21.w2", "model.layers.4.block_sparse_moe.experts.22.w2", "model.layers.4.block_sparse_moe.experts.23.w2", "model.layers.4.block_sparse_moe.experts.24.w2", "model.layers.4.block_sparse_moe.experts.25.w2", "model.layers.4.block_sparse_moe.experts.26.w2", "model.layers.4.block_sparse_moe.experts.27.w2", "model.layers.4.block_sparse_moe.experts.28.w2", "model.layers.4.block_sparse_moe.experts.29.w2", "model.layers.4.block_sparse_moe.experts.30.w2", "model.layers.4.block_sparse_moe.experts.31.w2", "model.layers.4.block_sparse_moe.experts.32.w2", "model.layers.4.block_sparse_moe.experts.33.w2", "model.layers.4.block_sparse_moe.experts.34.w2", "model.layers.4.block_sparse_moe.experts.35.w2", "model.layers.4.block_sparse_moe.experts.36.w2", "model.layers.4.block_sparse_moe.experts.37.w2", "model.layers.4.block_sparse_moe.experts.38.w2", "model.layers.4.block_sparse_moe.experts.39.w2", "model.layers.4.block_sparse_moe.experts.40.w2", "model.layers.4.block_sparse_moe.experts.41.w2", "model.layers.4.block_sparse_moe.experts.42.w2", "model.layers.4.block_sparse_moe.experts.43.w2", "model.layers.4.block_sparse_moe.experts.44.w2", "model.layers.4.block_sparse_moe.experts.45.w2", "model.layers.4.block_sparse_moe.experts.46.w2", "model.layers.4.block_sparse_moe.experts.47.w2", "model.layers.4.block_sparse_moe.experts.48.w2", "model.layers.4.block_sparse_moe.experts.49.w2", "model.layers.4.block_sparse_moe.experts.50.w2", "model.layers.4.block_sparse_moe.experts.51.w2", "model.layers.4.block_sparse_moe.experts.52.w2", "model.layers.4.block_sparse_moe.experts.53.w2", "model.layers.4.block_sparse_moe.experts.54.w2", "model.layers.4.block_sparse_moe.experts.55.w2", "model.layers.4.block_sparse_moe.experts.56.w2", "model.layers.4.block_sparse_moe.experts.57.w2", "model.layers.4.block_sparse_moe.experts.58.w2", "model.layers.4.block_sparse_moe.experts.59.w2", "model.layers.4.block_sparse_moe.experts.60.w2", "model.layers.4.block_sparse_moe.experts.61.w2", "model.layers.4.block_sparse_moe.experts.62.w2", "model.layers.4.block_sparse_moe.experts.63.w2", "model.layers.4.block_sparse_moe.experts.64.w2", "model.layers.4.block_sparse_moe.experts.65.w2", "model.layers.4.block_sparse_moe.experts.66.w2", "model.layers.4.block_sparse_moe.experts.67.w2", "model.layers.4.block_sparse_moe.experts.68.w2", "model.layers.4.block_sparse_moe.experts.69.w2", "model.layers.4.block_sparse_moe.experts.70.w2", "model.layers.4.block_sparse_moe.experts.71.w2", "model.layers.4.block_sparse_moe.experts.72.w2", "model.layers.4.block_sparse_moe.experts.73.w2", "model.layers.4.block_sparse_moe.experts.74.w2", "model.layers.4.block_sparse_moe.experts.75.w2", "model.layers.4.block_sparse_moe.experts.76.w2", "model.layers.4.block_sparse_moe.experts.77.w2", "model.layers.4.block_sparse_moe.experts.78.w2", "model.layers.4.block_sparse_moe.experts.79.w2", "model.layers.4.block_sparse_moe.experts.80.w2", "model.layers.4.block_sparse_moe.experts.81.w2", "model.layers.4.block_sparse_moe.experts.82.w2", "model.layers.4.block_sparse_moe.experts.83.w2", "model.layers.4.block_sparse_moe.experts.84.w2", "model.layers.4.block_sparse_moe.experts.85.w2", "model.layers.4.block_sparse_moe.experts.86.w2", "model.layers.4.block_sparse_moe.experts.87.w2", "model.layers.4.block_sparse_moe.experts.88.w2", "model.layers.4.block_sparse_moe.experts.89.w2", "model.layers.4.block_sparse_moe.experts.90.w2", "model.layers.4.block_sparse_moe.experts.91.w2", "model.layers.4.block_sparse_moe.experts.92.w2", "model.layers.4.block_sparse_moe.experts.93.w2", "model.layers.4.block_sparse_moe.experts.94.w2", "model.layers.4.block_sparse_moe.experts.95.w2", "model.layers.4.block_sparse_moe.experts.96.w2", "model.layers.4.block_sparse_moe.experts.97.w2", "model.layers.4.block_sparse_moe.experts.98.w2", "model.layers.4.block_sparse_moe.experts.99.w2", "model.layers.4.block_sparse_moe.experts.100.w2", "model.layers.4.block_sparse_moe.experts.101.w2", "model.layers.4.block_sparse_moe.experts.102.w2", "model.layers.4.block_sparse_moe.experts.103.w2", "model.layers.4.block_sparse_moe.experts.104.w2", "model.layers.4.block_sparse_moe.experts.105.w2", "model.layers.4.block_sparse_moe.experts.106.w2", "model.layers.4.block_sparse_moe.experts.107.w2", "model.layers.4.block_sparse_moe.experts.108.w2", "model.layers.4.block_sparse_moe.experts.109.w2", "model.layers.4.block_sparse_moe.experts.110.w2", "model.layers.4.block_sparse_moe.experts.111.w2", "model.layers.4.block_sparse_moe.experts.112.w2", "model.layers.4.block_sparse_moe.experts.113.w2", "model.layers.4.block_sparse_moe.experts.114.w2", "model.layers.4.block_sparse_moe.experts.115.w2", "model.layers.4.block_sparse_moe.experts.116.w2", "model.layers.4.block_sparse_moe.experts.117.w2", "model.layers.4.block_sparse_moe.experts.118.w2", "model.layers.4.block_sparse_moe.experts.119.w2", "model.layers.4.block_sparse_moe.experts.120.w2", "model.layers.4.block_sparse_moe.experts.121.w2", "model.layers.4.block_sparse_moe.experts.122.w2", "model.layers.4.block_sparse_moe.experts.123.w2", "model.layers.4.block_sparse_moe.experts.124.w2", "model.layers.4.block_sparse_moe.experts.125.w2", "model.layers.4.block_sparse_moe.experts.126.w2", "model.layers.4.block_sparse_moe.experts.127.w2", "model.layers.4.block_sparse_moe.experts.128.w2", "model.layers.4.block_sparse_moe.experts.129.w2", "model.layers.4.block_sparse_moe.experts.130.w2", "model.layers.4.block_sparse_moe.experts.131.w2", "model.layers.4.block_sparse_moe.experts.132.w2", "model.layers.4.block_sparse_moe.experts.133.w2", "model.layers.4.block_sparse_moe.experts.134.w2", "model.layers.4.block_sparse_moe.experts.135.w2", "model.layers.4.block_sparse_moe.experts.136.w2", "model.layers.4.block_sparse_moe.experts.137.w2", "model.layers.4.block_sparse_moe.experts.138.w2", "model.layers.4.block_sparse_moe.experts.139.w2", "model.layers.4.block_sparse_moe.experts.140.w2", "model.layers.4.block_sparse_moe.experts.141.w2", "model.layers.4.block_sparse_moe.experts.142.w2", "model.layers.4.block_sparse_moe.experts.143.w2", "model.layers.4.block_sparse_moe.experts.144.w2", "model.layers.4.block_sparse_moe.experts.145.w2", "model.layers.4.block_sparse_moe.experts.146.w2", "model.layers.4.block_sparse_moe.experts.147.w2", "model.layers.4.block_sparse_moe.experts.148.w2", "model.layers.4.block_sparse_moe.experts.149.w2", "model.layers.4.block_sparse_moe.experts.150.w2", "model.layers.4.block_sparse_moe.experts.151.w2", "model.layers.4.block_sparse_moe.experts.152.w2", "model.layers.4.block_sparse_moe.experts.153.w2", "model.layers.4.block_sparse_moe.experts.154.w2", "model.layers.4.block_sparse_moe.experts.155.w2", "model.layers.4.block_sparse_moe.experts.156.w2", "model.layers.4.block_sparse_moe.experts.157.w2", "model.layers.4.block_sparse_moe.experts.158.w2", "model.layers.4.block_sparse_moe.experts.159.w2", "model.layers.4.block_sparse_moe.experts.160.w2", "model.layers.4.block_sparse_moe.experts.161.w2", "model.layers.4.block_sparse_moe.experts.162.w2", "model.layers.4.block_sparse_moe.experts.163.w2", "model.layers.4.block_sparse_moe.experts.164.w2", "model.layers.4.block_sparse_moe.experts.165.w2", "model.layers.4.block_sparse_moe.experts.166.w2", "model.layers.4.block_sparse_moe.experts.167.w2", "model.layers.4.block_sparse_moe.experts.168.w2", "model.layers.4.block_sparse_moe.experts.169.w2", "model.layers.4.block_sparse_moe.experts.170.w2", "model.layers.4.block_sparse_moe.experts.171.w2", "model.layers.4.block_sparse_moe.experts.172.w2", "model.layers.4.block_sparse_moe.experts.173.w2", "model.layers.4.block_sparse_moe.experts.174.w2", "model.layers.4.block_sparse_moe.experts.175.w2", "model.layers.4.block_sparse_moe.experts.176.w2", "model.layers.4.block_sparse_moe.experts.177.w2", "model.layers.4.block_sparse_moe.experts.178.w2", "model.layers.4.block_sparse_moe.experts.179.w2", "model.layers.4.block_sparse_moe.experts.180.w2", "model.layers.4.block_sparse_moe.experts.181.w2", "model.layers.4.block_sparse_moe.experts.182.w2", "model.layers.4.block_sparse_moe.experts.183.w2", "model.layers.4.block_sparse_moe.experts.184.w2", "model.layers.4.block_sparse_moe.experts.185.w2", "model.layers.4.block_sparse_moe.experts.186.w2", "model.layers.4.block_sparse_moe.experts.187.w2", "model.layers.4.block_sparse_moe.experts.188.w2", "model.layers.4.block_sparse_moe.experts.189.w2", "model.layers.4.block_sparse_moe.experts.190.w2", "model.layers.4.block_sparse_moe.experts.191.w2", "model.layers.4.block_sparse_moe.experts.192.w2", "model.layers.4.block_sparse_moe.experts.193.w2", "model.layers.4.block_sparse_moe.experts.194.w2", "model.layers.4.block_sparse_moe.experts.195.w2", "model.layers.4.block_sparse_moe.experts.196.w2", "model.layers.4.block_sparse_moe.experts.197.w2", "model.layers.4.block_sparse_moe.experts.198.w2", "model.layers.4.block_sparse_moe.experts.199.w2", "model.layers.4.block_sparse_moe.experts.200.w2", "model.layers.4.block_sparse_moe.experts.201.w2", "model.layers.4.block_sparse_moe.experts.202.w2", "model.layers.4.block_sparse_moe.experts.203.w2", "model.layers.4.block_sparse_moe.experts.204.w2", "model.layers.4.block_sparse_moe.experts.205.w2", "model.layers.4.block_sparse_moe.experts.206.w2", "model.layers.4.block_sparse_moe.experts.207.w2", "model.layers.4.block_sparse_moe.experts.208.w2", "model.layers.4.block_sparse_moe.experts.209.w2", "model.layers.4.block_sparse_moe.experts.210.w2", "model.layers.4.block_sparse_moe.experts.211.w2", "model.layers.4.block_sparse_moe.experts.212.w2", "model.layers.4.block_sparse_moe.experts.213.w2", "model.layers.4.block_sparse_moe.experts.214.w2", "model.layers.4.block_sparse_moe.experts.215.w2", "model.layers.4.block_sparse_moe.experts.216.w2", "model.layers.4.block_sparse_moe.experts.217.w2", "model.layers.4.block_sparse_moe.experts.218.w2", "model.layers.4.block_sparse_moe.experts.219.w2", "model.layers.4.block_sparse_moe.experts.220.w2", "model.layers.4.block_sparse_moe.experts.221.w2", "model.layers.4.block_sparse_moe.experts.222.w2", "model.layers.4.block_sparse_moe.experts.223.w2", "model.layers.4.block_sparse_moe.experts.224.w2", "model.layers.4.block_sparse_moe.experts.225.w2", "model.layers.4.block_sparse_moe.experts.226.w2", "model.layers.4.block_sparse_moe.experts.227.w2", "model.layers.4.block_sparse_moe.experts.228.w2", "model.layers.4.block_sparse_moe.experts.229.w2", "model.layers.4.block_sparse_moe.experts.230.w2", "model.layers.4.block_sparse_moe.experts.231.w2", "model.layers.4.block_sparse_moe.experts.232.w2", "model.layers.4.block_sparse_moe.experts.233.w2", "model.layers.4.block_sparse_moe.experts.234.w2", "model.layers.4.block_sparse_moe.experts.235.w2", "model.layers.4.block_sparse_moe.experts.236.w2", "model.layers.4.block_sparse_moe.experts.237.w2", "model.layers.4.block_sparse_moe.experts.238.w2", "model.layers.4.block_sparse_moe.experts.239.w2", "model.layers.4.block_sparse_moe.experts.240.w2", "model.layers.4.block_sparse_moe.experts.241.w2", "model.layers.4.block_sparse_moe.experts.242.w2", "model.layers.4.block_sparse_moe.experts.243.w2", "model.layers.4.block_sparse_moe.experts.244.w2", "model.layers.4.block_sparse_moe.experts.245.w2", "model.layers.4.block_sparse_moe.experts.246.w2", "model.layers.4.block_sparse_moe.experts.247.w2", "model.layers.4.block_sparse_moe.experts.248.w2", "model.layers.4.block_sparse_moe.experts.249.w2", "model.layers.4.block_sparse_moe.experts.250.w2", "model.layers.4.block_sparse_moe.experts.251.w2", "model.layers.4.block_sparse_moe.experts.252.w2", "model.layers.4.block_sparse_moe.experts.253.w2", "model.layers.4.block_sparse_moe.experts.254.w2", "model.layers.4.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.001665741205215454, "dbits": 1207959552 } ] }, { "idx": 25, "layers": [ "model.layers.5.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0006949275732039961, "dbits": 18874368 } ] }, { "idx": 26, "layers": [ "model.layers.5.self_attn.k_proj", "model.layers.5.self_attn.v_proj" ], "candidates": [ { "dkld": -0.013756248354911738, "dbits": 6291456 } ] }, { "idx": 27, "layers": [ "model.layers.5.self_attn.o_proj" ], "candidates": [ { "dkld": -0.002620352804660775, "dbits": 18874368 } ] }, { "idx": 28, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w1", "model.layers.5.block_sparse_moe.experts.1.w1", "model.layers.5.block_sparse_moe.experts.2.w1", "model.layers.5.block_sparse_moe.experts.3.w1", "model.layers.5.block_sparse_moe.experts.4.w1", "model.layers.5.block_sparse_moe.experts.5.w1", "model.layers.5.block_sparse_moe.experts.6.w1", "model.layers.5.block_sparse_moe.experts.7.w1", "model.layers.5.block_sparse_moe.experts.8.w1", "model.layers.5.block_sparse_moe.experts.9.w1", "model.layers.5.block_sparse_moe.experts.10.w1", "model.layers.5.block_sparse_moe.experts.11.w1", "model.layers.5.block_sparse_moe.experts.12.w1", "model.layers.5.block_sparse_moe.experts.13.w1", "model.layers.5.block_sparse_moe.experts.14.w1", "model.layers.5.block_sparse_moe.experts.15.w1", "model.layers.5.block_sparse_moe.experts.16.w1", "model.layers.5.block_sparse_moe.experts.17.w1", "model.layers.5.block_sparse_moe.experts.18.w1", "model.layers.5.block_sparse_moe.experts.19.w1", "model.layers.5.block_sparse_moe.experts.20.w1", "model.layers.5.block_sparse_moe.experts.21.w1", "model.layers.5.block_sparse_moe.experts.22.w1", "model.layers.5.block_sparse_moe.experts.23.w1", "model.layers.5.block_sparse_moe.experts.24.w1", "model.layers.5.block_sparse_moe.experts.25.w1", "model.layers.5.block_sparse_moe.experts.26.w1", "model.layers.5.block_sparse_moe.experts.27.w1", "model.layers.5.block_sparse_moe.experts.28.w1", "model.layers.5.block_sparse_moe.experts.29.w1", "model.layers.5.block_sparse_moe.experts.30.w1", "model.layers.5.block_sparse_moe.experts.31.w1", "model.layers.5.block_sparse_moe.experts.32.w1", "model.layers.5.block_sparse_moe.experts.33.w1", "model.layers.5.block_sparse_moe.experts.34.w1", "model.layers.5.block_sparse_moe.experts.35.w1", "model.layers.5.block_sparse_moe.experts.36.w1", "model.layers.5.block_sparse_moe.experts.37.w1", "model.layers.5.block_sparse_moe.experts.38.w1", "model.layers.5.block_sparse_moe.experts.39.w1", "model.layers.5.block_sparse_moe.experts.40.w1", "model.layers.5.block_sparse_moe.experts.41.w1", "model.layers.5.block_sparse_moe.experts.42.w1", "model.layers.5.block_sparse_moe.experts.43.w1", "model.layers.5.block_sparse_moe.experts.44.w1", "model.layers.5.block_sparse_moe.experts.45.w1", "model.layers.5.block_sparse_moe.experts.46.w1", "model.layers.5.block_sparse_moe.experts.47.w1", "model.layers.5.block_sparse_moe.experts.48.w1", "model.layers.5.block_sparse_moe.experts.49.w1", "model.layers.5.block_sparse_moe.experts.50.w1", "model.layers.5.block_sparse_moe.experts.51.w1", "model.layers.5.block_sparse_moe.experts.52.w1", "model.layers.5.block_sparse_moe.experts.53.w1", "model.layers.5.block_sparse_moe.experts.54.w1", "model.layers.5.block_sparse_moe.experts.55.w1", "model.layers.5.block_sparse_moe.experts.56.w1", "model.layers.5.block_sparse_moe.experts.57.w1", "model.layers.5.block_sparse_moe.experts.58.w1", "model.layers.5.block_sparse_moe.experts.59.w1", "model.layers.5.block_sparse_moe.experts.60.w1", "model.layers.5.block_sparse_moe.experts.61.w1", "model.layers.5.block_sparse_moe.experts.62.w1", "model.layers.5.block_sparse_moe.experts.63.w1", "model.layers.5.block_sparse_moe.experts.64.w1", "model.layers.5.block_sparse_moe.experts.65.w1", "model.layers.5.block_sparse_moe.experts.66.w1", "model.layers.5.block_sparse_moe.experts.67.w1", "model.layers.5.block_sparse_moe.experts.68.w1", "model.layers.5.block_sparse_moe.experts.69.w1", "model.layers.5.block_sparse_moe.experts.70.w1", "model.layers.5.block_sparse_moe.experts.71.w1", "model.layers.5.block_sparse_moe.experts.72.w1", "model.layers.5.block_sparse_moe.experts.73.w1", "model.layers.5.block_sparse_moe.experts.74.w1", "model.layers.5.block_sparse_moe.experts.75.w1", "model.layers.5.block_sparse_moe.experts.76.w1", "model.layers.5.block_sparse_moe.experts.77.w1", "model.layers.5.block_sparse_moe.experts.78.w1", "model.layers.5.block_sparse_moe.experts.79.w1", "model.layers.5.block_sparse_moe.experts.80.w1", "model.layers.5.block_sparse_moe.experts.81.w1", "model.layers.5.block_sparse_moe.experts.82.w1", "model.layers.5.block_sparse_moe.experts.83.w1", "model.layers.5.block_sparse_moe.experts.84.w1", "model.layers.5.block_sparse_moe.experts.85.w1", "model.layers.5.block_sparse_moe.experts.86.w1", "model.layers.5.block_sparse_moe.experts.87.w1", "model.layers.5.block_sparse_moe.experts.88.w1", "model.layers.5.block_sparse_moe.experts.89.w1", "model.layers.5.block_sparse_moe.experts.90.w1", "model.layers.5.block_sparse_moe.experts.91.w1", "model.layers.5.block_sparse_moe.experts.92.w1", "model.layers.5.block_sparse_moe.experts.93.w1", "model.layers.5.block_sparse_moe.experts.94.w1", "model.layers.5.block_sparse_moe.experts.95.w1", "model.layers.5.block_sparse_moe.experts.96.w1", "model.layers.5.block_sparse_moe.experts.97.w1", "model.layers.5.block_sparse_moe.experts.98.w1", "model.layers.5.block_sparse_moe.experts.99.w1", "model.layers.5.block_sparse_moe.experts.100.w1", "model.layers.5.block_sparse_moe.experts.101.w1", "model.layers.5.block_sparse_moe.experts.102.w1", "model.layers.5.block_sparse_moe.experts.103.w1", "model.layers.5.block_sparse_moe.experts.104.w1", "model.layers.5.block_sparse_moe.experts.105.w1", "model.layers.5.block_sparse_moe.experts.106.w1", "model.layers.5.block_sparse_moe.experts.107.w1", "model.layers.5.block_sparse_moe.experts.108.w1", "model.layers.5.block_sparse_moe.experts.109.w1", "model.layers.5.block_sparse_moe.experts.110.w1", "model.layers.5.block_sparse_moe.experts.111.w1", "model.layers.5.block_sparse_moe.experts.112.w1", "model.layers.5.block_sparse_moe.experts.113.w1", "model.layers.5.block_sparse_moe.experts.114.w1", "model.layers.5.block_sparse_moe.experts.115.w1", "model.layers.5.block_sparse_moe.experts.116.w1", "model.layers.5.block_sparse_moe.experts.117.w1", "model.layers.5.block_sparse_moe.experts.118.w1", "model.layers.5.block_sparse_moe.experts.119.w1", "model.layers.5.block_sparse_moe.experts.120.w1", "model.layers.5.block_sparse_moe.experts.121.w1", "model.layers.5.block_sparse_moe.experts.122.w1", "model.layers.5.block_sparse_moe.experts.123.w1", "model.layers.5.block_sparse_moe.experts.124.w1", "model.layers.5.block_sparse_moe.experts.125.w1", "model.layers.5.block_sparse_moe.experts.126.w1", "model.layers.5.block_sparse_moe.experts.127.w1", "model.layers.5.block_sparse_moe.experts.128.w1", "model.layers.5.block_sparse_moe.experts.129.w1", "model.layers.5.block_sparse_moe.experts.130.w1", "model.layers.5.block_sparse_moe.experts.131.w1", "model.layers.5.block_sparse_moe.experts.132.w1", "model.layers.5.block_sparse_moe.experts.133.w1", "model.layers.5.block_sparse_moe.experts.134.w1", "model.layers.5.block_sparse_moe.experts.135.w1", "model.layers.5.block_sparse_moe.experts.136.w1", "model.layers.5.block_sparse_moe.experts.137.w1", "model.layers.5.block_sparse_moe.experts.138.w1", "model.layers.5.block_sparse_moe.experts.139.w1", "model.layers.5.block_sparse_moe.experts.140.w1", "model.layers.5.block_sparse_moe.experts.141.w1", "model.layers.5.block_sparse_moe.experts.142.w1", "model.layers.5.block_sparse_moe.experts.143.w1", "model.layers.5.block_sparse_moe.experts.144.w1", "model.layers.5.block_sparse_moe.experts.145.w1", "model.layers.5.block_sparse_moe.experts.146.w1", "model.layers.5.block_sparse_moe.experts.147.w1", "model.layers.5.block_sparse_moe.experts.148.w1", "model.layers.5.block_sparse_moe.experts.149.w1", "model.layers.5.block_sparse_moe.experts.150.w1", "model.layers.5.block_sparse_moe.experts.151.w1", "model.layers.5.block_sparse_moe.experts.152.w1", "model.layers.5.block_sparse_moe.experts.153.w1", "model.layers.5.block_sparse_moe.experts.154.w1", "model.layers.5.block_sparse_moe.experts.155.w1", "model.layers.5.block_sparse_moe.experts.156.w1", "model.layers.5.block_sparse_moe.experts.157.w1", "model.layers.5.block_sparse_moe.experts.158.w1", "model.layers.5.block_sparse_moe.experts.159.w1", "model.layers.5.block_sparse_moe.experts.160.w1", "model.layers.5.block_sparse_moe.experts.161.w1", "model.layers.5.block_sparse_moe.experts.162.w1", "model.layers.5.block_sparse_moe.experts.163.w1", "model.layers.5.block_sparse_moe.experts.164.w1", "model.layers.5.block_sparse_moe.experts.165.w1", "model.layers.5.block_sparse_moe.experts.166.w1", "model.layers.5.block_sparse_moe.experts.167.w1", "model.layers.5.block_sparse_moe.experts.168.w1", "model.layers.5.block_sparse_moe.experts.169.w1", "model.layers.5.block_sparse_moe.experts.170.w1", "model.layers.5.block_sparse_moe.experts.171.w1", "model.layers.5.block_sparse_moe.experts.172.w1", "model.layers.5.block_sparse_moe.experts.173.w1", "model.layers.5.block_sparse_moe.experts.174.w1", "model.layers.5.block_sparse_moe.experts.175.w1", "model.layers.5.block_sparse_moe.experts.176.w1", "model.layers.5.block_sparse_moe.experts.177.w1", "model.layers.5.block_sparse_moe.experts.178.w1", "model.layers.5.block_sparse_moe.experts.179.w1", "model.layers.5.block_sparse_moe.experts.180.w1", "model.layers.5.block_sparse_moe.experts.181.w1", "model.layers.5.block_sparse_moe.experts.182.w1", "model.layers.5.block_sparse_moe.experts.183.w1", "model.layers.5.block_sparse_moe.experts.184.w1", "model.layers.5.block_sparse_moe.experts.185.w1", "model.layers.5.block_sparse_moe.experts.186.w1", "model.layers.5.block_sparse_moe.experts.187.w1", "model.layers.5.block_sparse_moe.experts.188.w1", "model.layers.5.block_sparse_moe.experts.189.w1", "model.layers.5.block_sparse_moe.experts.190.w1", "model.layers.5.block_sparse_moe.experts.191.w1", "model.layers.5.block_sparse_moe.experts.192.w1", "model.layers.5.block_sparse_moe.experts.193.w1", "model.layers.5.block_sparse_moe.experts.194.w1", "model.layers.5.block_sparse_moe.experts.195.w1", "model.layers.5.block_sparse_moe.experts.196.w1", "model.layers.5.block_sparse_moe.experts.197.w1", "model.layers.5.block_sparse_moe.experts.198.w1", "model.layers.5.block_sparse_moe.experts.199.w1", "model.layers.5.block_sparse_moe.experts.200.w1", "model.layers.5.block_sparse_moe.experts.201.w1", "model.layers.5.block_sparse_moe.experts.202.w1", "model.layers.5.block_sparse_moe.experts.203.w1", "model.layers.5.block_sparse_moe.experts.204.w1", "model.layers.5.block_sparse_moe.experts.205.w1", "model.layers.5.block_sparse_moe.experts.206.w1", "model.layers.5.block_sparse_moe.experts.207.w1", "model.layers.5.block_sparse_moe.experts.208.w1", "model.layers.5.block_sparse_moe.experts.209.w1", "model.layers.5.block_sparse_moe.experts.210.w1", "model.layers.5.block_sparse_moe.experts.211.w1", "model.layers.5.block_sparse_moe.experts.212.w1", "model.layers.5.block_sparse_moe.experts.213.w1", "model.layers.5.block_sparse_moe.experts.214.w1", "model.layers.5.block_sparse_moe.experts.215.w1", "model.layers.5.block_sparse_moe.experts.216.w1", "model.layers.5.block_sparse_moe.experts.217.w1", "model.layers.5.block_sparse_moe.experts.218.w1", "model.layers.5.block_sparse_moe.experts.219.w1", "model.layers.5.block_sparse_moe.experts.220.w1", "model.layers.5.block_sparse_moe.experts.221.w1", "model.layers.5.block_sparse_moe.experts.222.w1", "model.layers.5.block_sparse_moe.experts.223.w1", "model.layers.5.block_sparse_moe.experts.224.w1", "model.layers.5.block_sparse_moe.experts.225.w1", "model.layers.5.block_sparse_moe.experts.226.w1", "model.layers.5.block_sparse_moe.experts.227.w1", "model.layers.5.block_sparse_moe.experts.228.w1", "model.layers.5.block_sparse_moe.experts.229.w1", "model.layers.5.block_sparse_moe.experts.230.w1", "model.layers.5.block_sparse_moe.experts.231.w1", "model.layers.5.block_sparse_moe.experts.232.w1", "model.layers.5.block_sparse_moe.experts.233.w1", "model.layers.5.block_sparse_moe.experts.234.w1", "model.layers.5.block_sparse_moe.experts.235.w1", "model.layers.5.block_sparse_moe.experts.236.w1", "model.layers.5.block_sparse_moe.experts.237.w1", "model.layers.5.block_sparse_moe.experts.238.w1", "model.layers.5.block_sparse_moe.experts.239.w1", "model.layers.5.block_sparse_moe.experts.240.w1", "model.layers.5.block_sparse_moe.experts.241.w1", "model.layers.5.block_sparse_moe.experts.242.w1", "model.layers.5.block_sparse_moe.experts.243.w1", "model.layers.5.block_sparse_moe.experts.244.w1", "model.layers.5.block_sparse_moe.experts.245.w1", "model.layers.5.block_sparse_moe.experts.246.w1", "model.layers.5.block_sparse_moe.experts.247.w1", "model.layers.5.block_sparse_moe.experts.248.w1", "model.layers.5.block_sparse_moe.experts.249.w1", "model.layers.5.block_sparse_moe.experts.250.w1", "model.layers.5.block_sparse_moe.experts.251.w1", "model.layers.5.block_sparse_moe.experts.252.w1", "model.layers.5.block_sparse_moe.experts.253.w1", "model.layers.5.block_sparse_moe.experts.254.w1", "model.layers.5.block_sparse_moe.experts.255.w1", "model.layers.5.block_sparse_moe.experts.0.w3", "model.layers.5.block_sparse_moe.experts.1.w3", "model.layers.5.block_sparse_moe.experts.2.w3", "model.layers.5.block_sparse_moe.experts.3.w3", "model.layers.5.block_sparse_moe.experts.4.w3", "model.layers.5.block_sparse_moe.experts.5.w3", "model.layers.5.block_sparse_moe.experts.6.w3", "model.layers.5.block_sparse_moe.experts.7.w3", "model.layers.5.block_sparse_moe.experts.8.w3", "model.layers.5.block_sparse_moe.experts.9.w3", "model.layers.5.block_sparse_moe.experts.10.w3", "model.layers.5.block_sparse_moe.experts.11.w3", "model.layers.5.block_sparse_moe.experts.12.w3", "model.layers.5.block_sparse_moe.experts.13.w3", "model.layers.5.block_sparse_moe.experts.14.w3", "model.layers.5.block_sparse_moe.experts.15.w3", "model.layers.5.block_sparse_moe.experts.16.w3", "model.layers.5.block_sparse_moe.experts.17.w3", "model.layers.5.block_sparse_moe.experts.18.w3", "model.layers.5.block_sparse_moe.experts.19.w3", "model.layers.5.block_sparse_moe.experts.20.w3", "model.layers.5.block_sparse_moe.experts.21.w3", "model.layers.5.block_sparse_moe.experts.22.w3", "model.layers.5.block_sparse_moe.experts.23.w3", "model.layers.5.block_sparse_moe.experts.24.w3", "model.layers.5.block_sparse_moe.experts.25.w3", "model.layers.5.block_sparse_moe.experts.26.w3", "model.layers.5.block_sparse_moe.experts.27.w3", "model.layers.5.block_sparse_moe.experts.28.w3", "model.layers.5.block_sparse_moe.experts.29.w3", "model.layers.5.block_sparse_moe.experts.30.w3", "model.layers.5.block_sparse_moe.experts.31.w3", "model.layers.5.block_sparse_moe.experts.32.w3", "model.layers.5.block_sparse_moe.experts.33.w3", "model.layers.5.block_sparse_moe.experts.34.w3", "model.layers.5.block_sparse_moe.experts.35.w3", "model.layers.5.block_sparse_moe.experts.36.w3", "model.layers.5.block_sparse_moe.experts.37.w3", "model.layers.5.block_sparse_moe.experts.38.w3", "model.layers.5.block_sparse_moe.experts.39.w3", "model.layers.5.block_sparse_moe.experts.40.w3", "model.layers.5.block_sparse_moe.experts.41.w3", "model.layers.5.block_sparse_moe.experts.42.w3", "model.layers.5.block_sparse_moe.experts.43.w3", "model.layers.5.block_sparse_moe.experts.44.w3", "model.layers.5.block_sparse_moe.experts.45.w3", "model.layers.5.block_sparse_moe.experts.46.w3", "model.layers.5.block_sparse_moe.experts.47.w3", "model.layers.5.block_sparse_moe.experts.48.w3", "model.layers.5.block_sparse_moe.experts.49.w3", "model.layers.5.block_sparse_moe.experts.50.w3", "model.layers.5.block_sparse_moe.experts.51.w3", "model.layers.5.block_sparse_moe.experts.52.w3", "model.layers.5.block_sparse_moe.experts.53.w3", "model.layers.5.block_sparse_moe.experts.54.w3", "model.layers.5.block_sparse_moe.experts.55.w3", "model.layers.5.block_sparse_moe.experts.56.w3", "model.layers.5.block_sparse_moe.experts.57.w3", "model.layers.5.block_sparse_moe.experts.58.w3", "model.layers.5.block_sparse_moe.experts.59.w3", "model.layers.5.block_sparse_moe.experts.60.w3", "model.layers.5.block_sparse_moe.experts.61.w3", "model.layers.5.block_sparse_moe.experts.62.w3", "model.layers.5.block_sparse_moe.experts.63.w3", "model.layers.5.block_sparse_moe.experts.64.w3", "model.layers.5.block_sparse_moe.experts.65.w3", "model.layers.5.block_sparse_moe.experts.66.w3", "model.layers.5.block_sparse_moe.experts.67.w3", "model.layers.5.block_sparse_moe.experts.68.w3", "model.layers.5.block_sparse_moe.experts.69.w3", "model.layers.5.block_sparse_moe.experts.70.w3", "model.layers.5.block_sparse_moe.experts.71.w3", "model.layers.5.block_sparse_moe.experts.72.w3", "model.layers.5.block_sparse_moe.experts.73.w3", "model.layers.5.block_sparse_moe.experts.74.w3", "model.layers.5.block_sparse_moe.experts.75.w3", "model.layers.5.block_sparse_moe.experts.76.w3", "model.layers.5.block_sparse_moe.experts.77.w3", "model.layers.5.block_sparse_moe.experts.78.w3", "model.layers.5.block_sparse_moe.experts.79.w3", "model.layers.5.block_sparse_moe.experts.80.w3", "model.layers.5.block_sparse_moe.experts.81.w3", "model.layers.5.block_sparse_moe.experts.82.w3", "model.layers.5.block_sparse_moe.experts.83.w3", "model.layers.5.block_sparse_moe.experts.84.w3", "model.layers.5.block_sparse_moe.experts.85.w3", "model.layers.5.block_sparse_moe.experts.86.w3", "model.layers.5.block_sparse_moe.experts.87.w3", "model.layers.5.block_sparse_moe.experts.88.w3", "model.layers.5.block_sparse_moe.experts.89.w3", "model.layers.5.block_sparse_moe.experts.90.w3", "model.layers.5.block_sparse_moe.experts.91.w3", "model.layers.5.block_sparse_moe.experts.92.w3", "model.layers.5.block_sparse_moe.experts.93.w3", "model.layers.5.block_sparse_moe.experts.94.w3", "model.layers.5.block_sparse_moe.experts.95.w3", "model.layers.5.block_sparse_moe.experts.96.w3", "model.layers.5.block_sparse_moe.experts.97.w3", "model.layers.5.block_sparse_moe.experts.98.w3", "model.layers.5.block_sparse_moe.experts.99.w3", "model.layers.5.block_sparse_moe.experts.100.w3", "model.layers.5.block_sparse_moe.experts.101.w3", "model.layers.5.block_sparse_moe.experts.102.w3", "model.layers.5.block_sparse_moe.experts.103.w3", "model.layers.5.block_sparse_moe.experts.104.w3", "model.layers.5.block_sparse_moe.experts.105.w3", "model.layers.5.block_sparse_moe.experts.106.w3", "model.layers.5.block_sparse_moe.experts.107.w3", "model.layers.5.block_sparse_moe.experts.108.w3", "model.layers.5.block_sparse_moe.experts.109.w3", "model.layers.5.block_sparse_moe.experts.110.w3", "model.layers.5.block_sparse_moe.experts.111.w3", "model.layers.5.block_sparse_moe.experts.112.w3", "model.layers.5.block_sparse_moe.experts.113.w3", "model.layers.5.block_sparse_moe.experts.114.w3", "model.layers.5.block_sparse_moe.experts.115.w3", "model.layers.5.block_sparse_moe.experts.116.w3", "model.layers.5.block_sparse_moe.experts.117.w3", "model.layers.5.block_sparse_moe.experts.118.w3", "model.layers.5.block_sparse_moe.experts.119.w3", "model.layers.5.block_sparse_moe.experts.120.w3", "model.layers.5.block_sparse_moe.experts.121.w3", "model.layers.5.block_sparse_moe.experts.122.w3", "model.layers.5.block_sparse_moe.experts.123.w3", "model.layers.5.block_sparse_moe.experts.124.w3", "model.layers.5.block_sparse_moe.experts.125.w3", "model.layers.5.block_sparse_moe.experts.126.w3", "model.layers.5.block_sparse_moe.experts.127.w3", "model.layers.5.block_sparse_moe.experts.128.w3", "model.layers.5.block_sparse_moe.experts.129.w3", "model.layers.5.block_sparse_moe.experts.130.w3", "model.layers.5.block_sparse_moe.experts.131.w3", "model.layers.5.block_sparse_moe.experts.132.w3", "model.layers.5.block_sparse_moe.experts.133.w3", "model.layers.5.block_sparse_moe.experts.134.w3", "model.layers.5.block_sparse_moe.experts.135.w3", "model.layers.5.block_sparse_moe.experts.136.w3", "model.layers.5.block_sparse_moe.experts.137.w3", "model.layers.5.block_sparse_moe.experts.138.w3", "model.layers.5.block_sparse_moe.experts.139.w3", "model.layers.5.block_sparse_moe.experts.140.w3", "model.layers.5.block_sparse_moe.experts.141.w3", "model.layers.5.block_sparse_moe.experts.142.w3", "model.layers.5.block_sparse_moe.experts.143.w3", "model.layers.5.block_sparse_moe.experts.144.w3", "model.layers.5.block_sparse_moe.experts.145.w3", "model.layers.5.block_sparse_moe.experts.146.w3", "model.layers.5.block_sparse_moe.experts.147.w3", "model.layers.5.block_sparse_moe.experts.148.w3", "model.layers.5.block_sparse_moe.experts.149.w3", "model.layers.5.block_sparse_moe.experts.150.w3", "model.layers.5.block_sparse_moe.experts.151.w3", "model.layers.5.block_sparse_moe.experts.152.w3", "model.layers.5.block_sparse_moe.experts.153.w3", "model.layers.5.block_sparse_moe.experts.154.w3", "model.layers.5.block_sparse_moe.experts.155.w3", "model.layers.5.block_sparse_moe.experts.156.w3", "model.layers.5.block_sparse_moe.experts.157.w3", "model.layers.5.block_sparse_moe.experts.158.w3", "model.layers.5.block_sparse_moe.experts.159.w3", "model.layers.5.block_sparse_moe.experts.160.w3", "model.layers.5.block_sparse_moe.experts.161.w3", "model.layers.5.block_sparse_moe.experts.162.w3", "model.layers.5.block_sparse_moe.experts.163.w3", "model.layers.5.block_sparse_moe.experts.164.w3", "model.layers.5.block_sparse_moe.experts.165.w3", "model.layers.5.block_sparse_moe.experts.166.w3", "model.layers.5.block_sparse_moe.experts.167.w3", "model.layers.5.block_sparse_moe.experts.168.w3", "model.layers.5.block_sparse_moe.experts.169.w3", "model.layers.5.block_sparse_moe.experts.170.w3", "model.layers.5.block_sparse_moe.experts.171.w3", "model.layers.5.block_sparse_moe.experts.172.w3", "model.layers.5.block_sparse_moe.experts.173.w3", "model.layers.5.block_sparse_moe.experts.174.w3", "model.layers.5.block_sparse_moe.experts.175.w3", "model.layers.5.block_sparse_moe.experts.176.w3", "model.layers.5.block_sparse_moe.experts.177.w3", "model.layers.5.block_sparse_moe.experts.178.w3", "model.layers.5.block_sparse_moe.experts.179.w3", "model.layers.5.block_sparse_moe.experts.180.w3", "model.layers.5.block_sparse_moe.experts.181.w3", "model.layers.5.block_sparse_moe.experts.182.w3", "model.layers.5.block_sparse_moe.experts.183.w3", "model.layers.5.block_sparse_moe.experts.184.w3", "model.layers.5.block_sparse_moe.experts.185.w3", "model.layers.5.block_sparse_moe.experts.186.w3", "model.layers.5.block_sparse_moe.experts.187.w3", "model.layers.5.block_sparse_moe.experts.188.w3", "model.layers.5.block_sparse_moe.experts.189.w3", "model.layers.5.block_sparse_moe.experts.190.w3", "model.layers.5.block_sparse_moe.experts.191.w3", "model.layers.5.block_sparse_moe.experts.192.w3", "model.layers.5.block_sparse_moe.experts.193.w3", "model.layers.5.block_sparse_moe.experts.194.w3", "model.layers.5.block_sparse_moe.experts.195.w3", "model.layers.5.block_sparse_moe.experts.196.w3", "model.layers.5.block_sparse_moe.experts.197.w3", "model.layers.5.block_sparse_moe.experts.198.w3", "model.layers.5.block_sparse_moe.experts.199.w3", "model.layers.5.block_sparse_moe.experts.200.w3", "model.layers.5.block_sparse_moe.experts.201.w3", "model.layers.5.block_sparse_moe.experts.202.w3", "model.layers.5.block_sparse_moe.experts.203.w3", "model.layers.5.block_sparse_moe.experts.204.w3", "model.layers.5.block_sparse_moe.experts.205.w3", "model.layers.5.block_sparse_moe.experts.206.w3", "model.layers.5.block_sparse_moe.experts.207.w3", "model.layers.5.block_sparse_moe.experts.208.w3", "model.layers.5.block_sparse_moe.experts.209.w3", "model.layers.5.block_sparse_moe.experts.210.w3", "model.layers.5.block_sparse_moe.experts.211.w3", "model.layers.5.block_sparse_moe.experts.212.w3", "model.layers.5.block_sparse_moe.experts.213.w3", "model.layers.5.block_sparse_moe.experts.214.w3", "model.layers.5.block_sparse_moe.experts.215.w3", "model.layers.5.block_sparse_moe.experts.216.w3", "model.layers.5.block_sparse_moe.experts.217.w3", "model.layers.5.block_sparse_moe.experts.218.w3", "model.layers.5.block_sparse_moe.experts.219.w3", "model.layers.5.block_sparse_moe.experts.220.w3", "model.layers.5.block_sparse_moe.experts.221.w3", "model.layers.5.block_sparse_moe.experts.222.w3", "model.layers.5.block_sparse_moe.experts.223.w3", "model.layers.5.block_sparse_moe.experts.224.w3", "model.layers.5.block_sparse_moe.experts.225.w3", "model.layers.5.block_sparse_moe.experts.226.w3", "model.layers.5.block_sparse_moe.experts.227.w3", "model.layers.5.block_sparse_moe.experts.228.w3", "model.layers.5.block_sparse_moe.experts.229.w3", "model.layers.5.block_sparse_moe.experts.230.w3", "model.layers.5.block_sparse_moe.experts.231.w3", "model.layers.5.block_sparse_moe.experts.232.w3", "model.layers.5.block_sparse_moe.experts.233.w3", "model.layers.5.block_sparse_moe.experts.234.w3", "model.layers.5.block_sparse_moe.experts.235.w3", "model.layers.5.block_sparse_moe.experts.236.w3", "model.layers.5.block_sparse_moe.experts.237.w3", "model.layers.5.block_sparse_moe.experts.238.w3", "model.layers.5.block_sparse_moe.experts.239.w3", "model.layers.5.block_sparse_moe.experts.240.w3", "model.layers.5.block_sparse_moe.experts.241.w3", "model.layers.5.block_sparse_moe.experts.242.w3", "model.layers.5.block_sparse_moe.experts.243.w3", "model.layers.5.block_sparse_moe.experts.244.w3", "model.layers.5.block_sparse_moe.experts.245.w3", "model.layers.5.block_sparse_moe.experts.246.w3", "model.layers.5.block_sparse_moe.experts.247.w3", "model.layers.5.block_sparse_moe.experts.248.w3", "model.layers.5.block_sparse_moe.experts.249.w3", "model.layers.5.block_sparse_moe.experts.250.w3", "model.layers.5.block_sparse_moe.experts.251.w3", "model.layers.5.block_sparse_moe.experts.252.w3", "model.layers.5.block_sparse_moe.experts.253.w3", "model.layers.5.block_sparse_moe.experts.254.w3", "model.layers.5.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0013270527124404907, "dbits": 2415919104 } ] }, { "idx": 29, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w2", "model.layers.5.block_sparse_moe.experts.1.w2", "model.layers.5.block_sparse_moe.experts.2.w2", "model.layers.5.block_sparse_moe.experts.3.w2", "model.layers.5.block_sparse_moe.experts.4.w2", "model.layers.5.block_sparse_moe.experts.5.w2", "model.layers.5.block_sparse_moe.experts.6.w2", "model.layers.5.block_sparse_moe.experts.7.w2", "model.layers.5.block_sparse_moe.experts.8.w2", "model.layers.5.block_sparse_moe.experts.9.w2", "model.layers.5.block_sparse_moe.experts.10.w2", "model.layers.5.block_sparse_moe.experts.11.w2", "model.layers.5.block_sparse_moe.experts.12.w2", "model.layers.5.block_sparse_moe.experts.13.w2", "model.layers.5.block_sparse_moe.experts.14.w2", "model.layers.5.block_sparse_moe.experts.15.w2", "model.layers.5.block_sparse_moe.experts.16.w2", "model.layers.5.block_sparse_moe.experts.17.w2", "model.layers.5.block_sparse_moe.experts.18.w2", "model.layers.5.block_sparse_moe.experts.19.w2", "model.layers.5.block_sparse_moe.experts.20.w2", "model.layers.5.block_sparse_moe.experts.21.w2", "model.layers.5.block_sparse_moe.experts.22.w2", "model.layers.5.block_sparse_moe.experts.23.w2", "model.layers.5.block_sparse_moe.experts.24.w2", "model.layers.5.block_sparse_moe.experts.25.w2", "model.layers.5.block_sparse_moe.experts.26.w2", "model.layers.5.block_sparse_moe.experts.27.w2", "model.layers.5.block_sparse_moe.experts.28.w2", "model.layers.5.block_sparse_moe.experts.29.w2", "model.layers.5.block_sparse_moe.experts.30.w2", "model.layers.5.block_sparse_moe.experts.31.w2", "model.layers.5.block_sparse_moe.experts.32.w2", "model.layers.5.block_sparse_moe.experts.33.w2", "model.layers.5.block_sparse_moe.experts.34.w2", "model.layers.5.block_sparse_moe.experts.35.w2", "model.layers.5.block_sparse_moe.experts.36.w2", "model.layers.5.block_sparse_moe.experts.37.w2", "model.layers.5.block_sparse_moe.experts.38.w2", "model.layers.5.block_sparse_moe.experts.39.w2", "model.layers.5.block_sparse_moe.experts.40.w2", "model.layers.5.block_sparse_moe.experts.41.w2", "model.layers.5.block_sparse_moe.experts.42.w2", "model.layers.5.block_sparse_moe.experts.43.w2", "model.layers.5.block_sparse_moe.experts.44.w2", "model.layers.5.block_sparse_moe.experts.45.w2", "model.layers.5.block_sparse_moe.experts.46.w2", "model.layers.5.block_sparse_moe.experts.47.w2", "model.layers.5.block_sparse_moe.experts.48.w2", "model.layers.5.block_sparse_moe.experts.49.w2", "model.layers.5.block_sparse_moe.experts.50.w2", "model.layers.5.block_sparse_moe.experts.51.w2", "model.layers.5.block_sparse_moe.experts.52.w2", "model.layers.5.block_sparse_moe.experts.53.w2", "model.layers.5.block_sparse_moe.experts.54.w2", "model.layers.5.block_sparse_moe.experts.55.w2", "model.layers.5.block_sparse_moe.experts.56.w2", "model.layers.5.block_sparse_moe.experts.57.w2", "model.layers.5.block_sparse_moe.experts.58.w2", "model.layers.5.block_sparse_moe.experts.59.w2", "model.layers.5.block_sparse_moe.experts.60.w2", "model.layers.5.block_sparse_moe.experts.61.w2", "model.layers.5.block_sparse_moe.experts.62.w2", "model.layers.5.block_sparse_moe.experts.63.w2", "model.layers.5.block_sparse_moe.experts.64.w2", "model.layers.5.block_sparse_moe.experts.65.w2", "model.layers.5.block_sparse_moe.experts.66.w2", "model.layers.5.block_sparse_moe.experts.67.w2", "model.layers.5.block_sparse_moe.experts.68.w2", "model.layers.5.block_sparse_moe.experts.69.w2", "model.layers.5.block_sparse_moe.experts.70.w2", "model.layers.5.block_sparse_moe.experts.71.w2", "model.layers.5.block_sparse_moe.experts.72.w2", "model.layers.5.block_sparse_moe.experts.73.w2", "model.layers.5.block_sparse_moe.experts.74.w2", "model.layers.5.block_sparse_moe.experts.75.w2", "model.layers.5.block_sparse_moe.experts.76.w2", "model.layers.5.block_sparse_moe.experts.77.w2", "model.layers.5.block_sparse_moe.experts.78.w2", "model.layers.5.block_sparse_moe.experts.79.w2", "model.layers.5.block_sparse_moe.experts.80.w2", "model.layers.5.block_sparse_moe.experts.81.w2", "model.layers.5.block_sparse_moe.experts.82.w2", "model.layers.5.block_sparse_moe.experts.83.w2", "model.layers.5.block_sparse_moe.experts.84.w2", "model.layers.5.block_sparse_moe.experts.85.w2", "model.layers.5.block_sparse_moe.experts.86.w2", "model.layers.5.block_sparse_moe.experts.87.w2", "model.layers.5.block_sparse_moe.experts.88.w2", "model.layers.5.block_sparse_moe.experts.89.w2", "model.layers.5.block_sparse_moe.experts.90.w2", "model.layers.5.block_sparse_moe.experts.91.w2", "model.layers.5.block_sparse_moe.experts.92.w2", "model.layers.5.block_sparse_moe.experts.93.w2", "model.layers.5.block_sparse_moe.experts.94.w2", "model.layers.5.block_sparse_moe.experts.95.w2", "model.layers.5.block_sparse_moe.experts.96.w2", "model.layers.5.block_sparse_moe.experts.97.w2", "model.layers.5.block_sparse_moe.experts.98.w2", "model.layers.5.block_sparse_moe.experts.99.w2", "model.layers.5.block_sparse_moe.experts.100.w2", "model.layers.5.block_sparse_moe.experts.101.w2", "model.layers.5.block_sparse_moe.experts.102.w2", "model.layers.5.block_sparse_moe.experts.103.w2", "model.layers.5.block_sparse_moe.experts.104.w2", "model.layers.5.block_sparse_moe.experts.105.w2", "model.layers.5.block_sparse_moe.experts.106.w2", "model.layers.5.block_sparse_moe.experts.107.w2", "model.layers.5.block_sparse_moe.experts.108.w2", "model.layers.5.block_sparse_moe.experts.109.w2", "model.layers.5.block_sparse_moe.experts.110.w2", "model.layers.5.block_sparse_moe.experts.111.w2", "model.layers.5.block_sparse_moe.experts.112.w2", "model.layers.5.block_sparse_moe.experts.113.w2", "model.layers.5.block_sparse_moe.experts.114.w2", "model.layers.5.block_sparse_moe.experts.115.w2", "model.layers.5.block_sparse_moe.experts.116.w2", "model.layers.5.block_sparse_moe.experts.117.w2", "model.layers.5.block_sparse_moe.experts.118.w2", "model.layers.5.block_sparse_moe.experts.119.w2", "model.layers.5.block_sparse_moe.experts.120.w2", "model.layers.5.block_sparse_moe.experts.121.w2", "model.layers.5.block_sparse_moe.experts.122.w2", "model.layers.5.block_sparse_moe.experts.123.w2", "model.layers.5.block_sparse_moe.experts.124.w2", "model.layers.5.block_sparse_moe.experts.125.w2", "model.layers.5.block_sparse_moe.experts.126.w2", "model.layers.5.block_sparse_moe.experts.127.w2", "model.layers.5.block_sparse_moe.experts.128.w2", "model.layers.5.block_sparse_moe.experts.129.w2", "model.layers.5.block_sparse_moe.experts.130.w2", "model.layers.5.block_sparse_moe.experts.131.w2", "model.layers.5.block_sparse_moe.experts.132.w2", "model.layers.5.block_sparse_moe.experts.133.w2", "model.layers.5.block_sparse_moe.experts.134.w2", "model.layers.5.block_sparse_moe.experts.135.w2", "model.layers.5.block_sparse_moe.experts.136.w2", "model.layers.5.block_sparse_moe.experts.137.w2", "model.layers.5.block_sparse_moe.experts.138.w2", "model.layers.5.block_sparse_moe.experts.139.w2", "model.layers.5.block_sparse_moe.experts.140.w2", "model.layers.5.block_sparse_moe.experts.141.w2", "model.layers.5.block_sparse_moe.experts.142.w2", "model.layers.5.block_sparse_moe.experts.143.w2", "model.layers.5.block_sparse_moe.experts.144.w2", "model.layers.5.block_sparse_moe.experts.145.w2", "model.layers.5.block_sparse_moe.experts.146.w2", "model.layers.5.block_sparse_moe.experts.147.w2", "model.layers.5.block_sparse_moe.experts.148.w2", "model.layers.5.block_sparse_moe.experts.149.w2", "model.layers.5.block_sparse_moe.experts.150.w2", "model.layers.5.block_sparse_moe.experts.151.w2", "model.layers.5.block_sparse_moe.experts.152.w2", "model.layers.5.block_sparse_moe.experts.153.w2", "model.layers.5.block_sparse_moe.experts.154.w2", "model.layers.5.block_sparse_moe.experts.155.w2", "model.layers.5.block_sparse_moe.experts.156.w2", "model.layers.5.block_sparse_moe.experts.157.w2", "model.layers.5.block_sparse_moe.experts.158.w2", "model.layers.5.block_sparse_moe.experts.159.w2", "model.layers.5.block_sparse_moe.experts.160.w2", "model.layers.5.block_sparse_moe.experts.161.w2", "model.layers.5.block_sparse_moe.experts.162.w2", "model.layers.5.block_sparse_moe.experts.163.w2", "model.layers.5.block_sparse_moe.experts.164.w2", "model.layers.5.block_sparse_moe.experts.165.w2", "model.layers.5.block_sparse_moe.experts.166.w2", "model.layers.5.block_sparse_moe.experts.167.w2", "model.layers.5.block_sparse_moe.experts.168.w2", "model.layers.5.block_sparse_moe.experts.169.w2", "model.layers.5.block_sparse_moe.experts.170.w2", "model.layers.5.block_sparse_moe.experts.171.w2", "model.layers.5.block_sparse_moe.experts.172.w2", "model.layers.5.block_sparse_moe.experts.173.w2", "model.layers.5.block_sparse_moe.experts.174.w2", "model.layers.5.block_sparse_moe.experts.175.w2", "model.layers.5.block_sparse_moe.experts.176.w2", "model.layers.5.block_sparse_moe.experts.177.w2", "model.layers.5.block_sparse_moe.experts.178.w2", "model.layers.5.block_sparse_moe.experts.179.w2", "model.layers.5.block_sparse_moe.experts.180.w2", "model.layers.5.block_sparse_moe.experts.181.w2", "model.layers.5.block_sparse_moe.experts.182.w2", "model.layers.5.block_sparse_moe.experts.183.w2", "model.layers.5.block_sparse_moe.experts.184.w2", "model.layers.5.block_sparse_moe.experts.185.w2", "model.layers.5.block_sparse_moe.experts.186.w2", "model.layers.5.block_sparse_moe.experts.187.w2", "model.layers.5.block_sparse_moe.experts.188.w2", "model.layers.5.block_sparse_moe.experts.189.w2", "model.layers.5.block_sparse_moe.experts.190.w2", "model.layers.5.block_sparse_moe.experts.191.w2", "model.layers.5.block_sparse_moe.experts.192.w2", "model.layers.5.block_sparse_moe.experts.193.w2", "model.layers.5.block_sparse_moe.experts.194.w2", "model.layers.5.block_sparse_moe.experts.195.w2", "model.layers.5.block_sparse_moe.experts.196.w2", "model.layers.5.block_sparse_moe.experts.197.w2", "model.layers.5.block_sparse_moe.experts.198.w2", "model.layers.5.block_sparse_moe.experts.199.w2", "model.layers.5.block_sparse_moe.experts.200.w2", "model.layers.5.block_sparse_moe.experts.201.w2", "model.layers.5.block_sparse_moe.experts.202.w2", "model.layers.5.block_sparse_moe.experts.203.w2", "model.layers.5.block_sparse_moe.experts.204.w2", "model.layers.5.block_sparse_moe.experts.205.w2", "model.layers.5.block_sparse_moe.experts.206.w2", "model.layers.5.block_sparse_moe.experts.207.w2", "model.layers.5.block_sparse_moe.experts.208.w2", "model.layers.5.block_sparse_moe.experts.209.w2", "model.layers.5.block_sparse_moe.experts.210.w2", "model.layers.5.block_sparse_moe.experts.211.w2", "model.layers.5.block_sparse_moe.experts.212.w2", "model.layers.5.block_sparse_moe.experts.213.w2", "model.layers.5.block_sparse_moe.experts.214.w2", "model.layers.5.block_sparse_moe.experts.215.w2", "model.layers.5.block_sparse_moe.experts.216.w2", "model.layers.5.block_sparse_moe.experts.217.w2", "model.layers.5.block_sparse_moe.experts.218.w2", "model.layers.5.block_sparse_moe.experts.219.w2", "model.layers.5.block_sparse_moe.experts.220.w2", "model.layers.5.block_sparse_moe.experts.221.w2", "model.layers.5.block_sparse_moe.experts.222.w2", "model.layers.5.block_sparse_moe.experts.223.w2", "model.layers.5.block_sparse_moe.experts.224.w2", "model.layers.5.block_sparse_moe.experts.225.w2", "model.layers.5.block_sparse_moe.experts.226.w2", "model.layers.5.block_sparse_moe.experts.227.w2", "model.layers.5.block_sparse_moe.experts.228.w2", "model.layers.5.block_sparse_moe.experts.229.w2", "model.layers.5.block_sparse_moe.experts.230.w2", "model.layers.5.block_sparse_moe.experts.231.w2", "model.layers.5.block_sparse_moe.experts.232.w2", "model.layers.5.block_sparse_moe.experts.233.w2", "model.layers.5.block_sparse_moe.experts.234.w2", "model.layers.5.block_sparse_moe.experts.235.w2", "model.layers.5.block_sparse_moe.experts.236.w2", "model.layers.5.block_sparse_moe.experts.237.w2", "model.layers.5.block_sparse_moe.experts.238.w2", "model.layers.5.block_sparse_moe.experts.239.w2", "model.layers.5.block_sparse_moe.experts.240.w2", "model.layers.5.block_sparse_moe.experts.241.w2", "model.layers.5.block_sparse_moe.experts.242.w2", "model.layers.5.block_sparse_moe.experts.243.w2", "model.layers.5.block_sparse_moe.experts.244.w2", "model.layers.5.block_sparse_moe.experts.245.w2", "model.layers.5.block_sparse_moe.experts.246.w2", "model.layers.5.block_sparse_moe.experts.247.w2", "model.layers.5.block_sparse_moe.experts.248.w2", "model.layers.5.block_sparse_moe.experts.249.w2", "model.layers.5.block_sparse_moe.experts.250.w2", "model.layers.5.block_sparse_moe.experts.251.w2", "model.layers.5.block_sparse_moe.experts.252.w2", "model.layers.5.block_sparse_moe.experts.253.w2", "model.layers.5.block_sparse_moe.experts.254.w2", "model.layers.5.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.02497080415487285, "dbits": 1207959552 } ] }, { "idx": 30, "layers": [ "model.layers.6.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0012944936752319336, "dbits": 18874368 } ] }, { "idx": 31, "layers": [ "model.layers.6.self_attn.k_proj", "model.layers.6.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002010241150856018, "dbits": 6291456 } ] }, { "idx": 32, "layers": [ "model.layers.6.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0009339243173599465, "dbits": 18874368 } ] }, { "idx": 33, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w1", "model.layers.6.block_sparse_moe.experts.1.w1", "model.layers.6.block_sparse_moe.experts.2.w1", "model.layers.6.block_sparse_moe.experts.3.w1", "model.layers.6.block_sparse_moe.experts.4.w1", "model.layers.6.block_sparse_moe.experts.5.w1", "model.layers.6.block_sparse_moe.experts.6.w1", "model.layers.6.block_sparse_moe.experts.7.w1", "model.layers.6.block_sparse_moe.experts.8.w1", "model.layers.6.block_sparse_moe.experts.9.w1", "model.layers.6.block_sparse_moe.experts.10.w1", "model.layers.6.block_sparse_moe.experts.11.w1", "model.layers.6.block_sparse_moe.experts.12.w1", "model.layers.6.block_sparse_moe.experts.13.w1", "model.layers.6.block_sparse_moe.experts.14.w1", "model.layers.6.block_sparse_moe.experts.15.w1", "model.layers.6.block_sparse_moe.experts.16.w1", "model.layers.6.block_sparse_moe.experts.17.w1", "model.layers.6.block_sparse_moe.experts.18.w1", "model.layers.6.block_sparse_moe.experts.19.w1", "model.layers.6.block_sparse_moe.experts.20.w1", "model.layers.6.block_sparse_moe.experts.21.w1", "model.layers.6.block_sparse_moe.experts.22.w1", "model.layers.6.block_sparse_moe.experts.23.w1", "model.layers.6.block_sparse_moe.experts.24.w1", "model.layers.6.block_sparse_moe.experts.25.w1", "model.layers.6.block_sparse_moe.experts.26.w1", "model.layers.6.block_sparse_moe.experts.27.w1", "model.layers.6.block_sparse_moe.experts.28.w1", "model.layers.6.block_sparse_moe.experts.29.w1", "model.layers.6.block_sparse_moe.experts.30.w1", "model.layers.6.block_sparse_moe.experts.31.w1", "model.layers.6.block_sparse_moe.experts.32.w1", "model.layers.6.block_sparse_moe.experts.33.w1", "model.layers.6.block_sparse_moe.experts.34.w1", "model.layers.6.block_sparse_moe.experts.35.w1", "model.layers.6.block_sparse_moe.experts.36.w1", "model.layers.6.block_sparse_moe.experts.37.w1", "model.layers.6.block_sparse_moe.experts.38.w1", "model.layers.6.block_sparse_moe.experts.39.w1", "model.layers.6.block_sparse_moe.experts.40.w1", "model.layers.6.block_sparse_moe.experts.41.w1", "model.layers.6.block_sparse_moe.experts.42.w1", "model.layers.6.block_sparse_moe.experts.43.w1", "model.layers.6.block_sparse_moe.experts.44.w1", "model.layers.6.block_sparse_moe.experts.45.w1", "model.layers.6.block_sparse_moe.experts.46.w1", "model.layers.6.block_sparse_moe.experts.47.w1", "model.layers.6.block_sparse_moe.experts.48.w1", "model.layers.6.block_sparse_moe.experts.49.w1", "model.layers.6.block_sparse_moe.experts.50.w1", "model.layers.6.block_sparse_moe.experts.51.w1", "model.layers.6.block_sparse_moe.experts.52.w1", "model.layers.6.block_sparse_moe.experts.53.w1", "model.layers.6.block_sparse_moe.experts.54.w1", "model.layers.6.block_sparse_moe.experts.55.w1", "model.layers.6.block_sparse_moe.experts.56.w1", "model.layers.6.block_sparse_moe.experts.57.w1", "model.layers.6.block_sparse_moe.experts.58.w1", "model.layers.6.block_sparse_moe.experts.59.w1", "model.layers.6.block_sparse_moe.experts.60.w1", "model.layers.6.block_sparse_moe.experts.61.w1", "model.layers.6.block_sparse_moe.experts.62.w1", "model.layers.6.block_sparse_moe.experts.63.w1", "model.layers.6.block_sparse_moe.experts.64.w1", "model.layers.6.block_sparse_moe.experts.65.w1", "model.layers.6.block_sparse_moe.experts.66.w1", "model.layers.6.block_sparse_moe.experts.67.w1", "model.layers.6.block_sparse_moe.experts.68.w1", "model.layers.6.block_sparse_moe.experts.69.w1", "model.layers.6.block_sparse_moe.experts.70.w1", "model.layers.6.block_sparse_moe.experts.71.w1", "model.layers.6.block_sparse_moe.experts.72.w1", "model.layers.6.block_sparse_moe.experts.73.w1", "model.layers.6.block_sparse_moe.experts.74.w1", "model.layers.6.block_sparse_moe.experts.75.w1", "model.layers.6.block_sparse_moe.experts.76.w1", "model.layers.6.block_sparse_moe.experts.77.w1", "model.layers.6.block_sparse_moe.experts.78.w1", "model.layers.6.block_sparse_moe.experts.79.w1", "model.layers.6.block_sparse_moe.experts.80.w1", "model.layers.6.block_sparse_moe.experts.81.w1", "model.layers.6.block_sparse_moe.experts.82.w1", "model.layers.6.block_sparse_moe.experts.83.w1", "model.layers.6.block_sparse_moe.experts.84.w1", "model.layers.6.block_sparse_moe.experts.85.w1", "model.layers.6.block_sparse_moe.experts.86.w1", "model.layers.6.block_sparse_moe.experts.87.w1", "model.layers.6.block_sparse_moe.experts.88.w1", "model.layers.6.block_sparse_moe.experts.89.w1", "model.layers.6.block_sparse_moe.experts.90.w1", "model.layers.6.block_sparse_moe.experts.91.w1", "model.layers.6.block_sparse_moe.experts.92.w1", "model.layers.6.block_sparse_moe.experts.93.w1", "model.layers.6.block_sparse_moe.experts.94.w1", "model.layers.6.block_sparse_moe.experts.95.w1", "model.layers.6.block_sparse_moe.experts.96.w1", "model.layers.6.block_sparse_moe.experts.97.w1", "model.layers.6.block_sparse_moe.experts.98.w1", "model.layers.6.block_sparse_moe.experts.99.w1", "model.layers.6.block_sparse_moe.experts.100.w1", "model.layers.6.block_sparse_moe.experts.101.w1", "model.layers.6.block_sparse_moe.experts.102.w1", "model.layers.6.block_sparse_moe.experts.103.w1", "model.layers.6.block_sparse_moe.experts.104.w1", "model.layers.6.block_sparse_moe.experts.105.w1", "model.layers.6.block_sparse_moe.experts.106.w1", "model.layers.6.block_sparse_moe.experts.107.w1", "model.layers.6.block_sparse_moe.experts.108.w1", "model.layers.6.block_sparse_moe.experts.109.w1", "model.layers.6.block_sparse_moe.experts.110.w1", "model.layers.6.block_sparse_moe.experts.111.w1", "model.layers.6.block_sparse_moe.experts.112.w1", "model.layers.6.block_sparse_moe.experts.113.w1", "model.layers.6.block_sparse_moe.experts.114.w1", "model.layers.6.block_sparse_moe.experts.115.w1", "model.layers.6.block_sparse_moe.experts.116.w1", "model.layers.6.block_sparse_moe.experts.117.w1", "model.layers.6.block_sparse_moe.experts.118.w1", "model.layers.6.block_sparse_moe.experts.119.w1", "model.layers.6.block_sparse_moe.experts.120.w1", "model.layers.6.block_sparse_moe.experts.121.w1", "model.layers.6.block_sparse_moe.experts.122.w1", "model.layers.6.block_sparse_moe.experts.123.w1", "model.layers.6.block_sparse_moe.experts.124.w1", "model.layers.6.block_sparse_moe.experts.125.w1", "model.layers.6.block_sparse_moe.experts.126.w1", "model.layers.6.block_sparse_moe.experts.127.w1", "model.layers.6.block_sparse_moe.experts.128.w1", "model.layers.6.block_sparse_moe.experts.129.w1", "model.layers.6.block_sparse_moe.experts.130.w1", "model.layers.6.block_sparse_moe.experts.131.w1", "model.layers.6.block_sparse_moe.experts.132.w1", "model.layers.6.block_sparse_moe.experts.133.w1", "model.layers.6.block_sparse_moe.experts.134.w1", "model.layers.6.block_sparse_moe.experts.135.w1", "model.layers.6.block_sparse_moe.experts.136.w1", "model.layers.6.block_sparse_moe.experts.137.w1", "model.layers.6.block_sparse_moe.experts.138.w1", "model.layers.6.block_sparse_moe.experts.139.w1", "model.layers.6.block_sparse_moe.experts.140.w1", "model.layers.6.block_sparse_moe.experts.141.w1", "model.layers.6.block_sparse_moe.experts.142.w1", "model.layers.6.block_sparse_moe.experts.143.w1", "model.layers.6.block_sparse_moe.experts.144.w1", "model.layers.6.block_sparse_moe.experts.145.w1", "model.layers.6.block_sparse_moe.experts.146.w1", "model.layers.6.block_sparse_moe.experts.147.w1", "model.layers.6.block_sparse_moe.experts.148.w1", "model.layers.6.block_sparse_moe.experts.149.w1", "model.layers.6.block_sparse_moe.experts.150.w1", "model.layers.6.block_sparse_moe.experts.151.w1", "model.layers.6.block_sparse_moe.experts.152.w1", "model.layers.6.block_sparse_moe.experts.153.w1", "model.layers.6.block_sparse_moe.experts.154.w1", "model.layers.6.block_sparse_moe.experts.155.w1", "model.layers.6.block_sparse_moe.experts.156.w1", "model.layers.6.block_sparse_moe.experts.157.w1", "model.layers.6.block_sparse_moe.experts.158.w1", "model.layers.6.block_sparse_moe.experts.159.w1", "model.layers.6.block_sparse_moe.experts.160.w1", "model.layers.6.block_sparse_moe.experts.161.w1", "model.layers.6.block_sparse_moe.experts.162.w1", "model.layers.6.block_sparse_moe.experts.163.w1", "model.layers.6.block_sparse_moe.experts.164.w1", "model.layers.6.block_sparse_moe.experts.165.w1", "model.layers.6.block_sparse_moe.experts.166.w1", "model.layers.6.block_sparse_moe.experts.167.w1", "model.layers.6.block_sparse_moe.experts.168.w1", "model.layers.6.block_sparse_moe.experts.169.w1", "model.layers.6.block_sparse_moe.experts.170.w1", "model.layers.6.block_sparse_moe.experts.171.w1", "model.layers.6.block_sparse_moe.experts.172.w1", "model.layers.6.block_sparse_moe.experts.173.w1", "model.layers.6.block_sparse_moe.experts.174.w1", "model.layers.6.block_sparse_moe.experts.175.w1", "model.layers.6.block_sparse_moe.experts.176.w1", "model.layers.6.block_sparse_moe.experts.177.w1", "model.layers.6.block_sparse_moe.experts.178.w1", "model.layers.6.block_sparse_moe.experts.179.w1", "model.layers.6.block_sparse_moe.experts.180.w1", "model.layers.6.block_sparse_moe.experts.181.w1", "model.layers.6.block_sparse_moe.experts.182.w1", "model.layers.6.block_sparse_moe.experts.183.w1", "model.layers.6.block_sparse_moe.experts.184.w1", "model.layers.6.block_sparse_moe.experts.185.w1", "model.layers.6.block_sparse_moe.experts.186.w1", "model.layers.6.block_sparse_moe.experts.187.w1", "model.layers.6.block_sparse_moe.experts.188.w1", "model.layers.6.block_sparse_moe.experts.189.w1", "model.layers.6.block_sparse_moe.experts.190.w1", "model.layers.6.block_sparse_moe.experts.191.w1", "model.layers.6.block_sparse_moe.experts.192.w1", "model.layers.6.block_sparse_moe.experts.193.w1", "model.layers.6.block_sparse_moe.experts.194.w1", "model.layers.6.block_sparse_moe.experts.195.w1", "model.layers.6.block_sparse_moe.experts.196.w1", "model.layers.6.block_sparse_moe.experts.197.w1", "model.layers.6.block_sparse_moe.experts.198.w1", "model.layers.6.block_sparse_moe.experts.199.w1", "model.layers.6.block_sparse_moe.experts.200.w1", "model.layers.6.block_sparse_moe.experts.201.w1", "model.layers.6.block_sparse_moe.experts.202.w1", "model.layers.6.block_sparse_moe.experts.203.w1", "model.layers.6.block_sparse_moe.experts.204.w1", "model.layers.6.block_sparse_moe.experts.205.w1", "model.layers.6.block_sparse_moe.experts.206.w1", "model.layers.6.block_sparse_moe.experts.207.w1", "model.layers.6.block_sparse_moe.experts.208.w1", "model.layers.6.block_sparse_moe.experts.209.w1", "model.layers.6.block_sparse_moe.experts.210.w1", "model.layers.6.block_sparse_moe.experts.211.w1", "model.layers.6.block_sparse_moe.experts.212.w1", "model.layers.6.block_sparse_moe.experts.213.w1", "model.layers.6.block_sparse_moe.experts.214.w1", "model.layers.6.block_sparse_moe.experts.215.w1", "model.layers.6.block_sparse_moe.experts.216.w1", "model.layers.6.block_sparse_moe.experts.217.w1", "model.layers.6.block_sparse_moe.experts.218.w1", "model.layers.6.block_sparse_moe.experts.219.w1", "model.layers.6.block_sparse_moe.experts.220.w1", "model.layers.6.block_sparse_moe.experts.221.w1", "model.layers.6.block_sparse_moe.experts.222.w1", "model.layers.6.block_sparse_moe.experts.223.w1", "model.layers.6.block_sparse_moe.experts.224.w1", "model.layers.6.block_sparse_moe.experts.225.w1", "model.layers.6.block_sparse_moe.experts.226.w1", "model.layers.6.block_sparse_moe.experts.227.w1", "model.layers.6.block_sparse_moe.experts.228.w1", "model.layers.6.block_sparse_moe.experts.229.w1", "model.layers.6.block_sparse_moe.experts.230.w1", "model.layers.6.block_sparse_moe.experts.231.w1", "model.layers.6.block_sparse_moe.experts.232.w1", "model.layers.6.block_sparse_moe.experts.233.w1", "model.layers.6.block_sparse_moe.experts.234.w1", "model.layers.6.block_sparse_moe.experts.235.w1", "model.layers.6.block_sparse_moe.experts.236.w1", "model.layers.6.block_sparse_moe.experts.237.w1", "model.layers.6.block_sparse_moe.experts.238.w1", "model.layers.6.block_sparse_moe.experts.239.w1", "model.layers.6.block_sparse_moe.experts.240.w1", "model.layers.6.block_sparse_moe.experts.241.w1", "model.layers.6.block_sparse_moe.experts.242.w1", "model.layers.6.block_sparse_moe.experts.243.w1", "model.layers.6.block_sparse_moe.experts.244.w1", "model.layers.6.block_sparse_moe.experts.245.w1", "model.layers.6.block_sparse_moe.experts.246.w1", "model.layers.6.block_sparse_moe.experts.247.w1", "model.layers.6.block_sparse_moe.experts.248.w1", "model.layers.6.block_sparse_moe.experts.249.w1", "model.layers.6.block_sparse_moe.experts.250.w1", "model.layers.6.block_sparse_moe.experts.251.w1", "model.layers.6.block_sparse_moe.experts.252.w1", "model.layers.6.block_sparse_moe.experts.253.w1", "model.layers.6.block_sparse_moe.experts.254.w1", "model.layers.6.block_sparse_moe.experts.255.w1", "model.layers.6.block_sparse_moe.experts.0.w3", "model.layers.6.block_sparse_moe.experts.1.w3", "model.layers.6.block_sparse_moe.experts.2.w3", "model.layers.6.block_sparse_moe.experts.3.w3", "model.layers.6.block_sparse_moe.experts.4.w3", "model.layers.6.block_sparse_moe.experts.5.w3", "model.layers.6.block_sparse_moe.experts.6.w3", "model.layers.6.block_sparse_moe.experts.7.w3", "model.layers.6.block_sparse_moe.experts.8.w3", "model.layers.6.block_sparse_moe.experts.9.w3", "model.layers.6.block_sparse_moe.experts.10.w3", "model.layers.6.block_sparse_moe.experts.11.w3", "model.layers.6.block_sparse_moe.experts.12.w3", "model.layers.6.block_sparse_moe.experts.13.w3", "model.layers.6.block_sparse_moe.experts.14.w3", "model.layers.6.block_sparse_moe.experts.15.w3", "model.layers.6.block_sparse_moe.experts.16.w3", "model.layers.6.block_sparse_moe.experts.17.w3", "model.layers.6.block_sparse_moe.experts.18.w3", "model.layers.6.block_sparse_moe.experts.19.w3", "model.layers.6.block_sparse_moe.experts.20.w3", "model.layers.6.block_sparse_moe.experts.21.w3", "model.layers.6.block_sparse_moe.experts.22.w3", "model.layers.6.block_sparse_moe.experts.23.w3", "model.layers.6.block_sparse_moe.experts.24.w3", "model.layers.6.block_sparse_moe.experts.25.w3", "model.layers.6.block_sparse_moe.experts.26.w3", "model.layers.6.block_sparse_moe.experts.27.w3", "model.layers.6.block_sparse_moe.experts.28.w3", "model.layers.6.block_sparse_moe.experts.29.w3", "model.layers.6.block_sparse_moe.experts.30.w3", "model.layers.6.block_sparse_moe.experts.31.w3", "model.layers.6.block_sparse_moe.experts.32.w3", "model.layers.6.block_sparse_moe.experts.33.w3", "model.layers.6.block_sparse_moe.experts.34.w3", "model.layers.6.block_sparse_moe.experts.35.w3", "model.layers.6.block_sparse_moe.experts.36.w3", "model.layers.6.block_sparse_moe.experts.37.w3", "model.layers.6.block_sparse_moe.experts.38.w3", "model.layers.6.block_sparse_moe.experts.39.w3", "model.layers.6.block_sparse_moe.experts.40.w3", "model.layers.6.block_sparse_moe.experts.41.w3", "model.layers.6.block_sparse_moe.experts.42.w3", "model.layers.6.block_sparse_moe.experts.43.w3", "model.layers.6.block_sparse_moe.experts.44.w3", "model.layers.6.block_sparse_moe.experts.45.w3", "model.layers.6.block_sparse_moe.experts.46.w3", "model.layers.6.block_sparse_moe.experts.47.w3", "model.layers.6.block_sparse_moe.experts.48.w3", "model.layers.6.block_sparse_moe.experts.49.w3", "model.layers.6.block_sparse_moe.experts.50.w3", "model.layers.6.block_sparse_moe.experts.51.w3", "model.layers.6.block_sparse_moe.experts.52.w3", "model.layers.6.block_sparse_moe.experts.53.w3", "model.layers.6.block_sparse_moe.experts.54.w3", "model.layers.6.block_sparse_moe.experts.55.w3", "model.layers.6.block_sparse_moe.experts.56.w3", "model.layers.6.block_sparse_moe.experts.57.w3", "model.layers.6.block_sparse_moe.experts.58.w3", "model.layers.6.block_sparse_moe.experts.59.w3", "model.layers.6.block_sparse_moe.experts.60.w3", "model.layers.6.block_sparse_moe.experts.61.w3", "model.layers.6.block_sparse_moe.experts.62.w3", "model.layers.6.block_sparse_moe.experts.63.w3", "model.layers.6.block_sparse_moe.experts.64.w3", "model.layers.6.block_sparse_moe.experts.65.w3", "model.layers.6.block_sparse_moe.experts.66.w3", "model.layers.6.block_sparse_moe.experts.67.w3", "model.layers.6.block_sparse_moe.experts.68.w3", "model.layers.6.block_sparse_moe.experts.69.w3", "model.layers.6.block_sparse_moe.experts.70.w3", "model.layers.6.block_sparse_moe.experts.71.w3", "model.layers.6.block_sparse_moe.experts.72.w3", "model.layers.6.block_sparse_moe.experts.73.w3", "model.layers.6.block_sparse_moe.experts.74.w3", "model.layers.6.block_sparse_moe.experts.75.w3", "model.layers.6.block_sparse_moe.experts.76.w3", "model.layers.6.block_sparse_moe.experts.77.w3", "model.layers.6.block_sparse_moe.experts.78.w3", "model.layers.6.block_sparse_moe.experts.79.w3", "model.layers.6.block_sparse_moe.experts.80.w3", "model.layers.6.block_sparse_moe.experts.81.w3", "model.layers.6.block_sparse_moe.experts.82.w3", "model.layers.6.block_sparse_moe.experts.83.w3", "model.layers.6.block_sparse_moe.experts.84.w3", "model.layers.6.block_sparse_moe.experts.85.w3", "model.layers.6.block_sparse_moe.experts.86.w3", "model.layers.6.block_sparse_moe.experts.87.w3", "model.layers.6.block_sparse_moe.experts.88.w3", "model.layers.6.block_sparse_moe.experts.89.w3", "model.layers.6.block_sparse_moe.experts.90.w3", "model.layers.6.block_sparse_moe.experts.91.w3", "model.layers.6.block_sparse_moe.experts.92.w3", "model.layers.6.block_sparse_moe.experts.93.w3", "model.layers.6.block_sparse_moe.experts.94.w3", "model.layers.6.block_sparse_moe.experts.95.w3", "model.layers.6.block_sparse_moe.experts.96.w3", "model.layers.6.block_sparse_moe.experts.97.w3", "model.layers.6.block_sparse_moe.experts.98.w3", "model.layers.6.block_sparse_moe.experts.99.w3", "model.layers.6.block_sparse_moe.experts.100.w3", "model.layers.6.block_sparse_moe.experts.101.w3", "model.layers.6.block_sparse_moe.experts.102.w3", "model.layers.6.block_sparse_moe.experts.103.w3", "model.layers.6.block_sparse_moe.experts.104.w3", "model.layers.6.block_sparse_moe.experts.105.w3", "model.layers.6.block_sparse_moe.experts.106.w3", "model.layers.6.block_sparse_moe.experts.107.w3", "model.layers.6.block_sparse_moe.experts.108.w3", "model.layers.6.block_sparse_moe.experts.109.w3", "model.layers.6.block_sparse_moe.experts.110.w3", "model.layers.6.block_sparse_moe.experts.111.w3", "model.layers.6.block_sparse_moe.experts.112.w3", "model.layers.6.block_sparse_moe.experts.113.w3", "model.layers.6.block_sparse_moe.experts.114.w3", "model.layers.6.block_sparse_moe.experts.115.w3", "model.layers.6.block_sparse_moe.experts.116.w3", "model.layers.6.block_sparse_moe.experts.117.w3", "model.layers.6.block_sparse_moe.experts.118.w3", "model.layers.6.block_sparse_moe.experts.119.w3", "model.layers.6.block_sparse_moe.experts.120.w3", "model.layers.6.block_sparse_moe.experts.121.w3", "model.layers.6.block_sparse_moe.experts.122.w3", "model.layers.6.block_sparse_moe.experts.123.w3", "model.layers.6.block_sparse_moe.experts.124.w3", "model.layers.6.block_sparse_moe.experts.125.w3", "model.layers.6.block_sparse_moe.experts.126.w3", "model.layers.6.block_sparse_moe.experts.127.w3", "model.layers.6.block_sparse_moe.experts.128.w3", "model.layers.6.block_sparse_moe.experts.129.w3", "model.layers.6.block_sparse_moe.experts.130.w3", "model.layers.6.block_sparse_moe.experts.131.w3", "model.layers.6.block_sparse_moe.experts.132.w3", "model.layers.6.block_sparse_moe.experts.133.w3", "model.layers.6.block_sparse_moe.experts.134.w3", "model.layers.6.block_sparse_moe.experts.135.w3", "model.layers.6.block_sparse_moe.experts.136.w3", "model.layers.6.block_sparse_moe.experts.137.w3", "model.layers.6.block_sparse_moe.experts.138.w3", "model.layers.6.block_sparse_moe.experts.139.w3", "model.layers.6.block_sparse_moe.experts.140.w3", "model.layers.6.block_sparse_moe.experts.141.w3", "model.layers.6.block_sparse_moe.experts.142.w3", "model.layers.6.block_sparse_moe.experts.143.w3", "model.layers.6.block_sparse_moe.experts.144.w3", "model.layers.6.block_sparse_moe.experts.145.w3", "model.layers.6.block_sparse_moe.experts.146.w3", "model.layers.6.block_sparse_moe.experts.147.w3", "model.layers.6.block_sparse_moe.experts.148.w3", "model.layers.6.block_sparse_moe.experts.149.w3", "model.layers.6.block_sparse_moe.experts.150.w3", "model.layers.6.block_sparse_moe.experts.151.w3", "model.layers.6.block_sparse_moe.experts.152.w3", "model.layers.6.block_sparse_moe.experts.153.w3", "model.layers.6.block_sparse_moe.experts.154.w3", "model.layers.6.block_sparse_moe.experts.155.w3", "model.layers.6.block_sparse_moe.experts.156.w3", "model.layers.6.block_sparse_moe.experts.157.w3", "model.layers.6.block_sparse_moe.experts.158.w3", "model.layers.6.block_sparse_moe.experts.159.w3", "model.layers.6.block_sparse_moe.experts.160.w3", "model.layers.6.block_sparse_moe.experts.161.w3", "model.layers.6.block_sparse_moe.experts.162.w3", "model.layers.6.block_sparse_moe.experts.163.w3", "model.layers.6.block_sparse_moe.experts.164.w3", "model.layers.6.block_sparse_moe.experts.165.w3", "model.layers.6.block_sparse_moe.experts.166.w3", "model.layers.6.block_sparse_moe.experts.167.w3", "model.layers.6.block_sparse_moe.experts.168.w3", "model.layers.6.block_sparse_moe.experts.169.w3", "model.layers.6.block_sparse_moe.experts.170.w3", "model.layers.6.block_sparse_moe.experts.171.w3", "model.layers.6.block_sparse_moe.experts.172.w3", "model.layers.6.block_sparse_moe.experts.173.w3", "model.layers.6.block_sparse_moe.experts.174.w3", "model.layers.6.block_sparse_moe.experts.175.w3", "model.layers.6.block_sparse_moe.experts.176.w3", "model.layers.6.block_sparse_moe.experts.177.w3", "model.layers.6.block_sparse_moe.experts.178.w3", "model.layers.6.block_sparse_moe.experts.179.w3", "model.layers.6.block_sparse_moe.experts.180.w3", "model.layers.6.block_sparse_moe.experts.181.w3", "model.layers.6.block_sparse_moe.experts.182.w3", "model.layers.6.block_sparse_moe.experts.183.w3", "model.layers.6.block_sparse_moe.experts.184.w3", "model.layers.6.block_sparse_moe.experts.185.w3", "model.layers.6.block_sparse_moe.experts.186.w3", "model.layers.6.block_sparse_moe.experts.187.w3", "model.layers.6.block_sparse_moe.experts.188.w3", "model.layers.6.block_sparse_moe.experts.189.w3", "model.layers.6.block_sparse_moe.experts.190.w3", "model.layers.6.block_sparse_moe.experts.191.w3", "model.layers.6.block_sparse_moe.experts.192.w3", "model.layers.6.block_sparse_moe.experts.193.w3", "model.layers.6.block_sparse_moe.experts.194.w3", "model.layers.6.block_sparse_moe.experts.195.w3", "model.layers.6.block_sparse_moe.experts.196.w3", "model.layers.6.block_sparse_moe.experts.197.w3", "model.layers.6.block_sparse_moe.experts.198.w3", "model.layers.6.block_sparse_moe.experts.199.w3", "model.layers.6.block_sparse_moe.experts.200.w3", "model.layers.6.block_sparse_moe.experts.201.w3", "model.layers.6.block_sparse_moe.experts.202.w3", "model.layers.6.block_sparse_moe.experts.203.w3", "model.layers.6.block_sparse_moe.experts.204.w3", "model.layers.6.block_sparse_moe.experts.205.w3", "model.layers.6.block_sparse_moe.experts.206.w3", "model.layers.6.block_sparse_moe.experts.207.w3", "model.layers.6.block_sparse_moe.experts.208.w3", "model.layers.6.block_sparse_moe.experts.209.w3", "model.layers.6.block_sparse_moe.experts.210.w3", "model.layers.6.block_sparse_moe.experts.211.w3", "model.layers.6.block_sparse_moe.experts.212.w3", "model.layers.6.block_sparse_moe.experts.213.w3", "model.layers.6.block_sparse_moe.experts.214.w3", "model.layers.6.block_sparse_moe.experts.215.w3", "model.layers.6.block_sparse_moe.experts.216.w3", "model.layers.6.block_sparse_moe.experts.217.w3", "model.layers.6.block_sparse_moe.experts.218.w3", "model.layers.6.block_sparse_moe.experts.219.w3", "model.layers.6.block_sparse_moe.experts.220.w3", "model.layers.6.block_sparse_moe.experts.221.w3", "model.layers.6.block_sparse_moe.experts.222.w3", "model.layers.6.block_sparse_moe.experts.223.w3", "model.layers.6.block_sparse_moe.experts.224.w3", "model.layers.6.block_sparse_moe.experts.225.w3", "model.layers.6.block_sparse_moe.experts.226.w3", "model.layers.6.block_sparse_moe.experts.227.w3", "model.layers.6.block_sparse_moe.experts.228.w3", "model.layers.6.block_sparse_moe.experts.229.w3", "model.layers.6.block_sparse_moe.experts.230.w3", "model.layers.6.block_sparse_moe.experts.231.w3", "model.layers.6.block_sparse_moe.experts.232.w3", "model.layers.6.block_sparse_moe.experts.233.w3", "model.layers.6.block_sparse_moe.experts.234.w3", "model.layers.6.block_sparse_moe.experts.235.w3", "model.layers.6.block_sparse_moe.experts.236.w3", "model.layers.6.block_sparse_moe.experts.237.w3", "model.layers.6.block_sparse_moe.experts.238.w3", "model.layers.6.block_sparse_moe.experts.239.w3", "model.layers.6.block_sparse_moe.experts.240.w3", "model.layers.6.block_sparse_moe.experts.241.w3", "model.layers.6.block_sparse_moe.experts.242.w3", "model.layers.6.block_sparse_moe.experts.243.w3", "model.layers.6.block_sparse_moe.experts.244.w3", "model.layers.6.block_sparse_moe.experts.245.w3", "model.layers.6.block_sparse_moe.experts.246.w3", "model.layers.6.block_sparse_moe.experts.247.w3", "model.layers.6.block_sparse_moe.experts.248.w3", "model.layers.6.block_sparse_moe.experts.249.w3", "model.layers.6.block_sparse_moe.experts.250.w3", "model.layers.6.block_sparse_moe.experts.251.w3", "model.layers.6.block_sparse_moe.experts.252.w3", "model.layers.6.block_sparse_moe.experts.253.w3", "model.layers.6.block_sparse_moe.experts.254.w3", "model.layers.6.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0040266066789627075, "dbits": 2415919104 } ] }, { "idx": 34, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w2", "model.layers.6.block_sparse_moe.experts.1.w2", "model.layers.6.block_sparse_moe.experts.2.w2", "model.layers.6.block_sparse_moe.experts.3.w2", "model.layers.6.block_sparse_moe.experts.4.w2", "model.layers.6.block_sparse_moe.experts.5.w2", "model.layers.6.block_sparse_moe.experts.6.w2", "model.layers.6.block_sparse_moe.experts.7.w2", "model.layers.6.block_sparse_moe.experts.8.w2", "model.layers.6.block_sparse_moe.experts.9.w2", "model.layers.6.block_sparse_moe.experts.10.w2", "model.layers.6.block_sparse_moe.experts.11.w2", "model.layers.6.block_sparse_moe.experts.12.w2", "model.layers.6.block_sparse_moe.experts.13.w2", "model.layers.6.block_sparse_moe.experts.14.w2", "model.layers.6.block_sparse_moe.experts.15.w2", "model.layers.6.block_sparse_moe.experts.16.w2", "model.layers.6.block_sparse_moe.experts.17.w2", "model.layers.6.block_sparse_moe.experts.18.w2", "model.layers.6.block_sparse_moe.experts.19.w2", "model.layers.6.block_sparse_moe.experts.20.w2", "model.layers.6.block_sparse_moe.experts.21.w2", "model.layers.6.block_sparse_moe.experts.22.w2", "model.layers.6.block_sparse_moe.experts.23.w2", "model.layers.6.block_sparse_moe.experts.24.w2", "model.layers.6.block_sparse_moe.experts.25.w2", "model.layers.6.block_sparse_moe.experts.26.w2", "model.layers.6.block_sparse_moe.experts.27.w2", "model.layers.6.block_sparse_moe.experts.28.w2", "model.layers.6.block_sparse_moe.experts.29.w2", "model.layers.6.block_sparse_moe.experts.30.w2", "model.layers.6.block_sparse_moe.experts.31.w2", "model.layers.6.block_sparse_moe.experts.32.w2", "model.layers.6.block_sparse_moe.experts.33.w2", "model.layers.6.block_sparse_moe.experts.34.w2", "model.layers.6.block_sparse_moe.experts.35.w2", "model.layers.6.block_sparse_moe.experts.36.w2", "model.layers.6.block_sparse_moe.experts.37.w2", "model.layers.6.block_sparse_moe.experts.38.w2", "model.layers.6.block_sparse_moe.experts.39.w2", "model.layers.6.block_sparse_moe.experts.40.w2", "model.layers.6.block_sparse_moe.experts.41.w2", "model.layers.6.block_sparse_moe.experts.42.w2", "model.layers.6.block_sparse_moe.experts.43.w2", "model.layers.6.block_sparse_moe.experts.44.w2", "model.layers.6.block_sparse_moe.experts.45.w2", "model.layers.6.block_sparse_moe.experts.46.w2", "model.layers.6.block_sparse_moe.experts.47.w2", "model.layers.6.block_sparse_moe.experts.48.w2", "model.layers.6.block_sparse_moe.experts.49.w2", "model.layers.6.block_sparse_moe.experts.50.w2", "model.layers.6.block_sparse_moe.experts.51.w2", "model.layers.6.block_sparse_moe.experts.52.w2", "model.layers.6.block_sparse_moe.experts.53.w2", "model.layers.6.block_sparse_moe.experts.54.w2", "model.layers.6.block_sparse_moe.experts.55.w2", "model.layers.6.block_sparse_moe.experts.56.w2", "model.layers.6.block_sparse_moe.experts.57.w2", "model.layers.6.block_sparse_moe.experts.58.w2", "model.layers.6.block_sparse_moe.experts.59.w2", "model.layers.6.block_sparse_moe.experts.60.w2", "model.layers.6.block_sparse_moe.experts.61.w2", "model.layers.6.block_sparse_moe.experts.62.w2", "model.layers.6.block_sparse_moe.experts.63.w2", "model.layers.6.block_sparse_moe.experts.64.w2", "model.layers.6.block_sparse_moe.experts.65.w2", "model.layers.6.block_sparse_moe.experts.66.w2", "model.layers.6.block_sparse_moe.experts.67.w2", "model.layers.6.block_sparse_moe.experts.68.w2", "model.layers.6.block_sparse_moe.experts.69.w2", "model.layers.6.block_sparse_moe.experts.70.w2", "model.layers.6.block_sparse_moe.experts.71.w2", "model.layers.6.block_sparse_moe.experts.72.w2", "model.layers.6.block_sparse_moe.experts.73.w2", "model.layers.6.block_sparse_moe.experts.74.w2", "model.layers.6.block_sparse_moe.experts.75.w2", "model.layers.6.block_sparse_moe.experts.76.w2", "model.layers.6.block_sparse_moe.experts.77.w2", "model.layers.6.block_sparse_moe.experts.78.w2", "model.layers.6.block_sparse_moe.experts.79.w2", "model.layers.6.block_sparse_moe.experts.80.w2", "model.layers.6.block_sparse_moe.experts.81.w2", "model.layers.6.block_sparse_moe.experts.82.w2", "model.layers.6.block_sparse_moe.experts.83.w2", "model.layers.6.block_sparse_moe.experts.84.w2", "model.layers.6.block_sparse_moe.experts.85.w2", "model.layers.6.block_sparse_moe.experts.86.w2", "model.layers.6.block_sparse_moe.experts.87.w2", "model.layers.6.block_sparse_moe.experts.88.w2", "model.layers.6.block_sparse_moe.experts.89.w2", "model.layers.6.block_sparse_moe.experts.90.w2", "model.layers.6.block_sparse_moe.experts.91.w2", "model.layers.6.block_sparse_moe.experts.92.w2", "model.layers.6.block_sparse_moe.experts.93.w2", "model.layers.6.block_sparse_moe.experts.94.w2", "model.layers.6.block_sparse_moe.experts.95.w2", "model.layers.6.block_sparse_moe.experts.96.w2", "model.layers.6.block_sparse_moe.experts.97.w2", "model.layers.6.block_sparse_moe.experts.98.w2", "model.layers.6.block_sparse_moe.experts.99.w2", "model.layers.6.block_sparse_moe.experts.100.w2", "model.layers.6.block_sparse_moe.experts.101.w2", "model.layers.6.block_sparse_moe.experts.102.w2", "model.layers.6.block_sparse_moe.experts.103.w2", "model.layers.6.block_sparse_moe.experts.104.w2", "model.layers.6.block_sparse_moe.experts.105.w2", "model.layers.6.block_sparse_moe.experts.106.w2", "model.layers.6.block_sparse_moe.experts.107.w2", "model.layers.6.block_sparse_moe.experts.108.w2", "model.layers.6.block_sparse_moe.experts.109.w2", "model.layers.6.block_sparse_moe.experts.110.w2", "model.layers.6.block_sparse_moe.experts.111.w2", "model.layers.6.block_sparse_moe.experts.112.w2", "model.layers.6.block_sparse_moe.experts.113.w2", "model.layers.6.block_sparse_moe.experts.114.w2", "model.layers.6.block_sparse_moe.experts.115.w2", "model.layers.6.block_sparse_moe.experts.116.w2", "model.layers.6.block_sparse_moe.experts.117.w2", "model.layers.6.block_sparse_moe.experts.118.w2", "model.layers.6.block_sparse_moe.experts.119.w2", "model.layers.6.block_sparse_moe.experts.120.w2", "model.layers.6.block_sparse_moe.experts.121.w2", "model.layers.6.block_sparse_moe.experts.122.w2", "model.layers.6.block_sparse_moe.experts.123.w2", "model.layers.6.block_sparse_moe.experts.124.w2", "model.layers.6.block_sparse_moe.experts.125.w2", "model.layers.6.block_sparse_moe.experts.126.w2", "model.layers.6.block_sparse_moe.experts.127.w2", "model.layers.6.block_sparse_moe.experts.128.w2", "model.layers.6.block_sparse_moe.experts.129.w2", "model.layers.6.block_sparse_moe.experts.130.w2", "model.layers.6.block_sparse_moe.experts.131.w2", "model.layers.6.block_sparse_moe.experts.132.w2", "model.layers.6.block_sparse_moe.experts.133.w2", "model.layers.6.block_sparse_moe.experts.134.w2", "model.layers.6.block_sparse_moe.experts.135.w2", "model.layers.6.block_sparse_moe.experts.136.w2", "model.layers.6.block_sparse_moe.experts.137.w2", "model.layers.6.block_sparse_moe.experts.138.w2", "model.layers.6.block_sparse_moe.experts.139.w2", "model.layers.6.block_sparse_moe.experts.140.w2", "model.layers.6.block_sparse_moe.experts.141.w2", "model.layers.6.block_sparse_moe.experts.142.w2", "model.layers.6.block_sparse_moe.experts.143.w2", "model.layers.6.block_sparse_moe.experts.144.w2", "model.layers.6.block_sparse_moe.experts.145.w2", "model.layers.6.block_sparse_moe.experts.146.w2", "model.layers.6.block_sparse_moe.experts.147.w2", "model.layers.6.block_sparse_moe.experts.148.w2", "model.layers.6.block_sparse_moe.experts.149.w2", "model.layers.6.block_sparse_moe.experts.150.w2", "model.layers.6.block_sparse_moe.experts.151.w2", "model.layers.6.block_sparse_moe.experts.152.w2", "model.layers.6.block_sparse_moe.experts.153.w2", "model.layers.6.block_sparse_moe.experts.154.w2", "model.layers.6.block_sparse_moe.experts.155.w2", "model.layers.6.block_sparse_moe.experts.156.w2", "model.layers.6.block_sparse_moe.experts.157.w2", "model.layers.6.block_sparse_moe.experts.158.w2", "model.layers.6.block_sparse_moe.experts.159.w2", "model.layers.6.block_sparse_moe.experts.160.w2", "model.layers.6.block_sparse_moe.experts.161.w2", "model.layers.6.block_sparse_moe.experts.162.w2", "model.layers.6.block_sparse_moe.experts.163.w2", "model.layers.6.block_sparse_moe.experts.164.w2", "model.layers.6.block_sparse_moe.experts.165.w2", "model.layers.6.block_sparse_moe.experts.166.w2", "model.layers.6.block_sparse_moe.experts.167.w2", "model.layers.6.block_sparse_moe.experts.168.w2", "model.layers.6.block_sparse_moe.experts.169.w2", "model.layers.6.block_sparse_moe.experts.170.w2", "model.layers.6.block_sparse_moe.experts.171.w2", "model.layers.6.block_sparse_moe.experts.172.w2", "model.layers.6.block_sparse_moe.experts.173.w2", "model.layers.6.block_sparse_moe.experts.174.w2", "model.layers.6.block_sparse_moe.experts.175.w2", "model.layers.6.block_sparse_moe.experts.176.w2", "model.layers.6.block_sparse_moe.experts.177.w2", "model.layers.6.block_sparse_moe.experts.178.w2", "model.layers.6.block_sparse_moe.experts.179.w2", "model.layers.6.block_sparse_moe.experts.180.w2", "model.layers.6.block_sparse_moe.experts.181.w2", "model.layers.6.block_sparse_moe.experts.182.w2", "model.layers.6.block_sparse_moe.experts.183.w2", "model.layers.6.block_sparse_moe.experts.184.w2", "model.layers.6.block_sparse_moe.experts.185.w2", "model.layers.6.block_sparse_moe.experts.186.w2", "model.layers.6.block_sparse_moe.experts.187.w2", "model.layers.6.block_sparse_moe.experts.188.w2", "model.layers.6.block_sparse_moe.experts.189.w2", "model.layers.6.block_sparse_moe.experts.190.w2", "model.layers.6.block_sparse_moe.experts.191.w2", "model.layers.6.block_sparse_moe.experts.192.w2", "model.layers.6.block_sparse_moe.experts.193.w2", "model.layers.6.block_sparse_moe.experts.194.w2", "model.layers.6.block_sparse_moe.experts.195.w2", "model.layers.6.block_sparse_moe.experts.196.w2", "model.layers.6.block_sparse_moe.experts.197.w2", "model.layers.6.block_sparse_moe.experts.198.w2", "model.layers.6.block_sparse_moe.experts.199.w2", "model.layers.6.block_sparse_moe.experts.200.w2", "model.layers.6.block_sparse_moe.experts.201.w2", "model.layers.6.block_sparse_moe.experts.202.w2", "model.layers.6.block_sparse_moe.experts.203.w2", "model.layers.6.block_sparse_moe.experts.204.w2", "model.layers.6.block_sparse_moe.experts.205.w2", "model.layers.6.block_sparse_moe.experts.206.w2", "model.layers.6.block_sparse_moe.experts.207.w2", "model.layers.6.block_sparse_moe.experts.208.w2", "model.layers.6.block_sparse_moe.experts.209.w2", "model.layers.6.block_sparse_moe.experts.210.w2", "model.layers.6.block_sparse_moe.experts.211.w2", "model.layers.6.block_sparse_moe.experts.212.w2", "model.layers.6.block_sparse_moe.experts.213.w2", "model.layers.6.block_sparse_moe.experts.214.w2", "model.layers.6.block_sparse_moe.experts.215.w2", "model.layers.6.block_sparse_moe.experts.216.w2", "model.layers.6.block_sparse_moe.experts.217.w2", "model.layers.6.block_sparse_moe.experts.218.w2", "model.layers.6.block_sparse_moe.experts.219.w2", "model.layers.6.block_sparse_moe.experts.220.w2", "model.layers.6.block_sparse_moe.experts.221.w2", "model.layers.6.block_sparse_moe.experts.222.w2", "model.layers.6.block_sparse_moe.experts.223.w2", "model.layers.6.block_sparse_moe.experts.224.w2", "model.layers.6.block_sparse_moe.experts.225.w2", "model.layers.6.block_sparse_moe.experts.226.w2", "model.layers.6.block_sparse_moe.experts.227.w2", "model.layers.6.block_sparse_moe.experts.228.w2", "model.layers.6.block_sparse_moe.experts.229.w2", "model.layers.6.block_sparse_moe.experts.230.w2", "model.layers.6.block_sparse_moe.experts.231.w2", "model.layers.6.block_sparse_moe.experts.232.w2", "model.layers.6.block_sparse_moe.experts.233.w2", "model.layers.6.block_sparse_moe.experts.234.w2", "model.layers.6.block_sparse_moe.experts.235.w2", "model.layers.6.block_sparse_moe.experts.236.w2", "model.layers.6.block_sparse_moe.experts.237.w2", "model.layers.6.block_sparse_moe.experts.238.w2", "model.layers.6.block_sparse_moe.experts.239.w2", "model.layers.6.block_sparse_moe.experts.240.w2", "model.layers.6.block_sparse_moe.experts.241.w2", "model.layers.6.block_sparse_moe.experts.242.w2", "model.layers.6.block_sparse_moe.experts.243.w2", "model.layers.6.block_sparse_moe.experts.244.w2", "model.layers.6.block_sparse_moe.experts.245.w2", "model.layers.6.block_sparse_moe.experts.246.w2", "model.layers.6.block_sparse_moe.experts.247.w2", "model.layers.6.block_sparse_moe.experts.248.w2", "model.layers.6.block_sparse_moe.experts.249.w2", "model.layers.6.block_sparse_moe.experts.250.w2", "model.layers.6.block_sparse_moe.experts.251.w2", "model.layers.6.block_sparse_moe.experts.252.w2", "model.layers.6.block_sparse_moe.experts.253.w2", "model.layers.6.block_sparse_moe.experts.254.w2", "model.layers.6.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005289018154144287, "dbits": 1207959552 } ] }, { "idx": 35, "layers": [ "model.layers.7.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00019786655902853667, "dbits": 18874368 } ] }, { "idx": 36, "layers": [ "model.layers.7.self_attn.k_proj", "model.layers.7.self_attn.v_proj" ], "candidates": [ { "dkld": -0.01423711776733394, "dbits": 6291456 } ] }, { "idx": 37, "layers": [ "model.layers.7.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004633364081382707, "dbits": 18874368 } ] }, { "idx": 38, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w1", "model.layers.7.block_sparse_moe.experts.1.w1", "model.layers.7.block_sparse_moe.experts.2.w1", "model.layers.7.block_sparse_moe.experts.3.w1", "model.layers.7.block_sparse_moe.experts.4.w1", "model.layers.7.block_sparse_moe.experts.5.w1", "model.layers.7.block_sparse_moe.experts.6.w1", "model.layers.7.block_sparse_moe.experts.7.w1", "model.layers.7.block_sparse_moe.experts.8.w1", "model.layers.7.block_sparse_moe.experts.9.w1", "model.layers.7.block_sparse_moe.experts.10.w1", "model.layers.7.block_sparse_moe.experts.11.w1", "model.layers.7.block_sparse_moe.experts.12.w1", "model.layers.7.block_sparse_moe.experts.13.w1", "model.layers.7.block_sparse_moe.experts.14.w1", "model.layers.7.block_sparse_moe.experts.15.w1", "model.layers.7.block_sparse_moe.experts.16.w1", "model.layers.7.block_sparse_moe.experts.17.w1", "model.layers.7.block_sparse_moe.experts.18.w1", "model.layers.7.block_sparse_moe.experts.19.w1", "model.layers.7.block_sparse_moe.experts.20.w1", "model.layers.7.block_sparse_moe.experts.21.w1", "model.layers.7.block_sparse_moe.experts.22.w1", "model.layers.7.block_sparse_moe.experts.23.w1", "model.layers.7.block_sparse_moe.experts.24.w1", "model.layers.7.block_sparse_moe.experts.25.w1", "model.layers.7.block_sparse_moe.experts.26.w1", "model.layers.7.block_sparse_moe.experts.27.w1", "model.layers.7.block_sparse_moe.experts.28.w1", "model.layers.7.block_sparse_moe.experts.29.w1", "model.layers.7.block_sparse_moe.experts.30.w1", "model.layers.7.block_sparse_moe.experts.31.w1", "model.layers.7.block_sparse_moe.experts.32.w1", "model.layers.7.block_sparse_moe.experts.33.w1", "model.layers.7.block_sparse_moe.experts.34.w1", "model.layers.7.block_sparse_moe.experts.35.w1", "model.layers.7.block_sparse_moe.experts.36.w1", "model.layers.7.block_sparse_moe.experts.37.w1", "model.layers.7.block_sparse_moe.experts.38.w1", "model.layers.7.block_sparse_moe.experts.39.w1", "model.layers.7.block_sparse_moe.experts.40.w1", "model.layers.7.block_sparse_moe.experts.41.w1", "model.layers.7.block_sparse_moe.experts.42.w1", "model.layers.7.block_sparse_moe.experts.43.w1", "model.layers.7.block_sparse_moe.experts.44.w1", "model.layers.7.block_sparse_moe.experts.45.w1", "model.layers.7.block_sparse_moe.experts.46.w1", "model.layers.7.block_sparse_moe.experts.47.w1", "model.layers.7.block_sparse_moe.experts.48.w1", "model.layers.7.block_sparse_moe.experts.49.w1", "model.layers.7.block_sparse_moe.experts.50.w1", "model.layers.7.block_sparse_moe.experts.51.w1", "model.layers.7.block_sparse_moe.experts.52.w1", "model.layers.7.block_sparse_moe.experts.53.w1", "model.layers.7.block_sparse_moe.experts.54.w1", "model.layers.7.block_sparse_moe.experts.55.w1", "model.layers.7.block_sparse_moe.experts.56.w1", "model.layers.7.block_sparse_moe.experts.57.w1", "model.layers.7.block_sparse_moe.experts.58.w1", "model.layers.7.block_sparse_moe.experts.59.w1", "model.layers.7.block_sparse_moe.experts.60.w1", "model.layers.7.block_sparse_moe.experts.61.w1", "model.layers.7.block_sparse_moe.experts.62.w1", "model.layers.7.block_sparse_moe.experts.63.w1", "model.layers.7.block_sparse_moe.experts.64.w1", "model.layers.7.block_sparse_moe.experts.65.w1", "model.layers.7.block_sparse_moe.experts.66.w1", "model.layers.7.block_sparse_moe.experts.67.w1", "model.layers.7.block_sparse_moe.experts.68.w1", "model.layers.7.block_sparse_moe.experts.69.w1", "model.layers.7.block_sparse_moe.experts.70.w1", "model.layers.7.block_sparse_moe.experts.71.w1", "model.layers.7.block_sparse_moe.experts.72.w1", "model.layers.7.block_sparse_moe.experts.73.w1", "model.layers.7.block_sparse_moe.experts.74.w1", "model.layers.7.block_sparse_moe.experts.75.w1", "model.layers.7.block_sparse_moe.experts.76.w1", "model.layers.7.block_sparse_moe.experts.77.w1", "model.layers.7.block_sparse_moe.experts.78.w1", "model.layers.7.block_sparse_moe.experts.79.w1", "model.layers.7.block_sparse_moe.experts.80.w1", "model.layers.7.block_sparse_moe.experts.81.w1", "model.layers.7.block_sparse_moe.experts.82.w1", "model.layers.7.block_sparse_moe.experts.83.w1", "model.layers.7.block_sparse_moe.experts.84.w1", "model.layers.7.block_sparse_moe.experts.85.w1", "model.layers.7.block_sparse_moe.experts.86.w1", "model.layers.7.block_sparse_moe.experts.87.w1", "model.layers.7.block_sparse_moe.experts.88.w1", "model.layers.7.block_sparse_moe.experts.89.w1", "model.layers.7.block_sparse_moe.experts.90.w1", "model.layers.7.block_sparse_moe.experts.91.w1", "model.layers.7.block_sparse_moe.experts.92.w1", "model.layers.7.block_sparse_moe.experts.93.w1", "model.layers.7.block_sparse_moe.experts.94.w1", "model.layers.7.block_sparse_moe.experts.95.w1", "model.layers.7.block_sparse_moe.experts.96.w1", "model.layers.7.block_sparse_moe.experts.97.w1", "model.layers.7.block_sparse_moe.experts.98.w1", "model.layers.7.block_sparse_moe.experts.99.w1", "model.layers.7.block_sparse_moe.experts.100.w1", "model.layers.7.block_sparse_moe.experts.101.w1", "model.layers.7.block_sparse_moe.experts.102.w1", "model.layers.7.block_sparse_moe.experts.103.w1", "model.layers.7.block_sparse_moe.experts.104.w1", "model.layers.7.block_sparse_moe.experts.105.w1", "model.layers.7.block_sparse_moe.experts.106.w1", "model.layers.7.block_sparse_moe.experts.107.w1", "model.layers.7.block_sparse_moe.experts.108.w1", "model.layers.7.block_sparse_moe.experts.109.w1", "model.layers.7.block_sparse_moe.experts.110.w1", "model.layers.7.block_sparse_moe.experts.111.w1", "model.layers.7.block_sparse_moe.experts.112.w1", "model.layers.7.block_sparse_moe.experts.113.w1", "model.layers.7.block_sparse_moe.experts.114.w1", "model.layers.7.block_sparse_moe.experts.115.w1", "model.layers.7.block_sparse_moe.experts.116.w1", "model.layers.7.block_sparse_moe.experts.117.w1", "model.layers.7.block_sparse_moe.experts.118.w1", "model.layers.7.block_sparse_moe.experts.119.w1", "model.layers.7.block_sparse_moe.experts.120.w1", "model.layers.7.block_sparse_moe.experts.121.w1", "model.layers.7.block_sparse_moe.experts.122.w1", "model.layers.7.block_sparse_moe.experts.123.w1", "model.layers.7.block_sparse_moe.experts.124.w1", "model.layers.7.block_sparse_moe.experts.125.w1", "model.layers.7.block_sparse_moe.experts.126.w1", "model.layers.7.block_sparse_moe.experts.127.w1", "model.layers.7.block_sparse_moe.experts.128.w1", "model.layers.7.block_sparse_moe.experts.129.w1", "model.layers.7.block_sparse_moe.experts.130.w1", "model.layers.7.block_sparse_moe.experts.131.w1", "model.layers.7.block_sparse_moe.experts.132.w1", "model.layers.7.block_sparse_moe.experts.133.w1", "model.layers.7.block_sparse_moe.experts.134.w1", "model.layers.7.block_sparse_moe.experts.135.w1", "model.layers.7.block_sparse_moe.experts.136.w1", "model.layers.7.block_sparse_moe.experts.137.w1", "model.layers.7.block_sparse_moe.experts.138.w1", "model.layers.7.block_sparse_moe.experts.139.w1", "model.layers.7.block_sparse_moe.experts.140.w1", "model.layers.7.block_sparse_moe.experts.141.w1", "model.layers.7.block_sparse_moe.experts.142.w1", "model.layers.7.block_sparse_moe.experts.143.w1", "model.layers.7.block_sparse_moe.experts.144.w1", "model.layers.7.block_sparse_moe.experts.145.w1", "model.layers.7.block_sparse_moe.experts.146.w1", "model.layers.7.block_sparse_moe.experts.147.w1", "model.layers.7.block_sparse_moe.experts.148.w1", "model.layers.7.block_sparse_moe.experts.149.w1", "model.layers.7.block_sparse_moe.experts.150.w1", "model.layers.7.block_sparse_moe.experts.151.w1", "model.layers.7.block_sparse_moe.experts.152.w1", "model.layers.7.block_sparse_moe.experts.153.w1", "model.layers.7.block_sparse_moe.experts.154.w1", "model.layers.7.block_sparse_moe.experts.155.w1", "model.layers.7.block_sparse_moe.experts.156.w1", "model.layers.7.block_sparse_moe.experts.157.w1", "model.layers.7.block_sparse_moe.experts.158.w1", "model.layers.7.block_sparse_moe.experts.159.w1", "model.layers.7.block_sparse_moe.experts.160.w1", "model.layers.7.block_sparse_moe.experts.161.w1", "model.layers.7.block_sparse_moe.experts.162.w1", "model.layers.7.block_sparse_moe.experts.163.w1", "model.layers.7.block_sparse_moe.experts.164.w1", "model.layers.7.block_sparse_moe.experts.165.w1", "model.layers.7.block_sparse_moe.experts.166.w1", "model.layers.7.block_sparse_moe.experts.167.w1", "model.layers.7.block_sparse_moe.experts.168.w1", "model.layers.7.block_sparse_moe.experts.169.w1", "model.layers.7.block_sparse_moe.experts.170.w1", "model.layers.7.block_sparse_moe.experts.171.w1", "model.layers.7.block_sparse_moe.experts.172.w1", "model.layers.7.block_sparse_moe.experts.173.w1", "model.layers.7.block_sparse_moe.experts.174.w1", "model.layers.7.block_sparse_moe.experts.175.w1", "model.layers.7.block_sparse_moe.experts.176.w1", "model.layers.7.block_sparse_moe.experts.177.w1", "model.layers.7.block_sparse_moe.experts.178.w1", "model.layers.7.block_sparse_moe.experts.179.w1", "model.layers.7.block_sparse_moe.experts.180.w1", "model.layers.7.block_sparse_moe.experts.181.w1", "model.layers.7.block_sparse_moe.experts.182.w1", "model.layers.7.block_sparse_moe.experts.183.w1", "model.layers.7.block_sparse_moe.experts.184.w1", "model.layers.7.block_sparse_moe.experts.185.w1", "model.layers.7.block_sparse_moe.experts.186.w1", "model.layers.7.block_sparse_moe.experts.187.w1", "model.layers.7.block_sparse_moe.experts.188.w1", "model.layers.7.block_sparse_moe.experts.189.w1", "model.layers.7.block_sparse_moe.experts.190.w1", "model.layers.7.block_sparse_moe.experts.191.w1", "model.layers.7.block_sparse_moe.experts.192.w1", "model.layers.7.block_sparse_moe.experts.193.w1", "model.layers.7.block_sparse_moe.experts.194.w1", "model.layers.7.block_sparse_moe.experts.195.w1", "model.layers.7.block_sparse_moe.experts.196.w1", "model.layers.7.block_sparse_moe.experts.197.w1", "model.layers.7.block_sparse_moe.experts.198.w1", "model.layers.7.block_sparse_moe.experts.199.w1", "model.layers.7.block_sparse_moe.experts.200.w1", "model.layers.7.block_sparse_moe.experts.201.w1", "model.layers.7.block_sparse_moe.experts.202.w1", "model.layers.7.block_sparse_moe.experts.203.w1", "model.layers.7.block_sparse_moe.experts.204.w1", "model.layers.7.block_sparse_moe.experts.205.w1", "model.layers.7.block_sparse_moe.experts.206.w1", "model.layers.7.block_sparse_moe.experts.207.w1", "model.layers.7.block_sparse_moe.experts.208.w1", "model.layers.7.block_sparse_moe.experts.209.w1", "model.layers.7.block_sparse_moe.experts.210.w1", "model.layers.7.block_sparse_moe.experts.211.w1", "model.layers.7.block_sparse_moe.experts.212.w1", "model.layers.7.block_sparse_moe.experts.213.w1", "model.layers.7.block_sparse_moe.experts.214.w1", "model.layers.7.block_sparse_moe.experts.215.w1", "model.layers.7.block_sparse_moe.experts.216.w1", "model.layers.7.block_sparse_moe.experts.217.w1", "model.layers.7.block_sparse_moe.experts.218.w1", "model.layers.7.block_sparse_moe.experts.219.w1", "model.layers.7.block_sparse_moe.experts.220.w1", "model.layers.7.block_sparse_moe.experts.221.w1", "model.layers.7.block_sparse_moe.experts.222.w1", "model.layers.7.block_sparse_moe.experts.223.w1", "model.layers.7.block_sparse_moe.experts.224.w1", "model.layers.7.block_sparse_moe.experts.225.w1", "model.layers.7.block_sparse_moe.experts.226.w1", "model.layers.7.block_sparse_moe.experts.227.w1", "model.layers.7.block_sparse_moe.experts.228.w1", "model.layers.7.block_sparse_moe.experts.229.w1", "model.layers.7.block_sparse_moe.experts.230.w1", "model.layers.7.block_sparse_moe.experts.231.w1", "model.layers.7.block_sparse_moe.experts.232.w1", "model.layers.7.block_sparse_moe.experts.233.w1", "model.layers.7.block_sparse_moe.experts.234.w1", "model.layers.7.block_sparse_moe.experts.235.w1", "model.layers.7.block_sparse_moe.experts.236.w1", "model.layers.7.block_sparse_moe.experts.237.w1", "model.layers.7.block_sparse_moe.experts.238.w1", "model.layers.7.block_sparse_moe.experts.239.w1", "model.layers.7.block_sparse_moe.experts.240.w1", "model.layers.7.block_sparse_moe.experts.241.w1", "model.layers.7.block_sparse_moe.experts.242.w1", "model.layers.7.block_sparse_moe.experts.243.w1", "model.layers.7.block_sparse_moe.experts.244.w1", "model.layers.7.block_sparse_moe.experts.245.w1", "model.layers.7.block_sparse_moe.experts.246.w1", "model.layers.7.block_sparse_moe.experts.247.w1", "model.layers.7.block_sparse_moe.experts.248.w1", "model.layers.7.block_sparse_moe.experts.249.w1", "model.layers.7.block_sparse_moe.experts.250.w1", "model.layers.7.block_sparse_moe.experts.251.w1", "model.layers.7.block_sparse_moe.experts.252.w1", "model.layers.7.block_sparse_moe.experts.253.w1", "model.layers.7.block_sparse_moe.experts.254.w1", "model.layers.7.block_sparse_moe.experts.255.w1", "model.layers.7.block_sparse_moe.experts.0.w3", "model.layers.7.block_sparse_moe.experts.1.w3", "model.layers.7.block_sparse_moe.experts.2.w3", "model.layers.7.block_sparse_moe.experts.3.w3", "model.layers.7.block_sparse_moe.experts.4.w3", "model.layers.7.block_sparse_moe.experts.5.w3", "model.layers.7.block_sparse_moe.experts.6.w3", "model.layers.7.block_sparse_moe.experts.7.w3", "model.layers.7.block_sparse_moe.experts.8.w3", "model.layers.7.block_sparse_moe.experts.9.w3", "model.layers.7.block_sparse_moe.experts.10.w3", "model.layers.7.block_sparse_moe.experts.11.w3", "model.layers.7.block_sparse_moe.experts.12.w3", "model.layers.7.block_sparse_moe.experts.13.w3", "model.layers.7.block_sparse_moe.experts.14.w3", "model.layers.7.block_sparse_moe.experts.15.w3", "model.layers.7.block_sparse_moe.experts.16.w3", "model.layers.7.block_sparse_moe.experts.17.w3", "model.layers.7.block_sparse_moe.experts.18.w3", "model.layers.7.block_sparse_moe.experts.19.w3", "model.layers.7.block_sparse_moe.experts.20.w3", "model.layers.7.block_sparse_moe.experts.21.w3", "model.layers.7.block_sparse_moe.experts.22.w3", "model.layers.7.block_sparse_moe.experts.23.w3", "model.layers.7.block_sparse_moe.experts.24.w3", "model.layers.7.block_sparse_moe.experts.25.w3", "model.layers.7.block_sparse_moe.experts.26.w3", "model.layers.7.block_sparse_moe.experts.27.w3", "model.layers.7.block_sparse_moe.experts.28.w3", "model.layers.7.block_sparse_moe.experts.29.w3", "model.layers.7.block_sparse_moe.experts.30.w3", "model.layers.7.block_sparse_moe.experts.31.w3", "model.layers.7.block_sparse_moe.experts.32.w3", "model.layers.7.block_sparse_moe.experts.33.w3", "model.layers.7.block_sparse_moe.experts.34.w3", "model.layers.7.block_sparse_moe.experts.35.w3", "model.layers.7.block_sparse_moe.experts.36.w3", "model.layers.7.block_sparse_moe.experts.37.w3", "model.layers.7.block_sparse_moe.experts.38.w3", "model.layers.7.block_sparse_moe.experts.39.w3", "model.layers.7.block_sparse_moe.experts.40.w3", "model.layers.7.block_sparse_moe.experts.41.w3", "model.layers.7.block_sparse_moe.experts.42.w3", "model.layers.7.block_sparse_moe.experts.43.w3", "model.layers.7.block_sparse_moe.experts.44.w3", "model.layers.7.block_sparse_moe.experts.45.w3", "model.layers.7.block_sparse_moe.experts.46.w3", "model.layers.7.block_sparse_moe.experts.47.w3", "model.layers.7.block_sparse_moe.experts.48.w3", "model.layers.7.block_sparse_moe.experts.49.w3", "model.layers.7.block_sparse_moe.experts.50.w3", "model.layers.7.block_sparse_moe.experts.51.w3", "model.layers.7.block_sparse_moe.experts.52.w3", "model.layers.7.block_sparse_moe.experts.53.w3", "model.layers.7.block_sparse_moe.experts.54.w3", "model.layers.7.block_sparse_moe.experts.55.w3", "model.layers.7.block_sparse_moe.experts.56.w3", "model.layers.7.block_sparse_moe.experts.57.w3", "model.layers.7.block_sparse_moe.experts.58.w3", "model.layers.7.block_sparse_moe.experts.59.w3", "model.layers.7.block_sparse_moe.experts.60.w3", "model.layers.7.block_sparse_moe.experts.61.w3", "model.layers.7.block_sparse_moe.experts.62.w3", "model.layers.7.block_sparse_moe.experts.63.w3", "model.layers.7.block_sparse_moe.experts.64.w3", "model.layers.7.block_sparse_moe.experts.65.w3", "model.layers.7.block_sparse_moe.experts.66.w3", "model.layers.7.block_sparse_moe.experts.67.w3", "model.layers.7.block_sparse_moe.experts.68.w3", "model.layers.7.block_sparse_moe.experts.69.w3", "model.layers.7.block_sparse_moe.experts.70.w3", "model.layers.7.block_sparse_moe.experts.71.w3", "model.layers.7.block_sparse_moe.experts.72.w3", "model.layers.7.block_sparse_moe.experts.73.w3", "model.layers.7.block_sparse_moe.experts.74.w3", "model.layers.7.block_sparse_moe.experts.75.w3", "model.layers.7.block_sparse_moe.experts.76.w3", "model.layers.7.block_sparse_moe.experts.77.w3", "model.layers.7.block_sparse_moe.experts.78.w3", "model.layers.7.block_sparse_moe.experts.79.w3", "model.layers.7.block_sparse_moe.experts.80.w3", "model.layers.7.block_sparse_moe.experts.81.w3", "model.layers.7.block_sparse_moe.experts.82.w3", "model.layers.7.block_sparse_moe.experts.83.w3", "model.layers.7.block_sparse_moe.experts.84.w3", "model.layers.7.block_sparse_moe.experts.85.w3", "model.layers.7.block_sparse_moe.experts.86.w3", "model.layers.7.block_sparse_moe.experts.87.w3", "model.layers.7.block_sparse_moe.experts.88.w3", "model.layers.7.block_sparse_moe.experts.89.w3", "model.layers.7.block_sparse_moe.experts.90.w3", "model.layers.7.block_sparse_moe.experts.91.w3", "model.layers.7.block_sparse_moe.experts.92.w3", "model.layers.7.block_sparse_moe.experts.93.w3", "model.layers.7.block_sparse_moe.experts.94.w3", "model.layers.7.block_sparse_moe.experts.95.w3", "model.layers.7.block_sparse_moe.experts.96.w3", "model.layers.7.block_sparse_moe.experts.97.w3", "model.layers.7.block_sparse_moe.experts.98.w3", "model.layers.7.block_sparse_moe.experts.99.w3", "model.layers.7.block_sparse_moe.experts.100.w3", "model.layers.7.block_sparse_moe.experts.101.w3", "model.layers.7.block_sparse_moe.experts.102.w3", "model.layers.7.block_sparse_moe.experts.103.w3", "model.layers.7.block_sparse_moe.experts.104.w3", "model.layers.7.block_sparse_moe.experts.105.w3", "model.layers.7.block_sparse_moe.experts.106.w3", "model.layers.7.block_sparse_moe.experts.107.w3", "model.layers.7.block_sparse_moe.experts.108.w3", "model.layers.7.block_sparse_moe.experts.109.w3", "model.layers.7.block_sparse_moe.experts.110.w3", "model.layers.7.block_sparse_moe.experts.111.w3", "model.layers.7.block_sparse_moe.experts.112.w3", "model.layers.7.block_sparse_moe.experts.113.w3", "model.layers.7.block_sparse_moe.experts.114.w3", "model.layers.7.block_sparse_moe.experts.115.w3", "model.layers.7.block_sparse_moe.experts.116.w3", "model.layers.7.block_sparse_moe.experts.117.w3", "model.layers.7.block_sparse_moe.experts.118.w3", "model.layers.7.block_sparse_moe.experts.119.w3", "model.layers.7.block_sparse_moe.experts.120.w3", "model.layers.7.block_sparse_moe.experts.121.w3", "model.layers.7.block_sparse_moe.experts.122.w3", "model.layers.7.block_sparse_moe.experts.123.w3", "model.layers.7.block_sparse_moe.experts.124.w3", "model.layers.7.block_sparse_moe.experts.125.w3", "model.layers.7.block_sparse_moe.experts.126.w3", "model.layers.7.block_sparse_moe.experts.127.w3", "model.layers.7.block_sparse_moe.experts.128.w3", "model.layers.7.block_sparse_moe.experts.129.w3", "model.layers.7.block_sparse_moe.experts.130.w3", "model.layers.7.block_sparse_moe.experts.131.w3", "model.layers.7.block_sparse_moe.experts.132.w3", "model.layers.7.block_sparse_moe.experts.133.w3", "model.layers.7.block_sparse_moe.experts.134.w3", "model.layers.7.block_sparse_moe.experts.135.w3", "model.layers.7.block_sparse_moe.experts.136.w3", "model.layers.7.block_sparse_moe.experts.137.w3", "model.layers.7.block_sparse_moe.experts.138.w3", "model.layers.7.block_sparse_moe.experts.139.w3", "model.layers.7.block_sparse_moe.experts.140.w3", "model.layers.7.block_sparse_moe.experts.141.w3", "model.layers.7.block_sparse_moe.experts.142.w3", "model.layers.7.block_sparse_moe.experts.143.w3", "model.layers.7.block_sparse_moe.experts.144.w3", "model.layers.7.block_sparse_moe.experts.145.w3", "model.layers.7.block_sparse_moe.experts.146.w3", "model.layers.7.block_sparse_moe.experts.147.w3", "model.layers.7.block_sparse_moe.experts.148.w3", "model.layers.7.block_sparse_moe.experts.149.w3", "model.layers.7.block_sparse_moe.experts.150.w3", "model.layers.7.block_sparse_moe.experts.151.w3", "model.layers.7.block_sparse_moe.experts.152.w3", "model.layers.7.block_sparse_moe.experts.153.w3", "model.layers.7.block_sparse_moe.experts.154.w3", "model.layers.7.block_sparse_moe.experts.155.w3", "model.layers.7.block_sparse_moe.experts.156.w3", "model.layers.7.block_sparse_moe.experts.157.w3", "model.layers.7.block_sparse_moe.experts.158.w3", "model.layers.7.block_sparse_moe.experts.159.w3", "model.layers.7.block_sparse_moe.experts.160.w3", "model.layers.7.block_sparse_moe.experts.161.w3", "model.layers.7.block_sparse_moe.experts.162.w3", "model.layers.7.block_sparse_moe.experts.163.w3", "model.layers.7.block_sparse_moe.experts.164.w3", "model.layers.7.block_sparse_moe.experts.165.w3", "model.layers.7.block_sparse_moe.experts.166.w3", "model.layers.7.block_sparse_moe.experts.167.w3", "model.layers.7.block_sparse_moe.experts.168.w3", "model.layers.7.block_sparse_moe.experts.169.w3", "model.layers.7.block_sparse_moe.experts.170.w3", "model.layers.7.block_sparse_moe.experts.171.w3", "model.layers.7.block_sparse_moe.experts.172.w3", "model.layers.7.block_sparse_moe.experts.173.w3", "model.layers.7.block_sparse_moe.experts.174.w3", "model.layers.7.block_sparse_moe.experts.175.w3", "model.layers.7.block_sparse_moe.experts.176.w3", "model.layers.7.block_sparse_moe.experts.177.w3", "model.layers.7.block_sparse_moe.experts.178.w3", "model.layers.7.block_sparse_moe.experts.179.w3", "model.layers.7.block_sparse_moe.experts.180.w3", "model.layers.7.block_sparse_moe.experts.181.w3", "model.layers.7.block_sparse_moe.experts.182.w3", "model.layers.7.block_sparse_moe.experts.183.w3", "model.layers.7.block_sparse_moe.experts.184.w3", "model.layers.7.block_sparse_moe.experts.185.w3", "model.layers.7.block_sparse_moe.experts.186.w3", "model.layers.7.block_sparse_moe.experts.187.w3", "model.layers.7.block_sparse_moe.experts.188.w3", "model.layers.7.block_sparse_moe.experts.189.w3", "model.layers.7.block_sparse_moe.experts.190.w3", "model.layers.7.block_sparse_moe.experts.191.w3", "model.layers.7.block_sparse_moe.experts.192.w3", "model.layers.7.block_sparse_moe.experts.193.w3", "model.layers.7.block_sparse_moe.experts.194.w3", "model.layers.7.block_sparse_moe.experts.195.w3", "model.layers.7.block_sparse_moe.experts.196.w3", "model.layers.7.block_sparse_moe.experts.197.w3", "model.layers.7.block_sparse_moe.experts.198.w3", "model.layers.7.block_sparse_moe.experts.199.w3", "model.layers.7.block_sparse_moe.experts.200.w3", "model.layers.7.block_sparse_moe.experts.201.w3", "model.layers.7.block_sparse_moe.experts.202.w3", "model.layers.7.block_sparse_moe.experts.203.w3", "model.layers.7.block_sparse_moe.experts.204.w3", "model.layers.7.block_sparse_moe.experts.205.w3", "model.layers.7.block_sparse_moe.experts.206.w3", "model.layers.7.block_sparse_moe.experts.207.w3", "model.layers.7.block_sparse_moe.experts.208.w3", "model.layers.7.block_sparse_moe.experts.209.w3", "model.layers.7.block_sparse_moe.experts.210.w3", "model.layers.7.block_sparse_moe.experts.211.w3", "model.layers.7.block_sparse_moe.experts.212.w3", "model.layers.7.block_sparse_moe.experts.213.w3", "model.layers.7.block_sparse_moe.experts.214.w3", "model.layers.7.block_sparse_moe.experts.215.w3", "model.layers.7.block_sparse_moe.experts.216.w3", "model.layers.7.block_sparse_moe.experts.217.w3", "model.layers.7.block_sparse_moe.experts.218.w3", "model.layers.7.block_sparse_moe.experts.219.w3", "model.layers.7.block_sparse_moe.experts.220.w3", "model.layers.7.block_sparse_moe.experts.221.w3", "model.layers.7.block_sparse_moe.experts.222.w3", "model.layers.7.block_sparse_moe.experts.223.w3", "model.layers.7.block_sparse_moe.experts.224.w3", "model.layers.7.block_sparse_moe.experts.225.w3", "model.layers.7.block_sparse_moe.experts.226.w3", "model.layers.7.block_sparse_moe.experts.227.w3", "model.layers.7.block_sparse_moe.experts.228.w3", "model.layers.7.block_sparse_moe.experts.229.w3", "model.layers.7.block_sparse_moe.experts.230.w3", "model.layers.7.block_sparse_moe.experts.231.w3", "model.layers.7.block_sparse_moe.experts.232.w3", "model.layers.7.block_sparse_moe.experts.233.w3", "model.layers.7.block_sparse_moe.experts.234.w3", "model.layers.7.block_sparse_moe.experts.235.w3", "model.layers.7.block_sparse_moe.experts.236.w3", "model.layers.7.block_sparse_moe.experts.237.w3", "model.layers.7.block_sparse_moe.experts.238.w3", "model.layers.7.block_sparse_moe.experts.239.w3", "model.layers.7.block_sparse_moe.experts.240.w3", "model.layers.7.block_sparse_moe.experts.241.w3", "model.layers.7.block_sparse_moe.experts.242.w3", "model.layers.7.block_sparse_moe.experts.243.w3", "model.layers.7.block_sparse_moe.experts.244.w3", "model.layers.7.block_sparse_moe.experts.245.w3", "model.layers.7.block_sparse_moe.experts.246.w3", "model.layers.7.block_sparse_moe.experts.247.w3", "model.layers.7.block_sparse_moe.experts.248.w3", "model.layers.7.block_sparse_moe.experts.249.w3", "model.layers.7.block_sparse_moe.experts.250.w3", "model.layers.7.block_sparse_moe.experts.251.w3", "model.layers.7.block_sparse_moe.experts.252.w3", "model.layers.7.block_sparse_moe.experts.253.w3", "model.layers.7.block_sparse_moe.experts.254.w3", "model.layers.7.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0013945400714874268, "dbits": 2415919104 } ] }, { "idx": 39, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w2", "model.layers.7.block_sparse_moe.experts.1.w2", "model.layers.7.block_sparse_moe.experts.2.w2", "model.layers.7.block_sparse_moe.experts.3.w2", "model.layers.7.block_sparse_moe.experts.4.w2", "model.layers.7.block_sparse_moe.experts.5.w2", "model.layers.7.block_sparse_moe.experts.6.w2", "model.layers.7.block_sparse_moe.experts.7.w2", "model.layers.7.block_sparse_moe.experts.8.w2", "model.layers.7.block_sparse_moe.experts.9.w2", "model.layers.7.block_sparse_moe.experts.10.w2", "model.layers.7.block_sparse_moe.experts.11.w2", "model.layers.7.block_sparse_moe.experts.12.w2", "model.layers.7.block_sparse_moe.experts.13.w2", "model.layers.7.block_sparse_moe.experts.14.w2", "model.layers.7.block_sparse_moe.experts.15.w2", "model.layers.7.block_sparse_moe.experts.16.w2", "model.layers.7.block_sparse_moe.experts.17.w2", "model.layers.7.block_sparse_moe.experts.18.w2", "model.layers.7.block_sparse_moe.experts.19.w2", "model.layers.7.block_sparse_moe.experts.20.w2", "model.layers.7.block_sparse_moe.experts.21.w2", "model.layers.7.block_sparse_moe.experts.22.w2", "model.layers.7.block_sparse_moe.experts.23.w2", "model.layers.7.block_sparse_moe.experts.24.w2", "model.layers.7.block_sparse_moe.experts.25.w2", "model.layers.7.block_sparse_moe.experts.26.w2", "model.layers.7.block_sparse_moe.experts.27.w2", "model.layers.7.block_sparse_moe.experts.28.w2", "model.layers.7.block_sparse_moe.experts.29.w2", "model.layers.7.block_sparse_moe.experts.30.w2", "model.layers.7.block_sparse_moe.experts.31.w2", "model.layers.7.block_sparse_moe.experts.32.w2", "model.layers.7.block_sparse_moe.experts.33.w2", "model.layers.7.block_sparse_moe.experts.34.w2", "model.layers.7.block_sparse_moe.experts.35.w2", "model.layers.7.block_sparse_moe.experts.36.w2", "model.layers.7.block_sparse_moe.experts.37.w2", "model.layers.7.block_sparse_moe.experts.38.w2", "model.layers.7.block_sparse_moe.experts.39.w2", "model.layers.7.block_sparse_moe.experts.40.w2", "model.layers.7.block_sparse_moe.experts.41.w2", "model.layers.7.block_sparse_moe.experts.42.w2", "model.layers.7.block_sparse_moe.experts.43.w2", "model.layers.7.block_sparse_moe.experts.44.w2", "model.layers.7.block_sparse_moe.experts.45.w2", "model.layers.7.block_sparse_moe.experts.46.w2", "model.layers.7.block_sparse_moe.experts.47.w2", "model.layers.7.block_sparse_moe.experts.48.w2", "model.layers.7.block_sparse_moe.experts.49.w2", "model.layers.7.block_sparse_moe.experts.50.w2", "model.layers.7.block_sparse_moe.experts.51.w2", "model.layers.7.block_sparse_moe.experts.52.w2", "model.layers.7.block_sparse_moe.experts.53.w2", "model.layers.7.block_sparse_moe.experts.54.w2", "model.layers.7.block_sparse_moe.experts.55.w2", "model.layers.7.block_sparse_moe.experts.56.w2", "model.layers.7.block_sparse_moe.experts.57.w2", "model.layers.7.block_sparse_moe.experts.58.w2", "model.layers.7.block_sparse_moe.experts.59.w2", "model.layers.7.block_sparse_moe.experts.60.w2", "model.layers.7.block_sparse_moe.experts.61.w2", "model.layers.7.block_sparse_moe.experts.62.w2", "model.layers.7.block_sparse_moe.experts.63.w2", "model.layers.7.block_sparse_moe.experts.64.w2", "model.layers.7.block_sparse_moe.experts.65.w2", "model.layers.7.block_sparse_moe.experts.66.w2", "model.layers.7.block_sparse_moe.experts.67.w2", "model.layers.7.block_sparse_moe.experts.68.w2", "model.layers.7.block_sparse_moe.experts.69.w2", "model.layers.7.block_sparse_moe.experts.70.w2", "model.layers.7.block_sparse_moe.experts.71.w2", "model.layers.7.block_sparse_moe.experts.72.w2", "model.layers.7.block_sparse_moe.experts.73.w2", "model.layers.7.block_sparse_moe.experts.74.w2", "model.layers.7.block_sparse_moe.experts.75.w2", "model.layers.7.block_sparse_moe.experts.76.w2", "model.layers.7.block_sparse_moe.experts.77.w2", "model.layers.7.block_sparse_moe.experts.78.w2", "model.layers.7.block_sparse_moe.experts.79.w2", "model.layers.7.block_sparse_moe.experts.80.w2", "model.layers.7.block_sparse_moe.experts.81.w2", "model.layers.7.block_sparse_moe.experts.82.w2", "model.layers.7.block_sparse_moe.experts.83.w2", "model.layers.7.block_sparse_moe.experts.84.w2", "model.layers.7.block_sparse_moe.experts.85.w2", "model.layers.7.block_sparse_moe.experts.86.w2", "model.layers.7.block_sparse_moe.experts.87.w2", "model.layers.7.block_sparse_moe.experts.88.w2", "model.layers.7.block_sparse_moe.experts.89.w2", "model.layers.7.block_sparse_moe.experts.90.w2", "model.layers.7.block_sparse_moe.experts.91.w2", "model.layers.7.block_sparse_moe.experts.92.w2", "model.layers.7.block_sparse_moe.experts.93.w2", "model.layers.7.block_sparse_moe.experts.94.w2", "model.layers.7.block_sparse_moe.experts.95.w2", "model.layers.7.block_sparse_moe.experts.96.w2", "model.layers.7.block_sparse_moe.experts.97.w2", "model.layers.7.block_sparse_moe.experts.98.w2", "model.layers.7.block_sparse_moe.experts.99.w2", "model.layers.7.block_sparse_moe.experts.100.w2", "model.layers.7.block_sparse_moe.experts.101.w2", "model.layers.7.block_sparse_moe.experts.102.w2", "model.layers.7.block_sparse_moe.experts.103.w2", "model.layers.7.block_sparse_moe.experts.104.w2", "model.layers.7.block_sparse_moe.experts.105.w2", "model.layers.7.block_sparse_moe.experts.106.w2", "model.layers.7.block_sparse_moe.experts.107.w2", "model.layers.7.block_sparse_moe.experts.108.w2", "model.layers.7.block_sparse_moe.experts.109.w2", "model.layers.7.block_sparse_moe.experts.110.w2", "model.layers.7.block_sparse_moe.experts.111.w2", "model.layers.7.block_sparse_moe.experts.112.w2", "model.layers.7.block_sparse_moe.experts.113.w2", "model.layers.7.block_sparse_moe.experts.114.w2", "model.layers.7.block_sparse_moe.experts.115.w2", "model.layers.7.block_sparse_moe.experts.116.w2", "model.layers.7.block_sparse_moe.experts.117.w2", "model.layers.7.block_sparse_moe.experts.118.w2", "model.layers.7.block_sparse_moe.experts.119.w2", "model.layers.7.block_sparse_moe.experts.120.w2", "model.layers.7.block_sparse_moe.experts.121.w2", "model.layers.7.block_sparse_moe.experts.122.w2", "model.layers.7.block_sparse_moe.experts.123.w2", "model.layers.7.block_sparse_moe.experts.124.w2", "model.layers.7.block_sparse_moe.experts.125.w2", "model.layers.7.block_sparse_moe.experts.126.w2", "model.layers.7.block_sparse_moe.experts.127.w2", "model.layers.7.block_sparse_moe.experts.128.w2", "model.layers.7.block_sparse_moe.experts.129.w2", "model.layers.7.block_sparse_moe.experts.130.w2", "model.layers.7.block_sparse_moe.experts.131.w2", "model.layers.7.block_sparse_moe.experts.132.w2", "model.layers.7.block_sparse_moe.experts.133.w2", "model.layers.7.block_sparse_moe.experts.134.w2", "model.layers.7.block_sparse_moe.experts.135.w2", "model.layers.7.block_sparse_moe.experts.136.w2", "model.layers.7.block_sparse_moe.experts.137.w2", "model.layers.7.block_sparse_moe.experts.138.w2", "model.layers.7.block_sparse_moe.experts.139.w2", "model.layers.7.block_sparse_moe.experts.140.w2", "model.layers.7.block_sparse_moe.experts.141.w2", "model.layers.7.block_sparse_moe.experts.142.w2", "model.layers.7.block_sparse_moe.experts.143.w2", "model.layers.7.block_sparse_moe.experts.144.w2", "model.layers.7.block_sparse_moe.experts.145.w2", "model.layers.7.block_sparse_moe.experts.146.w2", "model.layers.7.block_sparse_moe.experts.147.w2", "model.layers.7.block_sparse_moe.experts.148.w2", "model.layers.7.block_sparse_moe.experts.149.w2", "model.layers.7.block_sparse_moe.experts.150.w2", "model.layers.7.block_sparse_moe.experts.151.w2", "model.layers.7.block_sparse_moe.experts.152.w2", "model.layers.7.block_sparse_moe.experts.153.w2", "model.layers.7.block_sparse_moe.experts.154.w2", "model.layers.7.block_sparse_moe.experts.155.w2", "model.layers.7.block_sparse_moe.experts.156.w2", "model.layers.7.block_sparse_moe.experts.157.w2", "model.layers.7.block_sparse_moe.experts.158.w2", "model.layers.7.block_sparse_moe.experts.159.w2", "model.layers.7.block_sparse_moe.experts.160.w2", "model.layers.7.block_sparse_moe.experts.161.w2", "model.layers.7.block_sparse_moe.experts.162.w2", "model.layers.7.block_sparse_moe.experts.163.w2", "model.layers.7.block_sparse_moe.experts.164.w2", "model.layers.7.block_sparse_moe.experts.165.w2", "model.layers.7.block_sparse_moe.experts.166.w2", "model.layers.7.block_sparse_moe.experts.167.w2", "model.layers.7.block_sparse_moe.experts.168.w2", "model.layers.7.block_sparse_moe.experts.169.w2", "model.layers.7.block_sparse_moe.experts.170.w2", "model.layers.7.block_sparse_moe.experts.171.w2", "model.layers.7.block_sparse_moe.experts.172.w2", "model.layers.7.block_sparse_moe.experts.173.w2", "model.layers.7.block_sparse_moe.experts.174.w2", "model.layers.7.block_sparse_moe.experts.175.w2", "model.layers.7.block_sparse_moe.experts.176.w2", "model.layers.7.block_sparse_moe.experts.177.w2", "model.layers.7.block_sparse_moe.experts.178.w2", "model.layers.7.block_sparse_moe.experts.179.w2", "model.layers.7.block_sparse_moe.experts.180.w2", "model.layers.7.block_sparse_moe.experts.181.w2", "model.layers.7.block_sparse_moe.experts.182.w2", "model.layers.7.block_sparse_moe.experts.183.w2", "model.layers.7.block_sparse_moe.experts.184.w2", "model.layers.7.block_sparse_moe.experts.185.w2", "model.layers.7.block_sparse_moe.experts.186.w2", "model.layers.7.block_sparse_moe.experts.187.w2", "model.layers.7.block_sparse_moe.experts.188.w2", "model.layers.7.block_sparse_moe.experts.189.w2", "model.layers.7.block_sparse_moe.experts.190.w2", "model.layers.7.block_sparse_moe.experts.191.w2", "model.layers.7.block_sparse_moe.experts.192.w2", "model.layers.7.block_sparse_moe.experts.193.w2", "model.layers.7.block_sparse_moe.experts.194.w2", "model.layers.7.block_sparse_moe.experts.195.w2", "model.layers.7.block_sparse_moe.experts.196.w2", "model.layers.7.block_sparse_moe.experts.197.w2", "model.layers.7.block_sparse_moe.experts.198.w2", "model.layers.7.block_sparse_moe.experts.199.w2", "model.layers.7.block_sparse_moe.experts.200.w2", "model.layers.7.block_sparse_moe.experts.201.w2", "model.layers.7.block_sparse_moe.experts.202.w2", "model.layers.7.block_sparse_moe.experts.203.w2", "model.layers.7.block_sparse_moe.experts.204.w2", "model.layers.7.block_sparse_moe.experts.205.w2", "model.layers.7.block_sparse_moe.experts.206.w2", "model.layers.7.block_sparse_moe.experts.207.w2", "model.layers.7.block_sparse_moe.experts.208.w2", "model.layers.7.block_sparse_moe.experts.209.w2", "model.layers.7.block_sparse_moe.experts.210.w2", "model.layers.7.block_sparse_moe.experts.211.w2", "model.layers.7.block_sparse_moe.experts.212.w2", "model.layers.7.block_sparse_moe.experts.213.w2", "model.layers.7.block_sparse_moe.experts.214.w2", "model.layers.7.block_sparse_moe.experts.215.w2", "model.layers.7.block_sparse_moe.experts.216.w2", "model.layers.7.block_sparse_moe.experts.217.w2", "model.layers.7.block_sparse_moe.experts.218.w2", "model.layers.7.block_sparse_moe.experts.219.w2", "model.layers.7.block_sparse_moe.experts.220.w2", "model.layers.7.block_sparse_moe.experts.221.w2", "model.layers.7.block_sparse_moe.experts.222.w2", "model.layers.7.block_sparse_moe.experts.223.w2", "model.layers.7.block_sparse_moe.experts.224.w2", "model.layers.7.block_sparse_moe.experts.225.w2", "model.layers.7.block_sparse_moe.experts.226.w2", "model.layers.7.block_sparse_moe.experts.227.w2", "model.layers.7.block_sparse_moe.experts.228.w2", "model.layers.7.block_sparse_moe.experts.229.w2", "model.layers.7.block_sparse_moe.experts.230.w2", "model.layers.7.block_sparse_moe.experts.231.w2", "model.layers.7.block_sparse_moe.experts.232.w2", "model.layers.7.block_sparse_moe.experts.233.w2", "model.layers.7.block_sparse_moe.experts.234.w2", "model.layers.7.block_sparse_moe.experts.235.w2", "model.layers.7.block_sparse_moe.experts.236.w2", "model.layers.7.block_sparse_moe.experts.237.w2", "model.layers.7.block_sparse_moe.experts.238.w2", "model.layers.7.block_sparse_moe.experts.239.w2", "model.layers.7.block_sparse_moe.experts.240.w2", "model.layers.7.block_sparse_moe.experts.241.w2", "model.layers.7.block_sparse_moe.experts.242.w2", "model.layers.7.block_sparse_moe.experts.243.w2", "model.layers.7.block_sparse_moe.experts.244.w2", "model.layers.7.block_sparse_moe.experts.245.w2", "model.layers.7.block_sparse_moe.experts.246.w2", "model.layers.7.block_sparse_moe.experts.247.w2", "model.layers.7.block_sparse_moe.experts.248.w2", "model.layers.7.block_sparse_moe.experts.249.w2", "model.layers.7.block_sparse_moe.experts.250.w2", "model.layers.7.block_sparse_moe.experts.251.w2", "model.layers.7.block_sparse_moe.experts.252.w2", "model.layers.7.block_sparse_moe.experts.253.w2", "model.layers.7.block_sparse_moe.experts.254.w2", "model.layers.7.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0010280027985571927, "dbits": 1207959552 } ] }, { "idx": 40, "layers": [ "model.layers.8.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002789884805678433, "dbits": 18874368 } ] }, { "idx": 41, "layers": [ "model.layers.8.self_attn.k_proj", "model.layers.8.self_attn.v_proj" ], "candidates": [ { "dkld": -0.002524924278259233, "dbits": 6291456 } ] }, { "idx": 42, "layers": [ "model.layers.8.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0065887302160262395, "dbits": 18874368 } ] }, { "idx": 43, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w1", "model.layers.8.block_sparse_moe.experts.1.w1", "model.layers.8.block_sparse_moe.experts.2.w1", "model.layers.8.block_sparse_moe.experts.3.w1", "model.layers.8.block_sparse_moe.experts.4.w1", "model.layers.8.block_sparse_moe.experts.5.w1", "model.layers.8.block_sparse_moe.experts.6.w1", "model.layers.8.block_sparse_moe.experts.7.w1", "model.layers.8.block_sparse_moe.experts.8.w1", "model.layers.8.block_sparse_moe.experts.9.w1", "model.layers.8.block_sparse_moe.experts.10.w1", "model.layers.8.block_sparse_moe.experts.11.w1", "model.layers.8.block_sparse_moe.experts.12.w1", "model.layers.8.block_sparse_moe.experts.13.w1", "model.layers.8.block_sparse_moe.experts.14.w1", "model.layers.8.block_sparse_moe.experts.15.w1", "model.layers.8.block_sparse_moe.experts.16.w1", "model.layers.8.block_sparse_moe.experts.17.w1", "model.layers.8.block_sparse_moe.experts.18.w1", "model.layers.8.block_sparse_moe.experts.19.w1", "model.layers.8.block_sparse_moe.experts.20.w1", "model.layers.8.block_sparse_moe.experts.21.w1", "model.layers.8.block_sparse_moe.experts.22.w1", "model.layers.8.block_sparse_moe.experts.23.w1", "model.layers.8.block_sparse_moe.experts.24.w1", "model.layers.8.block_sparse_moe.experts.25.w1", "model.layers.8.block_sparse_moe.experts.26.w1", "model.layers.8.block_sparse_moe.experts.27.w1", "model.layers.8.block_sparse_moe.experts.28.w1", "model.layers.8.block_sparse_moe.experts.29.w1", "model.layers.8.block_sparse_moe.experts.30.w1", "model.layers.8.block_sparse_moe.experts.31.w1", "model.layers.8.block_sparse_moe.experts.32.w1", "model.layers.8.block_sparse_moe.experts.33.w1", "model.layers.8.block_sparse_moe.experts.34.w1", "model.layers.8.block_sparse_moe.experts.35.w1", "model.layers.8.block_sparse_moe.experts.36.w1", "model.layers.8.block_sparse_moe.experts.37.w1", "model.layers.8.block_sparse_moe.experts.38.w1", "model.layers.8.block_sparse_moe.experts.39.w1", "model.layers.8.block_sparse_moe.experts.40.w1", "model.layers.8.block_sparse_moe.experts.41.w1", "model.layers.8.block_sparse_moe.experts.42.w1", "model.layers.8.block_sparse_moe.experts.43.w1", "model.layers.8.block_sparse_moe.experts.44.w1", "model.layers.8.block_sparse_moe.experts.45.w1", "model.layers.8.block_sparse_moe.experts.46.w1", "model.layers.8.block_sparse_moe.experts.47.w1", "model.layers.8.block_sparse_moe.experts.48.w1", "model.layers.8.block_sparse_moe.experts.49.w1", "model.layers.8.block_sparse_moe.experts.50.w1", "model.layers.8.block_sparse_moe.experts.51.w1", "model.layers.8.block_sparse_moe.experts.52.w1", "model.layers.8.block_sparse_moe.experts.53.w1", "model.layers.8.block_sparse_moe.experts.54.w1", "model.layers.8.block_sparse_moe.experts.55.w1", "model.layers.8.block_sparse_moe.experts.56.w1", "model.layers.8.block_sparse_moe.experts.57.w1", "model.layers.8.block_sparse_moe.experts.58.w1", "model.layers.8.block_sparse_moe.experts.59.w1", "model.layers.8.block_sparse_moe.experts.60.w1", "model.layers.8.block_sparse_moe.experts.61.w1", "model.layers.8.block_sparse_moe.experts.62.w1", "model.layers.8.block_sparse_moe.experts.63.w1", "model.layers.8.block_sparse_moe.experts.64.w1", "model.layers.8.block_sparse_moe.experts.65.w1", "model.layers.8.block_sparse_moe.experts.66.w1", "model.layers.8.block_sparse_moe.experts.67.w1", "model.layers.8.block_sparse_moe.experts.68.w1", "model.layers.8.block_sparse_moe.experts.69.w1", "model.layers.8.block_sparse_moe.experts.70.w1", "model.layers.8.block_sparse_moe.experts.71.w1", "model.layers.8.block_sparse_moe.experts.72.w1", "model.layers.8.block_sparse_moe.experts.73.w1", "model.layers.8.block_sparse_moe.experts.74.w1", "model.layers.8.block_sparse_moe.experts.75.w1", "model.layers.8.block_sparse_moe.experts.76.w1", "model.layers.8.block_sparse_moe.experts.77.w1", "model.layers.8.block_sparse_moe.experts.78.w1", "model.layers.8.block_sparse_moe.experts.79.w1", "model.layers.8.block_sparse_moe.experts.80.w1", "model.layers.8.block_sparse_moe.experts.81.w1", "model.layers.8.block_sparse_moe.experts.82.w1", "model.layers.8.block_sparse_moe.experts.83.w1", "model.layers.8.block_sparse_moe.experts.84.w1", "model.layers.8.block_sparse_moe.experts.85.w1", "model.layers.8.block_sparse_moe.experts.86.w1", "model.layers.8.block_sparse_moe.experts.87.w1", "model.layers.8.block_sparse_moe.experts.88.w1", "model.layers.8.block_sparse_moe.experts.89.w1", "model.layers.8.block_sparse_moe.experts.90.w1", "model.layers.8.block_sparse_moe.experts.91.w1", "model.layers.8.block_sparse_moe.experts.92.w1", "model.layers.8.block_sparse_moe.experts.93.w1", "model.layers.8.block_sparse_moe.experts.94.w1", "model.layers.8.block_sparse_moe.experts.95.w1", "model.layers.8.block_sparse_moe.experts.96.w1", "model.layers.8.block_sparse_moe.experts.97.w1", "model.layers.8.block_sparse_moe.experts.98.w1", "model.layers.8.block_sparse_moe.experts.99.w1", "model.layers.8.block_sparse_moe.experts.100.w1", "model.layers.8.block_sparse_moe.experts.101.w1", "model.layers.8.block_sparse_moe.experts.102.w1", "model.layers.8.block_sparse_moe.experts.103.w1", "model.layers.8.block_sparse_moe.experts.104.w1", "model.layers.8.block_sparse_moe.experts.105.w1", "model.layers.8.block_sparse_moe.experts.106.w1", "model.layers.8.block_sparse_moe.experts.107.w1", "model.layers.8.block_sparse_moe.experts.108.w1", "model.layers.8.block_sparse_moe.experts.109.w1", "model.layers.8.block_sparse_moe.experts.110.w1", "model.layers.8.block_sparse_moe.experts.111.w1", "model.layers.8.block_sparse_moe.experts.112.w1", "model.layers.8.block_sparse_moe.experts.113.w1", "model.layers.8.block_sparse_moe.experts.114.w1", "model.layers.8.block_sparse_moe.experts.115.w1", "model.layers.8.block_sparse_moe.experts.116.w1", "model.layers.8.block_sparse_moe.experts.117.w1", "model.layers.8.block_sparse_moe.experts.118.w1", "model.layers.8.block_sparse_moe.experts.119.w1", "model.layers.8.block_sparse_moe.experts.120.w1", "model.layers.8.block_sparse_moe.experts.121.w1", "model.layers.8.block_sparse_moe.experts.122.w1", "model.layers.8.block_sparse_moe.experts.123.w1", "model.layers.8.block_sparse_moe.experts.124.w1", "model.layers.8.block_sparse_moe.experts.125.w1", "model.layers.8.block_sparse_moe.experts.126.w1", "model.layers.8.block_sparse_moe.experts.127.w1", "model.layers.8.block_sparse_moe.experts.128.w1", "model.layers.8.block_sparse_moe.experts.129.w1", "model.layers.8.block_sparse_moe.experts.130.w1", "model.layers.8.block_sparse_moe.experts.131.w1", "model.layers.8.block_sparse_moe.experts.132.w1", "model.layers.8.block_sparse_moe.experts.133.w1", "model.layers.8.block_sparse_moe.experts.134.w1", "model.layers.8.block_sparse_moe.experts.135.w1", "model.layers.8.block_sparse_moe.experts.136.w1", "model.layers.8.block_sparse_moe.experts.137.w1", "model.layers.8.block_sparse_moe.experts.138.w1", "model.layers.8.block_sparse_moe.experts.139.w1", "model.layers.8.block_sparse_moe.experts.140.w1", "model.layers.8.block_sparse_moe.experts.141.w1", "model.layers.8.block_sparse_moe.experts.142.w1", "model.layers.8.block_sparse_moe.experts.143.w1", "model.layers.8.block_sparse_moe.experts.144.w1", "model.layers.8.block_sparse_moe.experts.145.w1", "model.layers.8.block_sparse_moe.experts.146.w1", "model.layers.8.block_sparse_moe.experts.147.w1", "model.layers.8.block_sparse_moe.experts.148.w1", "model.layers.8.block_sparse_moe.experts.149.w1", "model.layers.8.block_sparse_moe.experts.150.w1", "model.layers.8.block_sparse_moe.experts.151.w1", "model.layers.8.block_sparse_moe.experts.152.w1", "model.layers.8.block_sparse_moe.experts.153.w1", "model.layers.8.block_sparse_moe.experts.154.w1", "model.layers.8.block_sparse_moe.experts.155.w1", "model.layers.8.block_sparse_moe.experts.156.w1", "model.layers.8.block_sparse_moe.experts.157.w1", "model.layers.8.block_sparse_moe.experts.158.w1", "model.layers.8.block_sparse_moe.experts.159.w1", "model.layers.8.block_sparse_moe.experts.160.w1", "model.layers.8.block_sparse_moe.experts.161.w1", "model.layers.8.block_sparse_moe.experts.162.w1", "model.layers.8.block_sparse_moe.experts.163.w1", "model.layers.8.block_sparse_moe.experts.164.w1", "model.layers.8.block_sparse_moe.experts.165.w1", "model.layers.8.block_sparse_moe.experts.166.w1", "model.layers.8.block_sparse_moe.experts.167.w1", "model.layers.8.block_sparse_moe.experts.168.w1", "model.layers.8.block_sparse_moe.experts.169.w1", "model.layers.8.block_sparse_moe.experts.170.w1", "model.layers.8.block_sparse_moe.experts.171.w1", "model.layers.8.block_sparse_moe.experts.172.w1", "model.layers.8.block_sparse_moe.experts.173.w1", "model.layers.8.block_sparse_moe.experts.174.w1", "model.layers.8.block_sparse_moe.experts.175.w1", "model.layers.8.block_sparse_moe.experts.176.w1", "model.layers.8.block_sparse_moe.experts.177.w1", "model.layers.8.block_sparse_moe.experts.178.w1", "model.layers.8.block_sparse_moe.experts.179.w1", "model.layers.8.block_sparse_moe.experts.180.w1", "model.layers.8.block_sparse_moe.experts.181.w1", "model.layers.8.block_sparse_moe.experts.182.w1", "model.layers.8.block_sparse_moe.experts.183.w1", "model.layers.8.block_sparse_moe.experts.184.w1", "model.layers.8.block_sparse_moe.experts.185.w1", "model.layers.8.block_sparse_moe.experts.186.w1", "model.layers.8.block_sparse_moe.experts.187.w1", "model.layers.8.block_sparse_moe.experts.188.w1", "model.layers.8.block_sparse_moe.experts.189.w1", "model.layers.8.block_sparse_moe.experts.190.w1", "model.layers.8.block_sparse_moe.experts.191.w1", "model.layers.8.block_sparse_moe.experts.192.w1", "model.layers.8.block_sparse_moe.experts.193.w1", "model.layers.8.block_sparse_moe.experts.194.w1", "model.layers.8.block_sparse_moe.experts.195.w1", "model.layers.8.block_sparse_moe.experts.196.w1", "model.layers.8.block_sparse_moe.experts.197.w1", "model.layers.8.block_sparse_moe.experts.198.w1", "model.layers.8.block_sparse_moe.experts.199.w1", "model.layers.8.block_sparse_moe.experts.200.w1", "model.layers.8.block_sparse_moe.experts.201.w1", "model.layers.8.block_sparse_moe.experts.202.w1", "model.layers.8.block_sparse_moe.experts.203.w1", "model.layers.8.block_sparse_moe.experts.204.w1", "model.layers.8.block_sparse_moe.experts.205.w1", "model.layers.8.block_sparse_moe.experts.206.w1", "model.layers.8.block_sparse_moe.experts.207.w1", "model.layers.8.block_sparse_moe.experts.208.w1", "model.layers.8.block_sparse_moe.experts.209.w1", "model.layers.8.block_sparse_moe.experts.210.w1", "model.layers.8.block_sparse_moe.experts.211.w1", "model.layers.8.block_sparse_moe.experts.212.w1", "model.layers.8.block_sparse_moe.experts.213.w1", "model.layers.8.block_sparse_moe.experts.214.w1", "model.layers.8.block_sparse_moe.experts.215.w1", "model.layers.8.block_sparse_moe.experts.216.w1", "model.layers.8.block_sparse_moe.experts.217.w1", "model.layers.8.block_sparse_moe.experts.218.w1", "model.layers.8.block_sparse_moe.experts.219.w1", "model.layers.8.block_sparse_moe.experts.220.w1", "model.layers.8.block_sparse_moe.experts.221.w1", "model.layers.8.block_sparse_moe.experts.222.w1", "model.layers.8.block_sparse_moe.experts.223.w1", "model.layers.8.block_sparse_moe.experts.224.w1", "model.layers.8.block_sparse_moe.experts.225.w1", "model.layers.8.block_sparse_moe.experts.226.w1", "model.layers.8.block_sparse_moe.experts.227.w1", "model.layers.8.block_sparse_moe.experts.228.w1", "model.layers.8.block_sparse_moe.experts.229.w1", "model.layers.8.block_sparse_moe.experts.230.w1", "model.layers.8.block_sparse_moe.experts.231.w1", "model.layers.8.block_sparse_moe.experts.232.w1", "model.layers.8.block_sparse_moe.experts.233.w1", "model.layers.8.block_sparse_moe.experts.234.w1", "model.layers.8.block_sparse_moe.experts.235.w1", "model.layers.8.block_sparse_moe.experts.236.w1", "model.layers.8.block_sparse_moe.experts.237.w1", "model.layers.8.block_sparse_moe.experts.238.w1", "model.layers.8.block_sparse_moe.experts.239.w1", "model.layers.8.block_sparse_moe.experts.240.w1", "model.layers.8.block_sparse_moe.experts.241.w1", "model.layers.8.block_sparse_moe.experts.242.w1", "model.layers.8.block_sparse_moe.experts.243.w1", "model.layers.8.block_sparse_moe.experts.244.w1", "model.layers.8.block_sparse_moe.experts.245.w1", "model.layers.8.block_sparse_moe.experts.246.w1", "model.layers.8.block_sparse_moe.experts.247.w1", "model.layers.8.block_sparse_moe.experts.248.w1", "model.layers.8.block_sparse_moe.experts.249.w1", "model.layers.8.block_sparse_moe.experts.250.w1", "model.layers.8.block_sparse_moe.experts.251.w1", "model.layers.8.block_sparse_moe.experts.252.w1", "model.layers.8.block_sparse_moe.experts.253.w1", "model.layers.8.block_sparse_moe.experts.254.w1", "model.layers.8.block_sparse_moe.experts.255.w1", "model.layers.8.block_sparse_moe.experts.0.w3", "model.layers.8.block_sparse_moe.experts.1.w3", "model.layers.8.block_sparse_moe.experts.2.w3", "model.layers.8.block_sparse_moe.experts.3.w3", "model.layers.8.block_sparse_moe.experts.4.w3", "model.layers.8.block_sparse_moe.experts.5.w3", "model.layers.8.block_sparse_moe.experts.6.w3", "model.layers.8.block_sparse_moe.experts.7.w3", "model.layers.8.block_sparse_moe.experts.8.w3", "model.layers.8.block_sparse_moe.experts.9.w3", "model.layers.8.block_sparse_moe.experts.10.w3", "model.layers.8.block_sparse_moe.experts.11.w3", "model.layers.8.block_sparse_moe.experts.12.w3", "model.layers.8.block_sparse_moe.experts.13.w3", "model.layers.8.block_sparse_moe.experts.14.w3", "model.layers.8.block_sparse_moe.experts.15.w3", "model.layers.8.block_sparse_moe.experts.16.w3", "model.layers.8.block_sparse_moe.experts.17.w3", "model.layers.8.block_sparse_moe.experts.18.w3", "model.layers.8.block_sparse_moe.experts.19.w3", "model.layers.8.block_sparse_moe.experts.20.w3", "model.layers.8.block_sparse_moe.experts.21.w3", "model.layers.8.block_sparse_moe.experts.22.w3", "model.layers.8.block_sparse_moe.experts.23.w3", "model.layers.8.block_sparse_moe.experts.24.w3", "model.layers.8.block_sparse_moe.experts.25.w3", "model.layers.8.block_sparse_moe.experts.26.w3", "model.layers.8.block_sparse_moe.experts.27.w3", "model.layers.8.block_sparse_moe.experts.28.w3", "model.layers.8.block_sparse_moe.experts.29.w3", "model.layers.8.block_sparse_moe.experts.30.w3", "model.layers.8.block_sparse_moe.experts.31.w3", "model.layers.8.block_sparse_moe.experts.32.w3", "model.layers.8.block_sparse_moe.experts.33.w3", "model.layers.8.block_sparse_moe.experts.34.w3", "model.layers.8.block_sparse_moe.experts.35.w3", "model.layers.8.block_sparse_moe.experts.36.w3", "model.layers.8.block_sparse_moe.experts.37.w3", "model.layers.8.block_sparse_moe.experts.38.w3", "model.layers.8.block_sparse_moe.experts.39.w3", "model.layers.8.block_sparse_moe.experts.40.w3", "model.layers.8.block_sparse_moe.experts.41.w3", "model.layers.8.block_sparse_moe.experts.42.w3", "model.layers.8.block_sparse_moe.experts.43.w3", "model.layers.8.block_sparse_moe.experts.44.w3", "model.layers.8.block_sparse_moe.experts.45.w3", "model.layers.8.block_sparse_moe.experts.46.w3", "model.layers.8.block_sparse_moe.experts.47.w3", "model.layers.8.block_sparse_moe.experts.48.w3", "model.layers.8.block_sparse_moe.experts.49.w3", "model.layers.8.block_sparse_moe.experts.50.w3", "model.layers.8.block_sparse_moe.experts.51.w3", "model.layers.8.block_sparse_moe.experts.52.w3", "model.layers.8.block_sparse_moe.experts.53.w3", "model.layers.8.block_sparse_moe.experts.54.w3", "model.layers.8.block_sparse_moe.experts.55.w3", "model.layers.8.block_sparse_moe.experts.56.w3", "model.layers.8.block_sparse_moe.experts.57.w3", "model.layers.8.block_sparse_moe.experts.58.w3", "model.layers.8.block_sparse_moe.experts.59.w3", "model.layers.8.block_sparse_moe.experts.60.w3", "model.layers.8.block_sparse_moe.experts.61.w3", "model.layers.8.block_sparse_moe.experts.62.w3", "model.layers.8.block_sparse_moe.experts.63.w3", "model.layers.8.block_sparse_moe.experts.64.w3", "model.layers.8.block_sparse_moe.experts.65.w3", "model.layers.8.block_sparse_moe.experts.66.w3", "model.layers.8.block_sparse_moe.experts.67.w3", "model.layers.8.block_sparse_moe.experts.68.w3", "model.layers.8.block_sparse_moe.experts.69.w3", "model.layers.8.block_sparse_moe.experts.70.w3", "model.layers.8.block_sparse_moe.experts.71.w3", "model.layers.8.block_sparse_moe.experts.72.w3", "model.layers.8.block_sparse_moe.experts.73.w3", "model.layers.8.block_sparse_moe.experts.74.w3", "model.layers.8.block_sparse_moe.experts.75.w3", "model.layers.8.block_sparse_moe.experts.76.w3", "model.layers.8.block_sparse_moe.experts.77.w3", "model.layers.8.block_sparse_moe.experts.78.w3", "model.layers.8.block_sparse_moe.experts.79.w3", "model.layers.8.block_sparse_moe.experts.80.w3", "model.layers.8.block_sparse_moe.experts.81.w3", "model.layers.8.block_sparse_moe.experts.82.w3", "model.layers.8.block_sparse_moe.experts.83.w3", "model.layers.8.block_sparse_moe.experts.84.w3", "model.layers.8.block_sparse_moe.experts.85.w3", "model.layers.8.block_sparse_moe.experts.86.w3", "model.layers.8.block_sparse_moe.experts.87.w3", "model.layers.8.block_sparse_moe.experts.88.w3", "model.layers.8.block_sparse_moe.experts.89.w3", "model.layers.8.block_sparse_moe.experts.90.w3", "model.layers.8.block_sparse_moe.experts.91.w3", "model.layers.8.block_sparse_moe.experts.92.w3", "model.layers.8.block_sparse_moe.experts.93.w3", "model.layers.8.block_sparse_moe.experts.94.w3", "model.layers.8.block_sparse_moe.experts.95.w3", "model.layers.8.block_sparse_moe.experts.96.w3", "model.layers.8.block_sparse_moe.experts.97.w3", "model.layers.8.block_sparse_moe.experts.98.w3", "model.layers.8.block_sparse_moe.experts.99.w3", "model.layers.8.block_sparse_moe.experts.100.w3", "model.layers.8.block_sparse_moe.experts.101.w3", "model.layers.8.block_sparse_moe.experts.102.w3", "model.layers.8.block_sparse_moe.experts.103.w3", "model.layers.8.block_sparse_moe.experts.104.w3", "model.layers.8.block_sparse_moe.experts.105.w3", "model.layers.8.block_sparse_moe.experts.106.w3", "model.layers.8.block_sparse_moe.experts.107.w3", "model.layers.8.block_sparse_moe.experts.108.w3", "model.layers.8.block_sparse_moe.experts.109.w3", "model.layers.8.block_sparse_moe.experts.110.w3", "model.layers.8.block_sparse_moe.experts.111.w3", "model.layers.8.block_sparse_moe.experts.112.w3", "model.layers.8.block_sparse_moe.experts.113.w3", "model.layers.8.block_sparse_moe.experts.114.w3", "model.layers.8.block_sparse_moe.experts.115.w3", "model.layers.8.block_sparse_moe.experts.116.w3", "model.layers.8.block_sparse_moe.experts.117.w3", "model.layers.8.block_sparse_moe.experts.118.w3", "model.layers.8.block_sparse_moe.experts.119.w3", "model.layers.8.block_sparse_moe.experts.120.w3", "model.layers.8.block_sparse_moe.experts.121.w3", "model.layers.8.block_sparse_moe.experts.122.w3", "model.layers.8.block_sparse_moe.experts.123.w3", "model.layers.8.block_sparse_moe.experts.124.w3", "model.layers.8.block_sparse_moe.experts.125.w3", "model.layers.8.block_sparse_moe.experts.126.w3", "model.layers.8.block_sparse_moe.experts.127.w3", "model.layers.8.block_sparse_moe.experts.128.w3", "model.layers.8.block_sparse_moe.experts.129.w3", "model.layers.8.block_sparse_moe.experts.130.w3", "model.layers.8.block_sparse_moe.experts.131.w3", "model.layers.8.block_sparse_moe.experts.132.w3", "model.layers.8.block_sparse_moe.experts.133.w3", "model.layers.8.block_sparse_moe.experts.134.w3", "model.layers.8.block_sparse_moe.experts.135.w3", "model.layers.8.block_sparse_moe.experts.136.w3", "model.layers.8.block_sparse_moe.experts.137.w3", "model.layers.8.block_sparse_moe.experts.138.w3", "model.layers.8.block_sparse_moe.experts.139.w3", "model.layers.8.block_sparse_moe.experts.140.w3", "model.layers.8.block_sparse_moe.experts.141.w3", "model.layers.8.block_sparse_moe.experts.142.w3", "model.layers.8.block_sparse_moe.experts.143.w3", "model.layers.8.block_sparse_moe.experts.144.w3", "model.layers.8.block_sparse_moe.experts.145.w3", "model.layers.8.block_sparse_moe.experts.146.w3", "model.layers.8.block_sparse_moe.experts.147.w3", "model.layers.8.block_sparse_moe.experts.148.w3", "model.layers.8.block_sparse_moe.experts.149.w3", "model.layers.8.block_sparse_moe.experts.150.w3", "model.layers.8.block_sparse_moe.experts.151.w3", "model.layers.8.block_sparse_moe.experts.152.w3", "model.layers.8.block_sparse_moe.experts.153.w3", "model.layers.8.block_sparse_moe.experts.154.w3", "model.layers.8.block_sparse_moe.experts.155.w3", "model.layers.8.block_sparse_moe.experts.156.w3", "model.layers.8.block_sparse_moe.experts.157.w3", "model.layers.8.block_sparse_moe.experts.158.w3", "model.layers.8.block_sparse_moe.experts.159.w3", "model.layers.8.block_sparse_moe.experts.160.w3", "model.layers.8.block_sparse_moe.experts.161.w3", "model.layers.8.block_sparse_moe.experts.162.w3", "model.layers.8.block_sparse_moe.experts.163.w3", "model.layers.8.block_sparse_moe.experts.164.w3", "model.layers.8.block_sparse_moe.experts.165.w3", "model.layers.8.block_sparse_moe.experts.166.w3", "model.layers.8.block_sparse_moe.experts.167.w3", "model.layers.8.block_sparse_moe.experts.168.w3", "model.layers.8.block_sparse_moe.experts.169.w3", "model.layers.8.block_sparse_moe.experts.170.w3", "model.layers.8.block_sparse_moe.experts.171.w3", "model.layers.8.block_sparse_moe.experts.172.w3", "model.layers.8.block_sparse_moe.experts.173.w3", "model.layers.8.block_sparse_moe.experts.174.w3", "model.layers.8.block_sparse_moe.experts.175.w3", "model.layers.8.block_sparse_moe.experts.176.w3", "model.layers.8.block_sparse_moe.experts.177.w3", "model.layers.8.block_sparse_moe.experts.178.w3", "model.layers.8.block_sparse_moe.experts.179.w3", "model.layers.8.block_sparse_moe.experts.180.w3", "model.layers.8.block_sparse_moe.experts.181.w3", "model.layers.8.block_sparse_moe.experts.182.w3", "model.layers.8.block_sparse_moe.experts.183.w3", "model.layers.8.block_sparse_moe.experts.184.w3", "model.layers.8.block_sparse_moe.experts.185.w3", "model.layers.8.block_sparse_moe.experts.186.w3", "model.layers.8.block_sparse_moe.experts.187.w3", "model.layers.8.block_sparse_moe.experts.188.w3", "model.layers.8.block_sparse_moe.experts.189.w3", "model.layers.8.block_sparse_moe.experts.190.w3", "model.layers.8.block_sparse_moe.experts.191.w3", "model.layers.8.block_sparse_moe.experts.192.w3", "model.layers.8.block_sparse_moe.experts.193.w3", "model.layers.8.block_sparse_moe.experts.194.w3", "model.layers.8.block_sparse_moe.experts.195.w3", "model.layers.8.block_sparse_moe.experts.196.w3", "model.layers.8.block_sparse_moe.experts.197.w3", "model.layers.8.block_sparse_moe.experts.198.w3", "model.layers.8.block_sparse_moe.experts.199.w3", "model.layers.8.block_sparse_moe.experts.200.w3", "model.layers.8.block_sparse_moe.experts.201.w3", "model.layers.8.block_sparse_moe.experts.202.w3", "model.layers.8.block_sparse_moe.experts.203.w3", "model.layers.8.block_sparse_moe.experts.204.w3", "model.layers.8.block_sparse_moe.experts.205.w3", "model.layers.8.block_sparse_moe.experts.206.w3", "model.layers.8.block_sparse_moe.experts.207.w3", "model.layers.8.block_sparse_moe.experts.208.w3", "model.layers.8.block_sparse_moe.experts.209.w3", "model.layers.8.block_sparse_moe.experts.210.w3", "model.layers.8.block_sparse_moe.experts.211.w3", "model.layers.8.block_sparse_moe.experts.212.w3", "model.layers.8.block_sparse_moe.experts.213.w3", "model.layers.8.block_sparse_moe.experts.214.w3", "model.layers.8.block_sparse_moe.experts.215.w3", "model.layers.8.block_sparse_moe.experts.216.w3", "model.layers.8.block_sparse_moe.experts.217.w3", "model.layers.8.block_sparse_moe.experts.218.w3", "model.layers.8.block_sparse_moe.experts.219.w3", "model.layers.8.block_sparse_moe.experts.220.w3", "model.layers.8.block_sparse_moe.experts.221.w3", "model.layers.8.block_sparse_moe.experts.222.w3", "model.layers.8.block_sparse_moe.experts.223.w3", "model.layers.8.block_sparse_moe.experts.224.w3", "model.layers.8.block_sparse_moe.experts.225.w3", "model.layers.8.block_sparse_moe.experts.226.w3", "model.layers.8.block_sparse_moe.experts.227.w3", "model.layers.8.block_sparse_moe.experts.228.w3", "model.layers.8.block_sparse_moe.experts.229.w3", "model.layers.8.block_sparse_moe.experts.230.w3", "model.layers.8.block_sparse_moe.experts.231.w3", "model.layers.8.block_sparse_moe.experts.232.w3", "model.layers.8.block_sparse_moe.experts.233.w3", "model.layers.8.block_sparse_moe.experts.234.w3", "model.layers.8.block_sparse_moe.experts.235.w3", "model.layers.8.block_sparse_moe.experts.236.w3", "model.layers.8.block_sparse_moe.experts.237.w3", "model.layers.8.block_sparse_moe.experts.238.w3", "model.layers.8.block_sparse_moe.experts.239.w3", "model.layers.8.block_sparse_moe.experts.240.w3", "model.layers.8.block_sparse_moe.experts.241.w3", "model.layers.8.block_sparse_moe.experts.242.w3", "model.layers.8.block_sparse_moe.experts.243.w3", "model.layers.8.block_sparse_moe.experts.244.w3", "model.layers.8.block_sparse_moe.experts.245.w3", "model.layers.8.block_sparse_moe.experts.246.w3", "model.layers.8.block_sparse_moe.experts.247.w3", "model.layers.8.block_sparse_moe.experts.248.w3", "model.layers.8.block_sparse_moe.experts.249.w3", "model.layers.8.block_sparse_moe.experts.250.w3", "model.layers.8.block_sparse_moe.experts.251.w3", "model.layers.8.block_sparse_moe.experts.252.w3", "model.layers.8.block_sparse_moe.experts.253.w3", "model.layers.8.block_sparse_moe.experts.254.w3", "model.layers.8.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.004924985766410761, "dbits": 2415919104 } ] }, { "idx": 44, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w2", "model.layers.8.block_sparse_moe.experts.1.w2", "model.layers.8.block_sparse_moe.experts.2.w2", "model.layers.8.block_sparse_moe.experts.3.w2", "model.layers.8.block_sparse_moe.experts.4.w2", "model.layers.8.block_sparse_moe.experts.5.w2", "model.layers.8.block_sparse_moe.experts.6.w2", "model.layers.8.block_sparse_moe.experts.7.w2", "model.layers.8.block_sparse_moe.experts.8.w2", "model.layers.8.block_sparse_moe.experts.9.w2", "model.layers.8.block_sparse_moe.experts.10.w2", "model.layers.8.block_sparse_moe.experts.11.w2", "model.layers.8.block_sparse_moe.experts.12.w2", "model.layers.8.block_sparse_moe.experts.13.w2", "model.layers.8.block_sparse_moe.experts.14.w2", "model.layers.8.block_sparse_moe.experts.15.w2", "model.layers.8.block_sparse_moe.experts.16.w2", "model.layers.8.block_sparse_moe.experts.17.w2", "model.layers.8.block_sparse_moe.experts.18.w2", "model.layers.8.block_sparse_moe.experts.19.w2", "model.layers.8.block_sparse_moe.experts.20.w2", "model.layers.8.block_sparse_moe.experts.21.w2", "model.layers.8.block_sparse_moe.experts.22.w2", "model.layers.8.block_sparse_moe.experts.23.w2", "model.layers.8.block_sparse_moe.experts.24.w2", "model.layers.8.block_sparse_moe.experts.25.w2", "model.layers.8.block_sparse_moe.experts.26.w2", "model.layers.8.block_sparse_moe.experts.27.w2", "model.layers.8.block_sparse_moe.experts.28.w2", "model.layers.8.block_sparse_moe.experts.29.w2", "model.layers.8.block_sparse_moe.experts.30.w2", "model.layers.8.block_sparse_moe.experts.31.w2", "model.layers.8.block_sparse_moe.experts.32.w2", "model.layers.8.block_sparse_moe.experts.33.w2", "model.layers.8.block_sparse_moe.experts.34.w2", "model.layers.8.block_sparse_moe.experts.35.w2", "model.layers.8.block_sparse_moe.experts.36.w2", "model.layers.8.block_sparse_moe.experts.37.w2", "model.layers.8.block_sparse_moe.experts.38.w2", "model.layers.8.block_sparse_moe.experts.39.w2", "model.layers.8.block_sparse_moe.experts.40.w2", "model.layers.8.block_sparse_moe.experts.41.w2", "model.layers.8.block_sparse_moe.experts.42.w2", "model.layers.8.block_sparse_moe.experts.43.w2", "model.layers.8.block_sparse_moe.experts.44.w2", "model.layers.8.block_sparse_moe.experts.45.w2", "model.layers.8.block_sparse_moe.experts.46.w2", "model.layers.8.block_sparse_moe.experts.47.w2", "model.layers.8.block_sparse_moe.experts.48.w2", "model.layers.8.block_sparse_moe.experts.49.w2", "model.layers.8.block_sparse_moe.experts.50.w2", "model.layers.8.block_sparse_moe.experts.51.w2", "model.layers.8.block_sparse_moe.experts.52.w2", "model.layers.8.block_sparse_moe.experts.53.w2", "model.layers.8.block_sparse_moe.experts.54.w2", "model.layers.8.block_sparse_moe.experts.55.w2", "model.layers.8.block_sparse_moe.experts.56.w2", "model.layers.8.block_sparse_moe.experts.57.w2", "model.layers.8.block_sparse_moe.experts.58.w2", "model.layers.8.block_sparse_moe.experts.59.w2", "model.layers.8.block_sparse_moe.experts.60.w2", "model.layers.8.block_sparse_moe.experts.61.w2", "model.layers.8.block_sparse_moe.experts.62.w2", "model.layers.8.block_sparse_moe.experts.63.w2", "model.layers.8.block_sparse_moe.experts.64.w2", "model.layers.8.block_sparse_moe.experts.65.w2", "model.layers.8.block_sparse_moe.experts.66.w2", "model.layers.8.block_sparse_moe.experts.67.w2", "model.layers.8.block_sparse_moe.experts.68.w2", "model.layers.8.block_sparse_moe.experts.69.w2", "model.layers.8.block_sparse_moe.experts.70.w2", "model.layers.8.block_sparse_moe.experts.71.w2", "model.layers.8.block_sparse_moe.experts.72.w2", "model.layers.8.block_sparse_moe.experts.73.w2", "model.layers.8.block_sparse_moe.experts.74.w2", "model.layers.8.block_sparse_moe.experts.75.w2", "model.layers.8.block_sparse_moe.experts.76.w2", "model.layers.8.block_sparse_moe.experts.77.w2", "model.layers.8.block_sparse_moe.experts.78.w2", "model.layers.8.block_sparse_moe.experts.79.w2", "model.layers.8.block_sparse_moe.experts.80.w2", "model.layers.8.block_sparse_moe.experts.81.w2", "model.layers.8.block_sparse_moe.experts.82.w2", "model.layers.8.block_sparse_moe.experts.83.w2", "model.layers.8.block_sparse_moe.experts.84.w2", "model.layers.8.block_sparse_moe.experts.85.w2", "model.layers.8.block_sparse_moe.experts.86.w2", "model.layers.8.block_sparse_moe.experts.87.w2", "model.layers.8.block_sparse_moe.experts.88.w2", "model.layers.8.block_sparse_moe.experts.89.w2", "model.layers.8.block_sparse_moe.experts.90.w2", "model.layers.8.block_sparse_moe.experts.91.w2", "model.layers.8.block_sparse_moe.experts.92.w2", "model.layers.8.block_sparse_moe.experts.93.w2", "model.layers.8.block_sparse_moe.experts.94.w2", "model.layers.8.block_sparse_moe.experts.95.w2", "model.layers.8.block_sparse_moe.experts.96.w2", "model.layers.8.block_sparse_moe.experts.97.w2", "model.layers.8.block_sparse_moe.experts.98.w2", "model.layers.8.block_sparse_moe.experts.99.w2", "model.layers.8.block_sparse_moe.experts.100.w2", "model.layers.8.block_sparse_moe.experts.101.w2", "model.layers.8.block_sparse_moe.experts.102.w2", "model.layers.8.block_sparse_moe.experts.103.w2", "model.layers.8.block_sparse_moe.experts.104.w2", "model.layers.8.block_sparse_moe.experts.105.w2", "model.layers.8.block_sparse_moe.experts.106.w2", "model.layers.8.block_sparse_moe.experts.107.w2", "model.layers.8.block_sparse_moe.experts.108.w2", "model.layers.8.block_sparse_moe.experts.109.w2", "model.layers.8.block_sparse_moe.experts.110.w2", "model.layers.8.block_sparse_moe.experts.111.w2", "model.layers.8.block_sparse_moe.experts.112.w2", "model.layers.8.block_sparse_moe.experts.113.w2", "model.layers.8.block_sparse_moe.experts.114.w2", "model.layers.8.block_sparse_moe.experts.115.w2", "model.layers.8.block_sparse_moe.experts.116.w2", "model.layers.8.block_sparse_moe.experts.117.w2", "model.layers.8.block_sparse_moe.experts.118.w2", "model.layers.8.block_sparse_moe.experts.119.w2", "model.layers.8.block_sparse_moe.experts.120.w2", "model.layers.8.block_sparse_moe.experts.121.w2", "model.layers.8.block_sparse_moe.experts.122.w2", "model.layers.8.block_sparse_moe.experts.123.w2", "model.layers.8.block_sparse_moe.experts.124.w2", "model.layers.8.block_sparse_moe.experts.125.w2", "model.layers.8.block_sparse_moe.experts.126.w2", "model.layers.8.block_sparse_moe.experts.127.w2", "model.layers.8.block_sparse_moe.experts.128.w2", "model.layers.8.block_sparse_moe.experts.129.w2", "model.layers.8.block_sparse_moe.experts.130.w2", "model.layers.8.block_sparse_moe.experts.131.w2", "model.layers.8.block_sparse_moe.experts.132.w2", "model.layers.8.block_sparse_moe.experts.133.w2", "model.layers.8.block_sparse_moe.experts.134.w2", "model.layers.8.block_sparse_moe.experts.135.w2", "model.layers.8.block_sparse_moe.experts.136.w2", "model.layers.8.block_sparse_moe.experts.137.w2", "model.layers.8.block_sparse_moe.experts.138.w2", "model.layers.8.block_sparse_moe.experts.139.w2", "model.layers.8.block_sparse_moe.experts.140.w2", "model.layers.8.block_sparse_moe.experts.141.w2", "model.layers.8.block_sparse_moe.experts.142.w2", "model.layers.8.block_sparse_moe.experts.143.w2", "model.layers.8.block_sparse_moe.experts.144.w2", "model.layers.8.block_sparse_moe.experts.145.w2", "model.layers.8.block_sparse_moe.experts.146.w2", "model.layers.8.block_sparse_moe.experts.147.w2", "model.layers.8.block_sparse_moe.experts.148.w2", "model.layers.8.block_sparse_moe.experts.149.w2", "model.layers.8.block_sparse_moe.experts.150.w2", "model.layers.8.block_sparse_moe.experts.151.w2", "model.layers.8.block_sparse_moe.experts.152.w2", "model.layers.8.block_sparse_moe.experts.153.w2", "model.layers.8.block_sparse_moe.experts.154.w2", "model.layers.8.block_sparse_moe.experts.155.w2", "model.layers.8.block_sparse_moe.experts.156.w2", "model.layers.8.block_sparse_moe.experts.157.w2", "model.layers.8.block_sparse_moe.experts.158.w2", "model.layers.8.block_sparse_moe.experts.159.w2", "model.layers.8.block_sparse_moe.experts.160.w2", "model.layers.8.block_sparse_moe.experts.161.w2", "model.layers.8.block_sparse_moe.experts.162.w2", "model.layers.8.block_sparse_moe.experts.163.w2", "model.layers.8.block_sparse_moe.experts.164.w2", "model.layers.8.block_sparse_moe.experts.165.w2", "model.layers.8.block_sparse_moe.experts.166.w2", "model.layers.8.block_sparse_moe.experts.167.w2", "model.layers.8.block_sparse_moe.experts.168.w2", "model.layers.8.block_sparse_moe.experts.169.w2", "model.layers.8.block_sparse_moe.experts.170.w2", "model.layers.8.block_sparse_moe.experts.171.w2", "model.layers.8.block_sparse_moe.experts.172.w2", "model.layers.8.block_sparse_moe.experts.173.w2", "model.layers.8.block_sparse_moe.experts.174.w2", "model.layers.8.block_sparse_moe.experts.175.w2", "model.layers.8.block_sparse_moe.experts.176.w2", "model.layers.8.block_sparse_moe.experts.177.w2", "model.layers.8.block_sparse_moe.experts.178.w2", "model.layers.8.block_sparse_moe.experts.179.w2", "model.layers.8.block_sparse_moe.experts.180.w2", "model.layers.8.block_sparse_moe.experts.181.w2", "model.layers.8.block_sparse_moe.experts.182.w2", "model.layers.8.block_sparse_moe.experts.183.w2", "model.layers.8.block_sparse_moe.experts.184.w2", "model.layers.8.block_sparse_moe.experts.185.w2", "model.layers.8.block_sparse_moe.experts.186.w2", "model.layers.8.block_sparse_moe.experts.187.w2", "model.layers.8.block_sparse_moe.experts.188.w2", "model.layers.8.block_sparse_moe.experts.189.w2", "model.layers.8.block_sparse_moe.experts.190.w2", "model.layers.8.block_sparse_moe.experts.191.w2", "model.layers.8.block_sparse_moe.experts.192.w2", "model.layers.8.block_sparse_moe.experts.193.w2", "model.layers.8.block_sparse_moe.experts.194.w2", "model.layers.8.block_sparse_moe.experts.195.w2", "model.layers.8.block_sparse_moe.experts.196.w2", "model.layers.8.block_sparse_moe.experts.197.w2", "model.layers.8.block_sparse_moe.experts.198.w2", "model.layers.8.block_sparse_moe.experts.199.w2", "model.layers.8.block_sparse_moe.experts.200.w2", "model.layers.8.block_sparse_moe.experts.201.w2", "model.layers.8.block_sparse_moe.experts.202.w2", "model.layers.8.block_sparse_moe.experts.203.w2", "model.layers.8.block_sparse_moe.experts.204.w2", "model.layers.8.block_sparse_moe.experts.205.w2", "model.layers.8.block_sparse_moe.experts.206.w2", "model.layers.8.block_sparse_moe.experts.207.w2", "model.layers.8.block_sparse_moe.experts.208.w2", "model.layers.8.block_sparse_moe.experts.209.w2", "model.layers.8.block_sparse_moe.experts.210.w2", "model.layers.8.block_sparse_moe.experts.211.w2", "model.layers.8.block_sparse_moe.experts.212.w2", "model.layers.8.block_sparse_moe.experts.213.w2", "model.layers.8.block_sparse_moe.experts.214.w2", "model.layers.8.block_sparse_moe.experts.215.w2", "model.layers.8.block_sparse_moe.experts.216.w2", "model.layers.8.block_sparse_moe.experts.217.w2", "model.layers.8.block_sparse_moe.experts.218.w2", "model.layers.8.block_sparse_moe.experts.219.w2", "model.layers.8.block_sparse_moe.experts.220.w2", "model.layers.8.block_sparse_moe.experts.221.w2", "model.layers.8.block_sparse_moe.experts.222.w2", "model.layers.8.block_sparse_moe.experts.223.w2", "model.layers.8.block_sparse_moe.experts.224.w2", "model.layers.8.block_sparse_moe.experts.225.w2", "model.layers.8.block_sparse_moe.experts.226.w2", "model.layers.8.block_sparse_moe.experts.227.w2", "model.layers.8.block_sparse_moe.experts.228.w2", "model.layers.8.block_sparse_moe.experts.229.w2", "model.layers.8.block_sparse_moe.experts.230.w2", "model.layers.8.block_sparse_moe.experts.231.w2", "model.layers.8.block_sparse_moe.experts.232.w2", "model.layers.8.block_sparse_moe.experts.233.w2", "model.layers.8.block_sparse_moe.experts.234.w2", "model.layers.8.block_sparse_moe.experts.235.w2", "model.layers.8.block_sparse_moe.experts.236.w2", "model.layers.8.block_sparse_moe.experts.237.w2", "model.layers.8.block_sparse_moe.experts.238.w2", "model.layers.8.block_sparse_moe.experts.239.w2", "model.layers.8.block_sparse_moe.experts.240.w2", "model.layers.8.block_sparse_moe.experts.241.w2", "model.layers.8.block_sparse_moe.experts.242.w2", "model.layers.8.block_sparse_moe.experts.243.w2", "model.layers.8.block_sparse_moe.experts.244.w2", "model.layers.8.block_sparse_moe.experts.245.w2", "model.layers.8.block_sparse_moe.experts.246.w2", "model.layers.8.block_sparse_moe.experts.247.w2", "model.layers.8.block_sparse_moe.experts.248.w2", "model.layers.8.block_sparse_moe.experts.249.w2", "model.layers.8.block_sparse_moe.experts.250.w2", "model.layers.8.block_sparse_moe.experts.251.w2", "model.layers.8.block_sparse_moe.experts.252.w2", "model.layers.8.block_sparse_moe.experts.253.w2", "model.layers.8.block_sparse_moe.experts.254.w2", "model.layers.8.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0010992407798766646, "dbits": 1207959552 } ] }, { "idx": 45, "layers": [ "model.layers.9.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00031776726245880127, "dbits": 18874368 } ] }, { "idx": 46, "layers": [ "model.layers.9.self_attn.k_proj", "model.layers.9.self_attn.v_proj" ], "candidates": [ { "dkld": 0.003736823797225952, "dbits": 6291456 } ] }, { "idx": 47, "layers": [ "model.layers.9.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006625273823738009, "dbits": 18874368 } ] }, { "idx": 48, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w1", "model.layers.9.block_sparse_moe.experts.1.w1", "model.layers.9.block_sparse_moe.experts.2.w1", "model.layers.9.block_sparse_moe.experts.3.w1", "model.layers.9.block_sparse_moe.experts.4.w1", "model.layers.9.block_sparse_moe.experts.5.w1", "model.layers.9.block_sparse_moe.experts.6.w1", "model.layers.9.block_sparse_moe.experts.7.w1", "model.layers.9.block_sparse_moe.experts.8.w1", "model.layers.9.block_sparse_moe.experts.9.w1", "model.layers.9.block_sparse_moe.experts.10.w1", "model.layers.9.block_sparse_moe.experts.11.w1", "model.layers.9.block_sparse_moe.experts.12.w1", "model.layers.9.block_sparse_moe.experts.13.w1", "model.layers.9.block_sparse_moe.experts.14.w1", "model.layers.9.block_sparse_moe.experts.15.w1", "model.layers.9.block_sparse_moe.experts.16.w1", "model.layers.9.block_sparse_moe.experts.17.w1", "model.layers.9.block_sparse_moe.experts.18.w1", "model.layers.9.block_sparse_moe.experts.19.w1", "model.layers.9.block_sparse_moe.experts.20.w1", "model.layers.9.block_sparse_moe.experts.21.w1", "model.layers.9.block_sparse_moe.experts.22.w1", "model.layers.9.block_sparse_moe.experts.23.w1", "model.layers.9.block_sparse_moe.experts.24.w1", "model.layers.9.block_sparse_moe.experts.25.w1", "model.layers.9.block_sparse_moe.experts.26.w1", "model.layers.9.block_sparse_moe.experts.27.w1", "model.layers.9.block_sparse_moe.experts.28.w1", "model.layers.9.block_sparse_moe.experts.29.w1", "model.layers.9.block_sparse_moe.experts.30.w1", "model.layers.9.block_sparse_moe.experts.31.w1", "model.layers.9.block_sparse_moe.experts.32.w1", "model.layers.9.block_sparse_moe.experts.33.w1", "model.layers.9.block_sparse_moe.experts.34.w1", "model.layers.9.block_sparse_moe.experts.35.w1", "model.layers.9.block_sparse_moe.experts.36.w1", "model.layers.9.block_sparse_moe.experts.37.w1", "model.layers.9.block_sparse_moe.experts.38.w1", "model.layers.9.block_sparse_moe.experts.39.w1", "model.layers.9.block_sparse_moe.experts.40.w1", "model.layers.9.block_sparse_moe.experts.41.w1", "model.layers.9.block_sparse_moe.experts.42.w1", "model.layers.9.block_sparse_moe.experts.43.w1", "model.layers.9.block_sparse_moe.experts.44.w1", "model.layers.9.block_sparse_moe.experts.45.w1", "model.layers.9.block_sparse_moe.experts.46.w1", "model.layers.9.block_sparse_moe.experts.47.w1", "model.layers.9.block_sparse_moe.experts.48.w1", "model.layers.9.block_sparse_moe.experts.49.w1", "model.layers.9.block_sparse_moe.experts.50.w1", "model.layers.9.block_sparse_moe.experts.51.w1", "model.layers.9.block_sparse_moe.experts.52.w1", "model.layers.9.block_sparse_moe.experts.53.w1", "model.layers.9.block_sparse_moe.experts.54.w1", "model.layers.9.block_sparse_moe.experts.55.w1", "model.layers.9.block_sparse_moe.experts.56.w1", "model.layers.9.block_sparse_moe.experts.57.w1", "model.layers.9.block_sparse_moe.experts.58.w1", "model.layers.9.block_sparse_moe.experts.59.w1", "model.layers.9.block_sparse_moe.experts.60.w1", "model.layers.9.block_sparse_moe.experts.61.w1", "model.layers.9.block_sparse_moe.experts.62.w1", "model.layers.9.block_sparse_moe.experts.63.w1", "model.layers.9.block_sparse_moe.experts.64.w1", "model.layers.9.block_sparse_moe.experts.65.w1", "model.layers.9.block_sparse_moe.experts.66.w1", "model.layers.9.block_sparse_moe.experts.67.w1", "model.layers.9.block_sparse_moe.experts.68.w1", "model.layers.9.block_sparse_moe.experts.69.w1", "model.layers.9.block_sparse_moe.experts.70.w1", "model.layers.9.block_sparse_moe.experts.71.w1", "model.layers.9.block_sparse_moe.experts.72.w1", "model.layers.9.block_sparse_moe.experts.73.w1", "model.layers.9.block_sparse_moe.experts.74.w1", "model.layers.9.block_sparse_moe.experts.75.w1", "model.layers.9.block_sparse_moe.experts.76.w1", "model.layers.9.block_sparse_moe.experts.77.w1", "model.layers.9.block_sparse_moe.experts.78.w1", "model.layers.9.block_sparse_moe.experts.79.w1", "model.layers.9.block_sparse_moe.experts.80.w1", "model.layers.9.block_sparse_moe.experts.81.w1", "model.layers.9.block_sparse_moe.experts.82.w1", "model.layers.9.block_sparse_moe.experts.83.w1", "model.layers.9.block_sparse_moe.experts.84.w1", "model.layers.9.block_sparse_moe.experts.85.w1", "model.layers.9.block_sparse_moe.experts.86.w1", "model.layers.9.block_sparse_moe.experts.87.w1", "model.layers.9.block_sparse_moe.experts.88.w1", "model.layers.9.block_sparse_moe.experts.89.w1", "model.layers.9.block_sparse_moe.experts.90.w1", "model.layers.9.block_sparse_moe.experts.91.w1", "model.layers.9.block_sparse_moe.experts.92.w1", "model.layers.9.block_sparse_moe.experts.93.w1", "model.layers.9.block_sparse_moe.experts.94.w1", "model.layers.9.block_sparse_moe.experts.95.w1", "model.layers.9.block_sparse_moe.experts.96.w1", "model.layers.9.block_sparse_moe.experts.97.w1", "model.layers.9.block_sparse_moe.experts.98.w1", "model.layers.9.block_sparse_moe.experts.99.w1", "model.layers.9.block_sparse_moe.experts.100.w1", "model.layers.9.block_sparse_moe.experts.101.w1", "model.layers.9.block_sparse_moe.experts.102.w1", "model.layers.9.block_sparse_moe.experts.103.w1", "model.layers.9.block_sparse_moe.experts.104.w1", "model.layers.9.block_sparse_moe.experts.105.w1", "model.layers.9.block_sparse_moe.experts.106.w1", "model.layers.9.block_sparse_moe.experts.107.w1", "model.layers.9.block_sparse_moe.experts.108.w1", "model.layers.9.block_sparse_moe.experts.109.w1", "model.layers.9.block_sparse_moe.experts.110.w1", "model.layers.9.block_sparse_moe.experts.111.w1", "model.layers.9.block_sparse_moe.experts.112.w1", "model.layers.9.block_sparse_moe.experts.113.w1", "model.layers.9.block_sparse_moe.experts.114.w1", "model.layers.9.block_sparse_moe.experts.115.w1", "model.layers.9.block_sparse_moe.experts.116.w1", "model.layers.9.block_sparse_moe.experts.117.w1", "model.layers.9.block_sparse_moe.experts.118.w1", "model.layers.9.block_sparse_moe.experts.119.w1", "model.layers.9.block_sparse_moe.experts.120.w1", "model.layers.9.block_sparse_moe.experts.121.w1", "model.layers.9.block_sparse_moe.experts.122.w1", "model.layers.9.block_sparse_moe.experts.123.w1", "model.layers.9.block_sparse_moe.experts.124.w1", "model.layers.9.block_sparse_moe.experts.125.w1", "model.layers.9.block_sparse_moe.experts.126.w1", "model.layers.9.block_sparse_moe.experts.127.w1", "model.layers.9.block_sparse_moe.experts.128.w1", "model.layers.9.block_sparse_moe.experts.129.w1", "model.layers.9.block_sparse_moe.experts.130.w1", "model.layers.9.block_sparse_moe.experts.131.w1", "model.layers.9.block_sparse_moe.experts.132.w1", "model.layers.9.block_sparse_moe.experts.133.w1", "model.layers.9.block_sparse_moe.experts.134.w1", "model.layers.9.block_sparse_moe.experts.135.w1", "model.layers.9.block_sparse_moe.experts.136.w1", "model.layers.9.block_sparse_moe.experts.137.w1", "model.layers.9.block_sparse_moe.experts.138.w1", "model.layers.9.block_sparse_moe.experts.139.w1", "model.layers.9.block_sparse_moe.experts.140.w1", "model.layers.9.block_sparse_moe.experts.141.w1", "model.layers.9.block_sparse_moe.experts.142.w1", "model.layers.9.block_sparse_moe.experts.143.w1", "model.layers.9.block_sparse_moe.experts.144.w1", "model.layers.9.block_sparse_moe.experts.145.w1", "model.layers.9.block_sparse_moe.experts.146.w1", "model.layers.9.block_sparse_moe.experts.147.w1", "model.layers.9.block_sparse_moe.experts.148.w1", "model.layers.9.block_sparse_moe.experts.149.w1", "model.layers.9.block_sparse_moe.experts.150.w1", "model.layers.9.block_sparse_moe.experts.151.w1", "model.layers.9.block_sparse_moe.experts.152.w1", "model.layers.9.block_sparse_moe.experts.153.w1", "model.layers.9.block_sparse_moe.experts.154.w1", "model.layers.9.block_sparse_moe.experts.155.w1", "model.layers.9.block_sparse_moe.experts.156.w1", "model.layers.9.block_sparse_moe.experts.157.w1", "model.layers.9.block_sparse_moe.experts.158.w1", "model.layers.9.block_sparse_moe.experts.159.w1", "model.layers.9.block_sparse_moe.experts.160.w1", "model.layers.9.block_sparse_moe.experts.161.w1", "model.layers.9.block_sparse_moe.experts.162.w1", "model.layers.9.block_sparse_moe.experts.163.w1", "model.layers.9.block_sparse_moe.experts.164.w1", "model.layers.9.block_sparse_moe.experts.165.w1", "model.layers.9.block_sparse_moe.experts.166.w1", "model.layers.9.block_sparse_moe.experts.167.w1", "model.layers.9.block_sparse_moe.experts.168.w1", "model.layers.9.block_sparse_moe.experts.169.w1", "model.layers.9.block_sparse_moe.experts.170.w1", "model.layers.9.block_sparse_moe.experts.171.w1", "model.layers.9.block_sparse_moe.experts.172.w1", "model.layers.9.block_sparse_moe.experts.173.w1", "model.layers.9.block_sparse_moe.experts.174.w1", "model.layers.9.block_sparse_moe.experts.175.w1", "model.layers.9.block_sparse_moe.experts.176.w1", "model.layers.9.block_sparse_moe.experts.177.w1", "model.layers.9.block_sparse_moe.experts.178.w1", "model.layers.9.block_sparse_moe.experts.179.w1", "model.layers.9.block_sparse_moe.experts.180.w1", "model.layers.9.block_sparse_moe.experts.181.w1", "model.layers.9.block_sparse_moe.experts.182.w1", "model.layers.9.block_sparse_moe.experts.183.w1", "model.layers.9.block_sparse_moe.experts.184.w1", "model.layers.9.block_sparse_moe.experts.185.w1", "model.layers.9.block_sparse_moe.experts.186.w1", "model.layers.9.block_sparse_moe.experts.187.w1", "model.layers.9.block_sparse_moe.experts.188.w1", "model.layers.9.block_sparse_moe.experts.189.w1", "model.layers.9.block_sparse_moe.experts.190.w1", "model.layers.9.block_sparse_moe.experts.191.w1", "model.layers.9.block_sparse_moe.experts.192.w1", "model.layers.9.block_sparse_moe.experts.193.w1", "model.layers.9.block_sparse_moe.experts.194.w1", "model.layers.9.block_sparse_moe.experts.195.w1", "model.layers.9.block_sparse_moe.experts.196.w1", "model.layers.9.block_sparse_moe.experts.197.w1", "model.layers.9.block_sparse_moe.experts.198.w1", "model.layers.9.block_sparse_moe.experts.199.w1", "model.layers.9.block_sparse_moe.experts.200.w1", "model.layers.9.block_sparse_moe.experts.201.w1", "model.layers.9.block_sparse_moe.experts.202.w1", "model.layers.9.block_sparse_moe.experts.203.w1", "model.layers.9.block_sparse_moe.experts.204.w1", "model.layers.9.block_sparse_moe.experts.205.w1", "model.layers.9.block_sparse_moe.experts.206.w1", "model.layers.9.block_sparse_moe.experts.207.w1", "model.layers.9.block_sparse_moe.experts.208.w1", "model.layers.9.block_sparse_moe.experts.209.w1", "model.layers.9.block_sparse_moe.experts.210.w1", "model.layers.9.block_sparse_moe.experts.211.w1", "model.layers.9.block_sparse_moe.experts.212.w1", "model.layers.9.block_sparse_moe.experts.213.w1", "model.layers.9.block_sparse_moe.experts.214.w1", "model.layers.9.block_sparse_moe.experts.215.w1", "model.layers.9.block_sparse_moe.experts.216.w1", "model.layers.9.block_sparse_moe.experts.217.w1", "model.layers.9.block_sparse_moe.experts.218.w1", "model.layers.9.block_sparse_moe.experts.219.w1", "model.layers.9.block_sparse_moe.experts.220.w1", "model.layers.9.block_sparse_moe.experts.221.w1", "model.layers.9.block_sparse_moe.experts.222.w1", "model.layers.9.block_sparse_moe.experts.223.w1", "model.layers.9.block_sparse_moe.experts.224.w1", "model.layers.9.block_sparse_moe.experts.225.w1", "model.layers.9.block_sparse_moe.experts.226.w1", "model.layers.9.block_sparse_moe.experts.227.w1", "model.layers.9.block_sparse_moe.experts.228.w1", "model.layers.9.block_sparse_moe.experts.229.w1", "model.layers.9.block_sparse_moe.experts.230.w1", "model.layers.9.block_sparse_moe.experts.231.w1", "model.layers.9.block_sparse_moe.experts.232.w1", "model.layers.9.block_sparse_moe.experts.233.w1", "model.layers.9.block_sparse_moe.experts.234.w1", "model.layers.9.block_sparse_moe.experts.235.w1", "model.layers.9.block_sparse_moe.experts.236.w1", "model.layers.9.block_sparse_moe.experts.237.w1", "model.layers.9.block_sparse_moe.experts.238.w1", "model.layers.9.block_sparse_moe.experts.239.w1", "model.layers.9.block_sparse_moe.experts.240.w1", "model.layers.9.block_sparse_moe.experts.241.w1", "model.layers.9.block_sparse_moe.experts.242.w1", "model.layers.9.block_sparse_moe.experts.243.w1", "model.layers.9.block_sparse_moe.experts.244.w1", "model.layers.9.block_sparse_moe.experts.245.w1", "model.layers.9.block_sparse_moe.experts.246.w1", "model.layers.9.block_sparse_moe.experts.247.w1", "model.layers.9.block_sparse_moe.experts.248.w1", "model.layers.9.block_sparse_moe.experts.249.w1", "model.layers.9.block_sparse_moe.experts.250.w1", "model.layers.9.block_sparse_moe.experts.251.w1", "model.layers.9.block_sparse_moe.experts.252.w1", "model.layers.9.block_sparse_moe.experts.253.w1", "model.layers.9.block_sparse_moe.experts.254.w1", "model.layers.9.block_sparse_moe.experts.255.w1", "model.layers.9.block_sparse_moe.experts.0.w3", "model.layers.9.block_sparse_moe.experts.1.w3", "model.layers.9.block_sparse_moe.experts.2.w3", "model.layers.9.block_sparse_moe.experts.3.w3", "model.layers.9.block_sparse_moe.experts.4.w3", "model.layers.9.block_sparse_moe.experts.5.w3", "model.layers.9.block_sparse_moe.experts.6.w3", "model.layers.9.block_sparse_moe.experts.7.w3", "model.layers.9.block_sparse_moe.experts.8.w3", "model.layers.9.block_sparse_moe.experts.9.w3", "model.layers.9.block_sparse_moe.experts.10.w3", "model.layers.9.block_sparse_moe.experts.11.w3", "model.layers.9.block_sparse_moe.experts.12.w3", "model.layers.9.block_sparse_moe.experts.13.w3", "model.layers.9.block_sparse_moe.experts.14.w3", "model.layers.9.block_sparse_moe.experts.15.w3", "model.layers.9.block_sparse_moe.experts.16.w3", "model.layers.9.block_sparse_moe.experts.17.w3", "model.layers.9.block_sparse_moe.experts.18.w3", "model.layers.9.block_sparse_moe.experts.19.w3", "model.layers.9.block_sparse_moe.experts.20.w3", "model.layers.9.block_sparse_moe.experts.21.w3", "model.layers.9.block_sparse_moe.experts.22.w3", "model.layers.9.block_sparse_moe.experts.23.w3", "model.layers.9.block_sparse_moe.experts.24.w3", "model.layers.9.block_sparse_moe.experts.25.w3", "model.layers.9.block_sparse_moe.experts.26.w3", "model.layers.9.block_sparse_moe.experts.27.w3", "model.layers.9.block_sparse_moe.experts.28.w3", "model.layers.9.block_sparse_moe.experts.29.w3", "model.layers.9.block_sparse_moe.experts.30.w3", "model.layers.9.block_sparse_moe.experts.31.w3", "model.layers.9.block_sparse_moe.experts.32.w3", "model.layers.9.block_sparse_moe.experts.33.w3", "model.layers.9.block_sparse_moe.experts.34.w3", "model.layers.9.block_sparse_moe.experts.35.w3", "model.layers.9.block_sparse_moe.experts.36.w3", "model.layers.9.block_sparse_moe.experts.37.w3", "model.layers.9.block_sparse_moe.experts.38.w3", "model.layers.9.block_sparse_moe.experts.39.w3", "model.layers.9.block_sparse_moe.experts.40.w3", "model.layers.9.block_sparse_moe.experts.41.w3", "model.layers.9.block_sparse_moe.experts.42.w3", "model.layers.9.block_sparse_moe.experts.43.w3", "model.layers.9.block_sparse_moe.experts.44.w3", "model.layers.9.block_sparse_moe.experts.45.w3", "model.layers.9.block_sparse_moe.experts.46.w3", "model.layers.9.block_sparse_moe.experts.47.w3", "model.layers.9.block_sparse_moe.experts.48.w3", "model.layers.9.block_sparse_moe.experts.49.w3", "model.layers.9.block_sparse_moe.experts.50.w3", "model.layers.9.block_sparse_moe.experts.51.w3", "model.layers.9.block_sparse_moe.experts.52.w3", "model.layers.9.block_sparse_moe.experts.53.w3", "model.layers.9.block_sparse_moe.experts.54.w3", "model.layers.9.block_sparse_moe.experts.55.w3", "model.layers.9.block_sparse_moe.experts.56.w3", "model.layers.9.block_sparse_moe.experts.57.w3", "model.layers.9.block_sparse_moe.experts.58.w3", "model.layers.9.block_sparse_moe.experts.59.w3", "model.layers.9.block_sparse_moe.experts.60.w3", "model.layers.9.block_sparse_moe.experts.61.w3", "model.layers.9.block_sparse_moe.experts.62.w3", "model.layers.9.block_sparse_moe.experts.63.w3", "model.layers.9.block_sparse_moe.experts.64.w3", "model.layers.9.block_sparse_moe.experts.65.w3", "model.layers.9.block_sparse_moe.experts.66.w3", "model.layers.9.block_sparse_moe.experts.67.w3", "model.layers.9.block_sparse_moe.experts.68.w3", "model.layers.9.block_sparse_moe.experts.69.w3", "model.layers.9.block_sparse_moe.experts.70.w3", "model.layers.9.block_sparse_moe.experts.71.w3", "model.layers.9.block_sparse_moe.experts.72.w3", "model.layers.9.block_sparse_moe.experts.73.w3", "model.layers.9.block_sparse_moe.experts.74.w3", "model.layers.9.block_sparse_moe.experts.75.w3", "model.layers.9.block_sparse_moe.experts.76.w3", "model.layers.9.block_sparse_moe.experts.77.w3", "model.layers.9.block_sparse_moe.experts.78.w3", "model.layers.9.block_sparse_moe.experts.79.w3", "model.layers.9.block_sparse_moe.experts.80.w3", "model.layers.9.block_sparse_moe.experts.81.w3", "model.layers.9.block_sparse_moe.experts.82.w3", "model.layers.9.block_sparse_moe.experts.83.w3", "model.layers.9.block_sparse_moe.experts.84.w3", "model.layers.9.block_sparse_moe.experts.85.w3", "model.layers.9.block_sparse_moe.experts.86.w3", "model.layers.9.block_sparse_moe.experts.87.w3", "model.layers.9.block_sparse_moe.experts.88.w3", "model.layers.9.block_sparse_moe.experts.89.w3", "model.layers.9.block_sparse_moe.experts.90.w3", "model.layers.9.block_sparse_moe.experts.91.w3", "model.layers.9.block_sparse_moe.experts.92.w3", "model.layers.9.block_sparse_moe.experts.93.w3", "model.layers.9.block_sparse_moe.experts.94.w3", "model.layers.9.block_sparse_moe.experts.95.w3", "model.layers.9.block_sparse_moe.experts.96.w3", "model.layers.9.block_sparse_moe.experts.97.w3", "model.layers.9.block_sparse_moe.experts.98.w3", "model.layers.9.block_sparse_moe.experts.99.w3", "model.layers.9.block_sparse_moe.experts.100.w3", "model.layers.9.block_sparse_moe.experts.101.w3", "model.layers.9.block_sparse_moe.experts.102.w3", "model.layers.9.block_sparse_moe.experts.103.w3", "model.layers.9.block_sparse_moe.experts.104.w3", "model.layers.9.block_sparse_moe.experts.105.w3", "model.layers.9.block_sparse_moe.experts.106.w3", "model.layers.9.block_sparse_moe.experts.107.w3", "model.layers.9.block_sparse_moe.experts.108.w3", "model.layers.9.block_sparse_moe.experts.109.w3", "model.layers.9.block_sparse_moe.experts.110.w3", "model.layers.9.block_sparse_moe.experts.111.w3", "model.layers.9.block_sparse_moe.experts.112.w3", "model.layers.9.block_sparse_moe.experts.113.w3", "model.layers.9.block_sparse_moe.experts.114.w3", "model.layers.9.block_sparse_moe.experts.115.w3", "model.layers.9.block_sparse_moe.experts.116.w3", "model.layers.9.block_sparse_moe.experts.117.w3", "model.layers.9.block_sparse_moe.experts.118.w3", "model.layers.9.block_sparse_moe.experts.119.w3", "model.layers.9.block_sparse_moe.experts.120.w3", "model.layers.9.block_sparse_moe.experts.121.w3", "model.layers.9.block_sparse_moe.experts.122.w3", "model.layers.9.block_sparse_moe.experts.123.w3", "model.layers.9.block_sparse_moe.experts.124.w3", "model.layers.9.block_sparse_moe.experts.125.w3", "model.layers.9.block_sparse_moe.experts.126.w3", "model.layers.9.block_sparse_moe.experts.127.w3", "model.layers.9.block_sparse_moe.experts.128.w3", "model.layers.9.block_sparse_moe.experts.129.w3", "model.layers.9.block_sparse_moe.experts.130.w3", "model.layers.9.block_sparse_moe.experts.131.w3", "model.layers.9.block_sparse_moe.experts.132.w3", "model.layers.9.block_sparse_moe.experts.133.w3", "model.layers.9.block_sparse_moe.experts.134.w3", "model.layers.9.block_sparse_moe.experts.135.w3", "model.layers.9.block_sparse_moe.experts.136.w3", "model.layers.9.block_sparse_moe.experts.137.w3", "model.layers.9.block_sparse_moe.experts.138.w3", "model.layers.9.block_sparse_moe.experts.139.w3", "model.layers.9.block_sparse_moe.experts.140.w3", "model.layers.9.block_sparse_moe.experts.141.w3", "model.layers.9.block_sparse_moe.experts.142.w3", "model.layers.9.block_sparse_moe.experts.143.w3", "model.layers.9.block_sparse_moe.experts.144.w3", "model.layers.9.block_sparse_moe.experts.145.w3", "model.layers.9.block_sparse_moe.experts.146.w3", "model.layers.9.block_sparse_moe.experts.147.w3", "model.layers.9.block_sparse_moe.experts.148.w3", "model.layers.9.block_sparse_moe.experts.149.w3", "model.layers.9.block_sparse_moe.experts.150.w3", "model.layers.9.block_sparse_moe.experts.151.w3", "model.layers.9.block_sparse_moe.experts.152.w3", "model.layers.9.block_sparse_moe.experts.153.w3", "model.layers.9.block_sparse_moe.experts.154.w3", "model.layers.9.block_sparse_moe.experts.155.w3", "model.layers.9.block_sparse_moe.experts.156.w3", "model.layers.9.block_sparse_moe.experts.157.w3", "model.layers.9.block_sparse_moe.experts.158.w3", "model.layers.9.block_sparse_moe.experts.159.w3", "model.layers.9.block_sparse_moe.experts.160.w3", "model.layers.9.block_sparse_moe.experts.161.w3", "model.layers.9.block_sparse_moe.experts.162.w3", "model.layers.9.block_sparse_moe.experts.163.w3", "model.layers.9.block_sparse_moe.experts.164.w3", "model.layers.9.block_sparse_moe.experts.165.w3", "model.layers.9.block_sparse_moe.experts.166.w3", "model.layers.9.block_sparse_moe.experts.167.w3", "model.layers.9.block_sparse_moe.experts.168.w3", "model.layers.9.block_sparse_moe.experts.169.w3", "model.layers.9.block_sparse_moe.experts.170.w3", "model.layers.9.block_sparse_moe.experts.171.w3", "model.layers.9.block_sparse_moe.experts.172.w3", "model.layers.9.block_sparse_moe.experts.173.w3", "model.layers.9.block_sparse_moe.experts.174.w3", "model.layers.9.block_sparse_moe.experts.175.w3", "model.layers.9.block_sparse_moe.experts.176.w3", "model.layers.9.block_sparse_moe.experts.177.w3", "model.layers.9.block_sparse_moe.experts.178.w3", "model.layers.9.block_sparse_moe.experts.179.w3", "model.layers.9.block_sparse_moe.experts.180.w3", "model.layers.9.block_sparse_moe.experts.181.w3", "model.layers.9.block_sparse_moe.experts.182.w3", "model.layers.9.block_sparse_moe.experts.183.w3", "model.layers.9.block_sparse_moe.experts.184.w3", "model.layers.9.block_sparse_moe.experts.185.w3", "model.layers.9.block_sparse_moe.experts.186.w3", "model.layers.9.block_sparse_moe.experts.187.w3", "model.layers.9.block_sparse_moe.experts.188.w3", "model.layers.9.block_sparse_moe.experts.189.w3", "model.layers.9.block_sparse_moe.experts.190.w3", "model.layers.9.block_sparse_moe.experts.191.w3", "model.layers.9.block_sparse_moe.experts.192.w3", "model.layers.9.block_sparse_moe.experts.193.w3", "model.layers.9.block_sparse_moe.experts.194.w3", "model.layers.9.block_sparse_moe.experts.195.w3", "model.layers.9.block_sparse_moe.experts.196.w3", "model.layers.9.block_sparse_moe.experts.197.w3", "model.layers.9.block_sparse_moe.experts.198.w3", "model.layers.9.block_sparse_moe.experts.199.w3", "model.layers.9.block_sparse_moe.experts.200.w3", "model.layers.9.block_sparse_moe.experts.201.w3", "model.layers.9.block_sparse_moe.experts.202.w3", "model.layers.9.block_sparse_moe.experts.203.w3", "model.layers.9.block_sparse_moe.experts.204.w3", "model.layers.9.block_sparse_moe.experts.205.w3", "model.layers.9.block_sparse_moe.experts.206.w3", "model.layers.9.block_sparse_moe.experts.207.w3", "model.layers.9.block_sparse_moe.experts.208.w3", "model.layers.9.block_sparse_moe.experts.209.w3", "model.layers.9.block_sparse_moe.experts.210.w3", "model.layers.9.block_sparse_moe.experts.211.w3", "model.layers.9.block_sparse_moe.experts.212.w3", "model.layers.9.block_sparse_moe.experts.213.w3", "model.layers.9.block_sparse_moe.experts.214.w3", "model.layers.9.block_sparse_moe.experts.215.w3", "model.layers.9.block_sparse_moe.experts.216.w3", "model.layers.9.block_sparse_moe.experts.217.w3", "model.layers.9.block_sparse_moe.experts.218.w3", "model.layers.9.block_sparse_moe.experts.219.w3", "model.layers.9.block_sparse_moe.experts.220.w3", "model.layers.9.block_sparse_moe.experts.221.w3", "model.layers.9.block_sparse_moe.experts.222.w3", "model.layers.9.block_sparse_moe.experts.223.w3", "model.layers.9.block_sparse_moe.experts.224.w3", "model.layers.9.block_sparse_moe.experts.225.w3", "model.layers.9.block_sparse_moe.experts.226.w3", "model.layers.9.block_sparse_moe.experts.227.w3", "model.layers.9.block_sparse_moe.experts.228.w3", "model.layers.9.block_sparse_moe.experts.229.w3", "model.layers.9.block_sparse_moe.experts.230.w3", "model.layers.9.block_sparse_moe.experts.231.w3", "model.layers.9.block_sparse_moe.experts.232.w3", "model.layers.9.block_sparse_moe.experts.233.w3", "model.layers.9.block_sparse_moe.experts.234.w3", "model.layers.9.block_sparse_moe.experts.235.w3", "model.layers.9.block_sparse_moe.experts.236.w3", "model.layers.9.block_sparse_moe.experts.237.w3", "model.layers.9.block_sparse_moe.experts.238.w3", "model.layers.9.block_sparse_moe.experts.239.w3", "model.layers.9.block_sparse_moe.experts.240.w3", "model.layers.9.block_sparse_moe.experts.241.w3", "model.layers.9.block_sparse_moe.experts.242.w3", "model.layers.9.block_sparse_moe.experts.243.w3", "model.layers.9.block_sparse_moe.experts.244.w3", "model.layers.9.block_sparse_moe.experts.245.w3", "model.layers.9.block_sparse_moe.experts.246.w3", "model.layers.9.block_sparse_moe.experts.247.w3", "model.layers.9.block_sparse_moe.experts.248.w3", "model.layers.9.block_sparse_moe.experts.249.w3", "model.layers.9.block_sparse_moe.experts.250.w3", "model.layers.9.block_sparse_moe.experts.251.w3", "model.layers.9.block_sparse_moe.experts.252.w3", "model.layers.9.block_sparse_moe.experts.253.w3", "model.layers.9.block_sparse_moe.experts.254.w3", "model.layers.9.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002514481544494629, "dbits": 2415919104 } ] }, { "idx": 49, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w2", "model.layers.9.block_sparse_moe.experts.1.w2", "model.layers.9.block_sparse_moe.experts.2.w2", "model.layers.9.block_sparse_moe.experts.3.w2", "model.layers.9.block_sparse_moe.experts.4.w2", "model.layers.9.block_sparse_moe.experts.5.w2", "model.layers.9.block_sparse_moe.experts.6.w2", "model.layers.9.block_sparse_moe.experts.7.w2", "model.layers.9.block_sparse_moe.experts.8.w2", "model.layers.9.block_sparse_moe.experts.9.w2", "model.layers.9.block_sparse_moe.experts.10.w2", "model.layers.9.block_sparse_moe.experts.11.w2", "model.layers.9.block_sparse_moe.experts.12.w2", "model.layers.9.block_sparse_moe.experts.13.w2", "model.layers.9.block_sparse_moe.experts.14.w2", "model.layers.9.block_sparse_moe.experts.15.w2", "model.layers.9.block_sparse_moe.experts.16.w2", "model.layers.9.block_sparse_moe.experts.17.w2", "model.layers.9.block_sparse_moe.experts.18.w2", "model.layers.9.block_sparse_moe.experts.19.w2", "model.layers.9.block_sparse_moe.experts.20.w2", "model.layers.9.block_sparse_moe.experts.21.w2", "model.layers.9.block_sparse_moe.experts.22.w2", "model.layers.9.block_sparse_moe.experts.23.w2", "model.layers.9.block_sparse_moe.experts.24.w2", "model.layers.9.block_sparse_moe.experts.25.w2", "model.layers.9.block_sparse_moe.experts.26.w2", "model.layers.9.block_sparse_moe.experts.27.w2", "model.layers.9.block_sparse_moe.experts.28.w2", "model.layers.9.block_sparse_moe.experts.29.w2", "model.layers.9.block_sparse_moe.experts.30.w2", "model.layers.9.block_sparse_moe.experts.31.w2", "model.layers.9.block_sparse_moe.experts.32.w2", "model.layers.9.block_sparse_moe.experts.33.w2", "model.layers.9.block_sparse_moe.experts.34.w2", "model.layers.9.block_sparse_moe.experts.35.w2", "model.layers.9.block_sparse_moe.experts.36.w2", "model.layers.9.block_sparse_moe.experts.37.w2", "model.layers.9.block_sparse_moe.experts.38.w2", "model.layers.9.block_sparse_moe.experts.39.w2", "model.layers.9.block_sparse_moe.experts.40.w2", "model.layers.9.block_sparse_moe.experts.41.w2", "model.layers.9.block_sparse_moe.experts.42.w2", "model.layers.9.block_sparse_moe.experts.43.w2", "model.layers.9.block_sparse_moe.experts.44.w2", "model.layers.9.block_sparse_moe.experts.45.w2", "model.layers.9.block_sparse_moe.experts.46.w2", "model.layers.9.block_sparse_moe.experts.47.w2", "model.layers.9.block_sparse_moe.experts.48.w2", "model.layers.9.block_sparse_moe.experts.49.w2", "model.layers.9.block_sparse_moe.experts.50.w2", "model.layers.9.block_sparse_moe.experts.51.w2", "model.layers.9.block_sparse_moe.experts.52.w2", "model.layers.9.block_sparse_moe.experts.53.w2", "model.layers.9.block_sparse_moe.experts.54.w2", "model.layers.9.block_sparse_moe.experts.55.w2", "model.layers.9.block_sparse_moe.experts.56.w2", "model.layers.9.block_sparse_moe.experts.57.w2", "model.layers.9.block_sparse_moe.experts.58.w2", "model.layers.9.block_sparse_moe.experts.59.w2", "model.layers.9.block_sparse_moe.experts.60.w2", "model.layers.9.block_sparse_moe.experts.61.w2", "model.layers.9.block_sparse_moe.experts.62.w2", "model.layers.9.block_sparse_moe.experts.63.w2", "model.layers.9.block_sparse_moe.experts.64.w2", "model.layers.9.block_sparse_moe.experts.65.w2", "model.layers.9.block_sparse_moe.experts.66.w2", "model.layers.9.block_sparse_moe.experts.67.w2", "model.layers.9.block_sparse_moe.experts.68.w2", "model.layers.9.block_sparse_moe.experts.69.w2", "model.layers.9.block_sparse_moe.experts.70.w2", "model.layers.9.block_sparse_moe.experts.71.w2", "model.layers.9.block_sparse_moe.experts.72.w2", "model.layers.9.block_sparse_moe.experts.73.w2", "model.layers.9.block_sparse_moe.experts.74.w2", "model.layers.9.block_sparse_moe.experts.75.w2", "model.layers.9.block_sparse_moe.experts.76.w2", "model.layers.9.block_sparse_moe.experts.77.w2", "model.layers.9.block_sparse_moe.experts.78.w2", "model.layers.9.block_sparse_moe.experts.79.w2", "model.layers.9.block_sparse_moe.experts.80.w2", "model.layers.9.block_sparse_moe.experts.81.w2", "model.layers.9.block_sparse_moe.experts.82.w2", "model.layers.9.block_sparse_moe.experts.83.w2", "model.layers.9.block_sparse_moe.experts.84.w2", "model.layers.9.block_sparse_moe.experts.85.w2", "model.layers.9.block_sparse_moe.experts.86.w2", "model.layers.9.block_sparse_moe.experts.87.w2", "model.layers.9.block_sparse_moe.experts.88.w2", "model.layers.9.block_sparse_moe.experts.89.w2", "model.layers.9.block_sparse_moe.experts.90.w2", "model.layers.9.block_sparse_moe.experts.91.w2", "model.layers.9.block_sparse_moe.experts.92.w2", "model.layers.9.block_sparse_moe.experts.93.w2", "model.layers.9.block_sparse_moe.experts.94.w2", "model.layers.9.block_sparse_moe.experts.95.w2", "model.layers.9.block_sparse_moe.experts.96.w2", "model.layers.9.block_sparse_moe.experts.97.w2", "model.layers.9.block_sparse_moe.experts.98.w2", "model.layers.9.block_sparse_moe.experts.99.w2", "model.layers.9.block_sparse_moe.experts.100.w2", "model.layers.9.block_sparse_moe.experts.101.w2", "model.layers.9.block_sparse_moe.experts.102.w2", "model.layers.9.block_sparse_moe.experts.103.w2", "model.layers.9.block_sparse_moe.experts.104.w2", "model.layers.9.block_sparse_moe.experts.105.w2", "model.layers.9.block_sparse_moe.experts.106.w2", "model.layers.9.block_sparse_moe.experts.107.w2", "model.layers.9.block_sparse_moe.experts.108.w2", "model.layers.9.block_sparse_moe.experts.109.w2", "model.layers.9.block_sparse_moe.experts.110.w2", "model.layers.9.block_sparse_moe.experts.111.w2", "model.layers.9.block_sparse_moe.experts.112.w2", "model.layers.9.block_sparse_moe.experts.113.w2", "model.layers.9.block_sparse_moe.experts.114.w2", "model.layers.9.block_sparse_moe.experts.115.w2", "model.layers.9.block_sparse_moe.experts.116.w2", "model.layers.9.block_sparse_moe.experts.117.w2", "model.layers.9.block_sparse_moe.experts.118.w2", "model.layers.9.block_sparse_moe.experts.119.w2", "model.layers.9.block_sparse_moe.experts.120.w2", "model.layers.9.block_sparse_moe.experts.121.w2", "model.layers.9.block_sparse_moe.experts.122.w2", "model.layers.9.block_sparse_moe.experts.123.w2", "model.layers.9.block_sparse_moe.experts.124.w2", "model.layers.9.block_sparse_moe.experts.125.w2", "model.layers.9.block_sparse_moe.experts.126.w2", "model.layers.9.block_sparse_moe.experts.127.w2", "model.layers.9.block_sparse_moe.experts.128.w2", "model.layers.9.block_sparse_moe.experts.129.w2", "model.layers.9.block_sparse_moe.experts.130.w2", "model.layers.9.block_sparse_moe.experts.131.w2", "model.layers.9.block_sparse_moe.experts.132.w2", "model.layers.9.block_sparse_moe.experts.133.w2", "model.layers.9.block_sparse_moe.experts.134.w2", "model.layers.9.block_sparse_moe.experts.135.w2", "model.layers.9.block_sparse_moe.experts.136.w2", "model.layers.9.block_sparse_moe.experts.137.w2", "model.layers.9.block_sparse_moe.experts.138.w2", "model.layers.9.block_sparse_moe.experts.139.w2", "model.layers.9.block_sparse_moe.experts.140.w2", "model.layers.9.block_sparse_moe.experts.141.w2", "model.layers.9.block_sparse_moe.experts.142.w2", "model.layers.9.block_sparse_moe.experts.143.w2", "model.layers.9.block_sparse_moe.experts.144.w2", "model.layers.9.block_sparse_moe.experts.145.w2", "model.layers.9.block_sparse_moe.experts.146.w2", "model.layers.9.block_sparse_moe.experts.147.w2", "model.layers.9.block_sparse_moe.experts.148.w2", "model.layers.9.block_sparse_moe.experts.149.w2", "model.layers.9.block_sparse_moe.experts.150.w2", "model.layers.9.block_sparse_moe.experts.151.w2", "model.layers.9.block_sparse_moe.experts.152.w2", "model.layers.9.block_sparse_moe.experts.153.w2", "model.layers.9.block_sparse_moe.experts.154.w2", "model.layers.9.block_sparse_moe.experts.155.w2", "model.layers.9.block_sparse_moe.experts.156.w2", "model.layers.9.block_sparse_moe.experts.157.w2", "model.layers.9.block_sparse_moe.experts.158.w2", "model.layers.9.block_sparse_moe.experts.159.w2", "model.layers.9.block_sparse_moe.experts.160.w2", "model.layers.9.block_sparse_moe.experts.161.w2", "model.layers.9.block_sparse_moe.experts.162.w2", "model.layers.9.block_sparse_moe.experts.163.w2", "model.layers.9.block_sparse_moe.experts.164.w2", "model.layers.9.block_sparse_moe.experts.165.w2", "model.layers.9.block_sparse_moe.experts.166.w2", "model.layers.9.block_sparse_moe.experts.167.w2", "model.layers.9.block_sparse_moe.experts.168.w2", "model.layers.9.block_sparse_moe.experts.169.w2", "model.layers.9.block_sparse_moe.experts.170.w2", "model.layers.9.block_sparse_moe.experts.171.w2", "model.layers.9.block_sparse_moe.experts.172.w2", "model.layers.9.block_sparse_moe.experts.173.w2", "model.layers.9.block_sparse_moe.experts.174.w2", "model.layers.9.block_sparse_moe.experts.175.w2", "model.layers.9.block_sparse_moe.experts.176.w2", "model.layers.9.block_sparse_moe.experts.177.w2", "model.layers.9.block_sparse_moe.experts.178.w2", "model.layers.9.block_sparse_moe.experts.179.w2", "model.layers.9.block_sparse_moe.experts.180.w2", "model.layers.9.block_sparse_moe.experts.181.w2", "model.layers.9.block_sparse_moe.experts.182.w2", "model.layers.9.block_sparse_moe.experts.183.w2", "model.layers.9.block_sparse_moe.experts.184.w2", "model.layers.9.block_sparse_moe.experts.185.w2", "model.layers.9.block_sparse_moe.experts.186.w2", "model.layers.9.block_sparse_moe.experts.187.w2", "model.layers.9.block_sparse_moe.experts.188.w2", "model.layers.9.block_sparse_moe.experts.189.w2", "model.layers.9.block_sparse_moe.experts.190.w2", "model.layers.9.block_sparse_moe.experts.191.w2", "model.layers.9.block_sparse_moe.experts.192.w2", "model.layers.9.block_sparse_moe.experts.193.w2", "model.layers.9.block_sparse_moe.experts.194.w2", "model.layers.9.block_sparse_moe.experts.195.w2", "model.layers.9.block_sparse_moe.experts.196.w2", "model.layers.9.block_sparse_moe.experts.197.w2", "model.layers.9.block_sparse_moe.experts.198.w2", "model.layers.9.block_sparse_moe.experts.199.w2", "model.layers.9.block_sparse_moe.experts.200.w2", "model.layers.9.block_sparse_moe.experts.201.w2", "model.layers.9.block_sparse_moe.experts.202.w2", "model.layers.9.block_sparse_moe.experts.203.w2", "model.layers.9.block_sparse_moe.experts.204.w2", "model.layers.9.block_sparse_moe.experts.205.w2", "model.layers.9.block_sparse_moe.experts.206.w2", "model.layers.9.block_sparse_moe.experts.207.w2", "model.layers.9.block_sparse_moe.experts.208.w2", "model.layers.9.block_sparse_moe.experts.209.w2", "model.layers.9.block_sparse_moe.experts.210.w2", "model.layers.9.block_sparse_moe.experts.211.w2", "model.layers.9.block_sparse_moe.experts.212.w2", "model.layers.9.block_sparse_moe.experts.213.w2", "model.layers.9.block_sparse_moe.experts.214.w2", "model.layers.9.block_sparse_moe.experts.215.w2", "model.layers.9.block_sparse_moe.experts.216.w2", "model.layers.9.block_sparse_moe.experts.217.w2", "model.layers.9.block_sparse_moe.experts.218.w2", "model.layers.9.block_sparse_moe.experts.219.w2", "model.layers.9.block_sparse_moe.experts.220.w2", "model.layers.9.block_sparse_moe.experts.221.w2", "model.layers.9.block_sparse_moe.experts.222.w2", "model.layers.9.block_sparse_moe.experts.223.w2", "model.layers.9.block_sparse_moe.experts.224.w2", "model.layers.9.block_sparse_moe.experts.225.w2", "model.layers.9.block_sparse_moe.experts.226.w2", "model.layers.9.block_sparse_moe.experts.227.w2", "model.layers.9.block_sparse_moe.experts.228.w2", "model.layers.9.block_sparse_moe.experts.229.w2", "model.layers.9.block_sparse_moe.experts.230.w2", "model.layers.9.block_sparse_moe.experts.231.w2", "model.layers.9.block_sparse_moe.experts.232.w2", "model.layers.9.block_sparse_moe.experts.233.w2", "model.layers.9.block_sparse_moe.experts.234.w2", "model.layers.9.block_sparse_moe.experts.235.w2", "model.layers.9.block_sparse_moe.experts.236.w2", "model.layers.9.block_sparse_moe.experts.237.w2", "model.layers.9.block_sparse_moe.experts.238.w2", "model.layers.9.block_sparse_moe.experts.239.w2", "model.layers.9.block_sparse_moe.experts.240.w2", "model.layers.9.block_sparse_moe.experts.241.w2", "model.layers.9.block_sparse_moe.experts.242.w2", "model.layers.9.block_sparse_moe.experts.243.w2", "model.layers.9.block_sparse_moe.experts.244.w2", "model.layers.9.block_sparse_moe.experts.245.w2", "model.layers.9.block_sparse_moe.experts.246.w2", "model.layers.9.block_sparse_moe.experts.247.w2", "model.layers.9.block_sparse_moe.experts.248.w2", "model.layers.9.block_sparse_moe.experts.249.w2", "model.layers.9.block_sparse_moe.experts.250.w2", "model.layers.9.block_sparse_moe.experts.251.w2", "model.layers.9.block_sparse_moe.experts.252.w2", "model.layers.9.block_sparse_moe.experts.253.w2", "model.layers.9.block_sparse_moe.experts.254.w2", "model.layers.9.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0004411458969115767, "dbits": 1207959552 } ] }, { "idx": 50, "layers": [ "model.layers.10.self_attn.q_proj" ], "candidates": [ { "dkld": 0.003063240647316001, "dbits": 18874368 } ] }, { "idx": 51, "layers": [ "model.layers.10.self_attn.k_proj", "model.layers.10.self_attn.v_proj" ], "candidates": [ { "dkld": -0.003002090752124764, "dbits": 6291456 } ] }, { "idx": 52, "layers": [ "model.layers.10.self_attn.o_proj" ], "candidates": [ { "dkld": 0.005351707339286804, "dbits": 18874368 } ] }, { "idx": 53, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w1", "model.layers.10.block_sparse_moe.experts.1.w1", "model.layers.10.block_sparse_moe.experts.2.w1", "model.layers.10.block_sparse_moe.experts.3.w1", "model.layers.10.block_sparse_moe.experts.4.w1", "model.layers.10.block_sparse_moe.experts.5.w1", "model.layers.10.block_sparse_moe.experts.6.w1", "model.layers.10.block_sparse_moe.experts.7.w1", "model.layers.10.block_sparse_moe.experts.8.w1", "model.layers.10.block_sparse_moe.experts.9.w1", "model.layers.10.block_sparse_moe.experts.10.w1", "model.layers.10.block_sparse_moe.experts.11.w1", "model.layers.10.block_sparse_moe.experts.12.w1", "model.layers.10.block_sparse_moe.experts.13.w1", "model.layers.10.block_sparse_moe.experts.14.w1", "model.layers.10.block_sparse_moe.experts.15.w1", "model.layers.10.block_sparse_moe.experts.16.w1", "model.layers.10.block_sparse_moe.experts.17.w1", "model.layers.10.block_sparse_moe.experts.18.w1", "model.layers.10.block_sparse_moe.experts.19.w1", "model.layers.10.block_sparse_moe.experts.20.w1", "model.layers.10.block_sparse_moe.experts.21.w1", "model.layers.10.block_sparse_moe.experts.22.w1", "model.layers.10.block_sparse_moe.experts.23.w1", "model.layers.10.block_sparse_moe.experts.24.w1", "model.layers.10.block_sparse_moe.experts.25.w1", "model.layers.10.block_sparse_moe.experts.26.w1", "model.layers.10.block_sparse_moe.experts.27.w1", "model.layers.10.block_sparse_moe.experts.28.w1", "model.layers.10.block_sparse_moe.experts.29.w1", "model.layers.10.block_sparse_moe.experts.30.w1", "model.layers.10.block_sparse_moe.experts.31.w1", "model.layers.10.block_sparse_moe.experts.32.w1", "model.layers.10.block_sparse_moe.experts.33.w1", "model.layers.10.block_sparse_moe.experts.34.w1", "model.layers.10.block_sparse_moe.experts.35.w1", "model.layers.10.block_sparse_moe.experts.36.w1", "model.layers.10.block_sparse_moe.experts.37.w1", "model.layers.10.block_sparse_moe.experts.38.w1", "model.layers.10.block_sparse_moe.experts.39.w1", "model.layers.10.block_sparse_moe.experts.40.w1", "model.layers.10.block_sparse_moe.experts.41.w1", "model.layers.10.block_sparse_moe.experts.42.w1", "model.layers.10.block_sparse_moe.experts.43.w1", "model.layers.10.block_sparse_moe.experts.44.w1", "model.layers.10.block_sparse_moe.experts.45.w1", "model.layers.10.block_sparse_moe.experts.46.w1", "model.layers.10.block_sparse_moe.experts.47.w1", "model.layers.10.block_sparse_moe.experts.48.w1", "model.layers.10.block_sparse_moe.experts.49.w1", "model.layers.10.block_sparse_moe.experts.50.w1", "model.layers.10.block_sparse_moe.experts.51.w1", "model.layers.10.block_sparse_moe.experts.52.w1", "model.layers.10.block_sparse_moe.experts.53.w1", "model.layers.10.block_sparse_moe.experts.54.w1", "model.layers.10.block_sparse_moe.experts.55.w1", "model.layers.10.block_sparse_moe.experts.56.w1", "model.layers.10.block_sparse_moe.experts.57.w1", "model.layers.10.block_sparse_moe.experts.58.w1", "model.layers.10.block_sparse_moe.experts.59.w1", "model.layers.10.block_sparse_moe.experts.60.w1", "model.layers.10.block_sparse_moe.experts.61.w1", "model.layers.10.block_sparse_moe.experts.62.w1", "model.layers.10.block_sparse_moe.experts.63.w1", "model.layers.10.block_sparse_moe.experts.64.w1", "model.layers.10.block_sparse_moe.experts.65.w1", "model.layers.10.block_sparse_moe.experts.66.w1", "model.layers.10.block_sparse_moe.experts.67.w1", "model.layers.10.block_sparse_moe.experts.68.w1", "model.layers.10.block_sparse_moe.experts.69.w1", "model.layers.10.block_sparse_moe.experts.70.w1", "model.layers.10.block_sparse_moe.experts.71.w1", "model.layers.10.block_sparse_moe.experts.72.w1", "model.layers.10.block_sparse_moe.experts.73.w1", "model.layers.10.block_sparse_moe.experts.74.w1", "model.layers.10.block_sparse_moe.experts.75.w1", "model.layers.10.block_sparse_moe.experts.76.w1", "model.layers.10.block_sparse_moe.experts.77.w1", "model.layers.10.block_sparse_moe.experts.78.w1", "model.layers.10.block_sparse_moe.experts.79.w1", "model.layers.10.block_sparse_moe.experts.80.w1", "model.layers.10.block_sparse_moe.experts.81.w1", "model.layers.10.block_sparse_moe.experts.82.w1", "model.layers.10.block_sparse_moe.experts.83.w1", "model.layers.10.block_sparse_moe.experts.84.w1", "model.layers.10.block_sparse_moe.experts.85.w1", "model.layers.10.block_sparse_moe.experts.86.w1", "model.layers.10.block_sparse_moe.experts.87.w1", "model.layers.10.block_sparse_moe.experts.88.w1", "model.layers.10.block_sparse_moe.experts.89.w1", "model.layers.10.block_sparse_moe.experts.90.w1", "model.layers.10.block_sparse_moe.experts.91.w1", "model.layers.10.block_sparse_moe.experts.92.w1", "model.layers.10.block_sparse_moe.experts.93.w1", "model.layers.10.block_sparse_moe.experts.94.w1", "model.layers.10.block_sparse_moe.experts.95.w1", "model.layers.10.block_sparse_moe.experts.96.w1", "model.layers.10.block_sparse_moe.experts.97.w1", "model.layers.10.block_sparse_moe.experts.98.w1", "model.layers.10.block_sparse_moe.experts.99.w1", "model.layers.10.block_sparse_moe.experts.100.w1", "model.layers.10.block_sparse_moe.experts.101.w1", "model.layers.10.block_sparse_moe.experts.102.w1", "model.layers.10.block_sparse_moe.experts.103.w1", "model.layers.10.block_sparse_moe.experts.104.w1", "model.layers.10.block_sparse_moe.experts.105.w1", "model.layers.10.block_sparse_moe.experts.106.w1", "model.layers.10.block_sparse_moe.experts.107.w1", "model.layers.10.block_sparse_moe.experts.108.w1", "model.layers.10.block_sparse_moe.experts.109.w1", "model.layers.10.block_sparse_moe.experts.110.w1", "model.layers.10.block_sparse_moe.experts.111.w1", "model.layers.10.block_sparse_moe.experts.112.w1", "model.layers.10.block_sparse_moe.experts.113.w1", "model.layers.10.block_sparse_moe.experts.114.w1", "model.layers.10.block_sparse_moe.experts.115.w1", "model.layers.10.block_sparse_moe.experts.116.w1", "model.layers.10.block_sparse_moe.experts.117.w1", "model.layers.10.block_sparse_moe.experts.118.w1", "model.layers.10.block_sparse_moe.experts.119.w1", "model.layers.10.block_sparse_moe.experts.120.w1", "model.layers.10.block_sparse_moe.experts.121.w1", "model.layers.10.block_sparse_moe.experts.122.w1", "model.layers.10.block_sparse_moe.experts.123.w1", "model.layers.10.block_sparse_moe.experts.124.w1", "model.layers.10.block_sparse_moe.experts.125.w1", "model.layers.10.block_sparse_moe.experts.126.w1", "model.layers.10.block_sparse_moe.experts.127.w1", "model.layers.10.block_sparse_moe.experts.128.w1", "model.layers.10.block_sparse_moe.experts.129.w1", "model.layers.10.block_sparse_moe.experts.130.w1", "model.layers.10.block_sparse_moe.experts.131.w1", "model.layers.10.block_sparse_moe.experts.132.w1", "model.layers.10.block_sparse_moe.experts.133.w1", "model.layers.10.block_sparse_moe.experts.134.w1", "model.layers.10.block_sparse_moe.experts.135.w1", "model.layers.10.block_sparse_moe.experts.136.w1", "model.layers.10.block_sparse_moe.experts.137.w1", "model.layers.10.block_sparse_moe.experts.138.w1", "model.layers.10.block_sparse_moe.experts.139.w1", "model.layers.10.block_sparse_moe.experts.140.w1", "model.layers.10.block_sparse_moe.experts.141.w1", "model.layers.10.block_sparse_moe.experts.142.w1", "model.layers.10.block_sparse_moe.experts.143.w1", "model.layers.10.block_sparse_moe.experts.144.w1", "model.layers.10.block_sparse_moe.experts.145.w1", "model.layers.10.block_sparse_moe.experts.146.w1", "model.layers.10.block_sparse_moe.experts.147.w1", "model.layers.10.block_sparse_moe.experts.148.w1", "model.layers.10.block_sparse_moe.experts.149.w1", "model.layers.10.block_sparse_moe.experts.150.w1", "model.layers.10.block_sparse_moe.experts.151.w1", "model.layers.10.block_sparse_moe.experts.152.w1", "model.layers.10.block_sparse_moe.experts.153.w1", "model.layers.10.block_sparse_moe.experts.154.w1", "model.layers.10.block_sparse_moe.experts.155.w1", "model.layers.10.block_sparse_moe.experts.156.w1", "model.layers.10.block_sparse_moe.experts.157.w1", "model.layers.10.block_sparse_moe.experts.158.w1", "model.layers.10.block_sparse_moe.experts.159.w1", "model.layers.10.block_sparse_moe.experts.160.w1", "model.layers.10.block_sparse_moe.experts.161.w1", "model.layers.10.block_sparse_moe.experts.162.w1", "model.layers.10.block_sparse_moe.experts.163.w1", "model.layers.10.block_sparse_moe.experts.164.w1", "model.layers.10.block_sparse_moe.experts.165.w1", "model.layers.10.block_sparse_moe.experts.166.w1", "model.layers.10.block_sparse_moe.experts.167.w1", "model.layers.10.block_sparse_moe.experts.168.w1", "model.layers.10.block_sparse_moe.experts.169.w1", "model.layers.10.block_sparse_moe.experts.170.w1", "model.layers.10.block_sparse_moe.experts.171.w1", "model.layers.10.block_sparse_moe.experts.172.w1", "model.layers.10.block_sparse_moe.experts.173.w1", "model.layers.10.block_sparse_moe.experts.174.w1", "model.layers.10.block_sparse_moe.experts.175.w1", "model.layers.10.block_sparse_moe.experts.176.w1", "model.layers.10.block_sparse_moe.experts.177.w1", "model.layers.10.block_sparse_moe.experts.178.w1", "model.layers.10.block_sparse_moe.experts.179.w1", "model.layers.10.block_sparse_moe.experts.180.w1", "model.layers.10.block_sparse_moe.experts.181.w1", "model.layers.10.block_sparse_moe.experts.182.w1", "model.layers.10.block_sparse_moe.experts.183.w1", "model.layers.10.block_sparse_moe.experts.184.w1", "model.layers.10.block_sparse_moe.experts.185.w1", "model.layers.10.block_sparse_moe.experts.186.w1", "model.layers.10.block_sparse_moe.experts.187.w1", "model.layers.10.block_sparse_moe.experts.188.w1", "model.layers.10.block_sparse_moe.experts.189.w1", "model.layers.10.block_sparse_moe.experts.190.w1", "model.layers.10.block_sparse_moe.experts.191.w1", "model.layers.10.block_sparse_moe.experts.192.w1", "model.layers.10.block_sparse_moe.experts.193.w1", "model.layers.10.block_sparse_moe.experts.194.w1", "model.layers.10.block_sparse_moe.experts.195.w1", "model.layers.10.block_sparse_moe.experts.196.w1", "model.layers.10.block_sparse_moe.experts.197.w1", "model.layers.10.block_sparse_moe.experts.198.w1", "model.layers.10.block_sparse_moe.experts.199.w1", "model.layers.10.block_sparse_moe.experts.200.w1", "model.layers.10.block_sparse_moe.experts.201.w1", "model.layers.10.block_sparse_moe.experts.202.w1", "model.layers.10.block_sparse_moe.experts.203.w1", "model.layers.10.block_sparse_moe.experts.204.w1", "model.layers.10.block_sparse_moe.experts.205.w1", "model.layers.10.block_sparse_moe.experts.206.w1", "model.layers.10.block_sparse_moe.experts.207.w1", "model.layers.10.block_sparse_moe.experts.208.w1", "model.layers.10.block_sparse_moe.experts.209.w1", "model.layers.10.block_sparse_moe.experts.210.w1", "model.layers.10.block_sparse_moe.experts.211.w1", "model.layers.10.block_sparse_moe.experts.212.w1", "model.layers.10.block_sparse_moe.experts.213.w1", "model.layers.10.block_sparse_moe.experts.214.w1", "model.layers.10.block_sparse_moe.experts.215.w1", "model.layers.10.block_sparse_moe.experts.216.w1", "model.layers.10.block_sparse_moe.experts.217.w1", "model.layers.10.block_sparse_moe.experts.218.w1", "model.layers.10.block_sparse_moe.experts.219.w1", "model.layers.10.block_sparse_moe.experts.220.w1", "model.layers.10.block_sparse_moe.experts.221.w1", "model.layers.10.block_sparse_moe.experts.222.w1", "model.layers.10.block_sparse_moe.experts.223.w1", "model.layers.10.block_sparse_moe.experts.224.w1", "model.layers.10.block_sparse_moe.experts.225.w1", "model.layers.10.block_sparse_moe.experts.226.w1", "model.layers.10.block_sparse_moe.experts.227.w1", "model.layers.10.block_sparse_moe.experts.228.w1", "model.layers.10.block_sparse_moe.experts.229.w1", "model.layers.10.block_sparse_moe.experts.230.w1", "model.layers.10.block_sparse_moe.experts.231.w1", "model.layers.10.block_sparse_moe.experts.232.w1", "model.layers.10.block_sparse_moe.experts.233.w1", "model.layers.10.block_sparse_moe.experts.234.w1", "model.layers.10.block_sparse_moe.experts.235.w1", "model.layers.10.block_sparse_moe.experts.236.w1", "model.layers.10.block_sparse_moe.experts.237.w1", "model.layers.10.block_sparse_moe.experts.238.w1", "model.layers.10.block_sparse_moe.experts.239.w1", "model.layers.10.block_sparse_moe.experts.240.w1", "model.layers.10.block_sparse_moe.experts.241.w1", "model.layers.10.block_sparse_moe.experts.242.w1", "model.layers.10.block_sparse_moe.experts.243.w1", "model.layers.10.block_sparse_moe.experts.244.w1", "model.layers.10.block_sparse_moe.experts.245.w1", "model.layers.10.block_sparse_moe.experts.246.w1", "model.layers.10.block_sparse_moe.experts.247.w1", "model.layers.10.block_sparse_moe.experts.248.w1", "model.layers.10.block_sparse_moe.experts.249.w1", "model.layers.10.block_sparse_moe.experts.250.w1", "model.layers.10.block_sparse_moe.experts.251.w1", "model.layers.10.block_sparse_moe.experts.252.w1", "model.layers.10.block_sparse_moe.experts.253.w1", "model.layers.10.block_sparse_moe.experts.254.w1", "model.layers.10.block_sparse_moe.experts.255.w1", "model.layers.10.block_sparse_moe.experts.0.w3", "model.layers.10.block_sparse_moe.experts.1.w3", "model.layers.10.block_sparse_moe.experts.2.w3", "model.layers.10.block_sparse_moe.experts.3.w3", "model.layers.10.block_sparse_moe.experts.4.w3", "model.layers.10.block_sparse_moe.experts.5.w3", "model.layers.10.block_sparse_moe.experts.6.w3", "model.layers.10.block_sparse_moe.experts.7.w3", "model.layers.10.block_sparse_moe.experts.8.w3", "model.layers.10.block_sparse_moe.experts.9.w3", "model.layers.10.block_sparse_moe.experts.10.w3", "model.layers.10.block_sparse_moe.experts.11.w3", "model.layers.10.block_sparse_moe.experts.12.w3", "model.layers.10.block_sparse_moe.experts.13.w3", "model.layers.10.block_sparse_moe.experts.14.w3", "model.layers.10.block_sparse_moe.experts.15.w3", "model.layers.10.block_sparse_moe.experts.16.w3", "model.layers.10.block_sparse_moe.experts.17.w3", "model.layers.10.block_sparse_moe.experts.18.w3", "model.layers.10.block_sparse_moe.experts.19.w3", "model.layers.10.block_sparse_moe.experts.20.w3", "model.layers.10.block_sparse_moe.experts.21.w3", "model.layers.10.block_sparse_moe.experts.22.w3", "model.layers.10.block_sparse_moe.experts.23.w3", "model.layers.10.block_sparse_moe.experts.24.w3", "model.layers.10.block_sparse_moe.experts.25.w3", "model.layers.10.block_sparse_moe.experts.26.w3", "model.layers.10.block_sparse_moe.experts.27.w3", "model.layers.10.block_sparse_moe.experts.28.w3", "model.layers.10.block_sparse_moe.experts.29.w3", "model.layers.10.block_sparse_moe.experts.30.w3", "model.layers.10.block_sparse_moe.experts.31.w3", "model.layers.10.block_sparse_moe.experts.32.w3", "model.layers.10.block_sparse_moe.experts.33.w3", "model.layers.10.block_sparse_moe.experts.34.w3", "model.layers.10.block_sparse_moe.experts.35.w3", "model.layers.10.block_sparse_moe.experts.36.w3", "model.layers.10.block_sparse_moe.experts.37.w3", "model.layers.10.block_sparse_moe.experts.38.w3", "model.layers.10.block_sparse_moe.experts.39.w3", "model.layers.10.block_sparse_moe.experts.40.w3", "model.layers.10.block_sparse_moe.experts.41.w3", "model.layers.10.block_sparse_moe.experts.42.w3", "model.layers.10.block_sparse_moe.experts.43.w3", "model.layers.10.block_sparse_moe.experts.44.w3", "model.layers.10.block_sparse_moe.experts.45.w3", "model.layers.10.block_sparse_moe.experts.46.w3", "model.layers.10.block_sparse_moe.experts.47.w3", "model.layers.10.block_sparse_moe.experts.48.w3", "model.layers.10.block_sparse_moe.experts.49.w3", "model.layers.10.block_sparse_moe.experts.50.w3", "model.layers.10.block_sparse_moe.experts.51.w3", "model.layers.10.block_sparse_moe.experts.52.w3", "model.layers.10.block_sparse_moe.experts.53.w3", "model.layers.10.block_sparse_moe.experts.54.w3", "model.layers.10.block_sparse_moe.experts.55.w3", "model.layers.10.block_sparse_moe.experts.56.w3", "model.layers.10.block_sparse_moe.experts.57.w3", "model.layers.10.block_sparse_moe.experts.58.w3", "model.layers.10.block_sparse_moe.experts.59.w3", "model.layers.10.block_sparse_moe.experts.60.w3", "model.layers.10.block_sparse_moe.experts.61.w3", "model.layers.10.block_sparse_moe.experts.62.w3", "model.layers.10.block_sparse_moe.experts.63.w3", "model.layers.10.block_sparse_moe.experts.64.w3", "model.layers.10.block_sparse_moe.experts.65.w3", "model.layers.10.block_sparse_moe.experts.66.w3", "model.layers.10.block_sparse_moe.experts.67.w3", "model.layers.10.block_sparse_moe.experts.68.w3", "model.layers.10.block_sparse_moe.experts.69.w3", "model.layers.10.block_sparse_moe.experts.70.w3", "model.layers.10.block_sparse_moe.experts.71.w3", "model.layers.10.block_sparse_moe.experts.72.w3", "model.layers.10.block_sparse_moe.experts.73.w3", "model.layers.10.block_sparse_moe.experts.74.w3", "model.layers.10.block_sparse_moe.experts.75.w3", "model.layers.10.block_sparse_moe.experts.76.w3", "model.layers.10.block_sparse_moe.experts.77.w3", "model.layers.10.block_sparse_moe.experts.78.w3", "model.layers.10.block_sparse_moe.experts.79.w3", "model.layers.10.block_sparse_moe.experts.80.w3", "model.layers.10.block_sparse_moe.experts.81.w3", "model.layers.10.block_sparse_moe.experts.82.w3", "model.layers.10.block_sparse_moe.experts.83.w3", "model.layers.10.block_sparse_moe.experts.84.w3", "model.layers.10.block_sparse_moe.experts.85.w3", "model.layers.10.block_sparse_moe.experts.86.w3", "model.layers.10.block_sparse_moe.experts.87.w3", "model.layers.10.block_sparse_moe.experts.88.w3", "model.layers.10.block_sparse_moe.experts.89.w3", "model.layers.10.block_sparse_moe.experts.90.w3", "model.layers.10.block_sparse_moe.experts.91.w3", "model.layers.10.block_sparse_moe.experts.92.w3", "model.layers.10.block_sparse_moe.experts.93.w3", "model.layers.10.block_sparse_moe.experts.94.w3", "model.layers.10.block_sparse_moe.experts.95.w3", "model.layers.10.block_sparse_moe.experts.96.w3", "model.layers.10.block_sparse_moe.experts.97.w3", "model.layers.10.block_sparse_moe.experts.98.w3", "model.layers.10.block_sparse_moe.experts.99.w3", "model.layers.10.block_sparse_moe.experts.100.w3", "model.layers.10.block_sparse_moe.experts.101.w3", "model.layers.10.block_sparse_moe.experts.102.w3", "model.layers.10.block_sparse_moe.experts.103.w3", "model.layers.10.block_sparse_moe.experts.104.w3", "model.layers.10.block_sparse_moe.experts.105.w3", "model.layers.10.block_sparse_moe.experts.106.w3", "model.layers.10.block_sparse_moe.experts.107.w3", "model.layers.10.block_sparse_moe.experts.108.w3", "model.layers.10.block_sparse_moe.experts.109.w3", "model.layers.10.block_sparse_moe.experts.110.w3", "model.layers.10.block_sparse_moe.experts.111.w3", "model.layers.10.block_sparse_moe.experts.112.w3", "model.layers.10.block_sparse_moe.experts.113.w3", "model.layers.10.block_sparse_moe.experts.114.w3", "model.layers.10.block_sparse_moe.experts.115.w3", "model.layers.10.block_sparse_moe.experts.116.w3", "model.layers.10.block_sparse_moe.experts.117.w3", "model.layers.10.block_sparse_moe.experts.118.w3", "model.layers.10.block_sparse_moe.experts.119.w3", "model.layers.10.block_sparse_moe.experts.120.w3", "model.layers.10.block_sparse_moe.experts.121.w3", "model.layers.10.block_sparse_moe.experts.122.w3", "model.layers.10.block_sparse_moe.experts.123.w3", "model.layers.10.block_sparse_moe.experts.124.w3", "model.layers.10.block_sparse_moe.experts.125.w3", "model.layers.10.block_sparse_moe.experts.126.w3", "model.layers.10.block_sparse_moe.experts.127.w3", "model.layers.10.block_sparse_moe.experts.128.w3", "model.layers.10.block_sparse_moe.experts.129.w3", "model.layers.10.block_sparse_moe.experts.130.w3", "model.layers.10.block_sparse_moe.experts.131.w3", "model.layers.10.block_sparse_moe.experts.132.w3", "model.layers.10.block_sparse_moe.experts.133.w3", "model.layers.10.block_sparse_moe.experts.134.w3", "model.layers.10.block_sparse_moe.experts.135.w3", "model.layers.10.block_sparse_moe.experts.136.w3", "model.layers.10.block_sparse_moe.experts.137.w3", "model.layers.10.block_sparse_moe.experts.138.w3", "model.layers.10.block_sparse_moe.experts.139.w3", "model.layers.10.block_sparse_moe.experts.140.w3", "model.layers.10.block_sparse_moe.experts.141.w3", "model.layers.10.block_sparse_moe.experts.142.w3", "model.layers.10.block_sparse_moe.experts.143.w3", "model.layers.10.block_sparse_moe.experts.144.w3", "model.layers.10.block_sparse_moe.experts.145.w3", "model.layers.10.block_sparse_moe.experts.146.w3", "model.layers.10.block_sparse_moe.experts.147.w3", "model.layers.10.block_sparse_moe.experts.148.w3", "model.layers.10.block_sparse_moe.experts.149.w3", "model.layers.10.block_sparse_moe.experts.150.w3", "model.layers.10.block_sparse_moe.experts.151.w3", "model.layers.10.block_sparse_moe.experts.152.w3", "model.layers.10.block_sparse_moe.experts.153.w3", "model.layers.10.block_sparse_moe.experts.154.w3", "model.layers.10.block_sparse_moe.experts.155.w3", "model.layers.10.block_sparse_moe.experts.156.w3", "model.layers.10.block_sparse_moe.experts.157.w3", "model.layers.10.block_sparse_moe.experts.158.w3", "model.layers.10.block_sparse_moe.experts.159.w3", "model.layers.10.block_sparse_moe.experts.160.w3", "model.layers.10.block_sparse_moe.experts.161.w3", "model.layers.10.block_sparse_moe.experts.162.w3", "model.layers.10.block_sparse_moe.experts.163.w3", "model.layers.10.block_sparse_moe.experts.164.w3", "model.layers.10.block_sparse_moe.experts.165.w3", "model.layers.10.block_sparse_moe.experts.166.w3", "model.layers.10.block_sparse_moe.experts.167.w3", "model.layers.10.block_sparse_moe.experts.168.w3", "model.layers.10.block_sparse_moe.experts.169.w3", "model.layers.10.block_sparse_moe.experts.170.w3", "model.layers.10.block_sparse_moe.experts.171.w3", "model.layers.10.block_sparse_moe.experts.172.w3", "model.layers.10.block_sparse_moe.experts.173.w3", "model.layers.10.block_sparse_moe.experts.174.w3", "model.layers.10.block_sparse_moe.experts.175.w3", "model.layers.10.block_sparse_moe.experts.176.w3", "model.layers.10.block_sparse_moe.experts.177.w3", "model.layers.10.block_sparse_moe.experts.178.w3", "model.layers.10.block_sparse_moe.experts.179.w3", "model.layers.10.block_sparse_moe.experts.180.w3", "model.layers.10.block_sparse_moe.experts.181.w3", "model.layers.10.block_sparse_moe.experts.182.w3", "model.layers.10.block_sparse_moe.experts.183.w3", "model.layers.10.block_sparse_moe.experts.184.w3", "model.layers.10.block_sparse_moe.experts.185.w3", "model.layers.10.block_sparse_moe.experts.186.w3", "model.layers.10.block_sparse_moe.experts.187.w3", "model.layers.10.block_sparse_moe.experts.188.w3", "model.layers.10.block_sparse_moe.experts.189.w3", "model.layers.10.block_sparse_moe.experts.190.w3", "model.layers.10.block_sparse_moe.experts.191.w3", "model.layers.10.block_sparse_moe.experts.192.w3", "model.layers.10.block_sparse_moe.experts.193.w3", "model.layers.10.block_sparse_moe.experts.194.w3", "model.layers.10.block_sparse_moe.experts.195.w3", "model.layers.10.block_sparse_moe.experts.196.w3", "model.layers.10.block_sparse_moe.experts.197.w3", "model.layers.10.block_sparse_moe.experts.198.w3", "model.layers.10.block_sparse_moe.experts.199.w3", "model.layers.10.block_sparse_moe.experts.200.w3", "model.layers.10.block_sparse_moe.experts.201.w3", "model.layers.10.block_sparse_moe.experts.202.w3", "model.layers.10.block_sparse_moe.experts.203.w3", "model.layers.10.block_sparse_moe.experts.204.w3", "model.layers.10.block_sparse_moe.experts.205.w3", "model.layers.10.block_sparse_moe.experts.206.w3", "model.layers.10.block_sparse_moe.experts.207.w3", "model.layers.10.block_sparse_moe.experts.208.w3", "model.layers.10.block_sparse_moe.experts.209.w3", "model.layers.10.block_sparse_moe.experts.210.w3", "model.layers.10.block_sparse_moe.experts.211.w3", "model.layers.10.block_sparse_moe.experts.212.w3", "model.layers.10.block_sparse_moe.experts.213.w3", "model.layers.10.block_sparse_moe.experts.214.w3", "model.layers.10.block_sparse_moe.experts.215.w3", "model.layers.10.block_sparse_moe.experts.216.w3", "model.layers.10.block_sparse_moe.experts.217.w3", "model.layers.10.block_sparse_moe.experts.218.w3", "model.layers.10.block_sparse_moe.experts.219.w3", "model.layers.10.block_sparse_moe.experts.220.w3", "model.layers.10.block_sparse_moe.experts.221.w3", "model.layers.10.block_sparse_moe.experts.222.w3", "model.layers.10.block_sparse_moe.experts.223.w3", "model.layers.10.block_sparse_moe.experts.224.w3", "model.layers.10.block_sparse_moe.experts.225.w3", "model.layers.10.block_sparse_moe.experts.226.w3", "model.layers.10.block_sparse_moe.experts.227.w3", "model.layers.10.block_sparse_moe.experts.228.w3", "model.layers.10.block_sparse_moe.experts.229.w3", "model.layers.10.block_sparse_moe.experts.230.w3", "model.layers.10.block_sparse_moe.experts.231.w3", "model.layers.10.block_sparse_moe.experts.232.w3", "model.layers.10.block_sparse_moe.experts.233.w3", "model.layers.10.block_sparse_moe.experts.234.w3", "model.layers.10.block_sparse_moe.experts.235.w3", "model.layers.10.block_sparse_moe.experts.236.w3", "model.layers.10.block_sparse_moe.experts.237.w3", "model.layers.10.block_sparse_moe.experts.238.w3", "model.layers.10.block_sparse_moe.experts.239.w3", "model.layers.10.block_sparse_moe.experts.240.w3", "model.layers.10.block_sparse_moe.experts.241.w3", "model.layers.10.block_sparse_moe.experts.242.w3", "model.layers.10.block_sparse_moe.experts.243.w3", "model.layers.10.block_sparse_moe.experts.244.w3", "model.layers.10.block_sparse_moe.experts.245.w3", "model.layers.10.block_sparse_moe.experts.246.w3", "model.layers.10.block_sparse_moe.experts.247.w3", "model.layers.10.block_sparse_moe.experts.248.w3", "model.layers.10.block_sparse_moe.experts.249.w3", "model.layers.10.block_sparse_moe.experts.250.w3", "model.layers.10.block_sparse_moe.experts.251.w3", "model.layers.10.block_sparse_moe.experts.252.w3", "model.layers.10.block_sparse_moe.experts.253.w3", "model.layers.10.block_sparse_moe.experts.254.w3", "model.layers.10.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 5.784630775496069e-06, "dbits": 2415919104 } ] }, { "idx": 54, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w2", "model.layers.10.block_sparse_moe.experts.1.w2", "model.layers.10.block_sparse_moe.experts.2.w2", "model.layers.10.block_sparse_moe.experts.3.w2", "model.layers.10.block_sparse_moe.experts.4.w2", "model.layers.10.block_sparse_moe.experts.5.w2", "model.layers.10.block_sparse_moe.experts.6.w2", "model.layers.10.block_sparse_moe.experts.7.w2", "model.layers.10.block_sparse_moe.experts.8.w2", "model.layers.10.block_sparse_moe.experts.9.w2", "model.layers.10.block_sparse_moe.experts.10.w2", "model.layers.10.block_sparse_moe.experts.11.w2", "model.layers.10.block_sparse_moe.experts.12.w2", "model.layers.10.block_sparse_moe.experts.13.w2", "model.layers.10.block_sparse_moe.experts.14.w2", "model.layers.10.block_sparse_moe.experts.15.w2", "model.layers.10.block_sparse_moe.experts.16.w2", "model.layers.10.block_sparse_moe.experts.17.w2", "model.layers.10.block_sparse_moe.experts.18.w2", "model.layers.10.block_sparse_moe.experts.19.w2", "model.layers.10.block_sparse_moe.experts.20.w2", "model.layers.10.block_sparse_moe.experts.21.w2", "model.layers.10.block_sparse_moe.experts.22.w2", "model.layers.10.block_sparse_moe.experts.23.w2", "model.layers.10.block_sparse_moe.experts.24.w2", "model.layers.10.block_sparse_moe.experts.25.w2", "model.layers.10.block_sparse_moe.experts.26.w2", "model.layers.10.block_sparse_moe.experts.27.w2", "model.layers.10.block_sparse_moe.experts.28.w2", "model.layers.10.block_sparse_moe.experts.29.w2", "model.layers.10.block_sparse_moe.experts.30.w2", "model.layers.10.block_sparse_moe.experts.31.w2", "model.layers.10.block_sparse_moe.experts.32.w2", "model.layers.10.block_sparse_moe.experts.33.w2", "model.layers.10.block_sparse_moe.experts.34.w2", "model.layers.10.block_sparse_moe.experts.35.w2", "model.layers.10.block_sparse_moe.experts.36.w2", "model.layers.10.block_sparse_moe.experts.37.w2", "model.layers.10.block_sparse_moe.experts.38.w2", "model.layers.10.block_sparse_moe.experts.39.w2", "model.layers.10.block_sparse_moe.experts.40.w2", "model.layers.10.block_sparse_moe.experts.41.w2", "model.layers.10.block_sparse_moe.experts.42.w2", "model.layers.10.block_sparse_moe.experts.43.w2", "model.layers.10.block_sparse_moe.experts.44.w2", "model.layers.10.block_sparse_moe.experts.45.w2", "model.layers.10.block_sparse_moe.experts.46.w2", "model.layers.10.block_sparse_moe.experts.47.w2", "model.layers.10.block_sparse_moe.experts.48.w2", "model.layers.10.block_sparse_moe.experts.49.w2", "model.layers.10.block_sparse_moe.experts.50.w2", "model.layers.10.block_sparse_moe.experts.51.w2", "model.layers.10.block_sparse_moe.experts.52.w2", "model.layers.10.block_sparse_moe.experts.53.w2", "model.layers.10.block_sparse_moe.experts.54.w2", "model.layers.10.block_sparse_moe.experts.55.w2", "model.layers.10.block_sparse_moe.experts.56.w2", "model.layers.10.block_sparse_moe.experts.57.w2", "model.layers.10.block_sparse_moe.experts.58.w2", "model.layers.10.block_sparse_moe.experts.59.w2", "model.layers.10.block_sparse_moe.experts.60.w2", "model.layers.10.block_sparse_moe.experts.61.w2", "model.layers.10.block_sparse_moe.experts.62.w2", "model.layers.10.block_sparse_moe.experts.63.w2", "model.layers.10.block_sparse_moe.experts.64.w2", "model.layers.10.block_sparse_moe.experts.65.w2", "model.layers.10.block_sparse_moe.experts.66.w2", "model.layers.10.block_sparse_moe.experts.67.w2", "model.layers.10.block_sparse_moe.experts.68.w2", "model.layers.10.block_sparse_moe.experts.69.w2", "model.layers.10.block_sparse_moe.experts.70.w2", "model.layers.10.block_sparse_moe.experts.71.w2", "model.layers.10.block_sparse_moe.experts.72.w2", "model.layers.10.block_sparse_moe.experts.73.w2", "model.layers.10.block_sparse_moe.experts.74.w2", "model.layers.10.block_sparse_moe.experts.75.w2", "model.layers.10.block_sparse_moe.experts.76.w2", "model.layers.10.block_sparse_moe.experts.77.w2", "model.layers.10.block_sparse_moe.experts.78.w2", "model.layers.10.block_sparse_moe.experts.79.w2", "model.layers.10.block_sparse_moe.experts.80.w2", "model.layers.10.block_sparse_moe.experts.81.w2", "model.layers.10.block_sparse_moe.experts.82.w2", "model.layers.10.block_sparse_moe.experts.83.w2", "model.layers.10.block_sparse_moe.experts.84.w2", "model.layers.10.block_sparse_moe.experts.85.w2", "model.layers.10.block_sparse_moe.experts.86.w2", "model.layers.10.block_sparse_moe.experts.87.w2", "model.layers.10.block_sparse_moe.experts.88.w2", "model.layers.10.block_sparse_moe.experts.89.w2", "model.layers.10.block_sparse_moe.experts.90.w2", "model.layers.10.block_sparse_moe.experts.91.w2", "model.layers.10.block_sparse_moe.experts.92.w2", "model.layers.10.block_sparse_moe.experts.93.w2", "model.layers.10.block_sparse_moe.experts.94.w2", "model.layers.10.block_sparse_moe.experts.95.w2", "model.layers.10.block_sparse_moe.experts.96.w2", "model.layers.10.block_sparse_moe.experts.97.w2", "model.layers.10.block_sparse_moe.experts.98.w2", "model.layers.10.block_sparse_moe.experts.99.w2", "model.layers.10.block_sparse_moe.experts.100.w2", "model.layers.10.block_sparse_moe.experts.101.w2", "model.layers.10.block_sparse_moe.experts.102.w2", "model.layers.10.block_sparse_moe.experts.103.w2", "model.layers.10.block_sparse_moe.experts.104.w2", "model.layers.10.block_sparse_moe.experts.105.w2", "model.layers.10.block_sparse_moe.experts.106.w2", "model.layers.10.block_sparse_moe.experts.107.w2", "model.layers.10.block_sparse_moe.experts.108.w2", "model.layers.10.block_sparse_moe.experts.109.w2", "model.layers.10.block_sparse_moe.experts.110.w2", "model.layers.10.block_sparse_moe.experts.111.w2", "model.layers.10.block_sparse_moe.experts.112.w2", "model.layers.10.block_sparse_moe.experts.113.w2", "model.layers.10.block_sparse_moe.experts.114.w2", "model.layers.10.block_sparse_moe.experts.115.w2", "model.layers.10.block_sparse_moe.experts.116.w2", "model.layers.10.block_sparse_moe.experts.117.w2", "model.layers.10.block_sparse_moe.experts.118.w2", "model.layers.10.block_sparse_moe.experts.119.w2", "model.layers.10.block_sparse_moe.experts.120.w2", "model.layers.10.block_sparse_moe.experts.121.w2", "model.layers.10.block_sparse_moe.experts.122.w2", "model.layers.10.block_sparse_moe.experts.123.w2", "model.layers.10.block_sparse_moe.experts.124.w2", "model.layers.10.block_sparse_moe.experts.125.w2", "model.layers.10.block_sparse_moe.experts.126.w2", "model.layers.10.block_sparse_moe.experts.127.w2", "model.layers.10.block_sparse_moe.experts.128.w2", "model.layers.10.block_sparse_moe.experts.129.w2", "model.layers.10.block_sparse_moe.experts.130.w2", "model.layers.10.block_sparse_moe.experts.131.w2", "model.layers.10.block_sparse_moe.experts.132.w2", "model.layers.10.block_sparse_moe.experts.133.w2", "model.layers.10.block_sparse_moe.experts.134.w2", "model.layers.10.block_sparse_moe.experts.135.w2", "model.layers.10.block_sparse_moe.experts.136.w2", "model.layers.10.block_sparse_moe.experts.137.w2", "model.layers.10.block_sparse_moe.experts.138.w2", "model.layers.10.block_sparse_moe.experts.139.w2", "model.layers.10.block_sparse_moe.experts.140.w2", "model.layers.10.block_sparse_moe.experts.141.w2", "model.layers.10.block_sparse_moe.experts.142.w2", "model.layers.10.block_sparse_moe.experts.143.w2", "model.layers.10.block_sparse_moe.experts.144.w2", "model.layers.10.block_sparse_moe.experts.145.w2", "model.layers.10.block_sparse_moe.experts.146.w2", "model.layers.10.block_sparse_moe.experts.147.w2", "model.layers.10.block_sparse_moe.experts.148.w2", "model.layers.10.block_sparse_moe.experts.149.w2", "model.layers.10.block_sparse_moe.experts.150.w2", "model.layers.10.block_sparse_moe.experts.151.w2", "model.layers.10.block_sparse_moe.experts.152.w2", "model.layers.10.block_sparse_moe.experts.153.w2", "model.layers.10.block_sparse_moe.experts.154.w2", "model.layers.10.block_sparse_moe.experts.155.w2", "model.layers.10.block_sparse_moe.experts.156.w2", "model.layers.10.block_sparse_moe.experts.157.w2", "model.layers.10.block_sparse_moe.experts.158.w2", "model.layers.10.block_sparse_moe.experts.159.w2", "model.layers.10.block_sparse_moe.experts.160.w2", "model.layers.10.block_sparse_moe.experts.161.w2", "model.layers.10.block_sparse_moe.experts.162.w2", "model.layers.10.block_sparse_moe.experts.163.w2", "model.layers.10.block_sparse_moe.experts.164.w2", "model.layers.10.block_sparse_moe.experts.165.w2", "model.layers.10.block_sparse_moe.experts.166.w2", "model.layers.10.block_sparse_moe.experts.167.w2", "model.layers.10.block_sparse_moe.experts.168.w2", "model.layers.10.block_sparse_moe.experts.169.w2", "model.layers.10.block_sparse_moe.experts.170.w2", "model.layers.10.block_sparse_moe.experts.171.w2", "model.layers.10.block_sparse_moe.experts.172.w2", "model.layers.10.block_sparse_moe.experts.173.w2", "model.layers.10.block_sparse_moe.experts.174.w2", "model.layers.10.block_sparse_moe.experts.175.w2", "model.layers.10.block_sparse_moe.experts.176.w2", "model.layers.10.block_sparse_moe.experts.177.w2", "model.layers.10.block_sparse_moe.experts.178.w2", "model.layers.10.block_sparse_moe.experts.179.w2", "model.layers.10.block_sparse_moe.experts.180.w2", "model.layers.10.block_sparse_moe.experts.181.w2", "model.layers.10.block_sparse_moe.experts.182.w2", "model.layers.10.block_sparse_moe.experts.183.w2", "model.layers.10.block_sparse_moe.experts.184.w2", "model.layers.10.block_sparse_moe.experts.185.w2", "model.layers.10.block_sparse_moe.experts.186.w2", "model.layers.10.block_sparse_moe.experts.187.w2", "model.layers.10.block_sparse_moe.experts.188.w2", "model.layers.10.block_sparse_moe.experts.189.w2", "model.layers.10.block_sparse_moe.experts.190.w2", "model.layers.10.block_sparse_moe.experts.191.w2", "model.layers.10.block_sparse_moe.experts.192.w2", "model.layers.10.block_sparse_moe.experts.193.w2", "model.layers.10.block_sparse_moe.experts.194.w2", "model.layers.10.block_sparse_moe.experts.195.w2", "model.layers.10.block_sparse_moe.experts.196.w2", "model.layers.10.block_sparse_moe.experts.197.w2", "model.layers.10.block_sparse_moe.experts.198.w2", "model.layers.10.block_sparse_moe.experts.199.w2", "model.layers.10.block_sparse_moe.experts.200.w2", "model.layers.10.block_sparse_moe.experts.201.w2", "model.layers.10.block_sparse_moe.experts.202.w2", "model.layers.10.block_sparse_moe.experts.203.w2", "model.layers.10.block_sparse_moe.experts.204.w2", "model.layers.10.block_sparse_moe.experts.205.w2", "model.layers.10.block_sparse_moe.experts.206.w2", "model.layers.10.block_sparse_moe.experts.207.w2", "model.layers.10.block_sparse_moe.experts.208.w2", "model.layers.10.block_sparse_moe.experts.209.w2", "model.layers.10.block_sparse_moe.experts.210.w2", "model.layers.10.block_sparse_moe.experts.211.w2", "model.layers.10.block_sparse_moe.experts.212.w2", "model.layers.10.block_sparse_moe.experts.213.w2", "model.layers.10.block_sparse_moe.experts.214.w2", "model.layers.10.block_sparse_moe.experts.215.w2", "model.layers.10.block_sparse_moe.experts.216.w2", "model.layers.10.block_sparse_moe.experts.217.w2", "model.layers.10.block_sparse_moe.experts.218.w2", "model.layers.10.block_sparse_moe.experts.219.w2", "model.layers.10.block_sparse_moe.experts.220.w2", "model.layers.10.block_sparse_moe.experts.221.w2", "model.layers.10.block_sparse_moe.experts.222.w2", "model.layers.10.block_sparse_moe.experts.223.w2", "model.layers.10.block_sparse_moe.experts.224.w2", "model.layers.10.block_sparse_moe.experts.225.w2", "model.layers.10.block_sparse_moe.experts.226.w2", "model.layers.10.block_sparse_moe.experts.227.w2", "model.layers.10.block_sparse_moe.experts.228.w2", "model.layers.10.block_sparse_moe.experts.229.w2", "model.layers.10.block_sparse_moe.experts.230.w2", "model.layers.10.block_sparse_moe.experts.231.w2", "model.layers.10.block_sparse_moe.experts.232.w2", "model.layers.10.block_sparse_moe.experts.233.w2", "model.layers.10.block_sparse_moe.experts.234.w2", "model.layers.10.block_sparse_moe.experts.235.w2", "model.layers.10.block_sparse_moe.experts.236.w2", "model.layers.10.block_sparse_moe.experts.237.w2", "model.layers.10.block_sparse_moe.experts.238.w2", "model.layers.10.block_sparse_moe.experts.239.w2", "model.layers.10.block_sparse_moe.experts.240.w2", "model.layers.10.block_sparse_moe.experts.241.w2", "model.layers.10.block_sparse_moe.experts.242.w2", "model.layers.10.block_sparse_moe.experts.243.w2", "model.layers.10.block_sparse_moe.experts.244.w2", "model.layers.10.block_sparse_moe.experts.245.w2", "model.layers.10.block_sparse_moe.experts.246.w2", "model.layers.10.block_sparse_moe.experts.247.w2", "model.layers.10.block_sparse_moe.experts.248.w2", "model.layers.10.block_sparse_moe.experts.249.w2", "model.layers.10.block_sparse_moe.experts.250.w2", "model.layers.10.block_sparse_moe.experts.251.w2", "model.layers.10.block_sparse_moe.experts.252.w2", "model.layers.10.block_sparse_moe.experts.253.w2", "model.layers.10.block_sparse_moe.experts.254.w2", "model.layers.10.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0002877175807953769, "dbits": 1207959552 } ] }, { "idx": 55, "layers": [ "model.layers.11.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003385394811630249, "dbits": 18874368 } ] }, { "idx": 56, "layers": [ "model.layers.11.self_attn.k_proj", "model.layers.11.self_attn.v_proj" ], "candidates": [ { "dkld": -0.007313624024391174, "dbits": 6291456 } ] }, { "idx": 57, "layers": [ "model.layers.11.self_attn.o_proj" ], "candidates": [ { "dkld": 0.007383871078491255, "dbits": 18874368 } ] }, { "idx": 58, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w1", "model.layers.11.block_sparse_moe.experts.1.w1", "model.layers.11.block_sparse_moe.experts.2.w1", "model.layers.11.block_sparse_moe.experts.3.w1", "model.layers.11.block_sparse_moe.experts.4.w1", "model.layers.11.block_sparse_moe.experts.5.w1", "model.layers.11.block_sparse_moe.experts.6.w1", "model.layers.11.block_sparse_moe.experts.7.w1", "model.layers.11.block_sparse_moe.experts.8.w1", "model.layers.11.block_sparse_moe.experts.9.w1", "model.layers.11.block_sparse_moe.experts.10.w1", "model.layers.11.block_sparse_moe.experts.11.w1", "model.layers.11.block_sparse_moe.experts.12.w1", "model.layers.11.block_sparse_moe.experts.13.w1", "model.layers.11.block_sparse_moe.experts.14.w1", "model.layers.11.block_sparse_moe.experts.15.w1", "model.layers.11.block_sparse_moe.experts.16.w1", "model.layers.11.block_sparse_moe.experts.17.w1", "model.layers.11.block_sparse_moe.experts.18.w1", "model.layers.11.block_sparse_moe.experts.19.w1", "model.layers.11.block_sparse_moe.experts.20.w1", "model.layers.11.block_sparse_moe.experts.21.w1", "model.layers.11.block_sparse_moe.experts.22.w1", "model.layers.11.block_sparse_moe.experts.23.w1", "model.layers.11.block_sparse_moe.experts.24.w1", "model.layers.11.block_sparse_moe.experts.25.w1", "model.layers.11.block_sparse_moe.experts.26.w1", "model.layers.11.block_sparse_moe.experts.27.w1", "model.layers.11.block_sparse_moe.experts.28.w1", "model.layers.11.block_sparse_moe.experts.29.w1", "model.layers.11.block_sparse_moe.experts.30.w1", "model.layers.11.block_sparse_moe.experts.31.w1", "model.layers.11.block_sparse_moe.experts.32.w1", "model.layers.11.block_sparse_moe.experts.33.w1", "model.layers.11.block_sparse_moe.experts.34.w1", "model.layers.11.block_sparse_moe.experts.35.w1", "model.layers.11.block_sparse_moe.experts.36.w1", "model.layers.11.block_sparse_moe.experts.37.w1", "model.layers.11.block_sparse_moe.experts.38.w1", "model.layers.11.block_sparse_moe.experts.39.w1", "model.layers.11.block_sparse_moe.experts.40.w1", "model.layers.11.block_sparse_moe.experts.41.w1", "model.layers.11.block_sparse_moe.experts.42.w1", "model.layers.11.block_sparse_moe.experts.43.w1", "model.layers.11.block_sparse_moe.experts.44.w1", "model.layers.11.block_sparse_moe.experts.45.w1", "model.layers.11.block_sparse_moe.experts.46.w1", "model.layers.11.block_sparse_moe.experts.47.w1", "model.layers.11.block_sparse_moe.experts.48.w1", "model.layers.11.block_sparse_moe.experts.49.w1", "model.layers.11.block_sparse_moe.experts.50.w1", "model.layers.11.block_sparse_moe.experts.51.w1", "model.layers.11.block_sparse_moe.experts.52.w1", "model.layers.11.block_sparse_moe.experts.53.w1", "model.layers.11.block_sparse_moe.experts.54.w1", "model.layers.11.block_sparse_moe.experts.55.w1", "model.layers.11.block_sparse_moe.experts.56.w1", "model.layers.11.block_sparse_moe.experts.57.w1", "model.layers.11.block_sparse_moe.experts.58.w1", "model.layers.11.block_sparse_moe.experts.59.w1", "model.layers.11.block_sparse_moe.experts.60.w1", "model.layers.11.block_sparse_moe.experts.61.w1", "model.layers.11.block_sparse_moe.experts.62.w1", "model.layers.11.block_sparse_moe.experts.63.w1", "model.layers.11.block_sparse_moe.experts.64.w1", "model.layers.11.block_sparse_moe.experts.65.w1", "model.layers.11.block_sparse_moe.experts.66.w1", "model.layers.11.block_sparse_moe.experts.67.w1", "model.layers.11.block_sparse_moe.experts.68.w1", "model.layers.11.block_sparse_moe.experts.69.w1", "model.layers.11.block_sparse_moe.experts.70.w1", "model.layers.11.block_sparse_moe.experts.71.w1", "model.layers.11.block_sparse_moe.experts.72.w1", "model.layers.11.block_sparse_moe.experts.73.w1", "model.layers.11.block_sparse_moe.experts.74.w1", "model.layers.11.block_sparse_moe.experts.75.w1", "model.layers.11.block_sparse_moe.experts.76.w1", "model.layers.11.block_sparse_moe.experts.77.w1", "model.layers.11.block_sparse_moe.experts.78.w1", "model.layers.11.block_sparse_moe.experts.79.w1", "model.layers.11.block_sparse_moe.experts.80.w1", "model.layers.11.block_sparse_moe.experts.81.w1", "model.layers.11.block_sparse_moe.experts.82.w1", "model.layers.11.block_sparse_moe.experts.83.w1", "model.layers.11.block_sparse_moe.experts.84.w1", "model.layers.11.block_sparse_moe.experts.85.w1", "model.layers.11.block_sparse_moe.experts.86.w1", "model.layers.11.block_sparse_moe.experts.87.w1", "model.layers.11.block_sparse_moe.experts.88.w1", "model.layers.11.block_sparse_moe.experts.89.w1", "model.layers.11.block_sparse_moe.experts.90.w1", "model.layers.11.block_sparse_moe.experts.91.w1", "model.layers.11.block_sparse_moe.experts.92.w1", "model.layers.11.block_sparse_moe.experts.93.w1", "model.layers.11.block_sparse_moe.experts.94.w1", "model.layers.11.block_sparse_moe.experts.95.w1", "model.layers.11.block_sparse_moe.experts.96.w1", "model.layers.11.block_sparse_moe.experts.97.w1", "model.layers.11.block_sparse_moe.experts.98.w1", "model.layers.11.block_sparse_moe.experts.99.w1", "model.layers.11.block_sparse_moe.experts.100.w1", "model.layers.11.block_sparse_moe.experts.101.w1", "model.layers.11.block_sparse_moe.experts.102.w1", "model.layers.11.block_sparse_moe.experts.103.w1", "model.layers.11.block_sparse_moe.experts.104.w1", "model.layers.11.block_sparse_moe.experts.105.w1", "model.layers.11.block_sparse_moe.experts.106.w1", "model.layers.11.block_sparse_moe.experts.107.w1", "model.layers.11.block_sparse_moe.experts.108.w1", "model.layers.11.block_sparse_moe.experts.109.w1", "model.layers.11.block_sparse_moe.experts.110.w1", "model.layers.11.block_sparse_moe.experts.111.w1", "model.layers.11.block_sparse_moe.experts.112.w1", "model.layers.11.block_sparse_moe.experts.113.w1", "model.layers.11.block_sparse_moe.experts.114.w1", "model.layers.11.block_sparse_moe.experts.115.w1", "model.layers.11.block_sparse_moe.experts.116.w1", "model.layers.11.block_sparse_moe.experts.117.w1", "model.layers.11.block_sparse_moe.experts.118.w1", "model.layers.11.block_sparse_moe.experts.119.w1", "model.layers.11.block_sparse_moe.experts.120.w1", "model.layers.11.block_sparse_moe.experts.121.w1", "model.layers.11.block_sparse_moe.experts.122.w1", "model.layers.11.block_sparse_moe.experts.123.w1", "model.layers.11.block_sparse_moe.experts.124.w1", "model.layers.11.block_sparse_moe.experts.125.w1", "model.layers.11.block_sparse_moe.experts.126.w1", "model.layers.11.block_sparse_moe.experts.127.w1", "model.layers.11.block_sparse_moe.experts.128.w1", "model.layers.11.block_sparse_moe.experts.129.w1", "model.layers.11.block_sparse_moe.experts.130.w1", "model.layers.11.block_sparse_moe.experts.131.w1", "model.layers.11.block_sparse_moe.experts.132.w1", "model.layers.11.block_sparse_moe.experts.133.w1", "model.layers.11.block_sparse_moe.experts.134.w1", "model.layers.11.block_sparse_moe.experts.135.w1", "model.layers.11.block_sparse_moe.experts.136.w1", "model.layers.11.block_sparse_moe.experts.137.w1", "model.layers.11.block_sparse_moe.experts.138.w1", "model.layers.11.block_sparse_moe.experts.139.w1", "model.layers.11.block_sparse_moe.experts.140.w1", "model.layers.11.block_sparse_moe.experts.141.w1", "model.layers.11.block_sparse_moe.experts.142.w1", "model.layers.11.block_sparse_moe.experts.143.w1", "model.layers.11.block_sparse_moe.experts.144.w1", "model.layers.11.block_sparse_moe.experts.145.w1", "model.layers.11.block_sparse_moe.experts.146.w1", "model.layers.11.block_sparse_moe.experts.147.w1", "model.layers.11.block_sparse_moe.experts.148.w1", "model.layers.11.block_sparse_moe.experts.149.w1", "model.layers.11.block_sparse_moe.experts.150.w1", "model.layers.11.block_sparse_moe.experts.151.w1", "model.layers.11.block_sparse_moe.experts.152.w1", "model.layers.11.block_sparse_moe.experts.153.w1", "model.layers.11.block_sparse_moe.experts.154.w1", "model.layers.11.block_sparse_moe.experts.155.w1", "model.layers.11.block_sparse_moe.experts.156.w1", "model.layers.11.block_sparse_moe.experts.157.w1", "model.layers.11.block_sparse_moe.experts.158.w1", "model.layers.11.block_sparse_moe.experts.159.w1", "model.layers.11.block_sparse_moe.experts.160.w1", "model.layers.11.block_sparse_moe.experts.161.w1", "model.layers.11.block_sparse_moe.experts.162.w1", "model.layers.11.block_sparse_moe.experts.163.w1", "model.layers.11.block_sparse_moe.experts.164.w1", "model.layers.11.block_sparse_moe.experts.165.w1", "model.layers.11.block_sparse_moe.experts.166.w1", "model.layers.11.block_sparse_moe.experts.167.w1", "model.layers.11.block_sparse_moe.experts.168.w1", "model.layers.11.block_sparse_moe.experts.169.w1", "model.layers.11.block_sparse_moe.experts.170.w1", "model.layers.11.block_sparse_moe.experts.171.w1", "model.layers.11.block_sparse_moe.experts.172.w1", "model.layers.11.block_sparse_moe.experts.173.w1", "model.layers.11.block_sparse_moe.experts.174.w1", "model.layers.11.block_sparse_moe.experts.175.w1", "model.layers.11.block_sparse_moe.experts.176.w1", "model.layers.11.block_sparse_moe.experts.177.w1", "model.layers.11.block_sparse_moe.experts.178.w1", "model.layers.11.block_sparse_moe.experts.179.w1", "model.layers.11.block_sparse_moe.experts.180.w1", "model.layers.11.block_sparse_moe.experts.181.w1", "model.layers.11.block_sparse_moe.experts.182.w1", "model.layers.11.block_sparse_moe.experts.183.w1", "model.layers.11.block_sparse_moe.experts.184.w1", "model.layers.11.block_sparse_moe.experts.185.w1", "model.layers.11.block_sparse_moe.experts.186.w1", "model.layers.11.block_sparse_moe.experts.187.w1", "model.layers.11.block_sparse_moe.experts.188.w1", "model.layers.11.block_sparse_moe.experts.189.w1", "model.layers.11.block_sparse_moe.experts.190.w1", "model.layers.11.block_sparse_moe.experts.191.w1", "model.layers.11.block_sparse_moe.experts.192.w1", "model.layers.11.block_sparse_moe.experts.193.w1", "model.layers.11.block_sparse_moe.experts.194.w1", "model.layers.11.block_sparse_moe.experts.195.w1", "model.layers.11.block_sparse_moe.experts.196.w1", "model.layers.11.block_sparse_moe.experts.197.w1", "model.layers.11.block_sparse_moe.experts.198.w1", "model.layers.11.block_sparse_moe.experts.199.w1", "model.layers.11.block_sparse_moe.experts.200.w1", "model.layers.11.block_sparse_moe.experts.201.w1", "model.layers.11.block_sparse_moe.experts.202.w1", "model.layers.11.block_sparse_moe.experts.203.w1", "model.layers.11.block_sparse_moe.experts.204.w1", "model.layers.11.block_sparse_moe.experts.205.w1", "model.layers.11.block_sparse_moe.experts.206.w1", "model.layers.11.block_sparse_moe.experts.207.w1", "model.layers.11.block_sparse_moe.experts.208.w1", "model.layers.11.block_sparse_moe.experts.209.w1", "model.layers.11.block_sparse_moe.experts.210.w1", "model.layers.11.block_sparse_moe.experts.211.w1", "model.layers.11.block_sparse_moe.experts.212.w1", "model.layers.11.block_sparse_moe.experts.213.w1", "model.layers.11.block_sparse_moe.experts.214.w1", "model.layers.11.block_sparse_moe.experts.215.w1", "model.layers.11.block_sparse_moe.experts.216.w1", "model.layers.11.block_sparse_moe.experts.217.w1", "model.layers.11.block_sparse_moe.experts.218.w1", "model.layers.11.block_sparse_moe.experts.219.w1", "model.layers.11.block_sparse_moe.experts.220.w1", "model.layers.11.block_sparse_moe.experts.221.w1", "model.layers.11.block_sparse_moe.experts.222.w1", "model.layers.11.block_sparse_moe.experts.223.w1", "model.layers.11.block_sparse_moe.experts.224.w1", "model.layers.11.block_sparse_moe.experts.225.w1", "model.layers.11.block_sparse_moe.experts.226.w1", "model.layers.11.block_sparse_moe.experts.227.w1", "model.layers.11.block_sparse_moe.experts.228.w1", "model.layers.11.block_sparse_moe.experts.229.w1", "model.layers.11.block_sparse_moe.experts.230.w1", "model.layers.11.block_sparse_moe.experts.231.w1", "model.layers.11.block_sparse_moe.experts.232.w1", "model.layers.11.block_sparse_moe.experts.233.w1", "model.layers.11.block_sparse_moe.experts.234.w1", "model.layers.11.block_sparse_moe.experts.235.w1", "model.layers.11.block_sparse_moe.experts.236.w1", "model.layers.11.block_sparse_moe.experts.237.w1", "model.layers.11.block_sparse_moe.experts.238.w1", "model.layers.11.block_sparse_moe.experts.239.w1", "model.layers.11.block_sparse_moe.experts.240.w1", "model.layers.11.block_sparse_moe.experts.241.w1", "model.layers.11.block_sparse_moe.experts.242.w1", "model.layers.11.block_sparse_moe.experts.243.w1", "model.layers.11.block_sparse_moe.experts.244.w1", "model.layers.11.block_sparse_moe.experts.245.w1", "model.layers.11.block_sparse_moe.experts.246.w1", "model.layers.11.block_sparse_moe.experts.247.w1", "model.layers.11.block_sparse_moe.experts.248.w1", "model.layers.11.block_sparse_moe.experts.249.w1", "model.layers.11.block_sparse_moe.experts.250.w1", "model.layers.11.block_sparse_moe.experts.251.w1", "model.layers.11.block_sparse_moe.experts.252.w1", "model.layers.11.block_sparse_moe.experts.253.w1", "model.layers.11.block_sparse_moe.experts.254.w1", "model.layers.11.block_sparse_moe.experts.255.w1", "model.layers.11.block_sparse_moe.experts.0.w3", "model.layers.11.block_sparse_moe.experts.1.w3", "model.layers.11.block_sparse_moe.experts.2.w3", "model.layers.11.block_sparse_moe.experts.3.w3", "model.layers.11.block_sparse_moe.experts.4.w3", "model.layers.11.block_sparse_moe.experts.5.w3", "model.layers.11.block_sparse_moe.experts.6.w3", "model.layers.11.block_sparse_moe.experts.7.w3", "model.layers.11.block_sparse_moe.experts.8.w3", "model.layers.11.block_sparse_moe.experts.9.w3", "model.layers.11.block_sparse_moe.experts.10.w3", "model.layers.11.block_sparse_moe.experts.11.w3", "model.layers.11.block_sparse_moe.experts.12.w3", "model.layers.11.block_sparse_moe.experts.13.w3", "model.layers.11.block_sparse_moe.experts.14.w3", "model.layers.11.block_sparse_moe.experts.15.w3", "model.layers.11.block_sparse_moe.experts.16.w3", "model.layers.11.block_sparse_moe.experts.17.w3", "model.layers.11.block_sparse_moe.experts.18.w3", "model.layers.11.block_sparse_moe.experts.19.w3", "model.layers.11.block_sparse_moe.experts.20.w3", "model.layers.11.block_sparse_moe.experts.21.w3", "model.layers.11.block_sparse_moe.experts.22.w3", "model.layers.11.block_sparse_moe.experts.23.w3", "model.layers.11.block_sparse_moe.experts.24.w3", "model.layers.11.block_sparse_moe.experts.25.w3", "model.layers.11.block_sparse_moe.experts.26.w3", "model.layers.11.block_sparse_moe.experts.27.w3", "model.layers.11.block_sparse_moe.experts.28.w3", "model.layers.11.block_sparse_moe.experts.29.w3", "model.layers.11.block_sparse_moe.experts.30.w3", "model.layers.11.block_sparse_moe.experts.31.w3", "model.layers.11.block_sparse_moe.experts.32.w3", "model.layers.11.block_sparse_moe.experts.33.w3", "model.layers.11.block_sparse_moe.experts.34.w3", "model.layers.11.block_sparse_moe.experts.35.w3", "model.layers.11.block_sparse_moe.experts.36.w3", "model.layers.11.block_sparse_moe.experts.37.w3", "model.layers.11.block_sparse_moe.experts.38.w3", "model.layers.11.block_sparse_moe.experts.39.w3", "model.layers.11.block_sparse_moe.experts.40.w3", "model.layers.11.block_sparse_moe.experts.41.w3", "model.layers.11.block_sparse_moe.experts.42.w3", "model.layers.11.block_sparse_moe.experts.43.w3", "model.layers.11.block_sparse_moe.experts.44.w3", "model.layers.11.block_sparse_moe.experts.45.w3", "model.layers.11.block_sparse_moe.experts.46.w3", "model.layers.11.block_sparse_moe.experts.47.w3", "model.layers.11.block_sparse_moe.experts.48.w3", "model.layers.11.block_sparse_moe.experts.49.w3", "model.layers.11.block_sparse_moe.experts.50.w3", "model.layers.11.block_sparse_moe.experts.51.w3", "model.layers.11.block_sparse_moe.experts.52.w3", "model.layers.11.block_sparse_moe.experts.53.w3", "model.layers.11.block_sparse_moe.experts.54.w3", "model.layers.11.block_sparse_moe.experts.55.w3", "model.layers.11.block_sparse_moe.experts.56.w3", "model.layers.11.block_sparse_moe.experts.57.w3", "model.layers.11.block_sparse_moe.experts.58.w3", "model.layers.11.block_sparse_moe.experts.59.w3", "model.layers.11.block_sparse_moe.experts.60.w3", "model.layers.11.block_sparse_moe.experts.61.w3", "model.layers.11.block_sparse_moe.experts.62.w3", "model.layers.11.block_sparse_moe.experts.63.w3", "model.layers.11.block_sparse_moe.experts.64.w3", "model.layers.11.block_sparse_moe.experts.65.w3", "model.layers.11.block_sparse_moe.experts.66.w3", "model.layers.11.block_sparse_moe.experts.67.w3", "model.layers.11.block_sparse_moe.experts.68.w3", "model.layers.11.block_sparse_moe.experts.69.w3", "model.layers.11.block_sparse_moe.experts.70.w3", "model.layers.11.block_sparse_moe.experts.71.w3", "model.layers.11.block_sparse_moe.experts.72.w3", "model.layers.11.block_sparse_moe.experts.73.w3", "model.layers.11.block_sparse_moe.experts.74.w3", "model.layers.11.block_sparse_moe.experts.75.w3", "model.layers.11.block_sparse_moe.experts.76.w3", "model.layers.11.block_sparse_moe.experts.77.w3", "model.layers.11.block_sparse_moe.experts.78.w3", "model.layers.11.block_sparse_moe.experts.79.w3", "model.layers.11.block_sparse_moe.experts.80.w3", "model.layers.11.block_sparse_moe.experts.81.w3", "model.layers.11.block_sparse_moe.experts.82.w3", "model.layers.11.block_sparse_moe.experts.83.w3", "model.layers.11.block_sparse_moe.experts.84.w3", "model.layers.11.block_sparse_moe.experts.85.w3", "model.layers.11.block_sparse_moe.experts.86.w3", "model.layers.11.block_sparse_moe.experts.87.w3", "model.layers.11.block_sparse_moe.experts.88.w3", "model.layers.11.block_sparse_moe.experts.89.w3", "model.layers.11.block_sparse_moe.experts.90.w3", "model.layers.11.block_sparse_moe.experts.91.w3", "model.layers.11.block_sparse_moe.experts.92.w3", "model.layers.11.block_sparse_moe.experts.93.w3", "model.layers.11.block_sparse_moe.experts.94.w3", "model.layers.11.block_sparse_moe.experts.95.w3", "model.layers.11.block_sparse_moe.experts.96.w3", "model.layers.11.block_sparse_moe.experts.97.w3", "model.layers.11.block_sparse_moe.experts.98.w3", "model.layers.11.block_sparse_moe.experts.99.w3", "model.layers.11.block_sparse_moe.experts.100.w3", "model.layers.11.block_sparse_moe.experts.101.w3", "model.layers.11.block_sparse_moe.experts.102.w3", "model.layers.11.block_sparse_moe.experts.103.w3", "model.layers.11.block_sparse_moe.experts.104.w3", "model.layers.11.block_sparse_moe.experts.105.w3", "model.layers.11.block_sparse_moe.experts.106.w3", "model.layers.11.block_sparse_moe.experts.107.w3", "model.layers.11.block_sparse_moe.experts.108.w3", "model.layers.11.block_sparse_moe.experts.109.w3", "model.layers.11.block_sparse_moe.experts.110.w3", "model.layers.11.block_sparse_moe.experts.111.w3", "model.layers.11.block_sparse_moe.experts.112.w3", "model.layers.11.block_sparse_moe.experts.113.w3", "model.layers.11.block_sparse_moe.experts.114.w3", "model.layers.11.block_sparse_moe.experts.115.w3", "model.layers.11.block_sparse_moe.experts.116.w3", "model.layers.11.block_sparse_moe.experts.117.w3", "model.layers.11.block_sparse_moe.experts.118.w3", "model.layers.11.block_sparse_moe.experts.119.w3", "model.layers.11.block_sparse_moe.experts.120.w3", "model.layers.11.block_sparse_moe.experts.121.w3", "model.layers.11.block_sparse_moe.experts.122.w3", "model.layers.11.block_sparse_moe.experts.123.w3", "model.layers.11.block_sparse_moe.experts.124.w3", "model.layers.11.block_sparse_moe.experts.125.w3", "model.layers.11.block_sparse_moe.experts.126.w3", "model.layers.11.block_sparse_moe.experts.127.w3", "model.layers.11.block_sparse_moe.experts.128.w3", "model.layers.11.block_sparse_moe.experts.129.w3", "model.layers.11.block_sparse_moe.experts.130.w3", "model.layers.11.block_sparse_moe.experts.131.w3", "model.layers.11.block_sparse_moe.experts.132.w3", "model.layers.11.block_sparse_moe.experts.133.w3", "model.layers.11.block_sparse_moe.experts.134.w3", "model.layers.11.block_sparse_moe.experts.135.w3", "model.layers.11.block_sparse_moe.experts.136.w3", "model.layers.11.block_sparse_moe.experts.137.w3", "model.layers.11.block_sparse_moe.experts.138.w3", "model.layers.11.block_sparse_moe.experts.139.w3", "model.layers.11.block_sparse_moe.experts.140.w3", "model.layers.11.block_sparse_moe.experts.141.w3", "model.layers.11.block_sparse_moe.experts.142.w3", "model.layers.11.block_sparse_moe.experts.143.w3", "model.layers.11.block_sparse_moe.experts.144.w3", "model.layers.11.block_sparse_moe.experts.145.w3", "model.layers.11.block_sparse_moe.experts.146.w3", "model.layers.11.block_sparse_moe.experts.147.w3", "model.layers.11.block_sparse_moe.experts.148.w3", "model.layers.11.block_sparse_moe.experts.149.w3", "model.layers.11.block_sparse_moe.experts.150.w3", "model.layers.11.block_sparse_moe.experts.151.w3", "model.layers.11.block_sparse_moe.experts.152.w3", "model.layers.11.block_sparse_moe.experts.153.w3", "model.layers.11.block_sparse_moe.experts.154.w3", "model.layers.11.block_sparse_moe.experts.155.w3", "model.layers.11.block_sparse_moe.experts.156.w3", "model.layers.11.block_sparse_moe.experts.157.w3", "model.layers.11.block_sparse_moe.experts.158.w3", "model.layers.11.block_sparse_moe.experts.159.w3", "model.layers.11.block_sparse_moe.experts.160.w3", "model.layers.11.block_sparse_moe.experts.161.w3", "model.layers.11.block_sparse_moe.experts.162.w3", "model.layers.11.block_sparse_moe.experts.163.w3", "model.layers.11.block_sparse_moe.experts.164.w3", "model.layers.11.block_sparse_moe.experts.165.w3", "model.layers.11.block_sparse_moe.experts.166.w3", "model.layers.11.block_sparse_moe.experts.167.w3", "model.layers.11.block_sparse_moe.experts.168.w3", "model.layers.11.block_sparse_moe.experts.169.w3", "model.layers.11.block_sparse_moe.experts.170.w3", "model.layers.11.block_sparse_moe.experts.171.w3", "model.layers.11.block_sparse_moe.experts.172.w3", "model.layers.11.block_sparse_moe.experts.173.w3", "model.layers.11.block_sparse_moe.experts.174.w3", "model.layers.11.block_sparse_moe.experts.175.w3", "model.layers.11.block_sparse_moe.experts.176.w3", "model.layers.11.block_sparse_moe.experts.177.w3", "model.layers.11.block_sparse_moe.experts.178.w3", "model.layers.11.block_sparse_moe.experts.179.w3", "model.layers.11.block_sparse_moe.experts.180.w3", "model.layers.11.block_sparse_moe.experts.181.w3", "model.layers.11.block_sparse_moe.experts.182.w3", "model.layers.11.block_sparse_moe.experts.183.w3", "model.layers.11.block_sparse_moe.experts.184.w3", "model.layers.11.block_sparse_moe.experts.185.w3", "model.layers.11.block_sparse_moe.experts.186.w3", "model.layers.11.block_sparse_moe.experts.187.w3", "model.layers.11.block_sparse_moe.experts.188.w3", "model.layers.11.block_sparse_moe.experts.189.w3", "model.layers.11.block_sparse_moe.experts.190.w3", "model.layers.11.block_sparse_moe.experts.191.w3", "model.layers.11.block_sparse_moe.experts.192.w3", "model.layers.11.block_sparse_moe.experts.193.w3", "model.layers.11.block_sparse_moe.experts.194.w3", "model.layers.11.block_sparse_moe.experts.195.w3", "model.layers.11.block_sparse_moe.experts.196.w3", "model.layers.11.block_sparse_moe.experts.197.w3", "model.layers.11.block_sparse_moe.experts.198.w3", "model.layers.11.block_sparse_moe.experts.199.w3", "model.layers.11.block_sparse_moe.experts.200.w3", "model.layers.11.block_sparse_moe.experts.201.w3", "model.layers.11.block_sparse_moe.experts.202.w3", "model.layers.11.block_sparse_moe.experts.203.w3", "model.layers.11.block_sparse_moe.experts.204.w3", "model.layers.11.block_sparse_moe.experts.205.w3", "model.layers.11.block_sparse_moe.experts.206.w3", "model.layers.11.block_sparse_moe.experts.207.w3", "model.layers.11.block_sparse_moe.experts.208.w3", "model.layers.11.block_sparse_moe.experts.209.w3", "model.layers.11.block_sparse_moe.experts.210.w3", "model.layers.11.block_sparse_moe.experts.211.w3", "model.layers.11.block_sparse_moe.experts.212.w3", "model.layers.11.block_sparse_moe.experts.213.w3", "model.layers.11.block_sparse_moe.experts.214.w3", "model.layers.11.block_sparse_moe.experts.215.w3", "model.layers.11.block_sparse_moe.experts.216.w3", "model.layers.11.block_sparse_moe.experts.217.w3", "model.layers.11.block_sparse_moe.experts.218.w3", "model.layers.11.block_sparse_moe.experts.219.w3", "model.layers.11.block_sparse_moe.experts.220.w3", "model.layers.11.block_sparse_moe.experts.221.w3", "model.layers.11.block_sparse_moe.experts.222.w3", "model.layers.11.block_sparse_moe.experts.223.w3", "model.layers.11.block_sparse_moe.experts.224.w3", "model.layers.11.block_sparse_moe.experts.225.w3", "model.layers.11.block_sparse_moe.experts.226.w3", "model.layers.11.block_sparse_moe.experts.227.w3", "model.layers.11.block_sparse_moe.experts.228.w3", "model.layers.11.block_sparse_moe.experts.229.w3", "model.layers.11.block_sparse_moe.experts.230.w3", "model.layers.11.block_sparse_moe.experts.231.w3", "model.layers.11.block_sparse_moe.experts.232.w3", "model.layers.11.block_sparse_moe.experts.233.w3", "model.layers.11.block_sparse_moe.experts.234.w3", "model.layers.11.block_sparse_moe.experts.235.w3", "model.layers.11.block_sparse_moe.experts.236.w3", "model.layers.11.block_sparse_moe.experts.237.w3", "model.layers.11.block_sparse_moe.experts.238.w3", "model.layers.11.block_sparse_moe.experts.239.w3", "model.layers.11.block_sparse_moe.experts.240.w3", "model.layers.11.block_sparse_moe.experts.241.w3", "model.layers.11.block_sparse_moe.experts.242.w3", "model.layers.11.block_sparse_moe.experts.243.w3", "model.layers.11.block_sparse_moe.experts.244.w3", "model.layers.11.block_sparse_moe.experts.245.w3", "model.layers.11.block_sparse_moe.experts.246.w3", "model.layers.11.block_sparse_moe.experts.247.w3", "model.layers.11.block_sparse_moe.experts.248.w3", "model.layers.11.block_sparse_moe.experts.249.w3", "model.layers.11.block_sparse_moe.experts.250.w3", "model.layers.11.block_sparse_moe.experts.251.w3", "model.layers.11.block_sparse_moe.experts.252.w3", "model.layers.11.block_sparse_moe.experts.253.w3", "model.layers.11.block_sparse_moe.experts.254.w3", "model.layers.11.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0025528371334075928, "dbits": 2415919104 } ] }, { "idx": 59, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w2", "model.layers.11.block_sparse_moe.experts.1.w2", "model.layers.11.block_sparse_moe.experts.2.w2", "model.layers.11.block_sparse_moe.experts.3.w2", "model.layers.11.block_sparse_moe.experts.4.w2", "model.layers.11.block_sparse_moe.experts.5.w2", "model.layers.11.block_sparse_moe.experts.6.w2", "model.layers.11.block_sparse_moe.experts.7.w2", "model.layers.11.block_sparse_moe.experts.8.w2", "model.layers.11.block_sparse_moe.experts.9.w2", "model.layers.11.block_sparse_moe.experts.10.w2", "model.layers.11.block_sparse_moe.experts.11.w2", "model.layers.11.block_sparse_moe.experts.12.w2", "model.layers.11.block_sparse_moe.experts.13.w2", "model.layers.11.block_sparse_moe.experts.14.w2", "model.layers.11.block_sparse_moe.experts.15.w2", "model.layers.11.block_sparse_moe.experts.16.w2", "model.layers.11.block_sparse_moe.experts.17.w2", "model.layers.11.block_sparse_moe.experts.18.w2", "model.layers.11.block_sparse_moe.experts.19.w2", "model.layers.11.block_sparse_moe.experts.20.w2", "model.layers.11.block_sparse_moe.experts.21.w2", "model.layers.11.block_sparse_moe.experts.22.w2", "model.layers.11.block_sparse_moe.experts.23.w2", "model.layers.11.block_sparse_moe.experts.24.w2", "model.layers.11.block_sparse_moe.experts.25.w2", "model.layers.11.block_sparse_moe.experts.26.w2", "model.layers.11.block_sparse_moe.experts.27.w2", "model.layers.11.block_sparse_moe.experts.28.w2", "model.layers.11.block_sparse_moe.experts.29.w2", "model.layers.11.block_sparse_moe.experts.30.w2", "model.layers.11.block_sparse_moe.experts.31.w2", "model.layers.11.block_sparse_moe.experts.32.w2", "model.layers.11.block_sparse_moe.experts.33.w2", "model.layers.11.block_sparse_moe.experts.34.w2", "model.layers.11.block_sparse_moe.experts.35.w2", "model.layers.11.block_sparse_moe.experts.36.w2", "model.layers.11.block_sparse_moe.experts.37.w2", "model.layers.11.block_sparse_moe.experts.38.w2", "model.layers.11.block_sparse_moe.experts.39.w2", "model.layers.11.block_sparse_moe.experts.40.w2", "model.layers.11.block_sparse_moe.experts.41.w2", "model.layers.11.block_sparse_moe.experts.42.w2", "model.layers.11.block_sparse_moe.experts.43.w2", "model.layers.11.block_sparse_moe.experts.44.w2", "model.layers.11.block_sparse_moe.experts.45.w2", "model.layers.11.block_sparse_moe.experts.46.w2", "model.layers.11.block_sparse_moe.experts.47.w2", "model.layers.11.block_sparse_moe.experts.48.w2", "model.layers.11.block_sparse_moe.experts.49.w2", "model.layers.11.block_sparse_moe.experts.50.w2", "model.layers.11.block_sparse_moe.experts.51.w2", "model.layers.11.block_sparse_moe.experts.52.w2", "model.layers.11.block_sparse_moe.experts.53.w2", "model.layers.11.block_sparse_moe.experts.54.w2", "model.layers.11.block_sparse_moe.experts.55.w2", "model.layers.11.block_sparse_moe.experts.56.w2", "model.layers.11.block_sparse_moe.experts.57.w2", "model.layers.11.block_sparse_moe.experts.58.w2", "model.layers.11.block_sparse_moe.experts.59.w2", "model.layers.11.block_sparse_moe.experts.60.w2", "model.layers.11.block_sparse_moe.experts.61.w2", "model.layers.11.block_sparse_moe.experts.62.w2", "model.layers.11.block_sparse_moe.experts.63.w2", "model.layers.11.block_sparse_moe.experts.64.w2", "model.layers.11.block_sparse_moe.experts.65.w2", "model.layers.11.block_sparse_moe.experts.66.w2", "model.layers.11.block_sparse_moe.experts.67.w2", "model.layers.11.block_sparse_moe.experts.68.w2", "model.layers.11.block_sparse_moe.experts.69.w2", "model.layers.11.block_sparse_moe.experts.70.w2", "model.layers.11.block_sparse_moe.experts.71.w2", "model.layers.11.block_sparse_moe.experts.72.w2", "model.layers.11.block_sparse_moe.experts.73.w2", "model.layers.11.block_sparse_moe.experts.74.w2", "model.layers.11.block_sparse_moe.experts.75.w2", "model.layers.11.block_sparse_moe.experts.76.w2", "model.layers.11.block_sparse_moe.experts.77.w2", "model.layers.11.block_sparse_moe.experts.78.w2", "model.layers.11.block_sparse_moe.experts.79.w2", "model.layers.11.block_sparse_moe.experts.80.w2", "model.layers.11.block_sparse_moe.experts.81.w2", "model.layers.11.block_sparse_moe.experts.82.w2", "model.layers.11.block_sparse_moe.experts.83.w2", "model.layers.11.block_sparse_moe.experts.84.w2", "model.layers.11.block_sparse_moe.experts.85.w2", "model.layers.11.block_sparse_moe.experts.86.w2", "model.layers.11.block_sparse_moe.experts.87.w2", "model.layers.11.block_sparse_moe.experts.88.w2", "model.layers.11.block_sparse_moe.experts.89.w2", "model.layers.11.block_sparse_moe.experts.90.w2", "model.layers.11.block_sparse_moe.experts.91.w2", "model.layers.11.block_sparse_moe.experts.92.w2", "model.layers.11.block_sparse_moe.experts.93.w2", "model.layers.11.block_sparse_moe.experts.94.w2", "model.layers.11.block_sparse_moe.experts.95.w2", "model.layers.11.block_sparse_moe.experts.96.w2", "model.layers.11.block_sparse_moe.experts.97.w2", "model.layers.11.block_sparse_moe.experts.98.w2", "model.layers.11.block_sparse_moe.experts.99.w2", "model.layers.11.block_sparse_moe.experts.100.w2", "model.layers.11.block_sparse_moe.experts.101.w2", "model.layers.11.block_sparse_moe.experts.102.w2", "model.layers.11.block_sparse_moe.experts.103.w2", "model.layers.11.block_sparse_moe.experts.104.w2", "model.layers.11.block_sparse_moe.experts.105.w2", "model.layers.11.block_sparse_moe.experts.106.w2", "model.layers.11.block_sparse_moe.experts.107.w2", "model.layers.11.block_sparse_moe.experts.108.w2", "model.layers.11.block_sparse_moe.experts.109.w2", "model.layers.11.block_sparse_moe.experts.110.w2", "model.layers.11.block_sparse_moe.experts.111.w2", "model.layers.11.block_sparse_moe.experts.112.w2", "model.layers.11.block_sparse_moe.experts.113.w2", "model.layers.11.block_sparse_moe.experts.114.w2", "model.layers.11.block_sparse_moe.experts.115.w2", "model.layers.11.block_sparse_moe.experts.116.w2", "model.layers.11.block_sparse_moe.experts.117.w2", "model.layers.11.block_sparse_moe.experts.118.w2", "model.layers.11.block_sparse_moe.experts.119.w2", "model.layers.11.block_sparse_moe.experts.120.w2", "model.layers.11.block_sparse_moe.experts.121.w2", "model.layers.11.block_sparse_moe.experts.122.w2", "model.layers.11.block_sparse_moe.experts.123.w2", "model.layers.11.block_sparse_moe.experts.124.w2", "model.layers.11.block_sparse_moe.experts.125.w2", "model.layers.11.block_sparse_moe.experts.126.w2", "model.layers.11.block_sparse_moe.experts.127.w2", "model.layers.11.block_sparse_moe.experts.128.w2", "model.layers.11.block_sparse_moe.experts.129.w2", "model.layers.11.block_sparse_moe.experts.130.w2", "model.layers.11.block_sparse_moe.experts.131.w2", "model.layers.11.block_sparse_moe.experts.132.w2", "model.layers.11.block_sparse_moe.experts.133.w2", "model.layers.11.block_sparse_moe.experts.134.w2", "model.layers.11.block_sparse_moe.experts.135.w2", "model.layers.11.block_sparse_moe.experts.136.w2", "model.layers.11.block_sparse_moe.experts.137.w2", "model.layers.11.block_sparse_moe.experts.138.w2", "model.layers.11.block_sparse_moe.experts.139.w2", "model.layers.11.block_sparse_moe.experts.140.w2", "model.layers.11.block_sparse_moe.experts.141.w2", "model.layers.11.block_sparse_moe.experts.142.w2", "model.layers.11.block_sparse_moe.experts.143.w2", "model.layers.11.block_sparse_moe.experts.144.w2", "model.layers.11.block_sparse_moe.experts.145.w2", "model.layers.11.block_sparse_moe.experts.146.w2", "model.layers.11.block_sparse_moe.experts.147.w2", "model.layers.11.block_sparse_moe.experts.148.w2", "model.layers.11.block_sparse_moe.experts.149.w2", "model.layers.11.block_sparse_moe.experts.150.w2", "model.layers.11.block_sparse_moe.experts.151.w2", "model.layers.11.block_sparse_moe.experts.152.w2", "model.layers.11.block_sparse_moe.experts.153.w2", "model.layers.11.block_sparse_moe.experts.154.w2", "model.layers.11.block_sparse_moe.experts.155.w2", "model.layers.11.block_sparse_moe.experts.156.w2", "model.layers.11.block_sparse_moe.experts.157.w2", "model.layers.11.block_sparse_moe.experts.158.w2", "model.layers.11.block_sparse_moe.experts.159.w2", "model.layers.11.block_sparse_moe.experts.160.w2", "model.layers.11.block_sparse_moe.experts.161.w2", "model.layers.11.block_sparse_moe.experts.162.w2", "model.layers.11.block_sparse_moe.experts.163.w2", "model.layers.11.block_sparse_moe.experts.164.w2", "model.layers.11.block_sparse_moe.experts.165.w2", "model.layers.11.block_sparse_moe.experts.166.w2", "model.layers.11.block_sparse_moe.experts.167.w2", "model.layers.11.block_sparse_moe.experts.168.w2", "model.layers.11.block_sparse_moe.experts.169.w2", "model.layers.11.block_sparse_moe.experts.170.w2", "model.layers.11.block_sparse_moe.experts.171.w2", "model.layers.11.block_sparse_moe.experts.172.w2", "model.layers.11.block_sparse_moe.experts.173.w2", "model.layers.11.block_sparse_moe.experts.174.w2", "model.layers.11.block_sparse_moe.experts.175.w2", "model.layers.11.block_sparse_moe.experts.176.w2", "model.layers.11.block_sparse_moe.experts.177.w2", "model.layers.11.block_sparse_moe.experts.178.w2", "model.layers.11.block_sparse_moe.experts.179.w2", "model.layers.11.block_sparse_moe.experts.180.w2", "model.layers.11.block_sparse_moe.experts.181.w2", "model.layers.11.block_sparse_moe.experts.182.w2", "model.layers.11.block_sparse_moe.experts.183.w2", "model.layers.11.block_sparse_moe.experts.184.w2", "model.layers.11.block_sparse_moe.experts.185.w2", "model.layers.11.block_sparse_moe.experts.186.w2", "model.layers.11.block_sparse_moe.experts.187.w2", "model.layers.11.block_sparse_moe.experts.188.w2", "model.layers.11.block_sparse_moe.experts.189.w2", "model.layers.11.block_sparse_moe.experts.190.w2", "model.layers.11.block_sparse_moe.experts.191.w2", "model.layers.11.block_sparse_moe.experts.192.w2", "model.layers.11.block_sparse_moe.experts.193.w2", "model.layers.11.block_sparse_moe.experts.194.w2", "model.layers.11.block_sparse_moe.experts.195.w2", "model.layers.11.block_sparse_moe.experts.196.w2", "model.layers.11.block_sparse_moe.experts.197.w2", "model.layers.11.block_sparse_moe.experts.198.w2", "model.layers.11.block_sparse_moe.experts.199.w2", "model.layers.11.block_sparse_moe.experts.200.w2", "model.layers.11.block_sparse_moe.experts.201.w2", "model.layers.11.block_sparse_moe.experts.202.w2", "model.layers.11.block_sparse_moe.experts.203.w2", "model.layers.11.block_sparse_moe.experts.204.w2", "model.layers.11.block_sparse_moe.experts.205.w2", "model.layers.11.block_sparse_moe.experts.206.w2", "model.layers.11.block_sparse_moe.experts.207.w2", "model.layers.11.block_sparse_moe.experts.208.w2", "model.layers.11.block_sparse_moe.experts.209.w2", "model.layers.11.block_sparse_moe.experts.210.w2", "model.layers.11.block_sparse_moe.experts.211.w2", "model.layers.11.block_sparse_moe.experts.212.w2", "model.layers.11.block_sparse_moe.experts.213.w2", "model.layers.11.block_sparse_moe.experts.214.w2", "model.layers.11.block_sparse_moe.experts.215.w2", "model.layers.11.block_sparse_moe.experts.216.w2", "model.layers.11.block_sparse_moe.experts.217.w2", "model.layers.11.block_sparse_moe.experts.218.w2", "model.layers.11.block_sparse_moe.experts.219.w2", "model.layers.11.block_sparse_moe.experts.220.w2", "model.layers.11.block_sparse_moe.experts.221.w2", "model.layers.11.block_sparse_moe.experts.222.w2", "model.layers.11.block_sparse_moe.experts.223.w2", "model.layers.11.block_sparse_moe.experts.224.w2", "model.layers.11.block_sparse_moe.experts.225.w2", "model.layers.11.block_sparse_moe.experts.226.w2", "model.layers.11.block_sparse_moe.experts.227.w2", "model.layers.11.block_sparse_moe.experts.228.w2", "model.layers.11.block_sparse_moe.experts.229.w2", "model.layers.11.block_sparse_moe.experts.230.w2", "model.layers.11.block_sparse_moe.experts.231.w2", "model.layers.11.block_sparse_moe.experts.232.w2", "model.layers.11.block_sparse_moe.experts.233.w2", "model.layers.11.block_sparse_moe.experts.234.w2", "model.layers.11.block_sparse_moe.experts.235.w2", "model.layers.11.block_sparse_moe.experts.236.w2", "model.layers.11.block_sparse_moe.experts.237.w2", "model.layers.11.block_sparse_moe.experts.238.w2", "model.layers.11.block_sparse_moe.experts.239.w2", "model.layers.11.block_sparse_moe.experts.240.w2", "model.layers.11.block_sparse_moe.experts.241.w2", "model.layers.11.block_sparse_moe.experts.242.w2", "model.layers.11.block_sparse_moe.experts.243.w2", "model.layers.11.block_sparse_moe.experts.244.w2", "model.layers.11.block_sparse_moe.experts.245.w2", "model.layers.11.block_sparse_moe.experts.246.w2", "model.layers.11.block_sparse_moe.experts.247.w2", "model.layers.11.block_sparse_moe.experts.248.w2", "model.layers.11.block_sparse_moe.experts.249.w2", "model.layers.11.block_sparse_moe.experts.250.w2", "model.layers.11.block_sparse_moe.experts.251.w2", "model.layers.11.block_sparse_moe.experts.252.w2", "model.layers.11.block_sparse_moe.experts.253.w2", "model.layers.11.block_sparse_moe.experts.254.w2", "model.layers.11.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008301228284834927, "dbits": 1207959552 } ] }, { "idx": 60, "layers": [ "model.layers.12.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009636193513870017, "dbits": 18874368 } ] }, { "idx": 61, "layers": [ "model.layers.12.self_attn.k_proj", "model.layers.12.self_attn.v_proj" ], "candidates": [ { "dkld": -0.007598887383937769, "dbits": 6291456 } ] }, { "idx": 62, "layers": [ "model.layers.12.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001286187767982394, "dbits": 18874368 } ] }, { "idx": 63, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w1", "model.layers.12.block_sparse_moe.experts.1.w1", "model.layers.12.block_sparse_moe.experts.2.w1", "model.layers.12.block_sparse_moe.experts.3.w1", "model.layers.12.block_sparse_moe.experts.4.w1", "model.layers.12.block_sparse_moe.experts.5.w1", "model.layers.12.block_sparse_moe.experts.6.w1", "model.layers.12.block_sparse_moe.experts.7.w1", "model.layers.12.block_sparse_moe.experts.8.w1", "model.layers.12.block_sparse_moe.experts.9.w1", "model.layers.12.block_sparse_moe.experts.10.w1", "model.layers.12.block_sparse_moe.experts.11.w1", "model.layers.12.block_sparse_moe.experts.12.w1", "model.layers.12.block_sparse_moe.experts.13.w1", "model.layers.12.block_sparse_moe.experts.14.w1", "model.layers.12.block_sparse_moe.experts.15.w1", "model.layers.12.block_sparse_moe.experts.16.w1", "model.layers.12.block_sparse_moe.experts.17.w1", "model.layers.12.block_sparse_moe.experts.18.w1", "model.layers.12.block_sparse_moe.experts.19.w1", "model.layers.12.block_sparse_moe.experts.20.w1", "model.layers.12.block_sparse_moe.experts.21.w1", "model.layers.12.block_sparse_moe.experts.22.w1", "model.layers.12.block_sparse_moe.experts.23.w1", "model.layers.12.block_sparse_moe.experts.24.w1", "model.layers.12.block_sparse_moe.experts.25.w1", "model.layers.12.block_sparse_moe.experts.26.w1", "model.layers.12.block_sparse_moe.experts.27.w1", "model.layers.12.block_sparse_moe.experts.28.w1", "model.layers.12.block_sparse_moe.experts.29.w1", "model.layers.12.block_sparse_moe.experts.30.w1", "model.layers.12.block_sparse_moe.experts.31.w1", "model.layers.12.block_sparse_moe.experts.32.w1", "model.layers.12.block_sparse_moe.experts.33.w1", "model.layers.12.block_sparse_moe.experts.34.w1", "model.layers.12.block_sparse_moe.experts.35.w1", "model.layers.12.block_sparse_moe.experts.36.w1", "model.layers.12.block_sparse_moe.experts.37.w1", "model.layers.12.block_sparse_moe.experts.38.w1", "model.layers.12.block_sparse_moe.experts.39.w1", "model.layers.12.block_sparse_moe.experts.40.w1", "model.layers.12.block_sparse_moe.experts.41.w1", "model.layers.12.block_sparse_moe.experts.42.w1", "model.layers.12.block_sparse_moe.experts.43.w1", "model.layers.12.block_sparse_moe.experts.44.w1", "model.layers.12.block_sparse_moe.experts.45.w1", "model.layers.12.block_sparse_moe.experts.46.w1", "model.layers.12.block_sparse_moe.experts.47.w1", "model.layers.12.block_sparse_moe.experts.48.w1", "model.layers.12.block_sparse_moe.experts.49.w1", "model.layers.12.block_sparse_moe.experts.50.w1", "model.layers.12.block_sparse_moe.experts.51.w1", "model.layers.12.block_sparse_moe.experts.52.w1", "model.layers.12.block_sparse_moe.experts.53.w1", "model.layers.12.block_sparse_moe.experts.54.w1", "model.layers.12.block_sparse_moe.experts.55.w1", "model.layers.12.block_sparse_moe.experts.56.w1", "model.layers.12.block_sparse_moe.experts.57.w1", "model.layers.12.block_sparse_moe.experts.58.w1", "model.layers.12.block_sparse_moe.experts.59.w1", "model.layers.12.block_sparse_moe.experts.60.w1", "model.layers.12.block_sparse_moe.experts.61.w1", "model.layers.12.block_sparse_moe.experts.62.w1", "model.layers.12.block_sparse_moe.experts.63.w1", "model.layers.12.block_sparse_moe.experts.64.w1", "model.layers.12.block_sparse_moe.experts.65.w1", "model.layers.12.block_sparse_moe.experts.66.w1", "model.layers.12.block_sparse_moe.experts.67.w1", "model.layers.12.block_sparse_moe.experts.68.w1", "model.layers.12.block_sparse_moe.experts.69.w1", "model.layers.12.block_sparse_moe.experts.70.w1", "model.layers.12.block_sparse_moe.experts.71.w1", "model.layers.12.block_sparse_moe.experts.72.w1", "model.layers.12.block_sparse_moe.experts.73.w1", "model.layers.12.block_sparse_moe.experts.74.w1", "model.layers.12.block_sparse_moe.experts.75.w1", "model.layers.12.block_sparse_moe.experts.76.w1", "model.layers.12.block_sparse_moe.experts.77.w1", "model.layers.12.block_sparse_moe.experts.78.w1", "model.layers.12.block_sparse_moe.experts.79.w1", "model.layers.12.block_sparse_moe.experts.80.w1", "model.layers.12.block_sparse_moe.experts.81.w1", "model.layers.12.block_sparse_moe.experts.82.w1", "model.layers.12.block_sparse_moe.experts.83.w1", "model.layers.12.block_sparse_moe.experts.84.w1", "model.layers.12.block_sparse_moe.experts.85.w1", "model.layers.12.block_sparse_moe.experts.86.w1", "model.layers.12.block_sparse_moe.experts.87.w1", "model.layers.12.block_sparse_moe.experts.88.w1", "model.layers.12.block_sparse_moe.experts.89.w1", "model.layers.12.block_sparse_moe.experts.90.w1", "model.layers.12.block_sparse_moe.experts.91.w1", "model.layers.12.block_sparse_moe.experts.92.w1", "model.layers.12.block_sparse_moe.experts.93.w1", "model.layers.12.block_sparse_moe.experts.94.w1", "model.layers.12.block_sparse_moe.experts.95.w1", "model.layers.12.block_sparse_moe.experts.96.w1", "model.layers.12.block_sparse_moe.experts.97.w1", "model.layers.12.block_sparse_moe.experts.98.w1", "model.layers.12.block_sparse_moe.experts.99.w1", "model.layers.12.block_sparse_moe.experts.100.w1", "model.layers.12.block_sparse_moe.experts.101.w1", "model.layers.12.block_sparse_moe.experts.102.w1", "model.layers.12.block_sparse_moe.experts.103.w1", "model.layers.12.block_sparse_moe.experts.104.w1", "model.layers.12.block_sparse_moe.experts.105.w1", "model.layers.12.block_sparse_moe.experts.106.w1", "model.layers.12.block_sparse_moe.experts.107.w1", "model.layers.12.block_sparse_moe.experts.108.w1", "model.layers.12.block_sparse_moe.experts.109.w1", "model.layers.12.block_sparse_moe.experts.110.w1", "model.layers.12.block_sparse_moe.experts.111.w1", "model.layers.12.block_sparse_moe.experts.112.w1", "model.layers.12.block_sparse_moe.experts.113.w1", "model.layers.12.block_sparse_moe.experts.114.w1", "model.layers.12.block_sparse_moe.experts.115.w1", "model.layers.12.block_sparse_moe.experts.116.w1", "model.layers.12.block_sparse_moe.experts.117.w1", "model.layers.12.block_sparse_moe.experts.118.w1", "model.layers.12.block_sparse_moe.experts.119.w1", "model.layers.12.block_sparse_moe.experts.120.w1", "model.layers.12.block_sparse_moe.experts.121.w1", "model.layers.12.block_sparse_moe.experts.122.w1", "model.layers.12.block_sparse_moe.experts.123.w1", "model.layers.12.block_sparse_moe.experts.124.w1", "model.layers.12.block_sparse_moe.experts.125.w1", "model.layers.12.block_sparse_moe.experts.126.w1", "model.layers.12.block_sparse_moe.experts.127.w1", "model.layers.12.block_sparse_moe.experts.128.w1", "model.layers.12.block_sparse_moe.experts.129.w1", "model.layers.12.block_sparse_moe.experts.130.w1", "model.layers.12.block_sparse_moe.experts.131.w1", "model.layers.12.block_sparse_moe.experts.132.w1", "model.layers.12.block_sparse_moe.experts.133.w1", "model.layers.12.block_sparse_moe.experts.134.w1", "model.layers.12.block_sparse_moe.experts.135.w1", "model.layers.12.block_sparse_moe.experts.136.w1", "model.layers.12.block_sparse_moe.experts.137.w1", "model.layers.12.block_sparse_moe.experts.138.w1", "model.layers.12.block_sparse_moe.experts.139.w1", "model.layers.12.block_sparse_moe.experts.140.w1", "model.layers.12.block_sparse_moe.experts.141.w1", "model.layers.12.block_sparse_moe.experts.142.w1", "model.layers.12.block_sparse_moe.experts.143.w1", "model.layers.12.block_sparse_moe.experts.144.w1", "model.layers.12.block_sparse_moe.experts.145.w1", "model.layers.12.block_sparse_moe.experts.146.w1", "model.layers.12.block_sparse_moe.experts.147.w1", "model.layers.12.block_sparse_moe.experts.148.w1", "model.layers.12.block_sparse_moe.experts.149.w1", "model.layers.12.block_sparse_moe.experts.150.w1", "model.layers.12.block_sparse_moe.experts.151.w1", "model.layers.12.block_sparse_moe.experts.152.w1", "model.layers.12.block_sparse_moe.experts.153.w1", "model.layers.12.block_sparse_moe.experts.154.w1", "model.layers.12.block_sparse_moe.experts.155.w1", "model.layers.12.block_sparse_moe.experts.156.w1", "model.layers.12.block_sparse_moe.experts.157.w1", "model.layers.12.block_sparse_moe.experts.158.w1", "model.layers.12.block_sparse_moe.experts.159.w1", "model.layers.12.block_sparse_moe.experts.160.w1", "model.layers.12.block_sparse_moe.experts.161.w1", "model.layers.12.block_sparse_moe.experts.162.w1", "model.layers.12.block_sparse_moe.experts.163.w1", "model.layers.12.block_sparse_moe.experts.164.w1", "model.layers.12.block_sparse_moe.experts.165.w1", "model.layers.12.block_sparse_moe.experts.166.w1", "model.layers.12.block_sparse_moe.experts.167.w1", "model.layers.12.block_sparse_moe.experts.168.w1", "model.layers.12.block_sparse_moe.experts.169.w1", "model.layers.12.block_sparse_moe.experts.170.w1", "model.layers.12.block_sparse_moe.experts.171.w1", "model.layers.12.block_sparse_moe.experts.172.w1", "model.layers.12.block_sparse_moe.experts.173.w1", "model.layers.12.block_sparse_moe.experts.174.w1", "model.layers.12.block_sparse_moe.experts.175.w1", "model.layers.12.block_sparse_moe.experts.176.w1", "model.layers.12.block_sparse_moe.experts.177.w1", "model.layers.12.block_sparse_moe.experts.178.w1", "model.layers.12.block_sparse_moe.experts.179.w1", "model.layers.12.block_sparse_moe.experts.180.w1", "model.layers.12.block_sparse_moe.experts.181.w1", "model.layers.12.block_sparse_moe.experts.182.w1", "model.layers.12.block_sparse_moe.experts.183.w1", "model.layers.12.block_sparse_moe.experts.184.w1", "model.layers.12.block_sparse_moe.experts.185.w1", "model.layers.12.block_sparse_moe.experts.186.w1", "model.layers.12.block_sparse_moe.experts.187.w1", "model.layers.12.block_sparse_moe.experts.188.w1", "model.layers.12.block_sparse_moe.experts.189.w1", "model.layers.12.block_sparse_moe.experts.190.w1", "model.layers.12.block_sparse_moe.experts.191.w1", "model.layers.12.block_sparse_moe.experts.192.w1", "model.layers.12.block_sparse_moe.experts.193.w1", "model.layers.12.block_sparse_moe.experts.194.w1", "model.layers.12.block_sparse_moe.experts.195.w1", "model.layers.12.block_sparse_moe.experts.196.w1", "model.layers.12.block_sparse_moe.experts.197.w1", "model.layers.12.block_sparse_moe.experts.198.w1", "model.layers.12.block_sparse_moe.experts.199.w1", "model.layers.12.block_sparse_moe.experts.200.w1", "model.layers.12.block_sparse_moe.experts.201.w1", "model.layers.12.block_sparse_moe.experts.202.w1", "model.layers.12.block_sparse_moe.experts.203.w1", "model.layers.12.block_sparse_moe.experts.204.w1", "model.layers.12.block_sparse_moe.experts.205.w1", "model.layers.12.block_sparse_moe.experts.206.w1", "model.layers.12.block_sparse_moe.experts.207.w1", "model.layers.12.block_sparse_moe.experts.208.w1", "model.layers.12.block_sparse_moe.experts.209.w1", "model.layers.12.block_sparse_moe.experts.210.w1", "model.layers.12.block_sparse_moe.experts.211.w1", "model.layers.12.block_sparse_moe.experts.212.w1", "model.layers.12.block_sparse_moe.experts.213.w1", "model.layers.12.block_sparse_moe.experts.214.w1", "model.layers.12.block_sparse_moe.experts.215.w1", "model.layers.12.block_sparse_moe.experts.216.w1", "model.layers.12.block_sparse_moe.experts.217.w1", "model.layers.12.block_sparse_moe.experts.218.w1", "model.layers.12.block_sparse_moe.experts.219.w1", "model.layers.12.block_sparse_moe.experts.220.w1", "model.layers.12.block_sparse_moe.experts.221.w1", "model.layers.12.block_sparse_moe.experts.222.w1", "model.layers.12.block_sparse_moe.experts.223.w1", "model.layers.12.block_sparse_moe.experts.224.w1", "model.layers.12.block_sparse_moe.experts.225.w1", "model.layers.12.block_sparse_moe.experts.226.w1", "model.layers.12.block_sparse_moe.experts.227.w1", "model.layers.12.block_sparse_moe.experts.228.w1", "model.layers.12.block_sparse_moe.experts.229.w1", "model.layers.12.block_sparse_moe.experts.230.w1", "model.layers.12.block_sparse_moe.experts.231.w1", "model.layers.12.block_sparse_moe.experts.232.w1", "model.layers.12.block_sparse_moe.experts.233.w1", "model.layers.12.block_sparse_moe.experts.234.w1", "model.layers.12.block_sparse_moe.experts.235.w1", "model.layers.12.block_sparse_moe.experts.236.w1", "model.layers.12.block_sparse_moe.experts.237.w1", "model.layers.12.block_sparse_moe.experts.238.w1", "model.layers.12.block_sparse_moe.experts.239.w1", "model.layers.12.block_sparse_moe.experts.240.w1", "model.layers.12.block_sparse_moe.experts.241.w1", "model.layers.12.block_sparse_moe.experts.242.w1", "model.layers.12.block_sparse_moe.experts.243.w1", "model.layers.12.block_sparse_moe.experts.244.w1", "model.layers.12.block_sparse_moe.experts.245.w1", "model.layers.12.block_sparse_moe.experts.246.w1", "model.layers.12.block_sparse_moe.experts.247.w1", "model.layers.12.block_sparse_moe.experts.248.w1", "model.layers.12.block_sparse_moe.experts.249.w1", "model.layers.12.block_sparse_moe.experts.250.w1", "model.layers.12.block_sparse_moe.experts.251.w1", "model.layers.12.block_sparse_moe.experts.252.w1", "model.layers.12.block_sparse_moe.experts.253.w1", "model.layers.12.block_sparse_moe.experts.254.w1", "model.layers.12.block_sparse_moe.experts.255.w1", "model.layers.12.block_sparse_moe.experts.0.w3", "model.layers.12.block_sparse_moe.experts.1.w3", "model.layers.12.block_sparse_moe.experts.2.w3", "model.layers.12.block_sparse_moe.experts.3.w3", "model.layers.12.block_sparse_moe.experts.4.w3", "model.layers.12.block_sparse_moe.experts.5.w3", "model.layers.12.block_sparse_moe.experts.6.w3", "model.layers.12.block_sparse_moe.experts.7.w3", "model.layers.12.block_sparse_moe.experts.8.w3", "model.layers.12.block_sparse_moe.experts.9.w3", "model.layers.12.block_sparse_moe.experts.10.w3", "model.layers.12.block_sparse_moe.experts.11.w3", "model.layers.12.block_sparse_moe.experts.12.w3", "model.layers.12.block_sparse_moe.experts.13.w3", "model.layers.12.block_sparse_moe.experts.14.w3", "model.layers.12.block_sparse_moe.experts.15.w3", "model.layers.12.block_sparse_moe.experts.16.w3", "model.layers.12.block_sparse_moe.experts.17.w3", "model.layers.12.block_sparse_moe.experts.18.w3", "model.layers.12.block_sparse_moe.experts.19.w3", "model.layers.12.block_sparse_moe.experts.20.w3", "model.layers.12.block_sparse_moe.experts.21.w3", "model.layers.12.block_sparse_moe.experts.22.w3", "model.layers.12.block_sparse_moe.experts.23.w3", "model.layers.12.block_sparse_moe.experts.24.w3", "model.layers.12.block_sparse_moe.experts.25.w3", "model.layers.12.block_sparse_moe.experts.26.w3", "model.layers.12.block_sparse_moe.experts.27.w3", "model.layers.12.block_sparse_moe.experts.28.w3", "model.layers.12.block_sparse_moe.experts.29.w3", "model.layers.12.block_sparse_moe.experts.30.w3", "model.layers.12.block_sparse_moe.experts.31.w3", "model.layers.12.block_sparse_moe.experts.32.w3", "model.layers.12.block_sparse_moe.experts.33.w3", "model.layers.12.block_sparse_moe.experts.34.w3", "model.layers.12.block_sparse_moe.experts.35.w3", "model.layers.12.block_sparse_moe.experts.36.w3", "model.layers.12.block_sparse_moe.experts.37.w3", "model.layers.12.block_sparse_moe.experts.38.w3", "model.layers.12.block_sparse_moe.experts.39.w3", "model.layers.12.block_sparse_moe.experts.40.w3", "model.layers.12.block_sparse_moe.experts.41.w3", "model.layers.12.block_sparse_moe.experts.42.w3", "model.layers.12.block_sparse_moe.experts.43.w3", "model.layers.12.block_sparse_moe.experts.44.w3", "model.layers.12.block_sparse_moe.experts.45.w3", "model.layers.12.block_sparse_moe.experts.46.w3", "model.layers.12.block_sparse_moe.experts.47.w3", "model.layers.12.block_sparse_moe.experts.48.w3", "model.layers.12.block_sparse_moe.experts.49.w3", "model.layers.12.block_sparse_moe.experts.50.w3", "model.layers.12.block_sparse_moe.experts.51.w3", "model.layers.12.block_sparse_moe.experts.52.w3", "model.layers.12.block_sparse_moe.experts.53.w3", "model.layers.12.block_sparse_moe.experts.54.w3", "model.layers.12.block_sparse_moe.experts.55.w3", "model.layers.12.block_sparse_moe.experts.56.w3", "model.layers.12.block_sparse_moe.experts.57.w3", "model.layers.12.block_sparse_moe.experts.58.w3", "model.layers.12.block_sparse_moe.experts.59.w3", "model.layers.12.block_sparse_moe.experts.60.w3", "model.layers.12.block_sparse_moe.experts.61.w3", "model.layers.12.block_sparse_moe.experts.62.w3", "model.layers.12.block_sparse_moe.experts.63.w3", "model.layers.12.block_sparse_moe.experts.64.w3", "model.layers.12.block_sparse_moe.experts.65.w3", "model.layers.12.block_sparse_moe.experts.66.w3", "model.layers.12.block_sparse_moe.experts.67.w3", "model.layers.12.block_sparse_moe.experts.68.w3", "model.layers.12.block_sparse_moe.experts.69.w3", "model.layers.12.block_sparse_moe.experts.70.w3", "model.layers.12.block_sparse_moe.experts.71.w3", "model.layers.12.block_sparse_moe.experts.72.w3", "model.layers.12.block_sparse_moe.experts.73.w3", "model.layers.12.block_sparse_moe.experts.74.w3", "model.layers.12.block_sparse_moe.experts.75.w3", "model.layers.12.block_sparse_moe.experts.76.w3", "model.layers.12.block_sparse_moe.experts.77.w3", "model.layers.12.block_sparse_moe.experts.78.w3", "model.layers.12.block_sparse_moe.experts.79.w3", "model.layers.12.block_sparse_moe.experts.80.w3", "model.layers.12.block_sparse_moe.experts.81.w3", "model.layers.12.block_sparse_moe.experts.82.w3", "model.layers.12.block_sparse_moe.experts.83.w3", "model.layers.12.block_sparse_moe.experts.84.w3", "model.layers.12.block_sparse_moe.experts.85.w3", "model.layers.12.block_sparse_moe.experts.86.w3", "model.layers.12.block_sparse_moe.experts.87.w3", "model.layers.12.block_sparse_moe.experts.88.w3", "model.layers.12.block_sparse_moe.experts.89.w3", "model.layers.12.block_sparse_moe.experts.90.w3", "model.layers.12.block_sparse_moe.experts.91.w3", "model.layers.12.block_sparse_moe.experts.92.w3", "model.layers.12.block_sparse_moe.experts.93.w3", "model.layers.12.block_sparse_moe.experts.94.w3", "model.layers.12.block_sparse_moe.experts.95.w3", "model.layers.12.block_sparse_moe.experts.96.w3", "model.layers.12.block_sparse_moe.experts.97.w3", "model.layers.12.block_sparse_moe.experts.98.w3", "model.layers.12.block_sparse_moe.experts.99.w3", "model.layers.12.block_sparse_moe.experts.100.w3", "model.layers.12.block_sparse_moe.experts.101.w3", "model.layers.12.block_sparse_moe.experts.102.w3", "model.layers.12.block_sparse_moe.experts.103.w3", "model.layers.12.block_sparse_moe.experts.104.w3", "model.layers.12.block_sparse_moe.experts.105.w3", "model.layers.12.block_sparse_moe.experts.106.w3", "model.layers.12.block_sparse_moe.experts.107.w3", "model.layers.12.block_sparse_moe.experts.108.w3", "model.layers.12.block_sparse_moe.experts.109.w3", "model.layers.12.block_sparse_moe.experts.110.w3", "model.layers.12.block_sparse_moe.experts.111.w3", "model.layers.12.block_sparse_moe.experts.112.w3", "model.layers.12.block_sparse_moe.experts.113.w3", "model.layers.12.block_sparse_moe.experts.114.w3", "model.layers.12.block_sparse_moe.experts.115.w3", "model.layers.12.block_sparse_moe.experts.116.w3", "model.layers.12.block_sparse_moe.experts.117.w3", "model.layers.12.block_sparse_moe.experts.118.w3", "model.layers.12.block_sparse_moe.experts.119.w3", "model.layers.12.block_sparse_moe.experts.120.w3", "model.layers.12.block_sparse_moe.experts.121.w3", "model.layers.12.block_sparse_moe.experts.122.w3", "model.layers.12.block_sparse_moe.experts.123.w3", "model.layers.12.block_sparse_moe.experts.124.w3", "model.layers.12.block_sparse_moe.experts.125.w3", "model.layers.12.block_sparse_moe.experts.126.w3", "model.layers.12.block_sparse_moe.experts.127.w3", "model.layers.12.block_sparse_moe.experts.128.w3", "model.layers.12.block_sparse_moe.experts.129.w3", "model.layers.12.block_sparse_moe.experts.130.w3", "model.layers.12.block_sparse_moe.experts.131.w3", "model.layers.12.block_sparse_moe.experts.132.w3", "model.layers.12.block_sparse_moe.experts.133.w3", "model.layers.12.block_sparse_moe.experts.134.w3", "model.layers.12.block_sparse_moe.experts.135.w3", "model.layers.12.block_sparse_moe.experts.136.w3", "model.layers.12.block_sparse_moe.experts.137.w3", "model.layers.12.block_sparse_moe.experts.138.w3", "model.layers.12.block_sparse_moe.experts.139.w3", "model.layers.12.block_sparse_moe.experts.140.w3", "model.layers.12.block_sparse_moe.experts.141.w3", "model.layers.12.block_sparse_moe.experts.142.w3", "model.layers.12.block_sparse_moe.experts.143.w3", "model.layers.12.block_sparse_moe.experts.144.w3", "model.layers.12.block_sparse_moe.experts.145.w3", "model.layers.12.block_sparse_moe.experts.146.w3", "model.layers.12.block_sparse_moe.experts.147.w3", "model.layers.12.block_sparse_moe.experts.148.w3", "model.layers.12.block_sparse_moe.experts.149.w3", "model.layers.12.block_sparse_moe.experts.150.w3", "model.layers.12.block_sparse_moe.experts.151.w3", "model.layers.12.block_sparse_moe.experts.152.w3", "model.layers.12.block_sparse_moe.experts.153.w3", "model.layers.12.block_sparse_moe.experts.154.w3", "model.layers.12.block_sparse_moe.experts.155.w3", "model.layers.12.block_sparse_moe.experts.156.w3", "model.layers.12.block_sparse_moe.experts.157.w3", "model.layers.12.block_sparse_moe.experts.158.w3", "model.layers.12.block_sparse_moe.experts.159.w3", "model.layers.12.block_sparse_moe.experts.160.w3", "model.layers.12.block_sparse_moe.experts.161.w3", "model.layers.12.block_sparse_moe.experts.162.w3", "model.layers.12.block_sparse_moe.experts.163.w3", "model.layers.12.block_sparse_moe.experts.164.w3", "model.layers.12.block_sparse_moe.experts.165.w3", "model.layers.12.block_sparse_moe.experts.166.w3", "model.layers.12.block_sparse_moe.experts.167.w3", "model.layers.12.block_sparse_moe.experts.168.w3", "model.layers.12.block_sparse_moe.experts.169.w3", "model.layers.12.block_sparse_moe.experts.170.w3", "model.layers.12.block_sparse_moe.experts.171.w3", "model.layers.12.block_sparse_moe.experts.172.w3", "model.layers.12.block_sparse_moe.experts.173.w3", "model.layers.12.block_sparse_moe.experts.174.w3", "model.layers.12.block_sparse_moe.experts.175.w3", "model.layers.12.block_sparse_moe.experts.176.w3", "model.layers.12.block_sparse_moe.experts.177.w3", "model.layers.12.block_sparse_moe.experts.178.w3", "model.layers.12.block_sparse_moe.experts.179.w3", "model.layers.12.block_sparse_moe.experts.180.w3", "model.layers.12.block_sparse_moe.experts.181.w3", "model.layers.12.block_sparse_moe.experts.182.w3", "model.layers.12.block_sparse_moe.experts.183.w3", "model.layers.12.block_sparse_moe.experts.184.w3", "model.layers.12.block_sparse_moe.experts.185.w3", "model.layers.12.block_sparse_moe.experts.186.w3", "model.layers.12.block_sparse_moe.experts.187.w3", "model.layers.12.block_sparse_moe.experts.188.w3", "model.layers.12.block_sparse_moe.experts.189.w3", "model.layers.12.block_sparse_moe.experts.190.w3", "model.layers.12.block_sparse_moe.experts.191.w3", "model.layers.12.block_sparse_moe.experts.192.w3", "model.layers.12.block_sparse_moe.experts.193.w3", "model.layers.12.block_sparse_moe.experts.194.w3", "model.layers.12.block_sparse_moe.experts.195.w3", "model.layers.12.block_sparse_moe.experts.196.w3", "model.layers.12.block_sparse_moe.experts.197.w3", "model.layers.12.block_sparse_moe.experts.198.w3", "model.layers.12.block_sparse_moe.experts.199.w3", "model.layers.12.block_sparse_moe.experts.200.w3", "model.layers.12.block_sparse_moe.experts.201.w3", "model.layers.12.block_sparse_moe.experts.202.w3", "model.layers.12.block_sparse_moe.experts.203.w3", "model.layers.12.block_sparse_moe.experts.204.w3", "model.layers.12.block_sparse_moe.experts.205.w3", "model.layers.12.block_sparse_moe.experts.206.w3", "model.layers.12.block_sparse_moe.experts.207.w3", "model.layers.12.block_sparse_moe.experts.208.w3", "model.layers.12.block_sparse_moe.experts.209.w3", "model.layers.12.block_sparse_moe.experts.210.w3", "model.layers.12.block_sparse_moe.experts.211.w3", "model.layers.12.block_sparse_moe.experts.212.w3", "model.layers.12.block_sparse_moe.experts.213.w3", "model.layers.12.block_sparse_moe.experts.214.w3", "model.layers.12.block_sparse_moe.experts.215.w3", "model.layers.12.block_sparse_moe.experts.216.w3", "model.layers.12.block_sparse_moe.experts.217.w3", "model.layers.12.block_sparse_moe.experts.218.w3", "model.layers.12.block_sparse_moe.experts.219.w3", "model.layers.12.block_sparse_moe.experts.220.w3", "model.layers.12.block_sparse_moe.experts.221.w3", "model.layers.12.block_sparse_moe.experts.222.w3", "model.layers.12.block_sparse_moe.experts.223.w3", "model.layers.12.block_sparse_moe.experts.224.w3", "model.layers.12.block_sparse_moe.experts.225.w3", "model.layers.12.block_sparse_moe.experts.226.w3", "model.layers.12.block_sparse_moe.experts.227.w3", "model.layers.12.block_sparse_moe.experts.228.w3", "model.layers.12.block_sparse_moe.experts.229.w3", "model.layers.12.block_sparse_moe.experts.230.w3", "model.layers.12.block_sparse_moe.experts.231.w3", "model.layers.12.block_sparse_moe.experts.232.w3", "model.layers.12.block_sparse_moe.experts.233.w3", "model.layers.12.block_sparse_moe.experts.234.w3", "model.layers.12.block_sparse_moe.experts.235.w3", "model.layers.12.block_sparse_moe.experts.236.w3", "model.layers.12.block_sparse_moe.experts.237.w3", "model.layers.12.block_sparse_moe.experts.238.w3", "model.layers.12.block_sparse_moe.experts.239.w3", "model.layers.12.block_sparse_moe.experts.240.w3", "model.layers.12.block_sparse_moe.experts.241.w3", "model.layers.12.block_sparse_moe.experts.242.w3", "model.layers.12.block_sparse_moe.experts.243.w3", "model.layers.12.block_sparse_moe.experts.244.w3", "model.layers.12.block_sparse_moe.experts.245.w3", "model.layers.12.block_sparse_moe.experts.246.w3", "model.layers.12.block_sparse_moe.experts.247.w3", "model.layers.12.block_sparse_moe.experts.248.w3", "model.layers.12.block_sparse_moe.experts.249.w3", "model.layers.12.block_sparse_moe.experts.250.w3", "model.layers.12.block_sparse_moe.experts.251.w3", "model.layers.12.block_sparse_moe.experts.252.w3", "model.layers.12.block_sparse_moe.experts.253.w3", "model.layers.12.block_sparse_moe.experts.254.w3", "model.layers.12.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0011446148157119307, "dbits": 2415919104 } ] }, { "idx": 64, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w2", "model.layers.12.block_sparse_moe.experts.1.w2", "model.layers.12.block_sparse_moe.experts.2.w2", "model.layers.12.block_sparse_moe.experts.3.w2", "model.layers.12.block_sparse_moe.experts.4.w2", "model.layers.12.block_sparse_moe.experts.5.w2", "model.layers.12.block_sparse_moe.experts.6.w2", "model.layers.12.block_sparse_moe.experts.7.w2", "model.layers.12.block_sparse_moe.experts.8.w2", "model.layers.12.block_sparse_moe.experts.9.w2", "model.layers.12.block_sparse_moe.experts.10.w2", "model.layers.12.block_sparse_moe.experts.11.w2", "model.layers.12.block_sparse_moe.experts.12.w2", "model.layers.12.block_sparse_moe.experts.13.w2", "model.layers.12.block_sparse_moe.experts.14.w2", "model.layers.12.block_sparse_moe.experts.15.w2", "model.layers.12.block_sparse_moe.experts.16.w2", "model.layers.12.block_sparse_moe.experts.17.w2", "model.layers.12.block_sparse_moe.experts.18.w2", "model.layers.12.block_sparse_moe.experts.19.w2", "model.layers.12.block_sparse_moe.experts.20.w2", "model.layers.12.block_sparse_moe.experts.21.w2", "model.layers.12.block_sparse_moe.experts.22.w2", "model.layers.12.block_sparse_moe.experts.23.w2", "model.layers.12.block_sparse_moe.experts.24.w2", "model.layers.12.block_sparse_moe.experts.25.w2", "model.layers.12.block_sparse_moe.experts.26.w2", "model.layers.12.block_sparse_moe.experts.27.w2", "model.layers.12.block_sparse_moe.experts.28.w2", "model.layers.12.block_sparse_moe.experts.29.w2", "model.layers.12.block_sparse_moe.experts.30.w2", "model.layers.12.block_sparse_moe.experts.31.w2", "model.layers.12.block_sparse_moe.experts.32.w2", "model.layers.12.block_sparse_moe.experts.33.w2", "model.layers.12.block_sparse_moe.experts.34.w2", "model.layers.12.block_sparse_moe.experts.35.w2", "model.layers.12.block_sparse_moe.experts.36.w2", "model.layers.12.block_sparse_moe.experts.37.w2", "model.layers.12.block_sparse_moe.experts.38.w2", "model.layers.12.block_sparse_moe.experts.39.w2", "model.layers.12.block_sparse_moe.experts.40.w2", "model.layers.12.block_sparse_moe.experts.41.w2", "model.layers.12.block_sparse_moe.experts.42.w2", "model.layers.12.block_sparse_moe.experts.43.w2", "model.layers.12.block_sparse_moe.experts.44.w2", "model.layers.12.block_sparse_moe.experts.45.w2", "model.layers.12.block_sparse_moe.experts.46.w2", "model.layers.12.block_sparse_moe.experts.47.w2", "model.layers.12.block_sparse_moe.experts.48.w2", "model.layers.12.block_sparse_moe.experts.49.w2", "model.layers.12.block_sparse_moe.experts.50.w2", "model.layers.12.block_sparse_moe.experts.51.w2", "model.layers.12.block_sparse_moe.experts.52.w2", "model.layers.12.block_sparse_moe.experts.53.w2", "model.layers.12.block_sparse_moe.experts.54.w2", "model.layers.12.block_sparse_moe.experts.55.w2", "model.layers.12.block_sparse_moe.experts.56.w2", "model.layers.12.block_sparse_moe.experts.57.w2", "model.layers.12.block_sparse_moe.experts.58.w2", "model.layers.12.block_sparse_moe.experts.59.w2", "model.layers.12.block_sparse_moe.experts.60.w2", "model.layers.12.block_sparse_moe.experts.61.w2", "model.layers.12.block_sparse_moe.experts.62.w2", "model.layers.12.block_sparse_moe.experts.63.w2", "model.layers.12.block_sparse_moe.experts.64.w2", "model.layers.12.block_sparse_moe.experts.65.w2", "model.layers.12.block_sparse_moe.experts.66.w2", "model.layers.12.block_sparse_moe.experts.67.w2", "model.layers.12.block_sparse_moe.experts.68.w2", "model.layers.12.block_sparse_moe.experts.69.w2", "model.layers.12.block_sparse_moe.experts.70.w2", "model.layers.12.block_sparse_moe.experts.71.w2", "model.layers.12.block_sparse_moe.experts.72.w2", "model.layers.12.block_sparse_moe.experts.73.w2", "model.layers.12.block_sparse_moe.experts.74.w2", "model.layers.12.block_sparse_moe.experts.75.w2", "model.layers.12.block_sparse_moe.experts.76.w2", "model.layers.12.block_sparse_moe.experts.77.w2", "model.layers.12.block_sparse_moe.experts.78.w2", "model.layers.12.block_sparse_moe.experts.79.w2", "model.layers.12.block_sparse_moe.experts.80.w2", "model.layers.12.block_sparse_moe.experts.81.w2", "model.layers.12.block_sparse_moe.experts.82.w2", "model.layers.12.block_sparse_moe.experts.83.w2", "model.layers.12.block_sparse_moe.experts.84.w2", "model.layers.12.block_sparse_moe.experts.85.w2", "model.layers.12.block_sparse_moe.experts.86.w2", "model.layers.12.block_sparse_moe.experts.87.w2", "model.layers.12.block_sparse_moe.experts.88.w2", "model.layers.12.block_sparse_moe.experts.89.w2", "model.layers.12.block_sparse_moe.experts.90.w2", "model.layers.12.block_sparse_moe.experts.91.w2", "model.layers.12.block_sparse_moe.experts.92.w2", "model.layers.12.block_sparse_moe.experts.93.w2", "model.layers.12.block_sparse_moe.experts.94.w2", "model.layers.12.block_sparse_moe.experts.95.w2", "model.layers.12.block_sparse_moe.experts.96.w2", "model.layers.12.block_sparse_moe.experts.97.w2", "model.layers.12.block_sparse_moe.experts.98.w2", "model.layers.12.block_sparse_moe.experts.99.w2", "model.layers.12.block_sparse_moe.experts.100.w2", "model.layers.12.block_sparse_moe.experts.101.w2", "model.layers.12.block_sparse_moe.experts.102.w2", "model.layers.12.block_sparse_moe.experts.103.w2", "model.layers.12.block_sparse_moe.experts.104.w2", "model.layers.12.block_sparse_moe.experts.105.w2", "model.layers.12.block_sparse_moe.experts.106.w2", "model.layers.12.block_sparse_moe.experts.107.w2", "model.layers.12.block_sparse_moe.experts.108.w2", "model.layers.12.block_sparse_moe.experts.109.w2", "model.layers.12.block_sparse_moe.experts.110.w2", "model.layers.12.block_sparse_moe.experts.111.w2", "model.layers.12.block_sparse_moe.experts.112.w2", "model.layers.12.block_sparse_moe.experts.113.w2", "model.layers.12.block_sparse_moe.experts.114.w2", "model.layers.12.block_sparse_moe.experts.115.w2", "model.layers.12.block_sparse_moe.experts.116.w2", "model.layers.12.block_sparse_moe.experts.117.w2", "model.layers.12.block_sparse_moe.experts.118.w2", "model.layers.12.block_sparse_moe.experts.119.w2", "model.layers.12.block_sparse_moe.experts.120.w2", "model.layers.12.block_sparse_moe.experts.121.w2", "model.layers.12.block_sparse_moe.experts.122.w2", "model.layers.12.block_sparse_moe.experts.123.w2", "model.layers.12.block_sparse_moe.experts.124.w2", "model.layers.12.block_sparse_moe.experts.125.w2", "model.layers.12.block_sparse_moe.experts.126.w2", "model.layers.12.block_sparse_moe.experts.127.w2", "model.layers.12.block_sparse_moe.experts.128.w2", "model.layers.12.block_sparse_moe.experts.129.w2", "model.layers.12.block_sparse_moe.experts.130.w2", "model.layers.12.block_sparse_moe.experts.131.w2", "model.layers.12.block_sparse_moe.experts.132.w2", "model.layers.12.block_sparse_moe.experts.133.w2", "model.layers.12.block_sparse_moe.experts.134.w2", "model.layers.12.block_sparse_moe.experts.135.w2", "model.layers.12.block_sparse_moe.experts.136.w2", "model.layers.12.block_sparse_moe.experts.137.w2", "model.layers.12.block_sparse_moe.experts.138.w2", "model.layers.12.block_sparse_moe.experts.139.w2", "model.layers.12.block_sparse_moe.experts.140.w2", "model.layers.12.block_sparse_moe.experts.141.w2", "model.layers.12.block_sparse_moe.experts.142.w2", "model.layers.12.block_sparse_moe.experts.143.w2", "model.layers.12.block_sparse_moe.experts.144.w2", "model.layers.12.block_sparse_moe.experts.145.w2", "model.layers.12.block_sparse_moe.experts.146.w2", "model.layers.12.block_sparse_moe.experts.147.w2", "model.layers.12.block_sparse_moe.experts.148.w2", "model.layers.12.block_sparse_moe.experts.149.w2", "model.layers.12.block_sparse_moe.experts.150.w2", "model.layers.12.block_sparse_moe.experts.151.w2", "model.layers.12.block_sparse_moe.experts.152.w2", "model.layers.12.block_sparse_moe.experts.153.w2", "model.layers.12.block_sparse_moe.experts.154.w2", "model.layers.12.block_sparse_moe.experts.155.w2", "model.layers.12.block_sparse_moe.experts.156.w2", "model.layers.12.block_sparse_moe.experts.157.w2", "model.layers.12.block_sparse_moe.experts.158.w2", "model.layers.12.block_sparse_moe.experts.159.w2", "model.layers.12.block_sparse_moe.experts.160.w2", "model.layers.12.block_sparse_moe.experts.161.w2", "model.layers.12.block_sparse_moe.experts.162.w2", "model.layers.12.block_sparse_moe.experts.163.w2", "model.layers.12.block_sparse_moe.experts.164.w2", "model.layers.12.block_sparse_moe.experts.165.w2", "model.layers.12.block_sparse_moe.experts.166.w2", "model.layers.12.block_sparse_moe.experts.167.w2", "model.layers.12.block_sparse_moe.experts.168.w2", "model.layers.12.block_sparse_moe.experts.169.w2", "model.layers.12.block_sparse_moe.experts.170.w2", "model.layers.12.block_sparse_moe.experts.171.w2", "model.layers.12.block_sparse_moe.experts.172.w2", "model.layers.12.block_sparse_moe.experts.173.w2", "model.layers.12.block_sparse_moe.experts.174.w2", "model.layers.12.block_sparse_moe.experts.175.w2", "model.layers.12.block_sparse_moe.experts.176.w2", "model.layers.12.block_sparse_moe.experts.177.w2", "model.layers.12.block_sparse_moe.experts.178.w2", "model.layers.12.block_sparse_moe.experts.179.w2", "model.layers.12.block_sparse_moe.experts.180.w2", "model.layers.12.block_sparse_moe.experts.181.w2", "model.layers.12.block_sparse_moe.experts.182.w2", "model.layers.12.block_sparse_moe.experts.183.w2", "model.layers.12.block_sparse_moe.experts.184.w2", "model.layers.12.block_sparse_moe.experts.185.w2", "model.layers.12.block_sparse_moe.experts.186.w2", "model.layers.12.block_sparse_moe.experts.187.w2", "model.layers.12.block_sparse_moe.experts.188.w2", "model.layers.12.block_sparse_moe.experts.189.w2", "model.layers.12.block_sparse_moe.experts.190.w2", "model.layers.12.block_sparse_moe.experts.191.w2", "model.layers.12.block_sparse_moe.experts.192.w2", "model.layers.12.block_sparse_moe.experts.193.w2", "model.layers.12.block_sparse_moe.experts.194.w2", "model.layers.12.block_sparse_moe.experts.195.w2", "model.layers.12.block_sparse_moe.experts.196.w2", "model.layers.12.block_sparse_moe.experts.197.w2", "model.layers.12.block_sparse_moe.experts.198.w2", "model.layers.12.block_sparse_moe.experts.199.w2", "model.layers.12.block_sparse_moe.experts.200.w2", "model.layers.12.block_sparse_moe.experts.201.w2", "model.layers.12.block_sparse_moe.experts.202.w2", "model.layers.12.block_sparse_moe.experts.203.w2", "model.layers.12.block_sparse_moe.experts.204.w2", "model.layers.12.block_sparse_moe.experts.205.w2", "model.layers.12.block_sparse_moe.experts.206.w2", "model.layers.12.block_sparse_moe.experts.207.w2", "model.layers.12.block_sparse_moe.experts.208.w2", "model.layers.12.block_sparse_moe.experts.209.w2", "model.layers.12.block_sparse_moe.experts.210.w2", "model.layers.12.block_sparse_moe.experts.211.w2", "model.layers.12.block_sparse_moe.experts.212.w2", "model.layers.12.block_sparse_moe.experts.213.w2", "model.layers.12.block_sparse_moe.experts.214.w2", "model.layers.12.block_sparse_moe.experts.215.w2", "model.layers.12.block_sparse_moe.experts.216.w2", "model.layers.12.block_sparse_moe.experts.217.w2", "model.layers.12.block_sparse_moe.experts.218.w2", "model.layers.12.block_sparse_moe.experts.219.w2", "model.layers.12.block_sparse_moe.experts.220.w2", "model.layers.12.block_sparse_moe.experts.221.w2", "model.layers.12.block_sparse_moe.experts.222.w2", "model.layers.12.block_sparse_moe.experts.223.w2", "model.layers.12.block_sparse_moe.experts.224.w2", "model.layers.12.block_sparse_moe.experts.225.w2", "model.layers.12.block_sparse_moe.experts.226.w2", "model.layers.12.block_sparse_moe.experts.227.w2", "model.layers.12.block_sparse_moe.experts.228.w2", "model.layers.12.block_sparse_moe.experts.229.w2", "model.layers.12.block_sparse_moe.experts.230.w2", "model.layers.12.block_sparse_moe.experts.231.w2", "model.layers.12.block_sparse_moe.experts.232.w2", "model.layers.12.block_sparse_moe.experts.233.w2", "model.layers.12.block_sparse_moe.experts.234.w2", "model.layers.12.block_sparse_moe.experts.235.w2", "model.layers.12.block_sparse_moe.experts.236.w2", "model.layers.12.block_sparse_moe.experts.237.w2", "model.layers.12.block_sparse_moe.experts.238.w2", "model.layers.12.block_sparse_moe.experts.239.w2", "model.layers.12.block_sparse_moe.experts.240.w2", "model.layers.12.block_sparse_moe.experts.241.w2", "model.layers.12.block_sparse_moe.experts.242.w2", "model.layers.12.block_sparse_moe.experts.243.w2", "model.layers.12.block_sparse_moe.experts.244.w2", "model.layers.12.block_sparse_moe.experts.245.w2", "model.layers.12.block_sparse_moe.experts.246.w2", "model.layers.12.block_sparse_moe.experts.247.w2", "model.layers.12.block_sparse_moe.experts.248.w2", "model.layers.12.block_sparse_moe.experts.249.w2", "model.layers.12.block_sparse_moe.experts.250.w2", "model.layers.12.block_sparse_moe.experts.251.w2", "model.layers.12.block_sparse_moe.experts.252.w2", "model.layers.12.block_sparse_moe.experts.253.w2", "model.layers.12.block_sparse_moe.experts.254.w2", "model.layers.12.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008361488580703735, "dbits": 1207959552 } ] }, { "idx": 65, "layers": [ "model.layers.13.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0016392350196837713, "dbits": 18874368 } ] }, { "idx": 66, "layers": [ "model.layers.13.self_attn.k_proj", "model.layers.13.self_attn.v_proj" ], "candidates": [ { "dkld": -0.011320936679840021, "dbits": 6291456 } ] }, { "idx": 67, "layers": [ "model.layers.13.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0017509162425994873, "dbits": 18874368 } ] }, { "idx": 68, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w1", "model.layers.13.block_sparse_moe.experts.1.w1", "model.layers.13.block_sparse_moe.experts.2.w1", "model.layers.13.block_sparse_moe.experts.3.w1", "model.layers.13.block_sparse_moe.experts.4.w1", "model.layers.13.block_sparse_moe.experts.5.w1", "model.layers.13.block_sparse_moe.experts.6.w1", "model.layers.13.block_sparse_moe.experts.7.w1", "model.layers.13.block_sparse_moe.experts.8.w1", "model.layers.13.block_sparse_moe.experts.9.w1", "model.layers.13.block_sparse_moe.experts.10.w1", "model.layers.13.block_sparse_moe.experts.11.w1", "model.layers.13.block_sparse_moe.experts.12.w1", "model.layers.13.block_sparse_moe.experts.13.w1", "model.layers.13.block_sparse_moe.experts.14.w1", "model.layers.13.block_sparse_moe.experts.15.w1", "model.layers.13.block_sparse_moe.experts.16.w1", "model.layers.13.block_sparse_moe.experts.17.w1", "model.layers.13.block_sparse_moe.experts.18.w1", "model.layers.13.block_sparse_moe.experts.19.w1", "model.layers.13.block_sparse_moe.experts.20.w1", "model.layers.13.block_sparse_moe.experts.21.w1", "model.layers.13.block_sparse_moe.experts.22.w1", "model.layers.13.block_sparse_moe.experts.23.w1", "model.layers.13.block_sparse_moe.experts.24.w1", "model.layers.13.block_sparse_moe.experts.25.w1", "model.layers.13.block_sparse_moe.experts.26.w1", "model.layers.13.block_sparse_moe.experts.27.w1", "model.layers.13.block_sparse_moe.experts.28.w1", "model.layers.13.block_sparse_moe.experts.29.w1", "model.layers.13.block_sparse_moe.experts.30.w1", "model.layers.13.block_sparse_moe.experts.31.w1", "model.layers.13.block_sparse_moe.experts.32.w1", "model.layers.13.block_sparse_moe.experts.33.w1", "model.layers.13.block_sparse_moe.experts.34.w1", "model.layers.13.block_sparse_moe.experts.35.w1", "model.layers.13.block_sparse_moe.experts.36.w1", "model.layers.13.block_sparse_moe.experts.37.w1", "model.layers.13.block_sparse_moe.experts.38.w1", "model.layers.13.block_sparse_moe.experts.39.w1", "model.layers.13.block_sparse_moe.experts.40.w1", "model.layers.13.block_sparse_moe.experts.41.w1", "model.layers.13.block_sparse_moe.experts.42.w1", "model.layers.13.block_sparse_moe.experts.43.w1", "model.layers.13.block_sparse_moe.experts.44.w1", "model.layers.13.block_sparse_moe.experts.45.w1", "model.layers.13.block_sparse_moe.experts.46.w1", "model.layers.13.block_sparse_moe.experts.47.w1", "model.layers.13.block_sparse_moe.experts.48.w1", "model.layers.13.block_sparse_moe.experts.49.w1", "model.layers.13.block_sparse_moe.experts.50.w1", "model.layers.13.block_sparse_moe.experts.51.w1", "model.layers.13.block_sparse_moe.experts.52.w1", "model.layers.13.block_sparse_moe.experts.53.w1", "model.layers.13.block_sparse_moe.experts.54.w1", "model.layers.13.block_sparse_moe.experts.55.w1", "model.layers.13.block_sparse_moe.experts.56.w1", "model.layers.13.block_sparse_moe.experts.57.w1", "model.layers.13.block_sparse_moe.experts.58.w1", "model.layers.13.block_sparse_moe.experts.59.w1", "model.layers.13.block_sparse_moe.experts.60.w1", "model.layers.13.block_sparse_moe.experts.61.w1", "model.layers.13.block_sparse_moe.experts.62.w1", "model.layers.13.block_sparse_moe.experts.63.w1", "model.layers.13.block_sparse_moe.experts.64.w1", "model.layers.13.block_sparse_moe.experts.65.w1", "model.layers.13.block_sparse_moe.experts.66.w1", "model.layers.13.block_sparse_moe.experts.67.w1", "model.layers.13.block_sparse_moe.experts.68.w1", "model.layers.13.block_sparse_moe.experts.69.w1", "model.layers.13.block_sparse_moe.experts.70.w1", "model.layers.13.block_sparse_moe.experts.71.w1", "model.layers.13.block_sparse_moe.experts.72.w1", "model.layers.13.block_sparse_moe.experts.73.w1", "model.layers.13.block_sparse_moe.experts.74.w1", "model.layers.13.block_sparse_moe.experts.75.w1", "model.layers.13.block_sparse_moe.experts.76.w1", "model.layers.13.block_sparse_moe.experts.77.w1", "model.layers.13.block_sparse_moe.experts.78.w1", "model.layers.13.block_sparse_moe.experts.79.w1", "model.layers.13.block_sparse_moe.experts.80.w1", "model.layers.13.block_sparse_moe.experts.81.w1", "model.layers.13.block_sparse_moe.experts.82.w1", "model.layers.13.block_sparse_moe.experts.83.w1", "model.layers.13.block_sparse_moe.experts.84.w1", "model.layers.13.block_sparse_moe.experts.85.w1", "model.layers.13.block_sparse_moe.experts.86.w1", "model.layers.13.block_sparse_moe.experts.87.w1", "model.layers.13.block_sparse_moe.experts.88.w1", "model.layers.13.block_sparse_moe.experts.89.w1", "model.layers.13.block_sparse_moe.experts.90.w1", "model.layers.13.block_sparse_moe.experts.91.w1", "model.layers.13.block_sparse_moe.experts.92.w1", "model.layers.13.block_sparse_moe.experts.93.w1", "model.layers.13.block_sparse_moe.experts.94.w1", "model.layers.13.block_sparse_moe.experts.95.w1", "model.layers.13.block_sparse_moe.experts.96.w1", "model.layers.13.block_sparse_moe.experts.97.w1", "model.layers.13.block_sparse_moe.experts.98.w1", "model.layers.13.block_sparse_moe.experts.99.w1", "model.layers.13.block_sparse_moe.experts.100.w1", "model.layers.13.block_sparse_moe.experts.101.w1", "model.layers.13.block_sparse_moe.experts.102.w1", "model.layers.13.block_sparse_moe.experts.103.w1", "model.layers.13.block_sparse_moe.experts.104.w1", "model.layers.13.block_sparse_moe.experts.105.w1", "model.layers.13.block_sparse_moe.experts.106.w1", "model.layers.13.block_sparse_moe.experts.107.w1", "model.layers.13.block_sparse_moe.experts.108.w1", "model.layers.13.block_sparse_moe.experts.109.w1", "model.layers.13.block_sparse_moe.experts.110.w1", "model.layers.13.block_sparse_moe.experts.111.w1", "model.layers.13.block_sparse_moe.experts.112.w1", "model.layers.13.block_sparse_moe.experts.113.w1", "model.layers.13.block_sparse_moe.experts.114.w1", "model.layers.13.block_sparse_moe.experts.115.w1", "model.layers.13.block_sparse_moe.experts.116.w1", "model.layers.13.block_sparse_moe.experts.117.w1", "model.layers.13.block_sparse_moe.experts.118.w1", "model.layers.13.block_sparse_moe.experts.119.w1", "model.layers.13.block_sparse_moe.experts.120.w1", "model.layers.13.block_sparse_moe.experts.121.w1", "model.layers.13.block_sparse_moe.experts.122.w1", "model.layers.13.block_sparse_moe.experts.123.w1", "model.layers.13.block_sparse_moe.experts.124.w1", "model.layers.13.block_sparse_moe.experts.125.w1", "model.layers.13.block_sparse_moe.experts.126.w1", "model.layers.13.block_sparse_moe.experts.127.w1", "model.layers.13.block_sparse_moe.experts.128.w1", "model.layers.13.block_sparse_moe.experts.129.w1", "model.layers.13.block_sparse_moe.experts.130.w1", "model.layers.13.block_sparse_moe.experts.131.w1", "model.layers.13.block_sparse_moe.experts.132.w1", "model.layers.13.block_sparse_moe.experts.133.w1", "model.layers.13.block_sparse_moe.experts.134.w1", "model.layers.13.block_sparse_moe.experts.135.w1", "model.layers.13.block_sparse_moe.experts.136.w1", "model.layers.13.block_sparse_moe.experts.137.w1", "model.layers.13.block_sparse_moe.experts.138.w1", "model.layers.13.block_sparse_moe.experts.139.w1", "model.layers.13.block_sparse_moe.experts.140.w1", "model.layers.13.block_sparse_moe.experts.141.w1", "model.layers.13.block_sparse_moe.experts.142.w1", "model.layers.13.block_sparse_moe.experts.143.w1", "model.layers.13.block_sparse_moe.experts.144.w1", "model.layers.13.block_sparse_moe.experts.145.w1", "model.layers.13.block_sparse_moe.experts.146.w1", "model.layers.13.block_sparse_moe.experts.147.w1", "model.layers.13.block_sparse_moe.experts.148.w1", "model.layers.13.block_sparse_moe.experts.149.w1", "model.layers.13.block_sparse_moe.experts.150.w1", "model.layers.13.block_sparse_moe.experts.151.w1", "model.layers.13.block_sparse_moe.experts.152.w1", "model.layers.13.block_sparse_moe.experts.153.w1", "model.layers.13.block_sparse_moe.experts.154.w1", "model.layers.13.block_sparse_moe.experts.155.w1", "model.layers.13.block_sparse_moe.experts.156.w1", "model.layers.13.block_sparse_moe.experts.157.w1", "model.layers.13.block_sparse_moe.experts.158.w1", "model.layers.13.block_sparse_moe.experts.159.w1", "model.layers.13.block_sparse_moe.experts.160.w1", "model.layers.13.block_sparse_moe.experts.161.w1", "model.layers.13.block_sparse_moe.experts.162.w1", "model.layers.13.block_sparse_moe.experts.163.w1", "model.layers.13.block_sparse_moe.experts.164.w1", "model.layers.13.block_sparse_moe.experts.165.w1", "model.layers.13.block_sparse_moe.experts.166.w1", "model.layers.13.block_sparse_moe.experts.167.w1", "model.layers.13.block_sparse_moe.experts.168.w1", "model.layers.13.block_sparse_moe.experts.169.w1", "model.layers.13.block_sparse_moe.experts.170.w1", "model.layers.13.block_sparse_moe.experts.171.w1", "model.layers.13.block_sparse_moe.experts.172.w1", "model.layers.13.block_sparse_moe.experts.173.w1", "model.layers.13.block_sparse_moe.experts.174.w1", "model.layers.13.block_sparse_moe.experts.175.w1", "model.layers.13.block_sparse_moe.experts.176.w1", "model.layers.13.block_sparse_moe.experts.177.w1", "model.layers.13.block_sparse_moe.experts.178.w1", "model.layers.13.block_sparse_moe.experts.179.w1", "model.layers.13.block_sparse_moe.experts.180.w1", "model.layers.13.block_sparse_moe.experts.181.w1", "model.layers.13.block_sparse_moe.experts.182.w1", "model.layers.13.block_sparse_moe.experts.183.w1", "model.layers.13.block_sparse_moe.experts.184.w1", "model.layers.13.block_sparse_moe.experts.185.w1", "model.layers.13.block_sparse_moe.experts.186.w1", "model.layers.13.block_sparse_moe.experts.187.w1", "model.layers.13.block_sparse_moe.experts.188.w1", "model.layers.13.block_sparse_moe.experts.189.w1", "model.layers.13.block_sparse_moe.experts.190.w1", "model.layers.13.block_sparse_moe.experts.191.w1", "model.layers.13.block_sparse_moe.experts.192.w1", "model.layers.13.block_sparse_moe.experts.193.w1", "model.layers.13.block_sparse_moe.experts.194.w1", "model.layers.13.block_sparse_moe.experts.195.w1", "model.layers.13.block_sparse_moe.experts.196.w1", "model.layers.13.block_sparse_moe.experts.197.w1", "model.layers.13.block_sparse_moe.experts.198.w1", "model.layers.13.block_sparse_moe.experts.199.w1", "model.layers.13.block_sparse_moe.experts.200.w1", "model.layers.13.block_sparse_moe.experts.201.w1", "model.layers.13.block_sparse_moe.experts.202.w1", "model.layers.13.block_sparse_moe.experts.203.w1", "model.layers.13.block_sparse_moe.experts.204.w1", "model.layers.13.block_sparse_moe.experts.205.w1", "model.layers.13.block_sparse_moe.experts.206.w1", "model.layers.13.block_sparse_moe.experts.207.w1", "model.layers.13.block_sparse_moe.experts.208.w1", "model.layers.13.block_sparse_moe.experts.209.w1", "model.layers.13.block_sparse_moe.experts.210.w1", "model.layers.13.block_sparse_moe.experts.211.w1", "model.layers.13.block_sparse_moe.experts.212.w1", "model.layers.13.block_sparse_moe.experts.213.w1", "model.layers.13.block_sparse_moe.experts.214.w1", "model.layers.13.block_sparse_moe.experts.215.w1", "model.layers.13.block_sparse_moe.experts.216.w1", "model.layers.13.block_sparse_moe.experts.217.w1", "model.layers.13.block_sparse_moe.experts.218.w1", "model.layers.13.block_sparse_moe.experts.219.w1", "model.layers.13.block_sparse_moe.experts.220.w1", "model.layers.13.block_sparse_moe.experts.221.w1", "model.layers.13.block_sparse_moe.experts.222.w1", "model.layers.13.block_sparse_moe.experts.223.w1", "model.layers.13.block_sparse_moe.experts.224.w1", "model.layers.13.block_sparse_moe.experts.225.w1", "model.layers.13.block_sparse_moe.experts.226.w1", "model.layers.13.block_sparse_moe.experts.227.w1", "model.layers.13.block_sparse_moe.experts.228.w1", "model.layers.13.block_sparse_moe.experts.229.w1", "model.layers.13.block_sparse_moe.experts.230.w1", "model.layers.13.block_sparse_moe.experts.231.w1", "model.layers.13.block_sparse_moe.experts.232.w1", "model.layers.13.block_sparse_moe.experts.233.w1", "model.layers.13.block_sparse_moe.experts.234.w1", "model.layers.13.block_sparse_moe.experts.235.w1", "model.layers.13.block_sparse_moe.experts.236.w1", "model.layers.13.block_sparse_moe.experts.237.w1", "model.layers.13.block_sparse_moe.experts.238.w1", "model.layers.13.block_sparse_moe.experts.239.w1", "model.layers.13.block_sparse_moe.experts.240.w1", "model.layers.13.block_sparse_moe.experts.241.w1", "model.layers.13.block_sparse_moe.experts.242.w1", "model.layers.13.block_sparse_moe.experts.243.w1", "model.layers.13.block_sparse_moe.experts.244.w1", "model.layers.13.block_sparse_moe.experts.245.w1", "model.layers.13.block_sparse_moe.experts.246.w1", "model.layers.13.block_sparse_moe.experts.247.w1", "model.layers.13.block_sparse_moe.experts.248.w1", "model.layers.13.block_sparse_moe.experts.249.w1", "model.layers.13.block_sparse_moe.experts.250.w1", "model.layers.13.block_sparse_moe.experts.251.w1", "model.layers.13.block_sparse_moe.experts.252.w1", "model.layers.13.block_sparse_moe.experts.253.w1", "model.layers.13.block_sparse_moe.experts.254.w1", "model.layers.13.block_sparse_moe.experts.255.w1", "model.layers.13.block_sparse_moe.experts.0.w3", "model.layers.13.block_sparse_moe.experts.1.w3", "model.layers.13.block_sparse_moe.experts.2.w3", "model.layers.13.block_sparse_moe.experts.3.w3", "model.layers.13.block_sparse_moe.experts.4.w3", "model.layers.13.block_sparse_moe.experts.5.w3", "model.layers.13.block_sparse_moe.experts.6.w3", "model.layers.13.block_sparse_moe.experts.7.w3", "model.layers.13.block_sparse_moe.experts.8.w3", "model.layers.13.block_sparse_moe.experts.9.w3", "model.layers.13.block_sparse_moe.experts.10.w3", "model.layers.13.block_sparse_moe.experts.11.w3", "model.layers.13.block_sparse_moe.experts.12.w3", "model.layers.13.block_sparse_moe.experts.13.w3", "model.layers.13.block_sparse_moe.experts.14.w3", "model.layers.13.block_sparse_moe.experts.15.w3", "model.layers.13.block_sparse_moe.experts.16.w3", "model.layers.13.block_sparse_moe.experts.17.w3", "model.layers.13.block_sparse_moe.experts.18.w3", "model.layers.13.block_sparse_moe.experts.19.w3", "model.layers.13.block_sparse_moe.experts.20.w3", "model.layers.13.block_sparse_moe.experts.21.w3", "model.layers.13.block_sparse_moe.experts.22.w3", "model.layers.13.block_sparse_moe.experts.23.w3", "model.layers.13.block_sparse_moe.experts.24.w3", "model.layers.13.block_sparse_moe.experts.25.w3", "model.layers.13.block_sparse_moe.experts.26.w3", "model.layers.13.block_sparse_moe.experts.27.w3", "model.layers.13.block_sparse_moe.experts.28.w3", "model.layers.13.block_sparse_moe.experts.29.w3", "model.layers.13.block_sparse_moe.experts.30.w3", "model.layers.13.block_sparse_moe.experts.31.w3", "model.layers.13.block_sparse_moe.experts.32.w3", "model.layers.13.block_sparse_moe.experts.33.w3", "model.layers.13.block_sparse_moe.experts.34.w3", "model.layers.13.block_sparse_moe.experts.35.w3", "model.layers.13.block_sparse_moe.experts.36.w3", "model.layers.13.block_sparse_moe.experts.37.w3", "model.layers.13.block_sparse_moe.experts.38.w3", "model.layers.13.block_sparse_moe.experts.39.w3", "model.layers.13.block_sparse_moe.experts.40.w3", "model.layers.13.block_sparse_moe.experts.41.w3", "model.layers.13.block_sparse_moe.experts.42.w3", "model.layers.13.block_sparse_moe.experts.43.w3", "model.layers.13.block_sparse_moe.experts.44.w3", "model.layers.13.block_sparse_moe.experts.45.w3", "model.layers.13.block_sparse_moe.experts.46.w3", "model.layers.13.block_sparse_moe.experts.47.w3", "model.layers.13.block_sparse_moe.experts.48.w3", "model.layers.13.block_sparse_moe.experts.49.w3", "model.layers.13.block_sparse_moe.experts.50.w3", "model.layers.13.block_sparse_moe.experts.51.w3", "model.layers.13.block_sparse_moe.experts.52.w3", "model.layers.13.block_sparse_moe.experts.53.w3", "model.layers.13.block_sparse_moe.experts.54.w3", "model.layers.13.block_sparse_moe.experts.55.w3", "model.layers.13.block_sparse_moe.experts.56.w3", "model.layers.13.block_sparse_moe.experts.57.w3", "model.layers.13.block_sparse_moe.experts.58.w3", "model.layers.13.block_sparse_moe.experts.59.w3", "model.layers.13.block_sparse_moe.experts.60.w3", "model.layers.13.block_sparse_moe.experts.61.w3", "model.layers.13.block_sparse_moe.experts.62.w3", "model.layers.13.block_sparse_moe.experts.63.w3", "model.layers.13.block_sparse_moe.experts.64.w3", "model.layers.13.block_sparse_moe.experts.65.w3", "model.layers.13.block_sparse_moe.experts.66.w3", "model.layers.13.block_sparse_moe.experts.67.w3", "model.layers.13.block_sparse_moe.experts.68.w3", "model.layers.13.block_sparse_moe.experts.69.w3", "model.layers.13.block_sparse_moe.experts.70.w3", "model.layers.13.block_sparse_moe.experts.71.w3", "model.layers.13.block_sparse_moe.experts.72.w3", "model.layers.13.block_sparse_moe.experts.73.w3", "model.layers.13.block_sparse_moe.experts.74.w3", "model.layers.13.block_sparse_moe.experts.75.w3", "model.layers.13.block_sparse_moe.experts.76.w3", "model.layers.13.block_sparse_moe.experts.77.w3", "model.layers.13.block_sparse_moe.experts.78.w3", "model.layers.13.block_sparse_moe.experts.79.w3", "model.layers.13.block_sparse_moe.experts.80.w3", "model.layers.13.block_sparse_moe.experts.81.w3", "model.layers.13.block_sparse_moe.experts.82.w3", "model.layers.13.block_sparse_moe.experts.83.w3", "model.layers.13.block_sparse_moe.experts.84.w3", "model.layers.13.block_sparse_moe.experts.85.w3", "model.layers.13.block_sparse_moe.experts.86.w3", "model.layers.13.block_sparse_moe.experts.87.w3", "model.layers.13.block_sparse_moe.experts.88.w3", "model.layers.13.block_sparse_moe.experts.89.w3", "model.layers.13.block_sparse_moe.experts.90.w3", "model.layers.13.block_sparse_moe.experts.91.w3", "model.layers.13.block_sparse_moe.experts.92.w3", "model.layers.13.block_sparse_moe.experts.93.w3", "model.layers.13.block_sparse_moe.experts.94.w3", "model.layers.13.block_sparse_moe.experts.95.w3", "model.layers.13.block_sparse_moe.experts.96.w3", "model.layers.13.block_sparse_moe.experts.97.w3", "model.layers.13.block_sparse_moe.experts.98.w3", "model.layers.13.block_sparse_moe.experts.99.w3", "model.layers.13.block_sparse_moe.experts.100.w3", "model.layers.13.block_sparse_moe.experts.101.w3", "model.layers.13.block_sparse_moe.experts.102.w3", "model.layers.13.block_sparse_moe.experts.103.w3", "model.layers.13.block_sparse_moe.experts.104.w3", "model.layers.13.block_sparse_moe.experts.105.w3", "model.layers.13.block_sparse_moe.experts.106.w3", "model.layers.13.block_sparse_moe.experts.107.w3", "model.layers.13.block_sparse_moe.experts.108.w3", "model.layers.13.block_sparse_moe.experts.109.w3", "model.layers.13.block_sparse_moe.experts.110.w3", "model.layers.13.block_sparse_moe.experts.111.w3", "model.layers.13.block_sparse_moe.experts.112.w3", "model.layers.13.block_sparse_moe.experts.113.w3", "model.layers.13.block_sparse_moe.experts.114.w3", "model.layers.13.block_sparse_moe.experts.115.w3", "model.layers.13.block_sparse_moe.experts.116.w3", "model.layers.13.block_sparse_moe.experts.117.w3", "model.layers.13.block_sparse_moe.experts.118.w3", "model.layers.13.block_sparse_moe.experts.119.w3", "model.layers.13.block_sparse_moe.experts.120.w3", "model.layers.13.block_sparse_moe.experts.121.w3", "model.layers.13.block_sparse_moe.experts.122.w3", "model.layers.13.block_sparse_moe.experts.123.w3", "model.layers.13.block_sparse_moe.experts.124.w3", "model.layers.13.block_sparse_moe.experts.125.w3", "model.layers.13.block_sparse_moe.experts.126.w3", "model.layers.13.block_sparse_moe.experts.127.w3", "model.layers.13.block_sparse_moe.experts.128.w3", "model.layers.13.block_sparse_moe.experts.129.w3", "model.layers.13.block_sparse_moe.experts.130.w3", "model.layers.13.block_sparse_moe.experts.131.w3", "model.layers.13.block_sparse_moe.experts.132.w3", "model.layers.13.block_sparse_moe.experts.133.w3", "model.layers.13.block_sparse_moe.experts.134.w3", "model.layers.13.block_sparse_moe.experts.135.w3", "model.layers.13.block_sparse_moe.experts.136.w3", "model.layers.13.block_sparse_moe.experts.137.w3", "model.layers.13.block_sparse_moe.experts.138.w3", "model.layers.13.block_sparse_moe.experts.139.w3", "model.layers.13.block_sparse_moe.experts.140.w3", "model.layers.13.block_sparse_moe.experts.141.w3", "model.layers.13.block_sparse_moe.experts.142.w3", "model.layers.13.block_sparse_moe.experts.143.w3", "model.layers.13.block_sparse_moe.experts.144.w3", "model.layers.13.block_sparse_moe.experts.145.w3", "model.layers.13.block_sparse_moe.experts.146.w3", "model.layers.13.block_sparse_moe.experts.147.w3", "model.layers.13.block_sparse_moe.experts.148.w3", "model.layers.13.block_sparse_moe.experts.149.w3", "model.layers.13.block_sparse_moe.experts.150.w3", "model.layers.13.block_sparse_moe.experts.151.w3", "model.layers.13.block_sparse_moe.experts.152.w3", "model.layers.13.block_sparse_moe.experts.153.w3", "model.layers.13.block_sparse_moe.experts.154.w3", "model.layers.13.block_sparse_moe.experts.155.w3", "model.layers.13.block_sparse_moe.experts.156.w3", "model.layers.13.block_sparse_moe.experts.157.w3", "model.layers.13.block_sparse_moe.experts.158.w3", "model.layers.13.block_sparse_moe.experts.159.w3", "model.layers.13.block_sparse_moe.experts.160.w3", "model.layers.13.block_sparse_moe.experts.161.w3", "model.layers.13.block_sparse_moe.experts.162.w3", "model.layers.13.block_sparse_moe.experts.163.w3", "model.layers.13.block_sparse_moe.experts.164.w3", "model.layers.13.block_sparse_moe.experts.165.w3", "model.layers.13.block_sparse_moe.experts.166.w3", "model.layers.13.block_sparse_moe.experts.167.w3", "model.layers.13.block_sparse_moe.experts.168.w3", "model.layers.13.block_sparse_moe.experts.169.w3", "model.layers.13.block_sparse_moe.experts.170.w3", "model.layers.13.block_sparse_moe.experts.171.w3", "model.layers.13.block_sparse_moe.experts.172.w3", "model.layers.13.block_sparse_moe.experts.173.w3", "model.layers.13.block_sparse_moe.experts.174.w3", "model.layers.13.block_sparse_moe.experts.175.w3", "model.layers.13.block_sparse_moe.experts.176.w3", "model.layers.13.block_sparse_moe.experts.177.w3", "model.layers.13.block_sparse_moe.experts.178.w3", "model.layers.13.block_sparse_moe.experts.179.w3", "model.layers.13.block_sparse_moe.experts.180.w3", "model.layers.13.block_sparse_moe.experts.181.w3", "model.layers.13.block_sparse_moe.experts.182.w3", "model.layers.13.block_sparse_moe.experts.183.w3", "model.layers.13.block_sparse_moe.experts.184.w3", "model.layers.13.block_sparse_moe.experts.185.w3", "model.layers.13.block_sparse_moe.experts.186.w3", "model.layers.13.block_sparse_moe.experts.187.w3", "model.layers.13.block_sparse_moe.experts.188.w3", "model.layers.13.block_sparse_moe.experts.189.w3", "model.layers.13.block_sparse_moe.experts.190.w3", "model.layers.13.block_sparse_moe.experts.191.w3", "model.layers.13.block_sparse_moe.experts.192.w3", "model.layers.13.block_sparse_moe.experts.193.w3", "model.layers.13.block_sparse_moe.experts.194.w3", "model.layers.13.block_sparse_moe.experts.195.w3", "model.layers.13.block_sparse_moe.experts.196.w3", "model.layers.13.block_sparse_moe.experts.197.w3", "model.layers.13.block_sparse_moe.experts.198.w3", "model.layers.13.block_sparse_moe.experts.199.w3", "model.layers.13.block_sparse_moe.experts.200.w3", "model.layers.13.block_sparse_moe.experts.201.w3", "model.layers.13.block_sparse_moe.experts.202.w3", "model.layers.13.block_sparse_moe.experts.203.w3", "model.layers.13.block_sparse_moe.experts.204.w3", "model.layers.13.block_sparse_moe.experts.205.w3", "model.layers.13.block_sparse_moe.experts.206.w3", "model.layers.13.block_sparse_moe.experts.207.w3", "model.layers.13.block_sparse_moe.experts.208.w3", "model.layers.13.block_sparse_moe.experts.209.w3", "model.layers.13.block_sparse_moe.experts.210.w3", "model.layers.13.block_sparse_moe.experts.211.w3", "model.layers.13.block_sparse_moe.experts.212.w3", "model.layers.13.block_sparse_moe.experts.213.w3", "model.layers.13.block_sparse_moe.experts.214.w3", "model.layers.13.block_sparse_moe.experts.215.w3", "model.layers.13.block_sparse_moe.experts.216.w3", "model.layers.13.block_sparse_moe.experts.217.w3", "model.layers.13.block_sparse_moe.experts.218.w3", "model.layers.13.block_sparse_moe.experts.219.w3", "model.layers.13.block_sparse_moe.experts.220.w3", "model.layers.13.block_sparse_moe.experts.221.w3", "model.layers.13.block_sparse_moe.experts.222.w3", "model.layers.13.block_sparse_moe.experts.223.w3", "model.layers.13.block_sparse_moe.experts.224.w3", "model.layers.13.block_sparse_moe.experts.225.w3", "model.layers.13.block_sparse_moe.experts.226.w3", "model.layers.13.block_sparse_moe.experts.227.w3", "model.layers.13.block_sparse_moe.experts.228.w3", "model.layers.13.block_sparse_moe.experts.229.w3", "model.layers.13.block_sparse_moe.experts.230.w3", "model.layers.13.block_sparse_moe.experts.231.w3", "model.layers.13.block_sparse_moe.experts.232.w3", "model.layers.13.block_sparse_moe.experts.233.w3", "model.layers.13.block_sparse_moe.experts.234.w3", "model.layers.13.block_sparse_moe.experts.235.w3", "model.layers.13.block_sparse_moe.experts.236.w3", "model.layers.13.block_sparse_moe.experts.237.w3", "model.layers.13.block_sparse_moe.experts.238.w3", "model.layers.13.block_sparse_moe.experts.239.w3", "model.layers.13.block_sparse_moe.experts.240.w3", "model.layers.13.block_sparse_moe.experts.241.w3", "model.layers.13.block_sparse_moe.experts.242.w3", "model.layers.13.block_sparse_moe.experts.243.w3", "model.layers.13.block_sparse_moe.experts.244.w3", "model.layers.13.block_sparse_moe.experts.245.w3", "model.layers.13.block_sparse_moe.experts.246.w3", "model.layers.13.block_sparse_moe.experts.247.w3", "model.layers.13.block_sparse_moe.experts.248.w3", "model.layers.13.block_sparse_moe.experts.249.w3", "model.layers.13.block_sparse_moe.experts.250.w3", "model.layers.13.block_sparse_moe.experts.251.w3", "model.layers.13.block_sparse_moe.experts.252.w3", "model.layers.13.block_sparse_moe.experts.253.w3", "model.layers.13.block_sparse_moe.experts.254.w3", "model.layers.13.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002545368671417192, "dbits": 2415919104 } ] }, { "idx": 69, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w2", "model.layers.13.block_sparse_moe.experts.1.w2", "model.layers.13.block_sparse_moe.experts.2.w2", "model.layers.13.block_sparse_moe.experts.3.w2", "model.layers.13.block_sparse_moe.experts.4.w2", "model.layers.13.block_sparse_moe.experts.5.w2", "model.layers.13.block_sparse_moe.experts.6.w2", "model.layers.13.block_sparse_moe.experts.7.w2", "model.layers.13.block_sparse_moe.experts.8.w2", "model.layers.13.block_sparse_moe.experts.9.w2", "model.layers.13.block_sparse_moe.experts.10.w2", "model.layers.13.block_sparse_moe.experts.11.w2", "model.layers.13.block_sparse_moe.experts.12.w2", "model.layers.13.block_sparse_moe.experts.13.w2", "model.layers.13.block_sparse_moe.experts.14.w2", "model.layers.13.block_sparse_moe.experts.15.w2", "model.layers.13.block_sparse_moe.experts.16.w2", "model.layers.13.block_sparse_moe.experts.17.w2", "model.layers.13.block_sparse_moe.experts.18.w2", "model.layers.13.block_sparse_moe.experts.19.w2", "model.layers.13.block_sparse_moe.experts.20.w2", "model.layers.13.block_sparse_moe.experts.21.w2", "model.layers.13.block_sparse_moe.experts.22.w2", "model.layers.13.block_sparse_moe.experts.23.w2", "model.layers.13.block_sparse_moe.experts.24.w2", "model.layers.13.block_sparse_moe.experts.25.w2", "model.layers.13.block_sparse_moe.experts.26.w2", "model.layers.13.block_sparse_moe.experts.27.w2", "model.layers.13.block_sparse_moe.experts.28.w2", "model.layers.13.block_sparse_moe.experts.29.w2", "model.layers.13.block_sparse_moe.experts.30.w2", "model.layers.13.block_sparse_moe.experts.31.w2", "model.layers.13.block_sparse_moe.experts.32.w2", "model.layers.13.block_sparse_moe.experts.33.w2", "model.layers.13.block_sparse_moe.experts.34.w2", "model.layers.13.block_sparse_moe.experts.35.w2", "model.layers.13.block_sparse_moe.experts.36.w2", "model.layers.13.block_sparse_moe.experts.37.w2", "model.layers.13.block_sparse_moe.experts.38.w2", "model.layers.13.block_sparse_moe.experts.39.w2", "model.layers.13.block_sparse_moe.experts.40.w2", "model.layers.13.block_sparse_moe.experts.41.w2", "model.layers.13.block_sparse_moe.experts.42.w2", "model.layers.13.block_sparse_moe.experts.43.w2", "model.layers.13.block_sparse_moe.experts.44.w2", "model.layers.13.block_sparse_moe.experts.45.w2", "model.layers.13.block_sparse_moe.experts.46.w2", "model.layers.13.block_sparse_moe.experts.47.w2", "model.layers.13.block_sparse_moe.experts.48.w2", "model.layers.13.block_sparse_moe.experts.49.w2", "model.layers.13.block_sparse_moe.experts.50.w2", "model.layers.13.block_sparse_moe.experts.51.w2", "model.layers.13.block_sparse_moe.experts.52.w2", "model.layers.13.block_sparse_moe.experts.53.w2", "model.layers.13.block_sparse_moe.experts.54.w2", "model.layers.13.block_sparse_moe.experts.55.w2", "model.layers.13.block_sparse_moe.experts.56.w2", "model.layers.13.block_sparse_moe.experts.57.w2", "model.layers.13.block_sparse_moe.experts.58.w2", "model.layers.13.block_sparse_moe.experts.59.w2", "model.layers.13.block_sparse_moe.experts.60.w2", "model.layers.13.block_sparse_moe.experts.61.w2", "model.layers.13.block_sparse_moe.experts.62.w2", "model.layers.13.block_sparse_moe.experts.63.w2", "model.layers.13.block_sparse_moe.experts.64.w2", "model.layers.13.block_sparse_moe.experts.65.w2", "model.layers.13.block_sparse_moe.experts.66.w2", "model.layers.13.block_sparse_moe.experts.67.w2", "model.layers.13.block_sparse_moe.experts.68.w2", "model.layers.13.block_sparse_moe.experts.69.w2", "model.layers.13.block_sparse_moe.experts.70.w2", "model.layers.13.block_sparse_moe.experts.71.w2", "model.layers.13.block_sparse_moe.experts.72.w2", "model.layers.13.block_sparse_moe.experts.73.w2", "model.layers.13.block_sparse_moe.experts.74.w2", "model.layers.13.block_sparse_moe.experts.75.w2", "model.layers.13.block_sparse_moe.experts.76.w2", "model.layers.13.block_sparse_moe.experts.77.w2", "model.layers.13.block_sparse_moe.experts.78.w2", "model.layers.13.block_sparse_moe.experts.79.w2", "model.layers.13.block_sparse_moe.experts.80.w2", "model.layers.13.block_sparse_moe.experts.81.w2", "model.layers.13.block_sparse_moe.experts.82.w2", "model.layers.13.block_sparse_moe.experts.83.w2", "model.layers.13.block_sparse_moe.experts.84.w2", "model.layers.13.block_sparse_moe.experts.85.w2", "model.layers.13.block_sparse_moe.experts.86.w2", "model.layers.13.block_sparse_moe.experts.87.w2", "model.layers.13.block_sparse_moe.experts.88.w2", "model.layers.13.block_sparse_moe.experts.89.w2", "model.layers.13.block_sparse_moe.experts.90.w2", "model.layers.13.block_sparse_moe.experts.91.w2", "model.layers.13.block_sparse_moe.experts.92.w2", "model.layers.13.block_sparse_moe.experts.93.w2", "model.layers.13.block_sparse_moe.experts.94.w2", "model.layers.13.block_sparse_moe.experts.95.w2", "model.layers.13.block_sparse_moe.experts.96.w2", "model.layers.13.block_sparse_moe.experts.97.w2", "model.layers.13.block_sparse_moe.experts.98.w2", "model.layers.13.block_sparse_moe.experts.99.w2", "model.layers.13.block_sparse_moe.experts.100.w2", "model.layers.13.block_sparse_moe.experts.101.w2", "model.layers.13.block_sparse_moe.experts.102.w2", "model.layers.13.block_sparse_moe.experts.103.w2", "model.layers.13.block_sparse_moe.experts.104.w2", "model.layers.13.block_sparse_moe.experts.105.w2", "model.layers.13.block_sparse_moe.experts.106.w2", "model.layers.13.block_sparse_moe.experts.107.w2", "model.layers.13.block_sparse_moe.experts.108.w2", "model.layers.13.block_sparse_moe.experts.109.w2", "model.layers.13.block_sparse_moe.experts.110.w2", "model.layers.13.block_sparse_moe.experts.111.w2", "model.layers.13.block_sparse_moe.experts.112.w2", "model.layers.13.block_sparse_moe.experts.113.w2", "model.layers.13.block_sparse_moe.experts.114.w2", "model.layers.13.block_sparse_moe.experts.115.w2", "model.layers.13.block_sparse_moe.experts.116.w2", "model.layers.13.block_sparse_moe.experts.117.w2", "model.layers.13.block_sparse_moe.experts.118.w2", "model.layers.13.block_sparse_moe.experts.119.w2", "model.layers.13.block_sparse_moe.experts.120.w2", "model.layers.13.block_sparse_moe.experts.121.w2", "model.layers.13.block_sparse_moe.experts.122.w2", "model.layers.13.block_sparse_moe.experts.123.w2", "model.layers.13.block_sparse_moe.experts.124.w2", "model.layers.13.block_sparse_moe.experts.125.w2", "model.layers.13.block_sparse_moe.experts.126.w2", "model.layers.13.block_sparse_moe.experts.127.w2", "model.layers.13.block_sparse_moe.experts.128.w2", "model.layers.13.block_sparse_moe.experts.129.w2", "model.layers.13.block_sparse_moe.experts.130.w2", "model.layers.13.block_sparse_moe.experts.131.w2", "model.layers.13.block_sparse_moe.experts.132.w2", "model.layers.13.block_sparse_moe.experts.133.w2", "model.layers.13.block_sparse_moe.experts.134.w2", "model.layers.13.block_sparse_moe.experts.135.w2", "model.layers.13.block_sparse_moe.experts.136.w2", "model.layers.13.block_sparse_moe.experts.137.w2", "model.layers.13.block_sparse_moe.experts.138.w2", "model.layers.13.block_sparse_moe.experts.139.w2", "model.layers.13.block_sparse_moe.experts.140.w2", "model.layers.13.block_sparse_moe.experts.141.w2", "model.layers.13.block_sparse_moe.experts.142.w2", "model.layers.13.block_sparse_moe.experts.143.w2", "model.layers.13.block_sparse_moe.experts.144.w2", "model.layers.13.block_sparse_moe.experts.145.w2", "model.layers.13.block_sparse_moe.experts.146.w2", "model.layers.13.block_sparse_moe.experts.147.w2", "model.layers.13.block_sparse_moe.experts.148.w2", "model.layers.13.block_sparse_moe.experts.149.w2", "model.layers.13.block_sparse_moe.experts.150.w2", "model.layers.13.block_sparse_moe.experts.151.w2", "model.layers.13.block_sparse_moe.experts.152.w2", "model.layers.13.block_sparse_moe.experts.153.w2", "model.layers.13.block_sparse_moe.experts.154.w2", "model.layers.13.block_sparse_moe.experts.155.w2", "model.layers.13.block_sparse_moe.experts.156.w2", "model.layers.13.block_sparse_moe.experts.157.w2", "model.layers.13.block_sparse_moe.experts.158.w2", "model.layers.13.block_sparse_moe.experts.159.w2", "model.layers.13.block_sparse_moe.experts.160.w2", "model.layers.13.block_sparse_moe.experts.161.w2", "model.layers.13.block_sparse_moe.experts.162.w2", "model.layers.13.block_sparse_moe.experts.163.w2", "model.layers.13.block_sparse_moe.experts.164.w2", "model.layers.13.block_sparse_moe.experts.165.w2", "model.layers.13.block_sparse_moe.experts.166.w2", "model.layers.13.block_sparse_moe.experts.167.w2", "model.layers.13.block_sparse_moe.experts.168.w2", "model.layers.13.block_sparse_moe.experts.169.w2", "model.layers.13.block_sparse_moe.experts.170.w2", "model.layers.13.block_sparse_moe.experts.171.w2", "model.layers.13.block_sparse_moe.experts.172.w2", "model.layers.13.block_sparse_moe.experts.173.w2", "model.layers.13.block_sparse_moe.experts.174.w2", "model.layers.13.block_sparse_moe.experts.175.w2", "model.layers.13.block_sparse_moe.experts.176.w2", "model.layers.13.block_sparse_moe.experts.177.w2", "model.layers.13.block_sparse_moe.experts.178.w2", "model.layers.13.block_sparse_moe.experts.179.w2", "model.layers.13.block_sparse_moe.experts.180.w2", "model.layers.13.block_sparse_moe.experts.181.w2", "model.layers.13.block_sparse_moe.experts.182.w2", "model.layers.13.block_sparse_moe.experts.183.w2", "model.layers.13.block_sparse_moe.experts.184.w2", "model.layers.13.block_sparse_moe.experts.185.w2", "model.layers.13.block_sparse_moe.experts.186.w2", "model.layers.13.block_sparse_moe.experts.187.w2", "model.layers.13.block_sparse_moe.experts.188.w2", "model.layers.13.block_sparse_moe.experts.189.w2", "model.layers.13.block_sparse_moe.experts.190.w2", "model.layers.13.block_sparse_moe.experts.191.w2", "model.layers.13.block_sparse_moe.experts.192.w2", "model.layers.13.block_sparse_moe.experts.193.w2", "model.layers.13.block_sparse_moe.experts.194.w2", "model.layers.13.block_sparse_moe.experts.195.w2", "model.layers.13.block_sparse_moe.experts.196.w2", "model.layers.13.block_sparse_moe.experts.197.w2", "model.layers.13.block_sparse_moe.experts.198.w2", "model.layers.13.block_sparse_moe.experts.199.w2", "model.layers.13.block_sparse_moe.experts.200.w2", "model.layers.13.block_sparse_moe.experts.201.w2", "model.layers.13.block_sparse_moe.experts.202.w2", "model.layers.13.block_sparse_moe.experts.203.w2", "model.layers.13.block_sparse_moe.experts.204.w2", "model.layers.13.block_sparse_moe.experts.205.w2", "model.layers.13.block_sparse_moe.experts.206.w2", "model.layers.13.block_sparse_moe.experts.207.w2", "model.layers.13.block_sparse_moe.experts.208.w2", "model.layers.13.block_sparse_moe.experts.209.w2", "model.layers.13.block_sparse_moe.experts.210.w2", "model.layers.13.block_sparse_moe.experts.211.w2", "model.layers.13.block_sparse_moe.experts.212.w2", "model.layers.13.block_sparse_moe.experts.213.w2", "model.layers.13.block_sparse_moe.experts.214.w2", "model.layers.13.block_sparse_moe.experts.215.w2", "model.layers.13.block_sparse_moe.experts.216.w2", "model.layers.13.block_sparse_moe.experts.217.w2", "model.layers.13.block_sparse_moe.experts.218.w2", "model.layers.13.block_sparse_moe.experts.219.w2", "model.layers.13.block_sparse_moe.experts.220.w2", "model.layers.13.block_sparse_moe.experts.221.w2", "model.layers.13.block_sparse_moe.experts.222.w2", "model.layers.13.block_sparse_moe.experts.223.w2", "model.layers.13.block_sparse_moe.experts.224.w2", "model.layers.13.block_sparse_moe.experts.225.w2", "model.layers.13.block_sparse_moe.experts.226.w2", "model.layers.13.block_sparse_moe.experts.227.w2", "model.layers.13.block_sparse_moe.experts.228.w2", "model.layers.13.block_sparse_moe.experts.229.w2", "model.layers.13.block_sparse_moe.experts.230.w2", "model.layers.13.block_sparse_moe.experts.231.w2", "model.layers.13.block_sparse_moe.experts.232.w2", "model.layers.13.block_sparse_moe.experts.233.w2", "model.layers.13.block_sparse_moe.experts.234.w2", "model.layers.13.block_sparse_moe.experts.235.w2", "model.layers.13.block_sparse_moe.experts.236.w2", "model.layers.13.block_sparse_moe.experts.237.w2", "model.layers.13.block_sparse_moe.experts.238.w2", "model.layers.13.block_sparse_moe.experts.239.w2", "model.layers.13.block_sparse_moe.experts.240.w2", "model.layers.13.block_sparse_moe.experts.241.w2", "model.layers.13.block_sparse_moe.experts.242.w2", "model.layers.13.block_sparse_moe.experts.243.w2", "model.layers.13.block_sparse_moe.experts.244.w2", "model.layers.13.block_sparse_moe.experts.245.w2", "model.layers.13.block_sparse_moe.experts.246.w2", "model.layers.13.block_sparse_moe.experts.247.w2", "model.layers.13.block_sparse_moe.experts.248.w2", "model.layers.13.block_sparse_moe.experts.249.w2", "model.layers.13.block_sparse_moe.experts.250.w2", "model.layers.13.block_sparse_moe.experts.251.w2", "model.layers.13.block_sparse_moe.experts.252.w2", "model.layers.13.block_sparse_moe.experts.253.w2", "model.layers.13.block_sparse_moe.experts.254.w2", "model.layers.13.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006778687238693237, "dbits": 1207959552 } ] }, { "idx": 70, "layers": [ "model.layers.14.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00018255412578582764, "dbits": 18874368 } ] }, { "idx": 71, "layers": [ "model.layers.14.self_attn.k_proj", "model.layers.14.self_attn.v_proj" ], "candidates": [ { "dkld": -0.003880874812602908, "dbits": 6291456 } ] }, { "idx": 72, "layers": [ "model.layers.14.self_attn.o_proj" ], "candidates": [ { "dkld": 0.009240156412124723, "dbits": 18874368 } ] }, { "idx": 73, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w1", "model.layers.14.block_sparse_moe.experts.1.w1", "model.layers.14.block_sparse_moe.experts.2.w1", "model.layers.14.block_sparse_moe.experts.3.w1", "model.layers.14.block_sparse_moe.experts.4.w1", "model.layers.14.block_sparse_moe.experts.5.w1", "model.layers.14.block_sparse_moe.experts.6.w1", "model.layers.14.block_sparse_moe.experts.7.w1", "model.layers.14.block_sparse_moe.experts.8.w1", "model.layers.14.block_sparse_moe.experts.9.w1", "model.layers.14.block_sparse_moe.experts.10.w1", "model.layers.14.block_sparse_moe.experts.11.w1", "model.layers.14.block_sparse_moe.experts.12.w1", "model.layers.14.block_sparse_moe.experts.13.w1", "model.layers.14.block_sparse_moe.experts.14.w1", "model.layers.14.block_sparse_moe.experts.15.w1", "model.layers.14.block_sparse_moe.experts.16.w1", "model.layers.14.block_sparse_moe.experts.17.w1", "model.layers.14.block_sparse_moe.experts.18.w1", "model.layers.14.block_sparse_moe.experts.19.w1", "model.layers.14.block_sparse_moe.experts.20.w1", "model.layers.14.block_sparse_moe.experts.21.w1", "model.layers.14.block_sparse_moe.experts.22.w1", "model.layers.14.block_sparse_moe.experts.23.w1", "model.layers.14.block_sparse_moe.experts.24.w1", "model.layers.14.block_sparse_moe.experts.25.w1", "model.layers.14.block_sparse_moe.experts.26.w1", "model.layers.14.block_sparse_moe.experts.27.w1", "model.layers.14.block_sparse_moe.experts.28.w1", "model.layers.14.block_sparse_moe.experts.29.w1", "model.layers.14.block_sparse_moe.experts.30.w1", "model.layers.14.block_sparse_moe.experts.31.w1", "model.layers.14.block_sparse_moe.experts.32.w1", "model.layers.14.block_sparse_moe.experts.33.w1", "model.layers.14.block_sparse_moe.experts.34.w1", "model.layers.14.block_sparse_moe.experts.35.w1", "model.layers.14.block_sparse_moe.experts.36.w1", "model.layers.14.block_sparse_moe.experts.37.w1", "model.layers.14.block_sparse_moe.experts.38.w1", "model.layers.14.block_sparse_moe.experts.39.w1", "model.layers.14.block_sparse_moe.experts.40.w1", "model.layers.14.block_sparse_moe.experts.41.w1", "model.layers.14.block_sparse_moe.experts.42.w1", "model.layers.14.block_sparse_moe.experts.43.w1", "model.layers.14.block_sparse_moe.experts.44.w1", "model.layers.14.block_sparse_moe.experts.45.w1", "model.layers.14.block_sparse_moe.experts.46.w1", "model.layers.14.block_sparse_moe.experts.47.w1", "model.layers.14.block_sparse_moe.experts.48.w1", "model.layers.14.block_sparse_moe.experts.49.w1", "model.layers.14.block_sparse_moe.experts.50.w1", "model.layers.14.block_sparse_moe.experts.51.w1", "model.layers.14.block_sparse_moe.experts.52.w1", "model.layers.14.block_sparse_moe.experts.53.w1", "model.layers.14.block_sparse_moe.experts.54.w1", "model.layers.14.block_sparse_moe.experts.55.w1", "model.layers.14.block_sparse_moe.experts.56.w1", "model.layers.14.block_sparse_moe.experts.57.w1", "model.layers.14.block_sparse_moe.experts.58.w1", "model.layers.14.block_sparse_moe.experts.59.w1", "model.layers.14.block_sparse_moe.experts.60.w1", "model.layers.14.block_sparse_moe.experts.61.w1", "model.layers.14.block_sparse_moe.experts.62.w1", "model.layers.14.block_sparse_moe.experts.63.w1", "model.layers.14.block_sparse_moe.experts.64.w1", "model.layers.14.block_sparse_moe.experts.65.w1", "model.layers.14.block_sparse_moe.experts.66.w1", "model.layers.14.block_sparse_moe.experts.67.w1", "model.layers.14.block_sparse_moe.experts.68.w1", "model.layers.14.block_sparse_moe.experts.69.w1", "model.layers.14.block_sparse_moe.experts.70.w1", "model.layers.14.block_sparse_moe.experts.71.w1", "model.layers.14.block_sparse_moe.experts.72.w1", "model.layers.14.block_sparse_moe.experts.73.w1", "model.layers.14.block_sparse_moe.experts.74.w1", "model.layers.14.block_sparse_moe.experts.75.w1", "model.layers.14.block_sparse_moe.experts.76.w1", "model.layers.14.block_sparse_moe.experts.77.w1", "model.layers.14.block_sparse_moe.experts.78.w1", "model.layers.14.block_sparse_moe.experts.79.w1", "model.layers.14.block_sparse_moe.experts.80.w1", "model.layers.14.block_sparse_moe.experts.81.w1", "model.layers.14.block_sparse_moe.experts.82.w1", "model.layers.14.block_sparse_moe.experts.83.w1", "model.layers.14.block_sparse_moe.experts.84.w1", "model.layers.14.block_sparse_moe.experts.85.w1", "model.layers.14.block_sparse_moe.experts.86.w1", "model.layers.14.block_sparse_moe.experts.87.w1", "model.layers.14.block_sparse_moe.experts.88.w1", "model.layers.14.block_sparse_moe.experts.89.w1", "model.layers.14.block_sparse_moe.experts.90.w1", "model.layers.14.block_sparse_moe.experts.91.w1", "model.layers.14.block_sparse_moe.experts.92.w1", "model.layers.14.block_sparse_moe.experts.93.w1", "model.layers.14.block_sparse_moe.experts.94.w1", "model.layers.14.block_sparse_moe.experts.95.w1", "model.layers.14.block_sparse_moe.experts.96.w1", "model.layers.14.block_sparse_moe.experts.97.w1", "model.layers.14.block_sparse_moe.experts.98.w1", "model.layers.14.block_sparse_moe.experts.99.w1", "model.layers.14.block_sparse_moe.experts.100.w1", "model.layers.14.block_sparse_moe.experts.101.w1", "model.layers.14.block_sparse_moe.experts.102.w1", "model.layers.14.block_sparse_moe.experts.103.w1", "model.layers.14.block_sparse_moe.experts.104.w1", "model.layers.14.block_sparse_moe.experts.105.w1", "model.layers.14.block_sparse_moe.experts.106.w1", "model.layers.14.block_sparse_moe.experts.107.w1", "model.layers.14.block_sparse_moe.experts.108.w1", "model.layers.14.block_sparse_moe.experts.109.w1", "model.layers.14.block_sparse_moe.experts.110.w1", "model.layers.14.block_sparse_moe.experts.111.w1", "model.layers.14.block_sparse_moe.experts.112.w1", "model.layers.14.block_sparse_moe.experts.113.w1", "model.layers.14.block_sparse_moe.experts.114.w1", "model.layers.14.block_sparse_moe.experts.115.w1", "model.layers.14.block_sparse_moe.experts.116.w1", "model.layers.14.block_sparse_moe.experts.117.w1", "model.layers.14.block_sparse_moe.experts.118.w1", "model.layers.14.block_sparse_moe.experts.119.w1", "model.layers.14.block_sparse_moe.experts.120.w1", "model.layers.14.block_sparse_moe.experts.121.w1", "model.layers.14.block_sparse_moe.experts.122.w1", "model.layers.14.block_sparse_moe.experts.123.w1", "model.layers.14.block_sparse_moe.experts.124.w1", "model.layers.14.block_sparse_moe.experts.125.w1", "model.layers.14.block_sparse_moe.experts.126.w1", "model.layers.14.block_sparse_moe.experts.127.w1", "model.layers.14.block_sparse_moe.experts.128.w1", "model.layers.14.block_sparse_moe.experts.129.w1", "model.layers.14.block_sparse_moe.experts.130.w1", "model.layers.14.block_sparse_moe.experts.131.w1", "model.layers.14.block_sparse_moe.experts.132.w1", "model.layers.14.block_sparse_moe.experts.133.w1", "model.layers.14.block_sparse_moe.experts.134.w1", "model.layers.14.block_sparse_moe.experts.135.w1", "model.layers.14.block_sparse_moe.experts.136.w1", "model.layers.14.block_sparse_moe.experts.137.w1", "model.layers.14.block_sparse_moe.experts.138.w1", "model.layers.14.block_sparse_moe.experts.139.w1", "model.layers.14.block_sparse_moe.experts.140.w1", "model.layers.14.block_sparse_moe.experts.141.w1", "model.layers.14.block_sparse_moe.experts.142.w1", "model.layers.14.block_sparse_moe.experts.143.w1", "model.layers.14.block_sparse_moe.experts.144.w1", "model.layers.14.block_sparse_moe.experts.145.w1", "model.layers.14.block_sparse_moe.experts.146.w1", "model.layers.14.block_sparse_moe.experts.147.w1", "model.layers.14.block_sparse_moe.experts.148.w1", "model.layers.14.block_sparse_moe.experts.149.w1", "model.layers.14.block_sparse_moe.experts.150.w1", "model.layers.14.block_sparse_moe.experts.151.w1", "model.layers.14.block_sparse_moe.experts.152.w1", "model.layers.14.block_sparse_moe.experts.153.w1", "model.layers.14.block_sparse_moe.experts.154.w1", "model.layers.14.block_sparse_moe.experts.155.w1", "model.layers.14.block_sparse_moe.experts.156.w1", "model.layers.14.block_sparse_moe.experts.157.w1", "model.layers.14.block_sparse_moe.experts.158.w1", "model.layers.14.block_sparse_moe.experts.159.w1", "model.layers.14.block_sparse_moe.experts.160.w1", "model.layers.14.block_sparse_moe.experts.161.w1", "model.layers.14.block_sparse_moe.experts.162.w1", "model.layers.14.block_sparse_moe.experts.163.w1", "model.layers.14.block_sparse_moe.experts.164.w1", "model.layers.14.block_sparse_moe.experts.165.w1", "model.layers.14.block_sparse_moe.experts.166.w1", "model.layers.14.block_sparse_moe.experts.167.w1", "model.layers.14.block_sparse_moe.experts.168.w1", "model.layers.14.block_sparse_moe.experts.169.w1", "model.layers.14.block_sparse_moe.experts.170.w1", "model.layers.14.block_sparse_moe.experts.171.w1", "model.layers.14.block_sparse_moe.experts.172.w1", "model.layers.14.block_sparse_moe.experts.173.w1", "model.layers.14.block_sparse_moe.experts.174.w1", "model.layers.14.block_sparse_moe.experts.175.w1", "model.layers.14.block_sparse_moe.experts.176.w1", "model.layers.14.block_sparse_moe.experts.177.w1", "model.layers.14.block_sparse_moe.experts.178.w1", "model.layers.14.block_sparse_moe.experts.179.w1", "model.layers.14.block_sparse_moe.experts.180.w1", "model.layers.14.block_sparse_moe.experts.181.w1", "model.layers.14.block_sparse_moe.experts.182.w1", "model.layers.14.block_sparse_moe.experts.183.w1", "model.layers.14.block_sparse_moe.experts.184.w1", "model.layers.14.block_sparse_moe.experts.185.w1", "model.layers.14.block_sparse_moe.experts.186.w1", "model.layers.14.block_sparse_moe.experts.187.w1", "model.layers.14.block_sparse_moe.experts.188.w1", "model.layers.14.block_sparse_moe.experts.189.w1", "model.layers.14.block_sparse_moe.experts.190.w1", "model.layers.14.block_sparse_moe.experts.191.w1", "model.layers.14.block_sparse_moe.experts.192.w1", "model.layers.14.block_sparse_moe.experts.193.w1", "model.layers.14.block_sparse_moe.experts.194.w1", "model.layers.14.block_sparse_moe.experts.195.w1", "model.layers.14.block_sparse_moe.experts.196.w1", "model.layers.14.block_sparse_moe.experts.197.w1", "model.layers.14.block_sparse_moe.experts.198.w1", "model.layers.14.block_sparse_moe.experts.199.w1", "model.layers.14.block_sparse_moe.experts.200.w1", "model.layers.14.block_sparse_moe.experts.201.w1", "model.layers.14.block_sparse_moe.experts.202.w1", "model.layers.14.block_sparse_moe.experts.203.w1", "model.layers.14.block_sparse_moe.experts.204.w1", "model.layers.14.block_sparse_moe.experts.205.w1", "model.layers.14.block_sparse_moe.experts.206.w1", "model.layers.14.block_sparse_moe.experts.207.w1", "model.layers.14.block_sparse_moe.experts.208.w1", "model.layers.14.block_sparse_moe.experts.209.w1", "model.layers.14.block_sparse_moe.experts.210.w1", "model.layers.14.block_sparse_moe.experts.211.w1", "model.layers.14.block_sparse_moe.experts.212.w1", "model.layers.14.block_sparse_moe.experts.213.w1", "model.layers.14.block_sparse_moe.experts.214.w1", "model.layers.14.block_sparse_moe.experts.215.w1", "model.layers.14.block_sparse_moe.experts.216.w1", "model.layers.14.block_sparse_moe.experts.217.w1", "model.layers.14.block_sparse_moe.experts.218.w1", "model.layers.14.block_sparse_moe.experts.219.w1", "model.layers.14.block_sparse_moe.experts.220.w1", "model.layers.14.block_sparse_moe.experts.221.w1", "model.layers.14.block_sparse_moe.experts.222.w1", "model.layers.14.block_sparse_moe.experts.223.w1", "model.layers.14.block_sparse_moe.experts.224.w1", "model.layers.14.block_sparse_moe.experts.225.w1", "model.layers.14.block_sparse_moe.experts.226.w1", "model.layers.14.block_sparse_moe.experts.227.w1", "model.layers.14.block_sparse_moe.experts.228.w1", "model.layers.14.block_sparse_moe.experts.229.w1", "model.layers.14.block_sparse_moe.experts.230.w1", "model.layers.14.block_sparse_moe.experts.231.w1", "model.layers.14.block_sparse_moe.experts.232.w1", "model.layers.14.block_sparse_moe.experts.233.w1", "model.layers.14.block_sparse_moe.experts.234.w1", "model.layers.14.block_sparse_moe.experts.235.w1", "model.layers.14.block_sparse_moe.experts.236.w1", "model.layers.14.block_sparse_moe.experts.237.w1", "model.layers.14.block_sparse_moe.experts.238.w1", "model.layers.14.block_sparse_moe.experts.239.w1", "model.layers.14.block_sparse_moe.experts.240.w1", "model.layers.14.block_sparse_moe.experts.241.w1", "model.layers.14.block_sparse_moe.experts.242.w1", "model.layers.14.block_sparse_moe.experts.243.w1", "model.layers.14.block_sparse_moe.experts.244.w1", "model.layers.14.block_sparse_moe.experts.245.w1", "model.layers.14.block_sparse_moe.experts.246.w1", "model.layers.14.block_sparse_moe.experts.247.w1", "model.layers.14.block_sparse_moe.experts.248.w1", "model.layers.14.block_sparse_moe.experts.249.w1", "model.layers.14.block_sparse_moe.experts.250.w1", "model.layers.14.block_sparse_moe.experts.251.w1", "model.layers.14.block_sparse_moe.experts.252.w1", "model.layers.14.block_sparse_moe.experts.253.w1", "model.layers.14.block_sparse_moe.experts.254.w1", "model.layers.14.block_sparse_moe.experts.255.w1", "model.layers.14.block_sparse_moe.experts.0.w3", "model.layers.14.block_sparse_moe.experts.1.w3", "model.layers.14.block_sparse_moe.experts.2.w3", "model.layers.14.block_sparse_moe.experts.3.w3", "model.layers.14.block_sparse_moe.experts.4.w3", "model.layers.14.block_sparse_moe.experts.5.w3", "model.layers.14.block_sparse_moe.experts.6.w3", "model.layers.14.block_sparse_moe.experts.7.w3", "model.layers.14.block_sparse_moe.experts.8.w3", "model.layers.14.block_sparse_moe.experts.9.w3", "model.layers.14.block_sparse_moe.experts.10.w3", "model.layers.14.block_sparse_moe.experts.11.w3", "model.layers.14.block_sparse_moe.experts.12.w3", "model.layers.14.block_sparse_moe.experts.13.w3", "model.layers.14.block_sparse_moe.experts.14.w3", "model.layers.14.block_sparse_moe.experts.15.w3", "model.layers.14.block_sparse_moe.experts.16.w3", "model.layers.14.block_sparse_moe.experts.17.w3", "model.layers.14.block_sparse_moe.experts.18.w3", "model.layers.14.block_sparse_moe.experts.19.w3", "model.layers.14.block_sparse_moe.experts.20.w3", "model.layers.14.block_sparse_moe.experts.21.w3", "model.layers.14.block_sparse_moe.experts.22.w3", "model.layers.14.block_sparse_moe.experts.23.w3", "model.layers.14.block_sparse_moe.experts.24.w3", "model.layers.14.block_sparse_moe.experts.25.w3", "model.layers.14.block_sparse_moe.experts.26.w3", "model.layers.14.block_sparse_moe.experts.27.w3", "model.layers.14.block_sparse_moe.experts.28.w3", "model.layers.14.block_sparse_moe.experts.29.w3", "model.layers.14.block_sparse_moe.experts.30.w3", "model.layers.14.block_sparse_moe.experts.31.w3", "model.layers.14.block_sparse_moe.experts.32.w3", "model.layers.14.block_sparse_moe.experts.33.w3", "model.layers.14.block_sparse_moe.experts.34.w3", "model.layers.14.block_sparse_moe.experts.35.w3", "model.layers.14.block_sparse_moe.experts.36.w3", "model.layers.14.block_sparse_moe.experts.37.w3", "model.layers.14.block_sparse_moe.experts.38.w3", "model.layers.14.block_sparse_moe.experts.39.w3", "model.layers.14.block_sparse_moe.experts.40.w3", "model.layers.14.block_sparse_moe.experts.41.w3", "model.layers.14.block_sparse_moe.experts.42.w3", "model.layers.14.block_sparse_moe.experts.43.w3", "model.layers.14.block_sparse_moe.experts.44.w3", "model.layers.14.block_sparse_moe.experts.45.w3", "model.layers.14.block_sparse_moe.experts.46.w3", "model.layers.14.block_sparse_moe.experts.47.w3", "model.layers.14.block_sparse_moe.experts.48.w3", "model.layers.14.block_sparse_moe.experts.49.w3", "model.layers.14.block_sparse_moe.experts.50.w3", "model.layers.14.block_sparse_moe.experts.51.w3", "model.layers.14.block_sparse_moe.experts.52.w3", "model.layers.14.block_sparse_moe.experts.53.w3", "model.layers.14.block_sparse_moe.experts.54.w3", "model.layers.14.block_sparse_moe.experts.55.w3", "model.layers.14.block_sparse_moe.experts.56.w3", "model.layers.14.block_sparse_moe.experts.57.w3", "model.layers.14.block_sparse_moe.experts.58.w3", "model.layers.14.block_sparse_moe.experts.59.w3", "model.layers.14.block_sparse_moe.experts.60.w3", "model.layers.14.block_sparse_moe.experts.61.w3", "model.layers.14.block_sparse_moe.experts.62.w3", "model.layers.14.block_sparse_moe.experts.63.w3", "model.layers.14.block_sparse_moe.experts.64.w3", "model.layers.14.block_sparse_moe.experts.65.w3", "model.layers.14.block_sparse_moe.experts.66.w3", "model.layers.14.block_sparse_moe.experts.67.w3", "model.layers.14.block_sparse_moe.experts.68.w3", "model.layers.14.block_sparse_moe.experts.69.w3", "model.layers.14.block_sparse_moe.experts.70.w3", "model.layers.14.block_sparse_moe.experts.71.w3", "model.layers.14.block_sparse_moe.experts.72.w3", "model.layers.14.block_sparse_moe.experts.73.w3", "model.layers.14.block_sparse_moe.experts.74.w3", "model.layers.14.block_sparse_moe.experts.75.w3", "model.layers.14.block_sparse_moe.experts.76.w3", "model.layers.14.block_sparse_moe.experts.77.w3", "model.layers.14.block_sparse_moe.experts.78.w3", "model.layers.14.block_sparse_moe.experts.79.w3", "model.layers.14.block_sparse_moe.experts.80.w3", "model.layers.14.block_sparse_moe.experts.81.w3", "model.layers.14.block_sparse_moe.experts.82.w3", "model.layers.14.block_sparse_moe.experts.83.w3", "model.layers.14.block_sparse_moe.experts.84.w3", "model.layers.14.block_sparse_moe.experts.85.w3", "model.layers.14.block_sparse_moe.experts.86.w3", "model.layers.14.block_sparse_moe.experts.87.w3", "model.layers.14.block_sparse_moe.experts.88.w3", "model.layers.14.block_sparse_moe.experts.89.w3", "model.layers.14.block_sparse_moe.experts.90.w3", "model.layers.14.block_sparse_moe.experts.91.w3", "model.layers.14.block_sparse_moe.experts.92.w3", "model.layers.14.block_sparse_moe.experts.93.w3", "model.layers.14.block_sparse_moe.experts.94.w3", "model.layers.14.block_sparse_moe.experts.95.w3", "model.layers.14.block_sparse_moe.experts.96.w3", "model.layers.14.block_sparse_moe.experts.97.w3", "model.layers.14.block_sparse_moe.experts.98.w3", "model.layers.14.block_sparse_moe.experts.99.w3", "model.layers.14.block_sparse_moe.experts.100.w3", "model.layers.14.block_sparse_moe.experts.101.w3", "model.layers.14.block_sparse_moe.experts.102.w3", "model.layers.14.block_sparse_moe.experts.103.w3", "model.layers.14.block_sparse_moe.experts.104.w3", "model.layers.14.block_sparse_moe.experts.105.w3", "model.layers.14.block_sparse_moe.experts.106.w3", "model.layers.14.block_sparse_moe.experts.107.w3", "model.layers.14.block_sparse_moe.experts.108.w3", "model.layers.14.block_sparse_moe.experts.109.w3", "model.layers.14.block_sparse_moe.experts.110.w3", "model.layers.14.block_sparse_moe.experts.111.w3", "model.layers.14.block_sparse_moe.experts.112.w3", "model.layers.14.block_sparse_moe.experts.113.w3", "model.layers.14.block_sparse_moe.experts.114.w3", "model.layers.14.block_sparse_moe.experts.115.w3", "model.layers.14.block_sparse_moe.experts.116.w3", "model.layers.14.block_sparse_moe.experts.117.w3", "model.layers.14.block_sparse_moe.experts.118.w3", "model.layers.14.block_sparse_moe.experts.119.w3", "model.layers.14.block_sparse_moe.experts.120.w3", "model.layers.14.block_sparse_moe.experts.121.w3", "model.layers.14.block_sparse_moe.experts.122.w3", "model.layers.14.block_sparse_moe.experts.123.w3", "model.layers.14.block_sparse_moe.experts.124.w3", "model.layers.14.block_sparse_moe.experts.125.w3", "model.layers.14.block_sparse_moe.experts.126.w3", "model.layers.14.block_sparse_moe.experts.127.w3", "model.layers.14.block_sparse_moe.experts.128.w3", "model.layers.14.block_sparse_moe.experts.129.w3", "model.layers.14.block_sparse_moe.experts.130.w3", "model.layers.14.block_sparse_moe.experts.131.w3", "model.layers.14.block_sparse_moe.experts.132.w3", "model.layers.14.block_sparse_moe.experts.133.w3", "model.layers.14.block_sparse_moe.experts.134.w3", "model.layers.14.block_sparse_moe.experts.135.w3", "model.layers.14.block_sparse_moe.experts.136.w3", "model.layers.14.block_sparse_moe.experts.137.w3", "model.layers.14.block_sparse_moe.experts.138.w3", "model.layers.14.block_sparse_moe.experts.139.w3", "model.layers.14.block_sparse_moe.experts.140.w3", "model.layers.14.block_sparse_moe.experts.141.w3", "model.layers.14.block_sparse_moe.experts.142.w3", "model.layers.14.block_sparse_moe.experts.143.w3", "model.layers.14.block_sparse_moe.experts.144.w3", "model.layers.14.block_sparse_moe.experts.145.w3", "model.layers.14.block_sparse_moe.experts.146.w3", "model.layers.14.block_sparse_moe.experts.147.w3", "model.layers.14.block_sparse_moe.experts.148.w3", "model.layers.14.block_sparse_moe.experts.149.w3", "model.layers.14.block_sparse_moe.experts.150.w3", "model.layers.14.block_sparse_moe.experts.151.w3", "model.layers.14.block_sparse_moe.experts.152.w3", "model.layers.14.block_sparse_moe.experts.153.w3", "model.layers.14.block_sparse_moe.experts.154.w3", "model.layers.14.block_sparse_moe.experts.155.w3", "model.layers.14.block_sparse_moe.experts.156.w3", "model.layers.14.block_sparse_moe.experts.157.w3", "model.layers.14.block_sparse_moe.experts.158.w3", "model.layers.14.block_sparse_moe.experts.159.w3", "model.layers.14.block_sparse_moe.experts.160.w3", "model.layers.14.block_sparse_moe.experts.161.w3", "model.layers.14.block_sparse_moe.experts.162.w3", "model.layers.14.block_sparse_moe.experts.163.w3", "model.layers.14.block_sparse_moe.experts.164.w3", "model.layers.14.block_sparse_moe.experts.165.w3", "model.layers.14.block_sparse_moe.experts.166.w3", "model.layers.14.block_sparse_moe.experts.167.w3", "model.layers.14.block_sparse_moe.experts.168.w3", "model.layers.14.block_sparse_moe.experts.169.w3", "model.layers.14.block_sparse_moe.experts.170.w3", "model.layers.14.block_sparse_moe.experts.171.w3", "model.layers.14.block_sparse_moe.experts.172.w3", "model.layers.14.block_sparse_moe.experts.173.w3", "model.layers.14.block_sparse_moe.experts.174.w3", "model.layers.14.block_sparse_moe.experts.175.w3", "model.layers.14.block_sparse_moe.experts.176.w3", "model.layers.14.block_sparse_moe.experts.177.w3", "model.layers.14.block_sparse_moe.experts.178.w3", "model.layers.14.block_sparse_moe.experts.179.w3", "model.layers.14.block_sparse_moe.experts.180.w3", "model.layers.14.block_sparse_moe.experts.181.w3", "model.layers.14.block_sparse_moe.experts.182.w3", "model.layers.14.block_sparse_moe.experts.183.w3", "model.layers.14.block_sparse_moe.experts.184.w3", "model.layers.14.block_sparse_moe.experts.185.w3", "model.layers.14.block_sparse_moe.experts.186.w3", "model.layers.14.block_sparse_moe.experts.187.w3", "model.layers.14.block_sparse_moe.experts.188.w3", "model.layers.14.block_sparse_moe.experts.189.w3", "model.layers.14.block_sparse_moe.experts.190.w3", "model.layers.14.block_sparse_moe.experts.191.w3", "model.layers.14.block_sparse_moe.experts.192.w3", "model.layers.14.block_sparse_moe.experts.193.w3", "model.layers.14.block_sparse_moe.experts.194.w3", "model.layers.14.block_sparse_moe.experts.195.w3", "model.layers.14.block_sparse_moe.experts.196.w3", "model.layers.14.block_sparse_moe.experts.197.w3", "model.layers.14.block_sparse_moe.experts.198.w3", "model.layers.14.block_sparse_moe.experts.199.w3", "model.layers.14.block_sparse_moe.experts.200.w3", "model.layers.14.block_sparse_moe.experts.201.w3", "model.layers.14.block_sparse_moe.experts.202.w3", "model.layers.14.block_sparse_moe.experts.203.w3", "model.layers.14.block_sparse_moe.experts.204.w3", "model.layers.14.block_sparse_moe.experts.205.w3", "model.layers.14.block_sparse_moe.experts.206.w3", "model.layers.14.block_sparse_moe.experts.207.w3", "model.layers.14.block_sparse_moe.experts.208.w3", "model.layers.14.block_sparse_moe.experts.209.w3", "model.layers.14.block_sparse_moe.experts.210.w3", "model.layers.14.block_sparse_moe.experts.211.w3", "model.layers.14.block_sparse_moe.experts.212.w3", "model.layers.14.block_sparse_moe.experts.213.w3", "model.layers.14.block_sparse_moe.experts.214.w3", "model.layers.14.block_sparse_moe.experts.215.w3", "model.layers.14.block_sparse_moe.experts.216.w3", "model.layers.14.block_sparse_moe.experts.217.w3", "model.layers.14.block_sparse_moe.experts.218.w3", "model.layers.14.block_sparse_moe.experts.219.w3", "model.layers.14.block_sparse_moe.experts.220.w3", "model.layers.14.block_sparse_moe.experts.221.w3", "model.layers.14.block_sparse_moe.experts.222.w3", "model.layers.14.block_sparse_moe.experts.223.w3", "model.layers.14.block_sparse_moe.experts.224.w3", "model.layers.14.block_sparse_moe.experts.225.w3", "model.layers.14.block_sparse_moe.experts.226.w3", "model.layers.14.block_sparse_moe.experts.227.w3", "model.layers.14.block_sparse_moe.experts.228.w3", "model.layers.14.block_sparse_moe.experts.229.w3", "model.layers.14.block_sparse_moe.experts.230.w3", "model.layers.14.block_sparse_moe.experts.231.w3", "model.layers.14.block_sparse_moe.experts.232.w3", "model.layers.14.block_sparse_moe.experts.233.w3", "model.layers.14.block_sparse_moe.experts.234.w3", "model.layers.14.block_sparse_moe.experts.235.w3", "model.layers.14.block_sparse_moe.experts.236.w3", "model.layers.14.block_sparse_moe.experts.237.w3", "model.layers.14.block_sparse_moe.experts.238.w3", "model.layers.14.block_sparse_moe.experts.239.w3", "model.layers.14.block_sparse_moe.experts.240.w3", "model.layers.14.block_sparse_moe.experts.241.w3", "model.layers.14.block_sparse_moe.experts.242.w3", "model.layers.14.block_sparse_moe.experts.243.w3", "model.layers.14.block_sparse_moe.experts.244.w3", "model.layers.14.block_sparse_moe.experts.245.w3", "model.layers.14.block_sparse_moe.experts.246.w3", "model.layers.14.block_sparse_moe.experts.247.w3", "model.layers.14.block_sparse_moe.experts.248.w3", "model.layers.14.block_sparse_moe.experts.249.w3", "model.layers.14.block_sparse_moe.experts.250.w3", "model.layers.14.block_sparse_moe.experts.251.w3", "model.layers.14.block_sparse_moe.experts.252.w3", "model.layers.14.block_sparse_moe.experts.253.w3", "model.layers.14.block_sparse_moe.experts.254.w3", "model.layers.14.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0025367677211760586, "dbits": 2415919104 } ] }, { "idx": 74, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w2", "model.layers.14.block_sparse_moe.experts.1.w2", "model.layers.14.block_sparse_moe.experts.2.w2", "model.layers.14.block_sparse_moe.experts.3.w2", "model.layers.14.block_sparse_moe.experts.4.w2", "model.layers.14.block_sparse_moe.experts.5.w2", "model.layers.14.block_sparse_moe.experts.6.w2", "model.layers.14.block_sparse_moe.experts.7.w2", "model.layers.14.block_sparse_moe.experts.8.w2", "model.layers.14.block_sparse_moe.experts.9.w2", "model.layers.14.block_sparse_moe.experts.10.w2", "model.layers.14.block_sparse_moe.experts.11.w2", "model.layers.14.block_sparse_moe.experts.12.w2", "model.layers.14.block_sparse_moe.experts.13.w2", "model.layers.14.block_sparse_moe.experts.14.w2", "model.layers.14.block_sparse_moe.experts.15.w2", "model.layers.14.block_sparse_moe.experts.16.w2", "model.layers.14.block_sparse_moe.experts.17.w2", "model.layers.14.block_sparse_moe.experts.18.w2", "model.layers.14.block_sparse_moe.experts.19.w2", "model.layers.14.block_sparse_moe.experts.20.w2", "model.layers.14.block_sparse_moe.experts.21.w2", "model.layers.14.block_sparse_moe.experts.22.w2", "model.layers.14.block_sparse_moe.experts.23.w2", "model.layers.14.block_sparse_moe.experts.24.w2", "model.layers.14.block_sparse_moe.experts.25.w2", "model.layers.14.block_sparse_moe.experts.26.w2", "model.layers.14.block_sparse_moe.experts.27.w2", "model.layers.14.block_sparse_moe.experts.28.w2", "model.layers.14.block_sparse_moe.experts.29.w2", "model.layers.14.block_sparse_moe.experts.30.w2", "model.layers.14.block_sparse_moe.experts.31.w2", "model.layers.14.block_sparse_moe.experts.32.w2", "model.layers.14.block_sparse_moe.experts.33.w2", "model.layers.14.block_sparse_moe.experts.34.w2", "model.layers.14.block_sparse_moe.experts.35.w2", "model.layers.14.block_sparse_moe.experts.36.w2", "model.layers.14.block_sparse_moe.experts.37.w2", "model.layers.14.block_sparse_moe.experts.38.w2", "model.layers.14.block_sparse_moe.experts.39.w2", "model.layers.14.block_sparse_moe.experts.40.w2", "model.layers.14.block_sparse_moe.experts.41.w2", "model.layers.14.block_sparse_moe.experts.42.w2", "model.layers.14.block_sparse_moe.experts.43.w2", "model.layers.14.block_sparse_moe.experts.44.w2", "model.layers.14.block_sparse_moe.experts.45.w2", "model.layers.14.block_sparse_moe.experts.46.w2", "model.layers.14.block_sparse_moe.experts.47.w2", "model.layers.14.block_sparse_moe.experts.48.w2", "model.layers.14.block_sparse_moe.experts.49.w2", "model.layers.14.block_sparse_moe.experts.50.w2", "model.layers.14.block_sparse_moe.experts.51.w2", "model.layers.14.block_sparse_moe.experts.52.w2", "model.layers.14.block_sparse_moe.experts.53.w2", "model.layers.14.block_sparse_moe.experts.54.w2", "model.layers.14.block_sparse_moe.experts.55.w2", "model.layers.14.block_sparse_moe.experts.56.w2", "model.layers.14.block_sparse_moe.experts.57.w2", "model.layers.14.block_sparse_moe.experts.58.w2", "model.layers.14.block_sparse_moe.experts.59.w2", "model.layers.14.block_sparse_moe.experts.60.w2", "model.layers.14.block_sparse_moe.experts.61.w2", "model.layers.14.block_sparse_moe.experts.62.w2", "model.layers.14.block_sparse_moe.experts.63.w2", "model.layers.14.block_sparse_moe.experts.64.w2", "model.layers.14.block_sparse_moe.experts.65.w2", "model.layers.14.block_sparse_moe.experts.66.w2", "model.layers.14.block_sparse_moe.experts.67.w2", "model.layers.14.block_sparse_moe.experts.68.w2", "model.layers.14.block_sparse_moe.experts.69.w2", "model.layers.14.block_sparse_moe.experts.70.w2", "model.layers.14.block_sparse_moe.experts.71.w2", "model.layers.14.block_sparse_moe.experts.72.w2", "model.layers.14.block_sparse_moe.experts.73.w2", "model.layers.14.block_sparse_moe.experts.74.w2", "model.layers.14.block_sparse_moe.experts.75.w2", "model.layers.14.block_sparse_moe.experts.76.w2", "model.layers.14.block_sparse_moe.experts.77.w2", "model.layers.14.block_sparse_moe.experts.78.w2", "model.layers.14.block_sparse_moe.experts.79.w2", "model.layers.14.block_sparse_moe.experts.80.w2", "model.layers.14.block_sparse_moe.experts.81.w2", "model.layers.14.block_sparse_moe.experts.82.w2", "model.layers.14.block_sparse_moe.experts.83.w2", "model.layers.14.block_sparse_moe.experts.84.w2", "model.layers.14.block_sparse_moe.experts.85.w2", "model.layers.14.block_sparse_moe.experts.86.w2", "model.layers.14.block_sparse_moe.experts.87.w2", "model.layers.14.block_sparse_moe.experts.88.w2", "model.layers.14.block_sparse_moe.experts.89.w2", "model.layers.14.block_sparse_moe.experts.90.w2", "model.layers.14.block_sparse_moe.experts.91.w2", "model.layers.14.block_sparse_moe.experts.92.w2", "model.layers.14.block_sparse_moe.experts.93.w2", "model.layers.14.block_sparse_moe.experts.94.w2", "model.layers.14.block_sparse_moe.experts.95.w2", "model.layers.14.block_sparse_moe.experts.96.w2", "model.layers.14.block_sparse_moe.experts.97.w2", "model.layers.14.block_sparse_moe.experts.98.w2", "model.layers.14.block_sparse_moe.experts.99.w2", "model.layers.14.block_sparse_moe.experts.100.w2", "model.layers.14.block_sparse_moe.experts.101.w2", "model.layers.14.block_sparse_moe.experts.102.w2", "model.layers.14.block_sparse_moe.experts.103.w2", "model.layers.14.block_sparse_moe.experts.104.w2", "model.layers.14.block_sparse_moe.experts.105.w2", "model.layers.14.block_sparse_moe.experts.106.w2", "model.layers.14.block_sparse_moe.experts.107.w2", "model.layers.14.block_sparse_moe.experts.108.w2", "model.layers.14.block_sparse_moe.experts.109.w2", "model.layers.14.block_sparse_moe.experts.110.w2", "model.layers.14.block_sparse_moe.experts.111.w2", "model.layers.14.block_sparse_moe.experts.112.w2", "model.layers.14.block_sparse_moe.experts.113.w2", "model.layers.14.block_sparse_moe.experts.114.w2", "model.layers.14.block_sparse_moe.experts.115.w2", "model.layers.14.block_sparse_moe.experts.116.w2", "model.layers.14.block_sparse_moe.experts.117.w2", "model.layers.14.block_sparse_moe.experts.118.w2", "model.layers.14.block_sparse_moe.experts.119.w2", "model.layers.14.block_sparse_moe.experts.120.w2", "model.layers.14.block_sparse_moe.experts.121.w2", "model.layers.14.block_sparse_moe.experts.122.w2", "model.layers.14.block_sparse_moe.experts.123.w2", "model.layers.14.block_sparse_moe.experts.124.w2", "model.layers.14.block_sparse_moe.experts.125.w2", "model.layers.14.block_sparse_moe.experts.126.w2", "model.layers.14.block_sparse_moe.experts.127.w2", "model.layers.14.block_sparse_moe.experts.128.w2", "model.layers.14.block_sparse_moe.experts.129.w2", "model.layers.14.block_sparse_moe.experts.130.w2", "model.layers.14.block_sparse_moe.experts.131.w2", "model.layers.14.block_sparse_moe.experts.132.w2", "model.layers.14.block_sparse_moe.experts.133.w2", "model.layers.14.block_sparse_moe.experts.134.w2", "model.layers.14.block_sparse_moe.experts.135.w2", "model.layers.14.block_sparse_moe.experts.136.w2", "model.layers.14.block_sparse_moe.experts.137.w2", "model.layers.14.block_sparse_moe.experts.138.w2", "model.layers.14.block_sparse_moe.experts.139.w2", "model.layers.14.block_sparse_moe.experts.140.w2", "model.layers.14.block_sparse_moe.experts.141.w2", "model.layers.14.block_sparse_moe.experts.142.w2", "model.layers.14.block_sparse_moe.experts.143.w2", "model.layers.14.block_sparse_moe.experts.144.w2", "model.layers.14.block_sparse_moe.experts.145.w2", "model.layers.14.block_sparse_moe.experts.146.w2", "model.layers.14.block_sparse_moe.experts.147.w2", "model.layers.14.block_sparse_moe.experts.148.w2", "model.layers.14.block_sparse_moe.experts.149.w2", "model.layers.14.block_sparse_moe.experts.150.w2", "model.layers.14.block_sparse_moe.experts.151.w2", "model.layers.14.block_sparse_moe.experts.152.w2", "model.layers.14.block_sparse_moe.experts.153.w2", "model.layers.14.block_sparse_moe.experts.154.w2", "model.layers.14.block_sparse_moe.experts.155.w2", "model.layers.14.block_sparse_moe.experts.156.w2", "model.layers.14.block_sparse_moe.experts.157.w2", "model.layers.14.block_sparse_moe.experts.158.w2", "model.layers.14.block_sparse_moe.experts.159.w2", "model.layers.14.block_sparse_moe.experts.160.w2", "model.layers.14.block_sparse_moe.experts.161.w2", "model.layers.14.block_sparse_moe.experts.162.w2", "model.layers.14.block_sparse_moe.experts.163.w2", "model.layers.14.block_sparse_moe.experts.164.w2", "model.layers.14.block_sparse_moe.experts.165.w2", "model.layers.14.block_sparse_moe.experts.166.w2", "model.layers.14.block_sparse_moe.experts.167.w2", "model.layers.14.block_sparse_moe.experts.168.w2", "model.layers.14.block_sparse_moe.experts.169.w2", "model.layers.14.block_sparse_moe.experts.170.w2", "model.layers.14.block_sparse_moe.experts.171.w2", "model.layers.14.block_sparse_moe.experts.172.w2", "model.layers.14.block_sparse_moe.experts.173.w2", "model.layers.14.block_sparse_moe.experts.174.w2", "model.layers.14.block_sparse_moe.experts.175.w2", "model.layers.14.block_sparse_moe.experts.176.w2", "model.layers.14.block_sparse_moe.experts.177.w2", "model.layers.14.block_sparse_moe.experts.178.w2", "model.layers.14.block_sparse_moe.experts.179.w2", "model.layers.14.block_sparse_moe.experts.180.w2", "model.layers.14.block_sparse_moe.experts.181.w2", "model.layers.14.block_sparse_moe.experts.182.w2", "model.layers.14.block_sparse_moe.experts.183.w2", "model.layers.14.block_sparse_moe.experts.184.w2", "model.layers.14.block_sparse_moe.experts.185.w2", "model.layers.14.block_sparse_moe.experts.186.w2", "model.layers.14.block_sparse_moe.experts.187.w2", "model.layers.14.block_sparse_moe.experts.188.w2", "model.layers.14.block_sparse_moe.experts.189.w2", "model.layers.14.block_sparse_moe.experts.190.w2", "model.layers.14.block_sparse_moe.experts.191.w2", "model.layers.14.block_sparse_moe.experts.192.w2", "model.layers.14.block_sparse_moe.experts.193.w2", "model.layers.14.block_sparse_moe.experts.194.w2", "model.layers.14.block_sparse_moe.experts.195.w2", "model.layers.14.block_sparse_moe.experts.196.w2", "model.layers.14.block_sparse_moe.experts.197.w2", "model.layers.14.block_sparse_moe.experts.198.w2", "model.layers.14.block_sparse_moe.experts.199.w2", "model.layers.14.block_sparse_moe.experts.200.w2", "model.layers.14.block_sparse_moe.experts.201.w2", "model.layers.14.block_sparse_moe.experts.202.w2", "model.layers.14.block_sparse_moe.experts.203.w2", "model.layers.14.block_sparse_moe.experts.204.w2", "model.layers.14.block_sparse_moe.experts.205.w2", "model.layers.14.block_sparse_moe.experts.206.w2", "model.layers.14.block_sparse_moe.experts.207.w2", "model.layers.14.block_sparse_moe.experts.208.w2", "model.layers.14.block_sparse_moe.experts.209.w2", "model.layers.14.block_sparse_moe.experts.210.w2", "model.layers.14.block_sparse_moe.experts.211.w2", "model.layers.14.block_sparse_moe.experts.212.w2", "model.layers.14.block_sparse_moe.experts.213.w2", "model.layers.14.block_sparse_moe.experts.214.w2", "model.layers.14.block_sparse_moe.experts.215.w2", "model.layers.14.block_sparse_moe.experts.216.w2", "model.layers.14.block_sparse_moe.experts.217.w2", "model.layers.14.block_sparse_moe.experts.218.w2", "model.layers.14.block_sparse_moe.experts.219.w2", "model.layers.14.block_sparse_moe.experts.220.w2", "model.layers.14.block_sparse_moe.experts.221.w2", "model.layers.14.block_sparse_moe.experts.222.w2", "model.layers.14.block_sparse_moe.experts.223.w2", "model.layers.14.block_sparse_moe.experts.224.w2", "model.layers.14.block_sparse_moe.experts.225.w2", "model.layers.14.block_sparse_moe.experts.226.w2", "model.layers.14.block_sparse_moe.experts.227.w2", "model.layers.14.block_sparse_moe.experts.228.w2", "model.layers.14.block_sparse_moe.experts.229.w2", "model.layers.14.block_sparse_moe.experts.230.w2", "model.layers.14.block_sparse_moe.experts.231.w2", "model.layers.14.block_sparse_moe.experts.232.w2", "model.layers.14.block_sparse_moe.experts.233.w2", "model.layers.14.block_sparse_moe.experts.234.w2", "model.layers.14.block_sparse_moe.experts.235.w2", "model.layers.14.block_sparse_moe.experts.236.w2", "model.layers.14.block_sparse_moe.experts.237.w2", "model.layers.14.block_sparse_moe.experts.238.w2", "model.layers.14.block_sparse_moe.experts.239.w2", "model.layers.14.block_sparse_moe.experts.240.w2", "model.layers.14.block_sparse_moe.experts.241.w2", "model.layers.14.block_sparse_moe.experts.242.w2", "model.layers.14.block_sparse_moe.experts.243.w2", "model.layers.14.block_sparse_moe.experts.244.w2", "model.layers.14.block_sparse_moe.experts.245.w2", "model.layers.14.block_sparse_moe.experts.246.w2", "model.layers.14.block_sparse_moe.experts.247.w2", "model.layers.14.block_sparse_moe.experts.248.w2", "model.layers.14.block_sparse_moe.experts.249.w2", "model.layers.14.block_sparse_moe.experts.250.w2", "model.layers.14.block_sparse_moe.experts.251.w2", "model.layers.14.block_sparse_moe.experts.252.w2", "model.layers.14.block_sparse_moe.experts.253.w2", "model.layers.14.block_sparse_moe.experts.254.w2", "model.layers.14.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0004603743553160955, "dbits": 1207959552 } ] }, { "idx": 75, "layers": [ "model.layers.15.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0028500080108643244, "dbits": 18874368 } ] }, { "idx": 76, "layers": [ "model.layers.15.self_attn.k_proj", "model.layers.15.self_attn.v_proj" ], "candidates": [ { "dkld": 0.005987074971199102, "dbits": 6291456 } ] }, { "idx": 77, "layers": [ "model.layers.15.self_attn.o_proj" ], "candidates": [ { "dkld": -0.015546464920043856, "dbits": 18874368 } ] }, { "idx": 78, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w1", "model.layers.15.block_sparse_moe.experts.1.w1", "model.layers.15.block_sparse_moe.experts.2.w1", "model.layers.15.block_sparse_moe.experts.3.w1", "model.layers.15.block_sparse_moe.experts.4.w1", "model.layers.15.block_sparse_moe.experts.5.w1", "model.layers.15.block_sparse_moe.experts.6.w1", "model.layers.15.block_sparse_moe.experts.7.w1", "model.layers.15.block_sparse_moe.experts.8.w1", "model.layers.15.block_sparse_moe.experts.9.w1", "model.layers.15.block_sparse_moe.experts.10.w1", "model.layers.15.block_sparse_moe.experts.11.w1", "model.layers.15.block_sparse_moe.experts.12.w1", "model.layers.15.block_sparse_moe.experts.13.w1", "model.layers.15.block_sparse_moe.experts.14.w1", "model.layers.15.block_sparse_moe.experts.15.w1", "model.layers.15.block_sparse_moe.experts.16.w1", "model.layers.15.block_sparse_moe.experts.17.w1", "model.layers.15.block_sparse_moe.experts.18.w1", "model.layers.15.block_sparse_moe.experts.19.w1", "model.layers.15.block_sparse_moe.experts.20.w1", "model.layers.15.block_sparse_moe.experts.21.w1", "model.layers.15.block_sparse_moe.experts.22.w1", "model.layers.15.block_sparse_moe.experts.23.w1", "model.layers.15.block_sparse_moe.experts.24.w1", "model.layers.15.block_sparse_moe.experts.25.w1", "model.layers.15.block_sparse_moe.experts.26.w1", "model.layers.15.block_sparse_moe.experts.27.w1", "model.layers.15.block_sparse_moe.experts.28.w1", "model.layers.15.block_sparse_moe.experts.29.w1", "model.layers.15.block_sparse_moe.experts.30.w1", "model.layers.15.block_sparse_moe.experts.31.w1", "model.layers.15.block_sparse_moe.experts.32.w1", "model.layers.15.block_sparse_moe.experts.33.w1", "model.layers.15.block_sparse_moe.experts.34.w1", "model.layers.15.block_sparse_moe.experts.35.w1", "model.layers.15.block_sparse_moe.experts.36.w1", "model.layers.15.block_sparse_moe.experts.37.w1", "model.layers.15.block_sparse_moe.experts.38.w1", "model.layers.15.block_sparse_moe.experts.39.w1", "model.layers.15.block_sparse_moe.experts.40.w1", "model.layers.15.block_sparse_moe.experts.41.w1", "model.layers.15.block_sparse_moe.experts.42.w1", "model.layers.15.block_sparse_moe.experts.43.w1", "model.layers.15.block_sparse_moe.experts.44.w1", "model.layers.15.block_sparse_moe.experts.45.w1", "model.layers.15.block_sparse_moe.experts.46.w1", "model.layers.15.block_sparse_moe.experts.47.w1", "model.layers.15.block_sparse_moe.experts.48.w1", "model.layers.15.block_sparse_moe.experts.49.w1", "model.layers.15.block_sparse_moe.experts.50.w1", "model.layers.15.block_sparse_moe.experts.51.w1", "model.layers.15.block_sparse_moe.experts.52.w1", "model.layers.15.block_sparse_moe.experts.53.w1", "model.layers.15.block_sparse_moe.experts.54.w1", "model.layers.15.block_sparse_moe.experts.55.w1", "model.layers.15.block_sparse_moe.experts.56.w1", "model.layers.15.block_sparse_moe.experts.57.w1", "model.layers.15.block_sparse_moe.experts.58.w1", "model.layers.15.block_sparse_moe.experts.59.w1", "model.layers.15.block_sparse_moe.experts.60.w1", "model.layers.15.block_sparse_moe.experts.61.w1", "model.layers.15.block_sparse_moe.experts.62.w1", "model.layers.15.block_sparse_moe.experts.63.w1", "model.layers.15.block_sparse_moe.experts.64.w1", "model.layers.15.block_sparse_moe.experts.65.w1", "model.layers.15.block_sparse_moe.experts.66.w1", "model.layers.15.block_sparse_moe.experts.67.w1", "model.layers.15.block_sparse_moe.experts.68.w1", "model.layers.15.block_sparse_moe.experts.69.w1", "model.layers.15.block_sparse_moe.experts.70.w1", "model.layers.15.block_sparse_moe.experts.71.w1", "model.layers.15.block_sparse_moe.experts.72.w1", "model.layers.15.block_sparse_moe.experts.73.w1", "model.layers.15.block_sparse_moe.experts.74.w1", "model.layers.15.block_sparse_moe.experts.75.w1", "model.layers.15.block_sparse_moe.experts.76.w1", "model.layers.15.block_sparse_moe.experts.77.w1", "model.layers.15.block_sparse_moe.experts.78.w1", "model.layers.15.block_sparse_moe.experts.79.w1", "model.layers.15.block_sparse_moe.experts.80.w1", "model.layers.15.block_sparse_moe.experts.81.w1", "model.layers.15.block_sparse_moe.experts.82.w1", "model.layers.15.block_sparse_moe.experts.83.w1", "model.layers.15.block_sparse_moe.experts.84.w1", "model.layers.15.block_sparse_moe.experts.85.w1", "model.layers.15.block_sparse_moe.experts.86.w1", "model.layers.15.block_sparse_moe.experts.87.w1", "model.layers.15.block_sparse_moe.experts.88.w1", "model.layers.15.block_sparse_moe.experts.89.w1", "model.layers.15.block_sparse_moe.experts.90.w1", "model.layers.15.block_sparse_moe.experts.91.w1", "model.layers.15.block_sparse_moe.experts.92.w1", "model.layers.15.block_sparse_moe.experts.93.w1", "model.layers.15.block_sparse_moe.experts.94.w1", "model.layers.15.block_sparse_moe.experts.95.w1", "model.layers.15.block_sparse_moe.experts.96.w1", "model.layers.15.block_sparse_moe.experts.97.w1", "model.layers.15.block_sparse_moe.experts.98.w1", "model.layers.15.block_sparse_moe.experts.99.w1", "model.layers.15.block_sparse_moe.experts.100.w1", "model.layers.15.block_sparse_moe.experts.101.w1", "model.layers.15.block_sparse_moe.experts.102.w1", "model.layers.15.block_sparse_moe.experts.103.w1", "model.layers.15.block_sparse_moe.experts.104.w1", "model.layers.15.block_sparse_moe.experts.105.w1", "model.layers.15.block_sparse_moe.experts.106.w1", "model.layers.15.block_sparse_moe.experts.107.w1", "model.layers.15.block_sparse_moe.experts.108.w1", "model.layers.15.block_sparse_moe.experts.109.w1", "model.layers.15.block_sparse_moe.experts.110.w1", "model.layers.15.block_sparse_moe.experts.111.w1", "model.layers.15.block_sparse_moe.experts.112.w1", "model.layers.15.block_sparse_moe.experts.113.w1", "model.layers.15.block_sparse_moe.experts.114.w1", "model.layers.15.block_sparse_moe.experts.115.w1", "model.layers.15.block_sparse_moe.experts.116.w1", "model.layers.15.block_sparse_moe.experts.117.w1", "model.layers.15.block_sparse_moe.experts.118.w1", "model.layers.15.block_sparse_moe.experts.119.w1", "model.layers.15.block_sparse_moe.experts.120.w1", "model.layers.15.block_sparse_moe.experts.121.w1", "model.layers.15.block_sparse_moe.experts.122.w1", "model.layers.15.block_sparse_moe.experts.123.w1", "model.layers.15.block_sparse_moe.experts.124.w1", "model.layers.15.block_sparse_moe.experts.125.w1", "model.layers.15.block_sparse_moe.experts.126.w1", "model.layers.15.block_sparse_moe.experts.127.w1", "model.layers.15.block_sparse_moe.experts.128.w1", "model.layers.15.block_sparse_moe.experts.129.w1", "model.layers.15.block_sparse_moe.experts.130.w1", "model.layers.15.block_sparse_moe.experts.131.w1", "model.layers.15.block_sparse_moe.experts.132.w1", "model.layers.15.block_sparse_moe.experts.133.w1", "model.layers.15.block_sparse_moe.experts.134.w1", "model.layers.15.block_sparse_moe.experts.135.w1", "model.layers.15.block_sparse_moe.experts.136.w1", "model.layers.15.block_sparse_moe.experts.137.w1", "model.layers.15.block_sparse_moe.experts.138.w1", "model.layers.15.block_sparse_moe.experts.139.w1", "model.layers.15.block_sparse_moe.experts.140.w1", "model.layers.15.block_sparse_moe.experts.141.w1", "model.layers.15.block_sparse_moe.experts.142.w1", "model.layers.15.block_sparse_moe.experts.143.w1", "model.layers.15.block_sparse_moe.experts.144.w1", "model.layers.15.block_sparse_moe.experts.145.w1", "model.layers.15.block_sparse_moe.experts.146.w1", "model.layers.15.block_sparse_moe.experts.147.w1", "model.layers.15.block_sparse_moe.experts.148.w1", "model.layers.15.block_sparse_moe.experts.149.w1", "model.layers.15.block_sparse_moe.experts.150.w1", "model.layers.15.block_sparse_moe.experts.151.w1", "model.layers.15.block_sparse_moe.experts.152.w1", "model.layers.15.block_sparse_moe.experts.153.w1", "model.layers.15.block_sparse_moe.experts.154.w1", "model.layers.15.block_sparse_moe.experts.155.w1", "model.layers.15.block_sparse_moe.experts.156.w1", "model.layers.15.block_sparse_moe.experts.157.w1", "model.layers.15.block_sparse_moe.experts.158.w1", "model.layers.15.block_sparse_moe.experts.159.w1", "model.layers.15.block_sparse_moe.experts.160.w1", "model.layers.15.block_sparse_moe.experts.161.w1", "model.layers.15.block_sparse_moe.experts.162.w1", "model.layers.15.block_sparse_moe.experts.163.w1", "model.layers.15.block_sparse_moe.experts.164.w1", "model.layers.15.block_sparse_moe.experts.165.w1", "model.layers.15.block_sparse_moe.experts.166.w1", "model.layers.15.block_sparse_moe.experts.167.w1", "model.layers.15.block_sparse_moe.experts.168.w1", "model.layers.15.block_sparse_moe.experts.169.w1", "model.layers.15.block_sparse_moe.experts.170.w1", "model.layers.15.block_sparse_moe.experts.171.w1", "model.layers.15.block_sparse_moe.experts.172.w1", "model.layers.15.block_sparse_moe.experts.173.w1", "model.layers.15.block_sparse_moe.experts.174.w1", "model.layers.15.block_sparse_moe.experts.175.w1", "model.layers.15.block_sparse_moe.experts.176.w1", "model.layers.15.block_sparse_moe.experts.177.w1", "model.layers.15.block_sparse_moe.experts.178.w1", "model.layers.15.block_sparse_moe.experts.179.w1", "model.layers.15.block_sparse_moe.experts.180.w1", "model.layers.15.block_sparse_moe.experts.181.w1", "model.layers.15.block_sparse_moe.experts.182.w1", "model.layers.15.block_sparse_moe.experts.183.w1", "model.layers.15.block_sparse_moe.experts.184.w1", "model.layers.15.block_sparse_moe.experts.185.w1", "model.layers.15.block_sparse_moe.experts.186.w1", "model.layers.15.block_sparse_moe.experts.187.w1", "model.layers.15.block_sparse_moe.experts.188.w1", "model.layers.15.block_sparse_moe.experts.189.w1", "model.layers.15.block_sparse_moe.experts.190.w1", "model.layers.15.block_sparse_moe.experts.191.w1", "model.layers.15.block_sparse_moe.experts.192.w1", "model.layers.15.block_sparse_moe.experts.193.w1", "model.layers.15.block_sparse_moe.experts.194.w1", "model.layers.15.block_sparse_moe.experts.195.w1", "model.layers.15.block_sparse_moe.experts.196.w1", "model.layers.15.block_sparse_moe.experts.197.w1", "model.layers.15.block_sparse_moe.experts.198.w1", "model.layers.15.block_sparse_moe.experts.199.w1", "model.layers.15.block_sparse_moe.experts.200.w1", "model.layers.15.block_sparse_moe.experts.201.w1", "model.layers.15.block_sparse_moe.experts.202.w1", "model.layers.15.block_sparse_moe.experts.203.w1", "model.layers.15.block_sparse_moe.experts.204.w1", "model.layers.15.block_sparse_moe.experts.205.w1", "model.layers.15.block_sparse_moe.experts.206.w1", "model.layers.15.block_sparse_moe.experts.207.w1", "model.layers.15.block_sparse_moe.experts.208.w1", "model.layers.15.block_sparse_moe.experts.209.w1", "model.layers.15.block_sparse_moe.experts.210.w1", "model.layers.15.block_sparse_moe.experts.211.w1", "model.layers.15.block_sparse_moe.experts.212.w1", "model.layers.15.block_sparse_moe.experts.213.w1", "model.layers.15.block_sparse_moe.experts.214.w1", "model.layers.15.block_sparse_moe.experts.215.w1", "model.layers.15.block_sparse_moe.experts.216.w1", "model.layers.15.block_sparse_moe.experts.217.w1", "model.layers.15.block_sparse_moe.experts.218.w1", "model.layers.15.block_sparse_moe.experts.219.w1", "model.layers.15.block_sparse_moe.experts.220.w1", "model.layers.15.block_sparse_moe.experts.221.w1", "model.layers.15.block_sparse_moe.experts.222.w1", "model.layers.15.block_sparse_moe.experts.223.w1", "model.layers.15.block_sparse_moe.experts.224.w1", "model.layers.15.block_sparse_moe.experts.225.w1", "model.layers.15.block_sparse_moe.experts.226.w1", "model.layers.15.block_sparse_moe.experts.227.w1", "model.layers.15.block_sparse_moe.experts.228.w1", "model.layers.15.block_sparse_moe.experts.229.w1", "model.layers.15.block_sparse_moe.experts.230.w1", "model.layers.15.block_sparse_moe.experts.231.w1", "model.layers.15.block_sparse_moe.experts.232.w1", "model.layers.15.block_sparse_moe.experts.233.w1", "model.layers.15.block_sparse_moe.experts.234.w1", "model.layers.15.block_sparse_moe.experts.235.w1", "model.layers.15.block_sparse_moe.experts.236.w1", "model.layers.15.block_sparse_moe.experts.237.w1", "model.layers.15.block_sparse_moe.experts.238.w1", "model.layers.15.block_sparse_moe.experts.239.w1", "model.layers.15.block_sparse_moe.experts.240.w1", "model.layers.15.block_sparse_moe.experts.241.w1", "model.layers.15.block_sparse_moe.experts.242.w1", "model.layers.15.block_sparse_moe.experts.243.w1", "model.layers.15.block_sparse_moe.experts.244.w1", "model.layers.15.block_sparse_moe.experts.245.w1", "model.layers.15.block_sparse_moe.experts.246.w1", "model.layers.15.block_sparse_moe.experts.247.w1", "model.layers.15.block_sparse_moe.experts.248.w1", "model.layers.15.block_sparse_moe.experts.249.w1", "model.layers.15.block_sparse_moe.experts.250.w1", "model.layers.15.block_sparse_moe.experts.251.w1", "model.layers.15.block_sparse_moe.experts.252.w1", "model.layers.15.block_sparse_moe.experts.253.w1", "model.layers.15.block_sparse_moe.experts.254.w1", "model.layers.15.block_sparse_moe.experts.255.w1", "model.layers.15.block_sparse_moe.experts.0.w3", "model.layers.15.block_sparse_moe.experts.1.w3", "model.layers.15.block_sparse_moe.experts.2.w3", "model.layers.15.block_sparse_moe.experts.3.w3", "model.layers.15.block_sparse_moe.experts.4.w3", "model.layers.15.block_sparse_moe.experts.5.w3", "model.layers.15.block_sparse_moe.experts.6.w3", "model.layers.15.block_sparse_moe.experts.7.w3", "model.layers.15.block_sparse_moe.experts.8.w3", "model.layers.15.block_sparse_moe.experts.9.w3", "model.layers.15.block_sparse_moe.experts.10.w3", "model.layers.15.block_sparse_moe.experts.11.w3", "model.layers.15.block_sparse_moe.experts.12.w3", "model.layers.15.block_sparse_moe.experts.13.w3", "model.layers.15.block_sparse_moe.experts.14.w3", "model.layers.15.block_sparse_moe.experts.15.w3", "model.layers.15.block_sparse_moe.experts.16.w3", "model.layers.15.block_sparse_moe.experts.17.w3", "model.layers.15.block_sparse_moe.experts.18.w3", "model.layers.15.block_sparse_moe.experts.19.w3", "model.layers.15.block_sparse_moe.experts.20.w3", "model.layers.15.block_sparse_moe.experts.21.w3", "model.layers.15.block_sparse_moe.experts.22.w3", "model.layers.15.block_sparse_moe.experts.23.w3", "model.layers.15.block_sparse_moe.experts.24.w3", "model.layers.15.block_sparse_moe.experts.25.w3", "model.layers.15.block_sparse_moe.experts.26.w3", "model.layers.15.block_sparse_moe.experts.27.w3", "model.layers.15.block_sparse_moe.experts.28.w3", "model.layers.15.block_sparse_moe.experts.29.w3", "model.layers.15.block_sparse_moe.experts.30.w3", "model.layers.15.block_sparse_moe.experts.31.w3", "model.layers.15.block_sparse_moe.experts.32.w3", "model.layers.15.block_sparse_moe.experts.33.w3", "model.layers.15.block_sparse_moe.experts.34.w3", "model.layers.15.block_sparse_moe.experts.35.w3", "model.layers.15.block_sparse_moe.experts.36.w3", "model.layers.15.block_sparse_moe.experts.37.w3", "model.layers.15.block_sparse_moe.experts.38.w3", "model.layers.15.block_sparse_moe.experts.39.w3", "model.layers.15.block_sparse_moe.experts.40.w3", "model.layers.15.block_sparse_moe.experts.41.w3", "model.layers.15.block_sparse_moe.experts.42.w3", "model.layers.15.block_sparse_moe.experts.43.w3", "model.layers.15.block_sparse_moe.experts.44.w3", "model.layers.15.block_sparse_moe.experts.45.w3", "model.layers.15.block_sparse_moe.experts.46.w3", "model.layers.15.block_sparse_moe.experts.47.w3", "model.layers.15.block_sparse_moe.experts.48.w3", "model.layers.15.block_sparse_moe.experts.49.w3", "model.layers.15.block_sparse_moe.experts.50.w3", "model.layers.15.block_sparse_moe.experts.51.w3", "model.layers.15.block_sparse_moe.experts.52.w3", "model.layers.15.block_sparse_moe.experts.53.w3", "model.layers.15.block_sparse_moe.experts.54.w3", "model.layers.15.block_sparse_moe.experts.55.w3", "model.layers.15.block_sparse_moe.experts.56.w3", "model.layers.15.block_sparse_moe.experts.57.w3", "model.layers.15.block_sparse_moe.experts.58.w3", "model.layers.15.block_sparse_moe.experts.59.w3", "model.layers.15.block_sparse_moe.experts.60.w3", "model.layers.15.block_sparse_moe.experts.61.w3", "model.layers.15.block_sparse_moe.experts.62.w3", "model.layers.15.block_sparse_moe.experts.63.w3", "model.layers.15.block_sparse_moe.experts.64.w3", "model.layers.15.block_sparse_moe.experts.65.w3", "model.layers.15.block_sparse_moe.experts.66.w3", "model.layers.15.block_sparse_moe.experts.67.w3", "model.layers.15.block_sparse_moe.experts.68.w3", "model.layers.15.block_sparse_moe.experts.69.w3", "model.layers.15.block_sparse_moe.experts.70.w3", "model.layers.15.block_sparse_moe.experts.71.w3", "model.layers.15.block_sparse_moe.experts.72.w3", "model.layers.15.block_sparse_moe.experts.73.w3", "model.layers.15.block_sparse_moe.experts.74.w3", "model.layers.15.block_sparse_moe.experts.75.w3", "model.layers.15.block_sparse_moe.experts.76.w3", "model.layers.15.block_sparse_moe.experts.77.w3", "model.layers.15.block_sparse_moe.experts.78.w3", "model.layers.15.block_sparse_moe.experts.79.w3", "model.layers.15.block_sparse_moe.experts.80.w3", "model.layers.15.block_sparse_moe.experts.81.w3", "model.layers.15.block_sparse_moe.experts.82.w3", "model.layers.15.block_sparse_moe.experts.83.w3", "model.layers.15.block_sparse_moe.experts.84.w3", "model.layers.15.block_sparse_moe.experts.85.w3", "model.layers.15.block_sparse_moe.experts.86.w3", "model.layers.15.block_sparse_moe.experts.87.w3", "model.layers.15.block_sparse_moe.experts.88.w3", "model.layers.15.block_sparse_moe.experts.89.w3", "model.layers.15.block_sparse_moe.experts.90.w3", "model.layers.15.block_sparse_moe.experts.91.w3", "model.layers.15.block_sparse_moe.experts.92.w3", "model.layers.15.block_sparse_moe.experts.93.w3", "model.layers.15.block_sparse_moe.experts.94.w3", "model.layers.15.block_sparse_moe.experts.95.w3", "model.layers.15.block_sparse_moe.experts.96.w3", "model.layers.15.block_sparse_moe.experts.97.w3", "model.layers.15.block_sparse_moe.experts.98.w3", "model.layers.15.block_sparse_moe.experts.99.w3", "model.layers.15.block_sparse_moe.experts.100.w3", "model.layers.15.block_sparse_moe.experts.101.w3", "model.layers.15.block_sparse_moe.experts.102.w3", "model.layers.15.block_sparse_moe.experts.103.w3", "model.layers.15.block_sparse_moe.experts.104.w3", "model.layers.15.block_sparse_moe.experts.105.w3", "model.layers.15.block_sparse_moe.experts.106.w3", "model.layers.15.block_sparse_moe.experts.107.w3", "model.layers.15.block_sparse_moe.experts.108.w3", "model.layers.15.block_sparse_moe.experts.109.w3", "model.layers.15.block_sparse_moe.experts.110.w3", "model.layers.15.block_sparse_moe.experts.111.w3", "model.layers.15.block_sparse_moe.experts.112.w3", "model.layers.15.block_sparse_moe.experts.113.w3", "model.layers.15.block_sparse_moe.experts.114.w3", "model.layers.15.block_sparse_moe.experts.115.w3", "model.layers.15.block_sparse_moe.experts.116.w3", "model.layers.15.block_sparse_moe.experts.117.w3", "model.layers.15.block_sparse_moe.experts.118.w3", "model.layers.15.block_sparse_moe.experts.119.w3", "model.layers.15.block_sparse_moe.experts.120.w3", "model.layers.15.block_sparse_moe.experts.121.w3", "model.layers.15.block_sparse_moe.experts.122.w3", "model.layers.15.block_sparse_moe.experts.123.w3", "model.layers.15.block_sparse_moe.experts.124.w3", "model.layers.15.block_sparse_moe.experts.125.w3", "model.layers.15.block_sparse_moe.experts.126.w3", "model.layers.15.block_sparse_moe.experts.127.w3", "model.layers.15.block_sparse_moe.experts.128.w3", "model.layers.15.block_sparse_moe.experts.129.w3", "model.layers.15.block_sparse_moe.experts.130.w3", "model.layers.15.block_sparse_moe.experts.131.w3", "model.layers.15.block_sparse_moe.experts.132.w3", "model.layers.15.block_sparse_moe.experts.133.w3", "model.layers.15.block_sparse_moe.experts.134.w3", "model.layers.15.block_sparse_moe.experts.135.w3", "model.layers.15.block_sparse_moe.experts.136.w3", "model.layers.15.block_sparse_moe.experts.137.w3", "model.layers.15.block_sparse_moe.experts.138.w3", "model.layers.15.block_sparse_moe.experts.139.w3", "model.layers.15.block_sparse_moe.experts.140.w3", "model.layers.15.block_sparse_moe.experts.141.w3", "model.layers.15.block_sparse_moe.experts.142.w3", "model.layers.15.block_sparse_moe.experts.143.w3", "model.layers.15.block_sparse_moe.experts.144.w3", "model.layers.15.block_sparse_moe.experts.145.w3", "model.layers.15.block_sparse_moe.experts.146.w3", "model.layers.15.block_sparse_moe.experts.147.w3", "model.layers.15.block_sparse_moe.experts.148.w3", "model.layers.15.block_sparse_moe.experts.149.w3", "model.layers.15.block_sparse_moe.experts.150.w3", "model.layers.15.block_sparse_moe.experts.151.w3", "model.layers.15.block_sparse_moe.experts.152.w3", "model.layers.15.block_sparse_moe.experts.153.w3", "model.layers.15.block_sparse_moe.experts.154.w3", "model.layers.15.block_sparse_moe.experts.155.w3", "model.layers.15.block_sparse_moe.experts.156.w3", "model.layers.15.block_sparse_moe.experts.157.w3", "model.layers.15.block_sparse_moe.experts.158.w3", "model.layers.15.block_sparse_moe.experts.159.w3", "model.layers.15.block_sparse_moe.experts.160.w3", "model.layers.15.block_sparse_moe.experts.161.w3", "model.layers.15.block_sparse_moe.experts.162.w3", "model.layers.15.block_sparse_moe.experts.163.w3", "model.layers.15.block_sparse_moe.experts.164.w3", "model.layers.15.block_sparse_moe.experts.165.w3", "model.layers.15.block_sparse_moe.experts.166.w3", "model.layers.15.block_sparse_moe.experts.167.w3", "model.layers.15.block_sparse_moe.experts.168.w3", "model.layers.15.block_sparse_moe.experts.169.w3", "model.layers.15.block_sparse_moe.experts.170.w3", "model.layers.15.block_sparse_moe.experts.171.w3", "model.layers.15.block_sparse_moe.experts.172.w3", "model.layers.15.block_sparse_moe.experts.173.w3", "model.layers.15.block_sparse_moe.experts.174.w3", "model.layers.15.block_sparse_moe.experts.175.w3", "model.layers.15.block_sparse_moe.experts.176.w3", "model.layers.15.block_sparse_moe.experts.177.w3", "model.layers.15.block_sparse_moe.experts.178.w3", "model.layers.15.block_sparse_moe.experts.179.w3", "model.layers.15.block_sparse_moe.experts.180.w3", "model.layers.15.block_sparse_moe.experts.181.w3", "model.layers.15.block_sparse_moe.experts.182.w3", "model.layers.15.block_sparse_moe.experts.183.w3", "model.layers.15.block_sparse_moe.experts.184.w3", "model.layers.15.block_sparse_moe.experts.185.w3", "model.layers.15.block_sparse_moe.experts.186.w3", "model.layers.15.block_sparse_moe.experts.187.w3", "model.layers.15.block_sparse_moe.experts.188.w3", "model.layers.15.block_sparse_moe.experts.189.w3", "model.layers.15.block_sparse_moe.experts.190.w3", "model.layers.15.block_sparse_moe.experts.191.w3", "model.layers.15.block_sparse_moe.experts.192.w3", "model.layers.15.block_sparse_moe.experts.193.w3", "model.layers.15.block_sparse_moe.experts.194.w3", "model.layers.15.block_sparse_moe.experts.195.w3", "model.layers.15.block_sparse_moe.experts.196.w3", "model.layers.15.block_sparse_moe.experts.197.w3", "model.layers.15.block_sparse_moe.experts.198.w3", "model.layers.15.block_sparse_moe.experts.199.w3", "model.layers.15.block_sparse_moe.experts.200.w3", "model.layers.15.block_sparse_moe.experts.201.w3", "model.layers.15.block_sparse_moe.experts.202.w3", "model.layers.15.block_sparse_moe.experts.203.w3", "model.layers.15.block_sparse_moe.experts.204.w3", "model.layers.15.block_sparse_moe.experts.205.w3", "model.layers.15.block_sparse_moe.experts.206.w3", "model.layers.15.block_sparse_moe.experts.207.w3", "model.layers.15.block_sparse_moe.experts.208.w3", "model.layers.15.block_sparse_moe.experts.209.w3", "model.layers.15.block_sparse_moe.experts.210.w3", "model.layers.15.block_sparse_moe.experts.211.w3", "model.layers.15.block_sparse_moe.experts.212.w3", "model.layers.15.block_sparse_moe.experts.213.w3", "model.layers.15.block_sparse_moe.experts.214.w3", "model.layers.15.block_sparse_moe.experts.215.w3", "model.layers.15.block_sparse_moe.experts.216.w3", "model.layers.15.block_sparse_moe.experts.217.w3", "model.layers.15.block_sparse_moe.experts.218.w3", "model.layers.15.block_sparse_moe.experts.219.w3", "model.layers.15.block_sparse_moe.experts.220.w3", "model.layers.15.block_sparse_moe.experts.221.w3", "model.layers.15.block_sparse_moe.experts.222.w3", "model.layers.15.block_sparse_moe.experts.223.w3", "model.layers.15.block_sparse_moe.experts.224.w3", "model.layers.15.block_sparse_moe.experts.225.w3", "model.layers.15.block_sparse_moe.experts.226.w3", "model.layers.15.block_sparse_moe.experts.227.w3", "model.layers.15.block_sparse_moe.experts.228.w3", "model.layers.15.block_sparse_moe.experts.229.w3", "model.layers.15.block_sparse_moe.experts.230.w3", "model.layers.15.block_sparse_moe.experts.231.w3", "model.layers.15.block_sparse_moe.experts.232.w3", "model.layers.15.block_sparse_moe.experts.233.w3", "model.layers.15.block_sparse_moe.experts.234.w3", "model.layers.15.block_sparse_moe.experts.235.w3", "model.layers.15.block_sparse_moe.experts.236.w3", "model.layers.15.block_sparse_moe.experts.237.w3", "model.layers.15.block_sparse_moe.experts.238.w3", "model.layers.15.block_sparse_moe.experts.239.w3", "model.layers.15.block_sparse_moe.experts.240.w3", "model.layers.15.block_sparse_moe.experts.241.w3", "model.layers.15.block_sparse_moe.experts.242.w3", "model.layers.15.block_sparse_moe.experts.243.w3", "model.layers.15.block_sparse_moe.experts.244.w3", "model.layers.15.block_sparse_moe.experts.245.w3", "model.layers.15.block_sparse_moe.experts.246.w3", "model.layers.15.block_sparse_moe.experts.247.w3", "model.layers.15.block_sparse_moe.experts.248.w3", "model.layers.15.block_sparse_moe.experts.249.w3", "model.layers.15.block_sparse_moe.experts.250.w3", "model.layers.15.block_sparse_moe.experts.251.w3", "model.layers.15.block_sparse_moe.experts.252.w3", "model.layers.15.block_sparse_moe.experts.253.w3", "model.layers.15.block_sparse_moe.experts.254.w3", "model.layers.15.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001042661070823625, "dbits": 2415919104 } ] }, { "idx": 79, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w2", "model.layers.15.block_sparse_moe.experts.1.w2", "model.layers.15.block_sparse_moe.experts.2.w2", "model.layers.15.block_sparse_moe.experts.3.w2", "model.layers.15.block_sparse_moe.experts.4.w2", "model.layers.15.block_sparse_moe.experts.5.w2", "model.layers.15.block_sparse_moe.experts.6.w2", "model.layers.15.block_sparse_moe.experts.7.w2", "model.layers.15.block_sparse_moe.experts.8.w2", "model.layers.15.block_sparse_moe.experts.9.w2", "model.layers.15.block_sparse_moe.experts.10.w2", "model.layers.15.block_sparse_moe.experts.11.w2", "model.layers.15.block_sparse_moe.experts.12.w2", "model.layers.15.block_sparse_moe.experts.13.w2", "model.layers.15.block_sparse_moe.experts.14.w2", "model.layers.15.block_sparse_moe.experts.15.w2", "model.layers.15.block_sparse_moe.experts.16.w2", "model.layers.15.block_sparse_moe.experts.17.w2", "model.layers.15.block_sparse_moe.experts.18.w2", "model.layers.15.block_sparse_moe.experts.19.w2", "model.layers.15.block_sparse_moe.experts.20.w2", "model.layers.15.block_sparse_moe.experts.21.w2", "model.layers.15.block_sparse_moe.experts.22.w2", "model.layers.15.block_sparse_moe.experts.23.w2", "model.layers.15.block_sparse_moe.experts.24.w2", "model.layers.15.block_sparse_moe.experts.25.w2", "model.layers.15.block_sparse_moe.experts.26.w2", "model.layers.15.block_sparse_moe.experts.27.w2", "model.layers.15.block_sparse_moe.experts.28.w2", "model.layers.15.block_sparse_moe.experts.29.w2", "model.layers.15.block_sparse_moe.experts.30.w2", "model.layers.15.block_sparse_moe.experts.31.w2", "model.layers.15.block_sparse_moe.experts.32.w2", "model.layers.15.block_sparse_moe.experts.33.w2", "model.layers.15.block_sparse_moe.experts.34.w2", "model.layers.15.block_sparse_moe.experts.35.w2", "model.layers.15.block_sparse_moe.experts.36.w2", "model.layers.15.block_sparse_moe.experts.37.w2", "model.layers.15.block_sparse_moe.experts.38.w2", "model.layers.15.block_sparse_moe.experts.39.w2", "model.layers.15.block_sparse_moe.experts.40.w2", "model.layers.15.block_sparse_moe.experts.41.w2", "model.layers.15.block_sparse_moe.experts.42.w2", "model.layers.15.block_sparse_moe.experts.43.w2", "model.layers.15.block_sparse_moe.experts.44.w2", "model.layers.15.block_sparse_moe.experts.45.w2", "model.layers.15.block_sparse_moe.experts.46.w2", "model.layers.15.block_sparse_moe.experts.47.w2", "model.layers.15.block_sparse_moe.experts.48.w2", "model.layers.15.block_sparse_moe.experts.49.w2", "model.layers.15.block_sparse_moe.experts.50.w2", "model.layers.15.block_sparse_moe.experts.51.w2", "model.layers.15.block_sparse_moe.experts.52.w2", "model.layers.15.block_sparse_moe.experts.53.w2", "model.layers.15.block_sparse_moe.experts.54.w2", "model.layers.15.block_sparse_moe.experts.55.w2", "model.layers.15.block_sparse_moe.experts.56.w2", "model.layers.15.block_sparse_moe.experts.57.w2", "model.layers.15.block_sparse_moe.experts.58.w2", "model.layers.15.block_sparse_moe.experts.59.w2", "model.layers.15.block_sparse_moe.experts.60.w2", "model.layers.15.block_sparse_moe.experts.61.w2", "model.layers.15.block_sparse_moe.experts.62.w2", "model.layers.15.block_sparse_moe.experts.63.w2", "model.layers.15.block_sparse_moe.experts.64.w2", "model.layers.15.block_sparse_moe.experts.65.w2", "model.layers.15.block_sparse_moe.experts.66.w2", "model.layers.15.block_sparse_moe.experts.67.w2", "model.layers.15.block_sparse_moe.experts.68.w2", "model.layers.15.block_sparse_moe.experts.69.w2", "model.layers.15.block_sparse_moe.experts.70.w2", "model.layers.15.block_sparse_moe.experts.71.w2", "model.layers.15.block_sparse_moe.experts.72.w2", "model.layers.15.block_sparse_moe.experts.73.w2", "model.layers.15.block_sparse_moe.experts.74.w2", "model.layers.15.block_sparse_moe.experts.75.w2", "model.layers.15.block_sparse_moe.experts.76.w2", "model.layers.15.block_sparse_moe.experts.77.w2", "model.layers.15.block_sparse_moe.experts.78.w2", "model.layers.15.block_sparse_moe.experts.79.w2", "model.layers.15.block_sparse_moe.experts.80.w2", "model.layers.15.block_sparse_moe.experts.81.w2", "model.layers.15.block_sparse_moe.experts.82.w2", "model.layers.15.block_sparse_moe.experts.83.w2", "model.layers.15.block_sparse_moe.experts.84.w2", "model.layers.15.block_sparse_moe.experts.85.w2", "model.layers.15.block_sparse_moe.experts.86.w2", "model.layers.15.block_sparse_moe.experts.87.w2", "model.layers.15.block_sparse_moe.experts.88.w2", "model.layers.15.block_sparse_moe.experts.89.w2", "model.layers.15.block_sparse_moe.experts.90.w2", "model.layers.15.block_sparse_moe.experts.91.w2", "model.layers.15.block_sparse_moe.experts.92.w2", "model.layers.15.block_sparse_moe.experts.93.w2", "model.layers.15.block_sparse_moe.experts.94.w2", "model.layers.15.block_sparse_moe.experts.95.w2", "model.layers.15.block_sparse_moe.experts.96.w2", "model.layers.15.block_sparse_moe.experts.97.w2", "model.layers.15.block_sparse_moe.experts.98.w2", "model.layers.15.block_sparse_moe.experts.99.w2", "model.layers.15.block_sparse_moe.experts.100.w2", "model.layers.15.block_sparse_moe.experts.101.w2", "model.layers.15.block_sparse_moe.experts.102.w2", "model.layers.15.block_sparse_moe.experts.103.w2", "model.layers.15.block_sparse_moe.experts.104.w2", "model.layers.15.block_sparse_moe.experts.105.w2", "model.layers.15.block_sparse_moe.experts.106.w2", "model.layers.15.block_sparse_moe.experts.107.w2", "model.layers.15.block_sparse_moe.experts.108.w2", "model.layers.15.block_sparse_moe.experts.109.w2", "model.layers.15.block_sparse_moe.experts.110.w2", "model.layers.15.block_sparse_moe.experts.111.w2", "model.layers.15.block_sparse_moe.experts.112.w2", "model.layers.15.block_sparse_moe.experts.113.w2", "model.layers.15.block_sparse_moe.experts.114.w2", "model.layers.15.block_sparse_moe.experts.115.w2", "model.layers.15.block_sparse_moe.experts.116.w2", "model.layers.15.block_sparse_moe.experts.117.w2", "model.layers.15.block_sparse_moe.experts.118.w2", "model.layers.15.block_sparse_moe.experts.119.w2", "model.layers.15.block_sparse_moe.experts.120.w2", "model.layers.15.block_sparse_moe.experts.121.w2", "model.layers.15.block_sparse_moe.experts.122.w2", "model.layers.15.block_sparse_moe.experts.123.w2", "model.layers.15.block_sparse_moe.experts.124.w2", "model.layers.15.block_sparse_moe.experts.125.w2", "model.layers.15.block_sparse_moe.experts.126.w2", "model.layers.15.block_sparse_moe.experts.127.w2", "model.layers.15.block_sparse_moe.experts.128.w2", "model.layers.15.block_sparse_moe.experts.129.w2", "model.layers.15.block_sparse_moe.experts.130.w2", "model.layers.15.block_sparse_moe.experts.131.w2", "model.layers.15.block_sparse_moe.experts.132.w2", "model.layers.15.block_sparse_moe.experts.133.w2", "model.layers.15.block_sparse_moe.experts.134.w2", "model.layers.15.block_sparse_moe.experts.135.w2", "model.layers.15.block_sparse_moe.experts.136.w2", "model.layers.15.block_sparse_moe.experts.137.w2", "model.layers.15.block_sparse_moe.experts.138.w2", "model.layers.15.block_sparse_moe.experts.139.w2", "model.layers.15.block_sparse_moe.experts.140.w2", "model.layers.15.block_sparse_moe.experts.141.w2", "model.layers.15.block_sparse_moe.experts.142.w2", "model.layers.15.block_sparse_moe.experts.143.w2", "model.layers.15.block_sparse_moe.experts.144.w2", "model.layers.15.block_sparse_moe.experts.145.w2", "model.layers.15.block_sparse_moe.experts.146.w2", "model.layers.15.block_sparse_moe.experts.147.w2", "model.layers.15.block_sparse_moe.experts.148.w2", "model.layers.15.block_sparse_moe.experts.149.w2", "model.layers.15.block_sparse_moe.experts.150.w2", "model.layers.15.block_sparse_moe.experts.151.w2", "model.layers.15.block_sparse_moe.experts.152.w2", "model.layers.15.block_sparse_moe.experts.153.w2", "model.layers.15.block_sparse_moe.experts.154.w2", "model.layers.15.block_sparse_moe.experts.155.w2", "model.layers.15.block_sparse_moe.experts.156.w2", "model.layers.15.block_sparse_moe.experts.157.w2", "model.layers.15.block_sparse_moe.experts.158.w2", "model.layers.15.block_sparse_moe.experts.159.w2", "model.layers.15.block_sparse_moe.experts.160.w2", "model.layers.15.block_sparse_moe.experts.161.w2", "model.layers.15.block_sparse_moe.experts.162.w2", "model.layers.15.block_sparse_moe.experts.163.w2", "model.layers.15.block_sparse_moe.experts.164.w2", "model.layers.15.block_sparse_moe.experts.165.w2", "model.layers.15.block_sparse_moe.experts.166.w2", "model.layers.15.block_sparse_moe.experts.167.w2", "model.layers.15.block_sparse_moe.experts.168.w2", "model.layers.15.block_sparse_moe.experts.169.w2", "model.layers.15.block_sparse_moe.experts.170.w2", "model.layers.15.block_sparse_moe.experts.171.w2", "model.layers.15.block_sparse_moe.experts.172.w2", "model.layers.15.block_sparse_moe.experts.173.w2", "model.layers.15.block_sparse_moe.experts.174.w2", "model.layers.15.block_sparse_moe.experts.175.w2", "model.layers.15.block_sparse_moe.experts.176.w2", "model.layers.15.block_sparse_moe.experts.177.w2", "model.layers.15.block_sparse_moe.experts.178.w2", "model.layers.15.block_sparse_moe.experts.179.w2", "model.layers.15.block_sparse_moe.experts.180.w2", "model.layers.15.block_sparse_moe.experts.181.w2", "model.layers.15.block_sparse_moe.experts.182.w2", "model.layers.15.block_sparse_moe.experts.183.w2", "model.layers.15.block_sparse_moe.experts.184.w2", "model.layers.15.block_sparse_moe.experts.185.w2", "model.layers.15.block_sparse_moe.experts.186.w2", "model.layers.15.block_sparse_moe.experts.187.w2", "model.layers.15.block_sparse_moe.experts.188.w2", "model.layers.15.block_sparse_moe.experts.189.w2", "model.layers.15.block_sparse_moe.experts.190.w2", "model.layers.15.block_sparse_moe.experts.191.w2", "model.layers.15.block_sparse_moe.experts.192.w2", "model.layers.15.block_sparse_moe.experts.193.w2", "model.layers.15.block_sparse_moe.experts.194.w2", "model.layers.15.block_sparse_moe.experts.195.w2", "model.layers.15.block_sparse_moe.experts.196.w2", "model.layers.15.block_sparse_moe.experts.197.w2", "model.layers.15.block_sparse_moe.experts.198.w2", "model.layers.15.block_sparse_moe.experts.199.w2", "model.layers.15.block_sparse_moe.experts.200.w2", "model.layers.15.block_sparse_moe.experts.201.w2", "model.layers.15.block_sparse_moe.experts.202.w2", "model.layers.15.block_sparse_moe.experts.203.w2", "model.layers.15.block_sparse_moe.experts.204.w2", "model.layers.15.block_sparse_moe.experts.205.w2", "model.layers.15.block_sparse_moe.experts.206.w2", "model.layers.15.block_sparse_moe.experts.207.w2", "model.layers.15.block_sparse_moe.experts.208.w2", "model.layers.15.block_sparse_moe.experts.209.w2", "model.layers.15.block_sparse_moe.experts.210.w2", "model.layers.15.block_sparse_moe.experts.211.w2", "model.layers.15.block_sparse_moe.experts.212.w2", "model.layers.15.block_sparse_moe.experts.213.w2", "model.layers.15.block_sparse_moe.experts.214.w2", "model.layers.15.block_sparse_moe.experts.215.w2", "model.layers.15.block_sparse_moe.experts.216.w2", "model.layers.15.block_sparse_moe.experts.217.w2", "model.layers.15.block_sparse_moe.experts.218.w2", "model.layers.15.block_sparse_moe.experts.219.w2", "model.layers.15.block_sparse_moe.experts.220.w2", "model.layers.15.block_sparse_moe.experts.221.w2", "model.layers.15.block_sparse_moe.experts.222.w2", "model.layers.15.block_sparse_moe.experts.223.w2", "model.layers.15.block_sparse_moe.experts.224.w2", "model.layers.15.block_sparse_moe.experts.225.w2", "model.layers.15.block_sparse_moe.experts.226.w2", "model.layers.15.block_sparse_moe.experts.227.w2", "model.layers.15.block_sparse_moe.experts.228.w2", "model.layers.15.block_sparse_moe.experts.229.w2", "model.layers.15.block_sparse_moe.experts.230.w2", "model.layers.15.block_sparse_moe.experts.231.w2", "model.layers.15.block_sparse_moe.experts.232.w2", "model.layers.15.block_sparse_moe.experts.233.w2", "model.layers.15.block_sparse_moe.experts.234.w2", "model.layers.15.block_sparse_moe.experts.235.w2", "model.layers.15.block_sparse_moe.experts.236.w2", "model.layers.15.block_sparse_moe.experts.237.w2", "model.layers.15.block_sparse_moe.experts.238.w2", "model.layers.15.block_sparse_moe.experts.239.w2", "model.layers.15.block_sparse_moe.experts.240.w2", "model.layers.15.block_sparse_moe.experts.241.w2", "model.layers.15.block_sparse_moe.experts.242.w2", "model.layers.15.block_sparse_moe.experts.243.w2", "model.layers.15.block_sparse_moe.experts.244.w2", "model.layers.15.block_sparse_moe.experts.245.w2", "model.layers.15.block_sparse_moe.experts.246.w2", "model.layers.15.block_sparse_moe.experts.247.w2", "model.layers.15.block_sparse_moe.experts.248.w2", "model.layers.15.block_sparse_moe.experts.249.w2", "model.layers.15.block_sparse_moe.experts.250.w2", "model.layers.15.block_sparse_moe.experts.251.w2", "model.layers.15.block_sparse_moe.experts.252.w2", "model.layers.15.block_sparse_moe.experts.253.w2", "model.layers.15.block_sparse_moe.experts.254.w2", "model.layers.15.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00017550885677342087, "dbits": 1207959552 } ] }, { "idx": 80, "layers": [ "model.layers.16.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0010639429092407227, "dbits": 18874368 } ] }, { "idx": 81, "layers": [ "model.layers.16.self_attn.k_proj", "model.layers.16.self_attn.v_proj" ], "candidates": [ { "dkld": -0.010319110751151994, "dbits": 6291456 } ] }, { "idx": 82, "layers": [ "model.layers.16.self_attn.o_proj" ], "candidates": [ { "dkld": 0.005490726232528775, "dbits": 18874368 } ] }, { "idx": 83, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w1", "model.layers.16.block_sparse_moe.experts.1.w1", "model.layers.16.block_sparse_moe.experts.2.w1", "model.layers.16.block_sparse_moe.experts.3.w1", "model.layers.16.block_sparse_moe.experts.4.w1", "model.layers.16.block_sparse_moe.experts.5.w1", "model.layers.16.block_sparse_moe.experts.6.w1", "model.layers.16.block_sparse_moe.experts.7.w1", "model.layers.16.block_sparse_moe.experts.8.w1", "model.layers.16.block_sparse_moe.experts.9.w1", "model.layers.16.block_sparse_moe.experts.10.w1", "model.layers.16.block_sparse_moe.experts.11.w1", "model.layers.16.block_sparse_moe.experts.12.w1", "model.layers.16.block_sparse_moe.experts.13.w1", "model.layers.16.block_sparse_moe.experts.14.w1", "model.layers.16.block_sparse_moe.experts.15.w1", "model.layers.16.block_sparse_moe.experts.16.w1", "model.layers.16.block_sparse_moe.experts.17.w1", "model.layers.16.block_sparse_moe.experts.18.w1", "model.layers.16.block_sparse_moe.experts.19.w1", "model.layers.16.block_sparse_moe.experts.20.w1", "model.layers.16.block_sparse_moe.experts.21.w1", "model.layers.16.block_sparse_moe.experts.22.w1", "model.layers.16.block_sparse_moe.experts.23.w1", "model.layers.16.block_sparse_moe.experts.24.w1", "model.layers.16.block_sparse_moe.experts.25.w1", "model.layers.16.block_sparse_moe.experts.26.w1", "model.layers.16.block_sparse_moe.experts.27.w1", "model.layers.16.block_sparse_moe.experts.28.w1", "model.layers.16.block_sparse_moe.experts.29.w1", "model.layers.16.block_sparse_moe.experts.30.w1", "model.layers.16.block_sparse_moe.experts.31.w1", "model.layers.16.block_sparse_moe.experts.32.w1", "model.layers.16.block_sparse_moe.experts.33.w1", "model.layers.16.block_sparse_moe.experts.34.w1", "model.layers.16.block_sparse_moe.experts.35.w1", "model.layers.16.block_sparse_moe.experts.36.w1", "model.layers.16.block_sparse_moe.experts.37.w1", "model.layers.16.block_sparse_moe.experts.38.w1", "model.layers.16.block_sparse_moe.experts.39.w1", "model.layers.16.block_sparse_moe.experts.40.w1", "model.layers.16.block_sparse_moe.experts.41.w1", "model.layers.16.block_sparse_moe.experts.42.w1", "model.layers.16.block_sparse_moe.experts.43.w1", "model.layers.16.block_sparse_moe.experts.44.w1", "model.layers.16.block_sparse_moe.experts.45.w1", "model.layers.16.block_sparse_moe.experts.46.w1", "model.layers.16.block_sparse_moe.experts.47.w1", "model.layers.16.block_sparse_moe.experts.48.w1", "model.layers.16.block_sparse_moe.experts.49.w1", "model.layers.16.block_sparse_moe.experts.50.w1", "model.layers.16.block_sparse_moe.experts.51.w1", "model.layers.16.block_sparse_moe.experts.52.w1", "model.layers.16.block_sparse_moe.experts.53.w1", "model.layers.16.block_sparse_moe.experts.54.w1", "model.layers.16.block_sparse_moe.experts.55.w1", "model.layers.16.block_sparse_moe.experts.56.w1", "model.layers.16.block_sparse_moe.experts.57.w1", "model.layers.16.block_sparse_moe.experts.58.w1", "model.layers.16.block_sparse_moe.experts.59.w1", "model.layers.16.block_sparse_moe.experts.60.w1", "model.layers.16.block_sparse_moe.experts.61.w1", "model.layers.16.block_sparse_moe.experts.62.w1", "model.layers.16.block_sparse_moe.experts.63.w1", "model.layers.16.block_sparse_moe.experts.64.w1", "model.layers.16.block_sparse_moe.experts.65.w1", "model.layers.16.block_sparse_moe.experts.66.w1", "model.layers.16.block_sparse_moe.experts.67.w1", "model.layers.16.block_sparse_moe.experts.68.w1", "model.layers.16.block_sparse_moe.experts.69.w1", "model.layers.16.block_sparse_moe.experts.70.w1", "model.layers.16.block_sparse_moe.experts.71.w1", "model.layers.16.block_sparse_moe.experts.72.w1", "model.layers.16.block_sparse_moe.experts.73.w1", "model.layers.16.block_sparse_moe.experts.74.w1", "model.layers.16.block_sparse_moe.experts.75.w1", "model.layers.16.block_sparse_moe.experts.76.w1", "model.layers.16.block_sparse_moe.experts.77.w1", "model.layers.16.block_sparse_moe.experts.78.w1", "model.layers.16.block_sparse_moe.experts.79.w1", "model.layers.16.block_sparse_moe.experts.80.w1", "model.layers.16.block_sparse_moe.experts.81.w1", "model.layers.16.block_sparse_moe.experts.82.w1", "model.layers.16.block_sparse_moe.experts.83.w1", "model.layers.16.block_sparse_moe.experts.84.w1", "model.layers.16.block_sparse_moe.experts.85.w1", "model.layers.16.block_sparse_moe.experts.86.w1", "model.layers.16.block_sparse_moe.experts.87.w1", "model.layers.16.block_sparse_moe.experts.88.w1", "model.layers.16.block_sparse_moe.experts.89.w1", "model.layers.16.block_sparse_moe.experts.90.w1", "model.layers.16.block_sparse_moe.experts.91.w1", "model.layers.16.block_sparse_moe.experts.92.w1", "model.layers.16.block_sparse_moe.experts.93.w1", "model.layers.16.block_sparse_moe.experts.94.w1", "model.layers.16.block_sparse_moe.experts.95.w1", "model.layers.16.block_sparse_moe.experts.96.w1", "model.layers.16.block_sparse_moe.experts.97.w1", "model.layers.16.block_sparse_moe.experts.98.w1", "model.layers.16.block_sparse_moe.experts.99.w1", "model.layers.16.block_sparse_moe.experts.100.w1", "model.layers.16.block_sparse_moe.experts.101.w1", "model.layers.16.block_sparse_moe.experts.102.w1", "model.layers.16.block_sparse_moe.experts.103.w1", "model.layers.16.block_sparse_moe.experts.104.w1", "model.layers.16.block_sparse_moe.experts.105.w1", "model.layers.16.block_sparse_moe.experts.106.w1", "model.layers.16.block_sparse_moe.experts.107.w1", "model.layers.16.block_sparse_moe.experts.108.w1", "model.layers.16.block_sparse_moe.experts.109.w1", "model.layers.16.block_sparse_moe.experts.110.w1", "model.layers.16.block_sparse_moe.experts.111.w1", "model.layers.16.block_sparse_moe.experts.112.w1", "model.layers.16.block_sparse_moe.experts.113.w1", "model.layers.16.block_sparse_moe.experts.114.w1", "model.layers.16.block_sparse_moe.experts.115.w1", "model.layers.16.block_sparse_moe.experts.116.w1", "model.layers.16.block_sparse_moe.experts.117.w1", "model.layers.16.block_sparse_moe.experts.118.w1", "model.layers.16.block_sparse_moe.experts.119.w1", "model.layers.16.block_sparse_moe.experts.120.w1", "model.layers.16.block_sparse_moe.experts.121.w1", "model.layers.16.block_sparse_moe.experts.122.w1", "model.layers.16.block_sparse_moe.experts.123.w1", "model.layers.16.block_sparse_moe.experts.124.w1", "model.layers.16.block_sparse_moe.experts.125.w1", "model.layers.16.block_sparse_moe.experts.126.w1", "model.layers.16.block_sparse_moe.experts.127.w1", "model.layers.16.block_sparse_moe.experts.128.w1", "model.layers.16.block_sparse_moe.experts.129.w1", "model.layers.16.block_sparse_moe.experts.130.w1", "model.layers.16.block_sparse_moe.experts.131.w1", "model.layers.16.block_sparse_moe.experts.132.w1", "model.layers.16.block_sparse_moe.experts.133.w1", "model.layers.16.block_sparse_moe.experts.134.w1", "model.layers.16.block_sparse_moe.experts.135.w1", "model.layers.16.block_sparse_moe.experts.136.w1", "model.layers.16.block_sparse_moe.experts.137.w1", "model.layers.16.block_sparse_moe.experts.138.w1", "model.layers.16.block_sparse_moe.experts.139.w1", "model.layers.16.block_sparse_moe.experts.140.w1", "model.layers.16.block_sparse_moe.experts.141.w1", "model.layers.16.block_sparse_moe.experts.142.w1", "model.layers.16.block_sparse_moe.experts.143.w1", "model.layers.16.block_sparse_moe.experts.144.w1", "model.layers.16.block_sparse_moe.experts.145.w1", "model.layers.16.block_sparse_moe.experts.146.w1", "model.layers.16.block_sparse_moe.experts.147.w1", "model.layers.16.block_sparse_moe.experts.148.w1", "model.layers.16.block_sparse_moe.experts.149.w1", "model.layers.16.block_sparse_moe.experts.150.w1", "model.layers.16.block_sparse_moe.experts.151.w1", "model.layers.16.block_sparse_moe.experts.152.w1", "model.layers.16.block_sparse_moe.experts.153.w1", "model.layers.16.block_sparse_moe.experts.154.w1", "model.layers.16.block_sparse_moe.experts.155.w1", "model.layers.16.block_sparse_moe.experts.156.w1", "model.layers.16.block_sparse_moe.experts.157.w1", "model.layers.16.block_sparse_moe.experts.158.w1", "model.layers.16.block_sparse_moe.experts.159.w1", "model.layers.16.block_sparse_moe.experts.160.w1", "model.layers.16.block_sparse_moe.experts.161.w1", "model.layers.16.block_sparse_moe.experts.162.w1", "model.layers.16.block_sparse_moe.experts.163.w1", "model.layers.16.block_sparse_moe.experts.164.w1", "model.layers.16.block_sparse_moe.experts.165.w1", "model.layers.16.block_sparse_moe.experts.166.w1", "model.layers.16.block_sparse_moe.experts.167.w1", "model.layers.16.block_sparse_moe.experts.168.w1", "model.layers.16.block_sparse_moe.experts.169.w1", "model.layers.16.block_sparse_moe.experts.170.w1", "model.layers.16.block_sparse_moe.experts.171.w1", "model.layers.16.block_sparse_moe.experts.172.w1", "model.layers.16.block_sparse_moe.experts.173.w1", "model.layers.16.block_sparse_moe.experts.174.w1", "model.layers.16.block_sparse_moe.experts.175.w1", "model.layers.16.block_sparse_moe.experts.176.w1", "model.layers.16.block_sparse_moe.experts.177.w1", "model.layers.16.block_sparse_moe.experts.178.w1", "model.layers.16.block_sparse_moe.experts.179.w1", "model.layers.16.block_sparse_moe.experts.180.w1", "model.layers.16.block_sparse_moe.experts.181.w1", "model.layers.16.block_sparse_moe.experts.182.w1", "model.layers.16.block_sparse_moe.experts.183.w1", "model.layers.16.block_sparse_moe.experts.184.w1", "model.layers.16.block_sparse_moe.experts.185.w1", "model.layers.16.block_sparse_moe.experts.186.w1", "model.layers.16.block_sparse_moe.experts.187.w1", "model.layers.16.block_sparse_moe.experts.188.w1", "model.layers.16.block_sparse_moe.experts.189.w1", "model.layers.16.block_sparse_moe.experts.190.w1", "model.layers.16.block_sparse_moe.experts.191.w1", "model.layers.16.block_sparse_moe.experts.192.w1", "model.layers.16.block_sparse_moe.experts.193.w1", "model.layers.16.block_sparse_moe.experts.194.w1", "model.layers.16.block_sparse_moe.experts.195.w1", "model.layers.16.block_sparse_moe.experts.196.w1", "model.layers.16.block_sparse_moe.experts.197.w1", "model.layers.16.block_sparse_moe.experts.198.w1", "model.layers.16.block_sparse_moe.experts.199.w1", "model.layers.16.block_sparse_moe.experts.200.w1", "model.layers.16.block_sparse_moe.experts.201.w1", "model.layers.16.block_sparse_moe.experts.202.w1", "model.layers.16.block_sparse_moe.experts.203.w1", "model.layers.16.block_sparse_moe.experts.204.w1", "model.layers.16.block_sparse_moe.experts.205.w1", "model.layers.16.block_sparse_moe.experts.206.w1", "model.layers.16.block_sparse_moe.experts.207.w1", "model.layers.16.block_sparse_moe.experts.208.w1", "model.layers.16.block_sparse_moe.experts.209.w1", "model.layers.16.block_sparse_moe.experts.210.w1", "model.layers.16.block_sparse_moe.experts.211.w1", "model.layers.16.block_sparse_moe.experts.212.w1", "model.layers.16.block_sparse_moe.experts.213.w1", "model.layers.16.block_sparse_moe.experts.214.w1", "model.layers.16.block_sparse_moe.experts.215.w1", "model.layers.16.block_sparse_moe.experts.216.w1", "model.layers.16.block_sparse_moe.experts.217.w1", "model.layers.16.block_sparse_moe.experts.218.w1", "model.layers.16.block_sparse_moe.experts.219.w1", "model.layers.16.block_sparse_moe.experts.220.w1", "model.layers.16.block_sparse_moe.experts.221.w1", "model.layers.16.block_sparse_moe.experts.222.w1", "model.layers.16.block_sparse_moe.experts.223.w1", "model.layers.16.block_sparse_moe.experts.224.w1", "model.layers.16.block_sparse_moe.experts.225.w1", "model.layers.16.block_sparse_moe.experts.226.w1", "model.layers.16.block_sparse_moe.experts.227.w1", "model.layers.16.block_sparse_moe.experts.228.w1", "model.layers.16.block_sparse_moe.experts.229.w1", "model.layers.16.block_sparse_moe.experts.230.w1", "model.layers.16.block_sparse_moe.experts.231.w1", "model.layers.16.block_sparse_moe.experts.232.w1", "model.layers.16.block_sparse_moe.experts.233.w1", "model.layers.16.block_sparse_moe.experts.234.w1", "model.layers.16.block_sparse_moe.experts.235.w1", "model.layers.16.block_sparse_moe.experts.236.w1", "model.layers.16.block_sparse_moe.experts.237.w1", "model.layers.16.block_sparse_moe.experts.238.w1", "model.layers.16.block_sparse_moe.experts.239.w1", "model.layers.16.block_sparse_moe.experts.240.w1", "model.layers.16.block_sparse_moe.experts.241.w1", "model.layers.16.block_sparse_moe.experts.242.w1", "model.layers.16.block_sparse_moe.experts.243.w1", "model.layers.16.block_sparse_moe.experts.244.w1", "model.layers.16.block_sparse_moe.experts.245.w1", "model.layers.16.block_sparse_moe.experts.246.w1", "model.layers.16.block_sparse_moe.experts.247.w1", "model.layers.16.block_sparse_moe.experts.248.w1", "model.layers.16.block_sparse_moe.experts.249.w1", "model.layers.16.block_sparse_moe.experts.250.w1", "model.layers.16.block_sparse_moe.experts.251.w1", "model.layers.16.block_sparse_moe.experts.252.w1", "model.layers.16.block_sparse_moe.experts.253.w1", "model.layers.16.block_sparse_moe.experts.254.w1", "model.layers.16.block_sparse_moe.experts.255.w1", "model.layers.16.block_sparse_moe.experts.0.w3", "model.layers.16.block_sparse_moe.experts.1.w3", "model.layers.16.block_sparse_moe.experts.2.w3", "model.layers.16.block_sparse_moe.experts.3.w3", "model.layers.16.block_sparse_moe.experts.4.w3", "model.layers.16.block_sparse_moe.experts.5.w3", "model.layers.16.block_sparse_moe.experts.6.w3", "model.layers.16.block_sparse_moe.experts.7.w3", "model.layers.16.block_sparse_moe.experts.8.w3", "model.layers.16.block_sparse_moe.experts.9.w3", "model.layers.16.block_sparse_moe.experts.10.w3", "model.layers.16.block_sparse_moe.experts.11.w3", "model.layers.16.block_sparse_moe.experts.12.w3", "model.layers.16.block_sparse_moe.experts.13.w3", "model.layers.16.block_sparse_moe.experts.14.w3", "model.layers.16.block_sparse_moe.experts.15.w3", "model.layers.16.block_sparse_moe.experts.16.w3", "model.layers.16.block_sparse_moe.experts.17.w3", "model.layers.16.block_sparse_moe.experts.18.w3", "model.layers.16.block_sparse_moe.experts.19.w3", "model.layers.16.block_sparse_moe.experts.20.w3", "model.layers.16.block_sparse_moe.experts.21.w3", "model.layers.16.block_sparse_moe.experts.22.w3", "model.layers.16.block_sparse_moe.experts.23.w3", "model.layers.16.block_sparse_moe.experts.24.w3", "model.layers.16.block_sparse_moe.experts.25.w3", "model.layers.16.block_sparse_moe.experts.26.w3", "model.layers.16.block_sparse_moe.experts.27.w3", "model.layers.16.block_sparse_moe.experts.28.w3", "model.layers.16.block_sparse_moe.experts.29.w3", "model.layers.16.block_sparse_moe.experts.30.w3", "model.layers.16.block_sparse_moe.experts.31.w3", "model.layers.16.block_sparse_moe.experts.32.w3", "model.layers.16.block_sparse_moe.experts.33.w3", "model.layers.16.block_sparse_moe.experts.34.w3", "model.layers.16.block_sparse_moe.experts.35.w3", "model.layers.16.block_sparse_moe.experts.36.w3", "model.layers.16.block_sparse_moe.experts.37.w3", "model.layers.16.block_sparse_moe.experts.38.w3", "model.layers.16.block_sparse_moe.experts.39.w3", "model.layers.16.block_sparse_moe.experts.40.w3", "model.layers.16.block_sparse_moe.experts.41.w3", "model.layers.16.block_sparse_moe.experts.42.w3", "model.layers.16.block_sparse_moe.experts.43.w3", "model.layers.16.block_sparse_moe.experts.44.w3", "model.layers.16.block_sparse_moe.experts.45.w3", "model.layers.16.block_sparse_moe.experts.46.w3", "model.layers.16.block_sparse_moe.experts.47.w3", "model.layers.16.block_sparse_moe.experts.48.w3", "model.layers.16.block_sparse_moe.experts.49.w3", "model.layers.16.block_sparse_moe.experts.50.w3", "model.layers.16.block_sparse_moe.experts.51.w3", "model.layers.16.block_sparse_moe.experts.52.w3", "model.layers.16.block_sparse_moe.experts.53.w3", "model.layers.16.block_sparse_moe.experts.54.w3", "model.layers.16.block_sparse_moe.experts.55.w3", "model.layers.16.block_sparse_moe.experts.56.w3", "model.layers.16.block_sparse_moe.experts.57.w3", "model.layers.16.block_sparse_moe.experts.58.w3", "model.layers.16.block_sparse_moe.experts.59.w3", "model.layers.16.block_sparse_moe.experts.60.w3", "model.layers.16.block_sparse_moe.experts.61.w3", "model.layers.16.block_sparse_moe.experts.62.w3", "model.layers.16.block_sparse_moe.experts.63.w3", "model.layers.16.block_sparse_moe.experts.64.w3", "model.layers.16.block_sparse_moe.experts.65.w3", "model.layers.16.block_sparse_moe.experts.66.w3", "model.layers.16.block_sparse_moe.experts.67.w3", "model.layers.16.block_sparse_moe.experts.68.w3", "model.layers.16.block_sparse_moe.experts.69.w3", "model.layers.16.block_sparse_moe.experts.70.w3", "model.layers.16.block_sparse_moe.experts.71.w3", "model.layers.16.block_sparse_moe.experts.72.w3", "model.layers.16.block_sparse_moe.experts.73.w3", "model.layers.16.block_sparse_moe.experts.74.w3", "model.layers.16.block_sparse_moe.experts.75.w3", "model.layers.16.block_sparse_moe.experts.76.w3", "model.layers.16.block_sparse_moe.experts.77.w3", "model.layers.16.block_sparse_moe.experts.78.w3", "model.layers.16.block_sparse_moe.experts.79.w3", "model.layers.16.block_sparse_moe.experts.80.w3", "model.layers.16.block_sparse_moe.experts.81.w3", "model.layers.16.block_sparse_moe.experts.82.w3", "model.layers.16.block_sparse_moe.experts.83.w3", "model.layers.16.block_sparse_moe.experts.84.w3", "model.layers.16.block_sparse_moe.experts.85.w3", "model.layers.16.block_sparse_moe.experts.86.w3", "model.layers.16.block_sparse_moe.experts.87.w3", "model.layers.16.block_sparse_moe.experts.88.w3", "model.layers.16.block_sparse_moe.experts.89.w3", "model.layers.16.block_sparse_moe.experts.90.w3", "model.layers.16.block_sparse_moe.experts.91.w3", "model.layers.16.block_sparse_moe.experts.92.w3", "model.layers.16.block_sparse_moe.experts.93.w3", "model.layers.16.block_sparse_moe.experts.94.w3", "model.layers.16.block_sparse_moe.experts.95.w3", "model.layers.16.block_sparse_moe.experts.96.w3", "model.layers.16.block_sparse_moe.experts.97.w3", "model.layers.16.block_sparse_moe.experts.98.w3", "model.layers.16.block_sparse_moe.experts.99.w3", "model.layers.16.block_sparse_moe.experts.100.w3", "model.layers.16.block_sparse_moe.experts.101.w3", "model.layers.16.block_sparse_moe.experts.102.w3", "model.layers.16.block_sparse_moe.experts.103.w3", "model.layers.16.block_sparse_moe.experts.104.w3", "model.layers.16.block_sparse_moe.experts.105.w3", "model.layers.16.block_sparse_moe.experts.106.w3", "model.layers.16.block_sparse_moe.experts.107.w3", "model.layers.16.block_sparse_moe.experts.108.w3", "model.layers.16.block_sparse_moe.experts.109.w3", "model.layers.16.block_sparse_moe.experts.110.w3", "model.layers.16.block_sparse_moe.experts.111.w3", "model.layers.16.block_sparse_moe.experts.112.w3", "model.layers.16.block_sparse_moe.experts.113.w3", "model.layers.16.block_sparse_moe.experts.114.w3", "model.layers.16.block_sparse_moe.experts.115.w3", "model.layers.16.block_sparse_moe.experts.116.w3", "model.layers.16.block_sparse_moe.experts.117.w3", "model.layers.16.block_sparse_moe.experts.118.w3", "model.layers.16.block_sparse_moe.experts.119.w3", "model.layers.16.block_sparse_moe.experts.120.w3", "model.layers.16.block_sparse_moe.experts.121.w3", "model.layers.16.block_sparse_moe.experts.122.w3", "model.layers.16.block_sparse_moe.experts.123.w3", "model.layers.16.block_sparse_moe.experts.124.w3", "model.layers.16.block_sparse_moe.experts.125.w3", "model.layers.16.block_sparse_moe.experts.126.w3", "model.layers.16.block_sparse_moe.experts.127.w3", "model.layers.16.block_sparse_moe.experts.128.w3", "model.layers.16.block_sparse_moe.experts.129.w3", "model.layers.16.block_sparse_moe.experts.130.w3", "model.layers.16.block_sparse_moe.experts.131.w3", "model.layers.16.block_sparse_moe.experts.132.w3", "model.layers.16.block_sparse_moe.experts.133.w3", "model.layers.16.block_sparse_moe.experts.134.w3", "model.layers.16.block_sparse_moe.experts.135.w3", "model.layers.16.block_sparse_moe.experts.136.w3", "model.layers.16.block_sparse_moe.experts.137.w3", "model.layers.16.block_sparse_moe.experts.138.w3", "model.layers.16.block_sparse_moe.experts.139.w3", "model.layers.16.block_sparse_moe.experts.140.w3", "model.layers.16.block_sparse_moe.experts.141.w3", "model.layers.16.block_sparse_moe.experts.142.w3", "model.layers.16.block_sparse_moe.experts.143.w3", "model.layers.16.block_sparse_moe.experts.144.w3", "model.layers.16.block_sparse_moe.experts.145.w3", "model.layers.16.block_sparse_moe.experts.146.w3", "model.layers.16.block_sparse_moe.experts.147.w3", "model.layers.16.block_sparse_moe.experts.148.w3", "model.layers.16.block_sparse_moe.experts.149.w3", "model.layers.16.block_sparse_moe.experts.150.w3", "model.layers.16.block_sparse_moe.experts.151.w3", "model.layers.16.block_sparse_moe.experts.152.w3", "model.layers.16.block_sparse_moe.experts.153.w3", "model.layers.16.block_sparse_moe.experts.154.w3", "model.layers.16.block_sparse_moe.experts.155.w3", "model.layers.16.block_sparse_moe.experts.156.w3", "model.layers.16.block_sparse_moe.experts.157.w3", "model.layers.16.block_sparse_moe.experts.158.w3", "model.layers.16.block_sparse_moe.experts.159.w3", "model.layers.16.block_sparse_moe.experts.160.w3", "model.layers.16.block_sparse_moe.experts.161.w3", "model.layers.16.block_sparse_moe.experts.162.w3", "model.layers.16.block_sparse_moe.experts.163.w3", "model.layers.16.block_sparse_moe.experts.164.w3", "model.layers.16.block_sparse_moe.experts.165.w3", "model.layers.16.block_sparse_moe.experts.166.w3", "model.layers.16.block_sparse_moe.experts.167.w3", "model.layers.16.block_sparse_moe.experts.168.w3", "model.layers.16.block_sparse_moe.experts.169.w3", "model.layers.16.block_sparse_moe.experts.170.w3", "model.layers.16.block_sparse_moe.experts.171.w3", "model.layers.16.block_sparse_moe.experts.172.w3", "model.layers.16.block_sparse_moe.experts.173.w3", "model.layers.16.block_sparse_moe.experts.174.w3", "model.layers.16.block_sparse_moe.experts.175.w3", "model.layers.16.block_sparse_moe.experts.176.w3", "model.layers.16.block_sparse_moe.experts.177.w3", "model.layers.16.block_sparse_moe.experts.178.w3", "model.layers.16.block_sparse_moe.experts.179.w3", "model.layers.16.block_sparse_moe.experts.180.w3", "model.layers.16.block_sparse_moe.experts.181.w3", "model.layers.16.block_sparse_moe.experts.182.w3", "model.layers.16.block_sparse_moe.experts.183.w3", "model.layers.16.block_sparse_moe.experts.184.w3", "model.layers.16.block_sparse_moe.experts.185.w3", "model.layers.16.block_sparse_moe.experts.186.w3", "model.layers.16.block_sparse_moe.experts.187.w3", "model.layers.16.block_sparse_moe.experts.188.w3", "model.layers.16.block_sparse_moe.experts.189.w3", "model.layers.16.block_sparse_moe.experts.190.w3", "model.layers.16.block_sparse_moe.experts.191.w3", "model.layers.16.block_sparse_moe.experts.192.w3", "model.layers.16.block_sparse_moe.experts.193.w3", "model.layers.16.block_sparse_moe.experts.194.w3", "model.layers.16.block_sparse_moe.experts.195.w3", "model.layers.16.block_sparse_moe.experts.196.w3", "model.layers.16.block_sparse_moe.experts.197.w3", "model.layers.16.block_sparse_moe.experts.198.w3", "model.layers.16.block_sparse_moe.experts.199.w3", "model.layers.16.block_sparse_moe.experts.200.w3", "model.layers.16.block_sparse_moe.experts.201.w3", "model.layers.16.block_sparse_moe.experts.202.w3", "model.layers.16.block_sparse_moe.experts.203.w3", "model.layers.16.block_sparse_moe.experts.204.w3", "model.layers.16.block_sparse_moe.experts.205.w3", "model.layers.16.block_sparse_moe.experts.206.w3", "model.layers.16.block_sparse_moe.experts.207.w3", "model.layers.16.block_sparse_moe.experts.208.w3", "model.layers.16.block_sparse_moe.experts.209.w3", "model.layers.16.block_sparse_moe.experts.210.w3", "model.layers.16.block_sparse_moe.experts.211.w3", "model.layers.16.block_sparse_moe.experts.212.w3", "model.layers.16.block_sparse_moe.experts.213.w3", "model.layers.16.block_sparse_moe.experts.214.w3", "model.layers.16.block_sparse_moe.experts.215.w3", "model.layers.16.block_sparse_moe.experts.216.w3", "model.layers.16.block_sparse_moe.experts.217.w3", "model.layers.16.block_sparse_moe.experts.218.w3", "model.layers.16.block_sparse_moe.experts.219.w3", "model.layers.16.block_sparse_moe.experts.220.w3", "model.layers.16.block_sparse_moe.experts.221.w3", "model.layers.16.block_sparse_moe.experts.222.w3", "model.layers.16.block_sparse_moe.experts.223.w3", "model.layers.16.block_sparse_moe.experts.224.w3", "model.layers.16.block_sparse_moe.experts.225.w3", "model.layers.16.block_sparse_moe.experts.226.w3", "model.layers.16.block_sparse_moe.experts.227.w3", "model.layers.16.block_sparse_moe.experts.228.w3", "model.layers.16.block_sparse_moe.experts.229.w3", "model.layers.16.block_sparse_moe.experts.230.w3", "model.layers.16.block_sparse_moe.experts.231.w3", "model.layers.16.block_sparse_moe.experts.232.w3", "model.layers.16.block_sparse_moe.experts.233.w3", "model.layers.16.block_sparse_moe.experts.234.w3", "model.layers.16.block_sparse_moe.experts.235.w3", "model.layers.16.block_sparse_moe.experts.236.w3", "model.layers.16.block_sparse_moe.experts.237.w3", "model.layers.16.block_sparse_moe.experts.238.w3", "model.layers.16.block_sparse_moe.experts.239.w3", "model.layers.16.block_sparse_moe.experts.240.w3", "model.layers.16.block_sparse_moe.experts.241.w3", "model.layers.16.block_sparse_moe.experts.242.w3", "model.layers.16.block_sparse_moe.experts.243.w3", "model.layers.16.block_sparse_moe.experts.244.w3", "model.layers.16.block_sparse_moe.experts.245.w3", "model.layers.16.block_sparse_moe.experts.246.w3", "model.layers.16.block_sparse_moe.experts.247.w3", "model.layers.16.block_sparse_moe.experts.248.w3", "model.layers.16.block_sparse_moe.experts.249.w3", "model.layers.16.block_sparse_moe.experts.250.w3", "model.layers.16.block_sparse_moe.experts.251.w3", "model.layers.16.block_sparse_moe.experts.252.w3", "model.layers.16.block_sparse_moe.experts.253.w3", "model.layers.16.block_sparse_moe.experts.254.w3", "model.layers.16.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0018067330121993574, "dbits": 2415919104 } ] }, { "idx": 84, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w2", "model.layers.16.block_sparse_moe.experts.1.w2", "model.layers.16.block_sparse_moe.experts.2.w2", "model.layers.16.block_sparse_moe.experts.3.w2", "model.layers.16.block_sparse_moe.experts.4.w2", "model.layers.16.block_sparse_moe.experts.5.w2", "model.layers.16.block_sparse_moe.experts.6.w2", "model.layers.16.block_sparse_moe.experts.7.w2", "model.layers.16.block_sparse_moe.experts.8.w2", "model.layers.16.block_sparse_moe.experts.9.w2", "model.layers.16.block_sparse_moe.experts.10.w2", "model.layers.16.block_sparse_moe.experts.11.w2", "model.layers.16.block_sparse_moe.experts.12.w2", "model.layers.16.block_sparse_moe.experts.13.w2", "model.layers.16.block_sparse_moe.experts.14.w2", "model.layers.16.block_sparse_moe.experts.15.w2", "model.layers.16.block_sparse_moe.experts.16.w2", "model.layers.16.block_sparse_moe.experts.17.w2", "model.layers.16.block_sparse_moe.experts.18.w2", "model.layers.16.block_sparse_moe.experts.19.w2", "model.layers.16.block_sparse_moe.experts.20.w2", "model.layers.16.block_sparse_moe.experts.21.w2", "model.layers.16.block_sparse_moe.experts.22.w2", "model.layers.16.block_sparse_moe.experts.23.w2", "model.layers.16.block_sparse_moe.experts.24.w2", "model.layers.16.block_sparse_moe.experts.25.w2", "model.layers.16.block_sparse_moe.experts.26.w2", "model.layers.16.block_sparse_moe.experts.27.w2", "model.layers.16.block_sparse_moe.experts.28.w2", "model.layers.16.block_sparse_moe.experts.29.w2", "model.layers.16.block_sparse_moe.experts.30.w2", "model.layers.16.block_sparse_moe.experts.31.w2", "model.layers.16.block_sparse_moe.experts.32.w2", "model.layers.16.block_sparse_moe.experts.33.w2", "model.layers.16.block_sparse_moe.experts.34.w2", "model.layers.16.block_sparse_moe.experts.35.w2", "model.layers.16.block_sparse_moe.experts.36.w2", "model.layers.16.block_sparse_moe.experts.37.w2", "model.layers.16.block_sparse_moe.experts.38.w2", "model.layers.16.block_sparse_moe.experts.39.w2", "model.layers.16.block_sparse_moe.experts.40.w2", "model.layers.16.block_sparse_moe.experts.41.w2", "model.layers.16.block_sparse_moe.experts.42.w2", "model.layers.16.block_sparse_moe.experts.43.w2", "model.layers.16.block_sparse_moe.experts.44.w2", "model.layers.16.block_sparse_moe.experts.45.w2", "model.layers.16.block_sparse_moe.experts.46.w2", "model.layers.16.block_sparse_moe.experts.47.w2", "model.layers.16.block_sparse_moe.experts.48.w2", "model.layers.16.block_sparse_moe.experts.49.w2", "model.layers.16.block_sparse_moe.experts.50.w2", "model.layers.16.block_sparse_moe.experts.51.w2", "model.layers.16.block_sparse_moe.experts.52.w2", "model.layers.16.block_sparse_moe.experts.53.w2", "model.layers.16.block_sparse_moe.experts.54.w2", "model.layers.16.block_sparse_moe.experts.55.w2", "model.layers.16.block_sparse_moe.experts.56.w2", "model.layers.16.block_sparse_moe.experts.57.w2", "model.layers.16.block_sparse_moe.experts.58.w2", "model.layers.16.block_sparse_moe.experts.59.w2", "model.layers.16.block_sparse_moe.experts.60.w2", "model.layers.16.block_sparse_moe.experts.61.w2", "model.layers.16.block_sparse_moe.experts.62.w2", "model.layers.16.block_sparse_moe.experts.63.w2", "model.layers.16.block_sparse_moe.experts.64.w2", "model.layers.16.block_sparse_moe.experts.65.w2", "model.layers.16.block_sparse_moe.experts.66.w2", "model.layers.16.block_sparse_moe.experts.67.w2", "model.layers.16.block_sparse_moe.experts.68.w2", "model.layers.16.block_sparse_moe.experts.69.w2", "model.layers.16.block_sparse_moe.experts.70.w2", "model.layers.16.block_sparse_moe.experts.71.w2", "model.layers.16.block_sparse_moe.experts.72.w2", "model.layers.16.block_sparse_moe.experts.73.w2", "model.layers.16.block_sparse_moe.experts.74.w2", "model.layers.16.block_sparse_moe.experts.75.w2", "model.layers.16.block_sparse_moe.experts.76.w2", "model.layers.16.block_sparse_moe.experts.77.w2", "model.layers.16.block_sparse_moe.experts.78.w2", "model.layers.16.block_sparse_moe.experts.79.w2", "model.layers.16.block_sparse_moe.experts.80.w2", "model.layers.16.block_sparse_moe.experts.81.w2", "model.layers.16.block_sparse_moe.experts.82.w2", "model.layers.16.block_sparse_moe.experts.83.w2", "model.layers.16.block_sparse_moe.experts.84.w2", "model.layers.16.block_sparse_moe.experts.85.w2", "model.layers.16.block_sparse_moe.experts.86.w2", "model.layers.16.block_sparse_moe.experts.87.w2", "model.layers.16.block_sparse_moe.experts.88.w2", "model.layers.16.block_sparse_moe.experts.89.w2", "model.layers.16.block_sparse_moe.experts.90.w2", "model.layers.16.block_sparse_moe.experts.91.w2", "model.layers.16.block_sparse_moe.experts.92.w2", "model.layers.16.block_sparse_moe.experts.93.w2", "model.layers.16.block_sparse_moe.experts.94.w2", "model.layers.16.block_sparse_moe.experts.95.w2", "model.layers.16.block_sparse_moe.experts.96.w2", "model.layers.16.block_sparse_moe.experts.97.w2", "model.layers.16.block_sparse_moe.experts.98.w2", "model.layers.16.block_sparse_moe.experts.99.w2", "model.layers.16.block_sparse_moe.experts.100.w2", "model.layers.16.block_sparse_moe.experts.101.w2", "model.layers.16.block_sparse_moe.experts.102.w2", "model.layers.16.block_sparse_moe.experts.103.w2", "model.layers.16.block_sparse_moe.experts.104.w2", "model.layers.16.block_sparse_moe.experts.105.w2", "model.layers.16.block_sparse_moe.experts.106.w2", "model.layers.16.block_sparse_moe.experts.107.w2", "model.layers.16.block_sparse_moe.experts.108.w2", "model.layers.16.block_sparse_moe.experts.109.w2", "model.layers.16.block_sparse_moe.experts.110.w2", "model.layers.16.block_sparse_moe.experts.111.w2", "model.layers.16.block_sparse_moe.experts.112.w2", "model.layers.16.block_sparse_moe.experts.113.w2", "model.layers.16.block_sparse_moe.experts.114.w2", "model.layers.16.block_sparse_moe.experts.115.w2", "model.layers.16.block_sparse_moe.experts.116.w2", "model.layers.16.block_sparse_moe.experts.117.w2", "model.layers.16.block_sparse_moe.experts.118.w2", "model.layers.16.block_sparse_moe.experts.119.w2", "model.layers.16.block_sparse_moe.experts.120.w2", "model.layers.16.block_sparse_moe.experts.121.w2", "model.layers.16.block_sparse_moe.experts.122.w2", "model.layers.16.block_sparse_moe.experts.123.w2", "model.layers.16.block_sparse_moe.experts.124.w2", "model.layers.16.block_sparse_moe.experts.125.w2", "model.layers.16.block_sparse_moe.experts.126.w2", "model.layers.16.block_sparse_moe.experts.127.w2", "model.layers.16.block_sparse_moe.experts.128.w2", "model.layers.16.block_sparse_moe.experts.129.w2", "model.layers.16.block_sparse_moe.experts.130.w2", "model.layers.16.block_sparse_moe.experts.131.w2", "model.layers.16.block_sparse_moe.experts.132.w2", "model.layers.16.block_sparse_moe.experts.133.w2", "model.layers.16.block_sparse_moe.experts.134.w2", "model.layers.16.block_sparse_moe.experts.135.w2", "model.layers.16.block_sparse_moe.experts.136.w2", "model.layers.16.block_sparse_moe.experts.137.w2", "model.layers.16.block_sparse_moe.experts.138.w2", "model.layers.16.block_sparse_moe.experts.139.w2", "model.layers.16.block_sparse_moe.experts.140.w2", "model.layers.16.block_sparse_moe.experts.141.w2", "model.layers.16.block_sparse_moe.experts.142.w2", "model.layers.16.block_sparse_moe.experts.143.w2", "model.layers.16.block_sparse_moe.experts.144.w2", "model.layers.16.block_sparse_moe.experts.145.w2", "model.layers.16.block_sparse_moe.experts.146.w2", "model.layers.16.block_sparse_moe.experts.147.w2", "model.layers.16.block_sparse_moe.experts.148.w2", "model.layers.16.block_sparse_moe.experts.149.w2", "model.layers.16.block_sparse_moe.experts.150.w2", "model.layers.16.block_sparse_moe.experts.151.w2", "model.layers.16.block_sparse_moe.experts.152.w2", "model.layers.16.block_sparse_moe.experts.153.w2", "model.layers.16.block_sparse_moe.experts.154.w2", "model.layers.16.block_sparse_moe.experts.155.w2", "model.layers.16.block_sparse_moe.experts.156.w2", "model.layers.16.block_sparse_moe.experts.157.w2", "model.layers.16.block_sparse_moe.experts.158.w2", "model.layers.16.block_sparse_moe.experts.159.w2", "model.layers.16.block_sparse_moe.experts.160.w2", "model.layers.16.block_sparse_moe.experts.161.w2", "model.layers.16.block_sparse_moe.experts.162.w2", "model.layers.16.block_sparse_moe.experts.163.w2", "model.layers.16.block_sparse_moe.experts.164.w2", "model.layers.16.block_sparse_moe.experts.165.w2", "model.layers.16.block_sparse_moe.experts.166.w2", "model.layers.16.block_sparse_moe.experts.167.w2", "model.layers.16.block_sparse_moe.experts.168.w2", "model.layers.16.block_sparse_moe.experts.169.w2", "model.layers.16.block_sparse_moe.experts.170.w2", "model.layers.16.block_sparse_moe.experts.171.w2", "model.layers.16.block_sparse_moe.experts.172.w2", "model.layers.16.block_sparse_moe.experts.173.w2", "model.layers.16.block_sparse_moe.experts.174.w2", "model.layers.16.block_sparse_moe.experts.175.w2", "model.layers.16.block_sparse_moe.experts.176.w2", "model.layers.16.block_sparse_moe.experts.177.w2", "model.layers.16.block_sparse_moe.experts.178.w2", "model.layers.16.block_sparse_moe.experts.179.w2", "model.layers.16.block_sparse_moe.experts.180.w2", "model.layers.16.block_sparse_moe.experts.181.w2", "model.layers.16.block_sparse_moe.experts.182.w2", "model.layers.16.block_sparse_moe.experts.183.w2", "model.layers.16.block_sparse_moe.experts.184.w2", "model.layers.16.block_sparse_moe.experts.185.w2", "model.layers.16.block_sparse_moe.experts.186.w2", "model.layers.16.block_sparse_moe.experts.187.w2", "model.layers.16.block_sparse_moe.experts.188.w2", "model.layers.16.block_sparse_moe.experts.189.w2", "model.layers.16.block_sparse_moe.experts.190.w2", "model.layers.16.block_sparse_moe.experts.191.w2", "model.layers.16.block_sparse_moe.experts.192.w2", "model.layers.16.block_sparse_moe.experts.193.w2", "model.layers.16.block_sparse_moe.experts.194.w2", "model.layers.16.block_sparse_moe.experts.195.w2", "model.layers.16.block_sparse_moe.experts.196.w2", "model.layers.16.block_sparse_moe.experts.197.w2", "model.layers.16.block_sparse_moe.experts.198.w2", "model.layers.16.block_sparse_moe.experts.199.w2", "model.layers.16.block_sparse_moe.experts.200.w2", "model.layers.16.block_sparse_moe.experts.201.w2", "model.layers.16.block_sparse_moe.experts.202.w2", "model.layers.16.block_sparse_moe.experts.203.w2", "model.layers.16.block_sparse_moe.experts.204.w2", "model.layers.16.block_sparse_moe.experts.205.w2", "model.layers.16.block_sparse_moe.experts.206.w2", "model.layers.16.block_sparse_moe.experts.207.w2", "model.layers.16.block_sparse_moe.experts.208.w2", "model.layers.16.block_sparse_moe.experts.209.w2", "model.layers.16.block_sparse_moe.experts.210.w2", "model.layers.16.block_sparse_moe.experts.211.w2", "model.layers.16.block_sparse_moe.experts.212.w2", "model.layers.16.block_sparse_moe.experts.213.w2", "model.layers.16.block_sparse_moe.experts.214.w2", "model.layers.16.block_sparse_moe.experts.215.w2", "model.layers.16.block_sparse_moe.experts.216.w2", "model.layers.16.block_sparse_moe.experts.217.w2", "model.layers.16.block_sparse_moe.experts.218.w2", "model.layers.16.block_sparse_moe.experts.219.w2", "model.layers.16.block_sparse_moe.experts.220.w2", "model.layers.16.block_sparse_moe.experts.221.w2", "model.layers.16.block_sparse_moe.experts.222.w2", "model.layers.16.block_sparse_moe.experts.223.w2", "model.layers.16.block_sparse_moe.experts.224.w2", "model.layers.16.block_sparse_moe.experts.225.w2", "model.layers.16.block_sparse_moe.experts.226.w2", "model.layers.16.block_sparse_moe.experts.227.w2", "model.layers.16.block_sparse_moe.experts.228.w2", "model.layers.16.block_sparse_moe.experts.229.w2", "model.layers.16.block_sparse_moe.experts.230.w2", "model.layers.16.block_sparse_moe.experts.231.w2", "model.layers.16.block_sparse_moe.experts.232.w2", "model.layers.16.block_sparse_moe.experts.233.w2", "model.layers.16.block_sparse_moe.experts.234.w2", "model.layers.16.block_sparse_moe.experts.235.w2", "model.layers.16.block_sparse_moe.experts.236.w2", "model.layers.16.block_sparse_moe.experts.237.w2", "model.layers.16.block_sparse_moe.experts.238.w2", "model.layers.16.block_sparse_moe.experts.239.w2", "model.layers.16.block_sparse_moe.experts.240.w2", "model.layers.16.block_sparse_moe.experts.241.w2", "model.layers.16.block_sparse_moe.experts.242.w2", "model.layers.16.block_sparse_moe.experts.243.w2", "model.layers.16.block_sparse_moe.experts.244.w2", "model.layers.16.block_sparse_moe.experts.245.w2", "model.layers.16.block_sparse_moe.experts.246.w2", "model.layers.16.block_sparse_moe.experts.247.w2", "model.layers.16.block_sparse_moe.experts.248.w2", "model.layers.16.block_sparse_moe.experts.249.w2", "model.layers.16.block_sparse_moe.experts.250.w2", "model.layers.16.block_sparse_moe.experts.251.w2", "model.layers.16.block_sparse_moe.experts.252.w2", "model.layers.16.block_sparse_moe.experts.253.w2", "model.layers.16.block_sparse_moe.experts.254.w2", "model.layers.16.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0001648038625716719, "dbits": 1207959552 } ] }, { "idx": 85, "layers": [ "model.layers.17.self_attn.q_proj" ], "candidates": [ { "dkld": 0.001224955916404813, "dbits": 18874368 } ] }, { "idx": 86, "layers": [ "model.layers.17.self_attn.k_proj", "model.layers.17.self_attn.v_proj" ], "candidates": [ { "dkld": -0.018651586771011308, "dbits": 6291456 } ] }, { "idx": 87, "layers": [ "model.layers.17.self_attn.o_proj" ], "candidates": [ { "dkld": 0.005383932590484641, "dbits": 18874368 } ] }, { "idx": 88, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w1", "model.layers.17.block_sparse_moe.experts.1.w1", "model.layers.17.block_sparse_moe.experts.2.w1", "model.layers.17.block_sparse_moe.experts.3.w1", "model.layers.17.block_sparse_moe.experts.4.w1", "model.layers.17.block_sparse_moe.experts.5.w1", "model.layers.17.block_sparse_moe.experts.6.w1", "model.layers.17.block_sparse_moe.experts.7.w1", "model.layers.17.block_sparse_moe.experts.8.w1", "model.layers.17.block_sparse_moe.experts.9.w1", "model.layers.17.block_sparse_moe.experts.10.w1", "model.layers.17.block_sparse_moe.experts.11.w1", "model.layers.17.block_sparse_moe.experts.12.w1", "model.layers.17.block_sparse_moe.experts.13.w1", "model.layers.17.block_sparse_moe.experts.14.w1", "model.layers.17.block_sparse_moe.experts.15.w1", "model.layers.17.block_sparse_moe.experts.16.w1", "model.layers.17.block_sparse_moe.experts.17.w1", "model.layers.17.block_sparse_moe.experts.18.w1", "model.layers.17.block_sparse_moe.experts.19.w1", "model.layers.17.block_sparse_moe.experts.20.w1", "model.layers.17.block_sparse_moe.experts.21.w1", "model.layers.17.block_sparse_moe.experts.22.w1", "model.layers.17.block_sparse_moe.experts.23.w1", "model.layers.17.block_sparse_moe.experts.24.w1", "model.layers.17.block_sparse_moe.experts.25.w1", "model.layers.17.block_sparse_moe.experts.26.w1", "model.layers.17.block_sparse_moe.experts.27.w1", "model.layers.17.block_sparse_moe.experts.28.w1", "model.layers.17.block_sparse_moe.experts.29.w1", "model.layers.17.block_sparse_moe.experts.30.w1", "model.layers.17.block_sparse_moe.experts.31.w1", "model.layers.17.block_sparse_moe.experts.32.w1", "model.layers.17.block_sparse_moe.experts.33.w1", "model.layers.17.block_sparse_moe.experts.34.w1", "model.layers.17.block_sparse_moe.experts.35.w1", "model.layers.17.block_sparse_moe.experts.36.w1", "model.layers.17.block_sparse_moe.experts.37.w1", "model.layers.17.block_sparse_moe.experts.38.w1", "model.layers.17.block_sparse_moe.experts.39.w1", "model.layers.17.block_sparse_moe.experts.40.w1", "model.layers.17.block_sparse_moe.experts.41.w1", "model.layers.17.block_sparse_moe.experts.42.w1", "model.layers.17.block_sparse_moe.experts.43.w1", "model.layers.17.block_sparse_moe.experts.44.w1", "model.layers.17.block_sparse_moe.experts.45.w1", "model.layers.17.block_sparse_moe.experts.46.w1", "model.layers.17.block_sparse_moe.experts.47.w1", "model.layers.17.block_sparse_moe.experts.48.w1", "model.layers.17.block_sparse_moe.experts.49.w1", "model.layers.17.block_sparse_moe.experts.50.w1", "model.layers.17.block_sparse_moe.experts.51.w1", "model.layers.17.block_sparse_moe.experts.52.w1", "model.layers.17.block_sparse_moe.experts.53.w1", "model.layers.17.block_sparse_moe.experts.54.w1", "model.layers.17.block_sparse_moe.experts.55.w1", "model.layers.17.block_sparse_moe.experts.56.w1", "model.layers.17.block_sparse_moe.experts.57.w1", "model.layers.17.block_sparse_moe.experts.58.w1", "model.layers.17.block_sparse_moe.experts.59.w1", "model.layers.17.block_sparse_moe.experts.60.w1", "model.layers.17.block_sparse_moe.experts.61.w1", "model.layers.17.block_sparse_moe.experts.62.w1", "model.layers.17.block_sparse_moe.experts.63.w1", "model.layers.17.block_sparse_moe.experts.64.w1", "model.layers.17.block_sparse_moe.experts.65.w1", "model.layers.17.block_sparse_moe.experts.66.w1", "model.layers.17.block_sparse_moe.experts.67.w1", "model.layers.17.block_sparse_moe.experts.68.w1", "model.layers.17.block_sparse_moe.experts.69.w1", "model.layers.17.block_sparse_moe.experts.70.w1", "model.layers.17.block_sparse_moe.experts.71.w1", "model.layers.17.block_sparse_moe.experts.72.w1", "model.layers.17.block_sparse_moe.experts.73.w1", "model.layers.17.block_sparse_moe.experts.74.w1", "model.layers.17.block_sparse_moe.experts.75.w1", "model.layers.17.block_sparse_moe.experts.76.w1", "model.layers.17.block_sparse_moe.experts.77.w1", "model.layers.17.block_sparse_moe.experts.78.w1", "model.layers.17.block_sparse_moe.experts.79.w1", "model.layers.17.block_sparse_moe.experts.80.w1", "model.layers.17.block_sparse_moe.experts.81.w1", "model.layers.17.block_sparse_moe.experts.82.w1", "model.layers.17.block_sparse_moe.experts.83.w1", "model.layers.17.block_sparse_moe.experts.84.w1", "model.layers.17.block_sparse_moe.experts.85.w1", "model.layers.17.block_sparse_moe.experts.86.w1", "model.layers.17.block_sparse_moe.experts.87.w1", "model.layers.17.block_sparse_moe.experts.88.w1", "model.layers.17.block_sparse_moe.experts.89.w1", "model.layers.17.block_sparse_moe.experts.90.w1", "model.layers.17.block_sparse_moe.experts.91.w1", "model.layers.17.block_sparse_moe.experts.92.w1", "model.layers.17.block_sparse_moe.experts.93.w1", "model.layers.17.block_sparse_moe.experts.94.w1", "model.layers.17.block_sparse_moe.experts.95.w1", "model.layers.17.block_sparse_moe.experts.96.w1", "model.layers.17.block_sparse_moe.experts.97.w1", "model.layers.17.block_sparse_moe.experts.98.w1", "model.layers.17.block_sparse_moe.experts.99.w1", "model.layers.17.block_sparse_moe.experts.100.w1", "model.layers.17.block_sparse_moe.experts.101.w1", "model.layers.17.block_sparse_moe.experts.102.w1", "model.layers.17.block_sparse_moe.experts.103.w1", "model.layers.17.block_sparse_moe.experts.104.w1", "model.layers.17.block_sparse_moe.experts.105.w1", "model.layers.17.block_sparse_moe.experts.106.w1", "model.layers.17.block_sparse_moe.experts.107.w1", "model.layers.17.block_sparse_moe.experts.108.w1", "model.layers.17.block_sparse_moe.experts.109.w1", "model.layers.17.block_sparse_moe.experts.110.w1", "model.layers.17.block_sparse_moe.experts.111.w1", "model.layers.17.block_sparse_moe.experts.112.w1", "model.layers.17.block_sparse_moe.experts.113.w1", "model.layers.17.block_sparse_moe.experts.114.w1", "model.layers.17.block_sparse_moe.experts.115.w1", "model.layers.17.block_sparse_moe.experts.116.w1", "model.layers.17.block_sparse_moe.experts.117.w1", "model.layers.17.block_sparse_moe.experts.118.w1", "model.layers.17.block_sparse_moe.experts.119.w1", "model.layers.17.block_sparse_moe.experts.120.w1", "model.layers.17.block_sparse_moe.experts.121.w1", "model.layers.17.block_sparse_moe.experts.122.w1", "model.layers.17.block_sparse_moe.experts.123.w1", "model.layers.17.block_sparse_moe.experts.124.w1", "model.layers.17.block_sparse_moe.experts.125.w1", "model.layers.17.block_sparse_moe.experts.126.w1", "model.layers.17.block_sparse_moe.experts.127.w1", "model.layers.17.block_sparse_moe.experts.128.w1", "model.layers.17.block_sparse_moe.experts.129.w1", "model.layers.17.block_sparse_moe.experts.130.w1", "model.layers.17.block_sparse_moe.experts.131.w1", "model.layers.17.block_sparse_moe.experts.132.w1", "model.layers.17.block_sparse_moe.experts.133.w1", "model.layers.17.block_sparse_moe.experts.134.w1", "model.layers.17.block_sparse_moe.experts.135.w1", "model.layers.17.block_sparse_moe.experts.136.w1", "model.layers.17.block_sparse_moe.experts.137.w1", "model.layers.17.block_sparse_moe.experts.138.w1", "model.layers.17.block_sparse_moe.experts.139.w1", "model.layers.17.block_sparse_moe.experts.140.w1", "model.layers.17.block_sparse_moe.experts.141.w1", "model.layers.17.block_sparse_moe.experts.142.w1", "model.layers.17.block_sparse_moe.experts.143.w1", "model.layers.17.block_sparse_moe.experts.144.w1", "model.layers.17.block_sparse_moe.experts.145.w1", "model.layers.17.block_sparse_moe.experts.146.w1", "model.layers.17.block_sparse_moe.experts.147.w1", "model.layers.17.block_sparse_moe.experts.148.w1", "model.layers.17.block_sparse_moe.experts.149.w1", "model.layers.17.block_sparse_moe.experts.150.w1", "model.layers.17.block_sparse_moe.experts.151.w1", "model.layers.17.block_sparse_moe.experts.152.w1", "model.layers.17.block_sparse_moe.experts.153.w1", "model.layers.17.block_sparse_moe.experts.154.w1", "model.layers.17.block_sparse_moe.experts.155.w1", "model.layers.17.block_sparse_moe.experts.156.w1", "model.layers.17.block_sparse_moe.experts.157.w1", "model.layers.17.block_sparse_moe.experts.158.w1", "model.layers.17.block_sparse_moe.experts.159.w1", "model.layers.17.block_sparse_moe.experts.160.w1", "model.layers.17.block_sparse_moe.experts.161.w1", "model.layers.17.block_sparse_moe.experts.162.w1", "model.layers.17.block_sparse_moe.experts.163.w1", "model.layers.17.block_sparse_moe.experts.164.w1", "model.layers.17.block_sparse_moe.experts.165.w1", "model.layers.17.block_sparse_moe.experts.166.w1", "model.layers.17.block_sparse_moe.experts.167.w1", "model.layers.17.block_sparse_moe.experts.168.w1", "model.layers.17.block_sparse_moe.experts.169.w1", "model.layers.17.block_sparse_moe.experts.170.w1", "model.layers.17.block_sparse_moe.experts.171.w1", "model.layers.17.block_sparse_moe.experts.172.w1", "model.layers.17.block_sparse_moe.experts.173.w1", "model.layers.17.block_sparse_moe.experts.174.w1", "model.layers.17.block_sparse_moe.experts.175.w1", "model.layers.17.block_sparse_moe.experts.176.w1", "model.layers.17.block_sparse_moe.experts.177.w1", "model.layers.17.block_sparse_moe.experts.178.w1", "model.layers.17.block_sparse_moe.experts.179.w1", "model.layers.17.block_sparse_moe.experts.180.w1", "model.layers.17.block_sparse_moe.experts.181.w1", "model.layers.17.block_sparse_moe.experts.182.w1", "model.layers.17.block_sparse_moe.experts.183.w1", "model.layers.17.block_sparse_moe.experts.184.w1", "model.layers.17.block_sparse_moe.experts.185.w1", "model.layers.17.block_sparse_moe.experts.186.w1", "model.layers.17.block_sparse_moe.experts.187.w1", "model.layers.17.block_sparse_moe.experts.188.w1", "model.layers.17.block_sparse_moe.experts.189.w1", "model.layers.17.block_sparse_moe.experts.190.w1", "model.layers.17.block_sparse_moe.experts.191.w1", "model.layers.17.block_sparse_moe.experts.192.w1", "model.layers.17.block_sparse_moe.experts.193.w1", "model.layers.17.block_sparse_moe.experts.194.w1", "model.layers.17.block_sparse_moe.experts.195.w1", "model.layers.17.block_sparse_moe.experts.196.w1", "model.layers.17.block_sparse_moe.experts.197.w1", "model.layers.17.block_sparse_moe.experts.198.w1", "model.layers.17.block_sparse_moe.experts.199.w1", "model.layers.17.block_sparse_moe.experts.200.w1", "model.layers.17.block_sparse_moe.experts.201.w1", "model.layers.17.block_sparse_moe.experts.202.w1", "model.layers.17.block_sparse_moe.experts.203.w1", "model.layers.17.block_sparse_moe.experts.204.w1", "model.layers.17.block_sparse_moe.experts.205.w1", "model.layers.17.block_sparse_moe.experts.206.w1", "model.layers.17.block_sparse_moe.experts.207.w1", "model.layers.17.block_sparse_moe.experts.208.w1", "model.layers.17.block_sparse_moe.experts.209.w1", "model.layers.17.block_sparse_moe.experts.210.w1", "model.layers.17.block_sparse_moe.experts.211.w1", "model.layers.17.block_sparse_moe.experts.212.w1", "model.layers.17.block_sparse_moe.experts.213.w1", "model.layers.17.block_sparse_moe.experts.214.w1", "model.layers.17.block_sparse_moe.experts.215.w1", "model.layers.17.block_sparse_moe.experts.216.w1", "model.layers.17.block_sparse_moe.experts.217.w1", "model.layers.17.block_sparse_moe.experts.218.w1", "model.layers.17.block_sparse_moe.experts.219.w1", "model.layers.17.block_sparse_moe.experts.220.w1", "model.layers.17.block_sparse_moe.experts.221.w1", "model.layers.17.block_sparse_moe.experts.222.w1", "model.layers.17.block_sparse_moe.experts.223.w1", "model.layers.17.block_sparse_moe.experts.224.w1", "model.layers.17.block_sparse_moe.experts.225.w1", "model.layers.17.block_sparse_moe.experts.226.w1", "model.layers.17.block_sparse_moe.experts.227.w1", "model.layers.17.block_sparse_moe.experts.228.w1", "model.layers.17.block_sparse_moe.experts.229.w1", "model.layers.17.block_sparse_moe.experts.230.w1", "model.layers.17.block_sparse_moe.experts.231.w1", "model.layers.17.block_sparse_moe.experts.232.w1", "model.layers.17.block_sparse_moe.experts.233.w1", "model.layers.17.block_sparse_moe.experts.234.w1", "model.layers.17.block_sparse_moe.experts.235.w1", "model.layers.17.block_sparse_moe.experts.236.w1", "model.layers.17.block_sparse_moe.experts.237.w1", "model.layers.17.block_sparse_moe.experts.238.w1", "model.layers.17.block_sparse_moe.experts.239.w1", "model.layers.17.block_sparse_moe.experts.240.w1", "model.layers.17.block_sparse_moe.experts.241.w1", "model.layers.17.block_sparse_moe.experts.242.w1", "model.layers.17.block_sparse_moe.experts.243.w1", "model.layers.17.block_sparse_moe.experts.244.w1", "model.layers.17.block_sparse_moe.experts.245.w1", "model.layers.17.block_sparse_moe.experts.246.w1", "model.layers.17.block_sparse_moe.experts.247.w1", "model.layers.17.block_sparse_moe.experts.248.w1", "model.layers.17.block_sparse_moe.experts.249.w1", "model.layers.17.block_sparse_moe.experts.250.w1", "model.layers.17.block_sparse_moe.experts.251.w1", "model.layers.17.block_sparse_moe.experts.252.w1", "model.layers.17.block_sparse_moe.experts.253.w1", "model.layers.17.block_sparse_moe.experts.254.w1", "model.layers.17.block_sparse_moe.experts.255.w1", "model.layers.17.block_sparse_moe.experts.0.w3", "model.layers.17.block_sparse_moe.experts.1.w3", "model.layers.17.block_sparse_moe.experts.2.w3", "model.layers.17.block_sparse_moe.experts.3.w3", "model.layers.17.block_sparse_moe.experts.4.w3", "model.layers.17.block_sparse_moe.experts.5.w3", "model.layers.17.block_sparse_moe.experts.6.w3", "model.layers.17.block_sparse_moe.experts.7.w3", "model.layers.17.block_sparse_moe.experts.8.w3", "model.layers.17.block_sparse_moe.experts.9.w3", "model.layers.17.block_sparse_moe.experts.10.w3", "model.layers.17.block_sparse_moe.experts.11.w3", "model.layers.17.block_sparse_moe.experts.12.w3", "model.layers.17.block_sparse_moe.experts.13.w3", "model.layers.17.block_sparse_moe.experts.14.w3", "model.layers.17.block_sparse_moe.experts.15.w3", "model.layers.17.block_sparse_moe.experts.16.w3", "model.layers.17.block_sparse_moe.experts.17.w3", "model.layers.17.block_sparse_moe.experts.18.w3", "model.layers.17.block_sparse_moe.experts.19.w3", "model.layers.17.block_sparse_moe.experts.20.w3", "model.layers.17.block_sparse_moe.experts.21.w3", "model.layers.17.block_sparse_moe.experts.22.w3", "model.layers.17.block_sparse_moe.experts.23.w3", "model.layers.17.block_sparse_moe.experts.24.w3", "model.layers.17.block_sparse_moe.experts.25.w3", "model.layers.17.block_sparse_moe.experts.26.w3", "model.layers.17.block_sparse_moe.experts.27.w3", "model.layers.17.block_sparse_moe.experts.28.w3", "model.layers.17.block_sparse_moe.experts.29.w3", "model.layers.17.block_sparse_moe.experts.30.w3", "model.layers.17.block_sparse_moe.experts.31.w3", "model.layers.17.block_sparse_moe.experts.32.w3", "model.layers.17.block_sparse_moe.experts.33.w3", "model.layers.17.block_sparse_moe.experts.34.w3", "model.layers.17.block_sparse_moe.experts.35.w3", "model.layers.17.block_sparse_moe.experts.36.w3", "model.layers.17.block_sparse_moe.experts.37.w3", "model.layers.17.block_sparse_moe.experts.38.w3", "model.layers.17.block_sparse_moe.experts.39.w3", "model.layers.17.block_sparse_moe.experts.40.w3", "model.layers.17.block_sparse_moe.experts.41.w3", "model.layers.17.block_sparse_moe.experts.42.w3", "model.layers.17.block_sparse_moe.experts.43.w3", "model.layers.17.block_sparse_moe.experts.44.w3", "model.layers.17.block_sparse_moe.experts.45.w3", "model.layers.17.block_sparse_moe.experts.46.w3", "model.layers.17.block_sparse_moe.experts.47.w3", "model.layers.17.block_sparse_moe.experts.48.w3", "model.layers.17.block_sparse_moe.experts.49.w3", "model.layers.17.block_sparse_moe.experts.50.w3", "model.layers.17.block_sparse_moe.experts.51.w3", "model.layers.17.block_sparse_moe.experts.52.w3", "model.layers.17.block_sparse_moe.experts.53.w3", "model.layers.17.block_sparse_moe.experts.54.w3", "model.layers.17.block_sparse_moe.experts.55.w3", "model.layers.17.block_sparse_moe.experts.56.w3", "model.layers.17.block_sparse_moe.experts.57.w3", "model.layers.17.block_sparse_moe.experts.58.w3", "model.layers.17.block_sparse_moe.experts.59.w3", "model.layers.17.block_sparse_moe.experts.60.w3", "model.layers.17.block_sparse_moe.experts.61.w3", "model.layers.17.block_sparse_moe.experts.62.w3", "model.layers.17.block_sparse_moe.experts.63.w3", "model.layers.17.block_sparse_moe.experts.64.w3", "model.layers.17.block_sparse_moe.experts.65.w3", "model.layers.17.block_sparse_moe.experts.66.w3", "model.layers.17.block_sparse_moe.experts.67.w3", "model.layers.17.block_sparse_moe.experts.68.w3", "model.layers.17.block_sparse_moe.experts.69.w3", "model.layers.17.block_sparse_moe.experts.70.w3", "model.layers.17.block_sparse_moe.experts.71.w3", "model.layers.17.block_sparse_moe.experts.72.w3", "model.layers.17.block_sparse_moe.experts.73.w3", "model.layers.17.block_sparse_moe.experts.74.w3", "model.layers.17.block_sparse_moe.experts.75.w3", "model.layers.17.block_sparse_moe.experts.76.w3", "model.layers.17.block_sparse_moe.experts.77.w3", "model.layers.17.block_sparse_moe.experts.78.w3", "model.layers.17.block_sparse_moe.experts.79.w3", "model.layers.17.block_sparse_moe.experts.80.w3", "model.layers.17.block_sparse_moe.experts.81.w3", "model.layers.17.block_sparse_moe.experts.82.w3", "model.layers.17.block_sparse_moe.experts.83.w3", "model.layers.17.block_sparse_moe.experts.84.w3", "model.layers.17.block_sparse_moe.experts.85.w3", "model.layers.17.block_sparse_moe.experts.86.w3", "model.layers.17.block_sparse_moe.experts.87.w3", "model.layers.17.block_sparse_moe.experts.88.w3", "model.layers.17.block_sparse_moe.experts.89.w3", "model.layers.17.block_sparse_moe.experts.90.w3", "model.layers.17.block_sparse_moe.experts.91.w3", "model.layers.17.block_sparse_moe.experts.92.w3", "model.layers.17.block_sparse_moe.experts.93.w3", "model.layers.17.block_sparse_moe.experts.94.w3", "model.layers.17.block_sparse_moe.experts.95.w3", "model.layers.17.block_sparse_moe.experts.96.w3", "model.layers.17.block_sparse_moe.experts.97.w3", "model.layers.17.block_sparse_moe.experts.98.w3", "model.layers.17.block_sparse_moe.experts.99.w3", "model.layers.17.block_sparse_moe.experts.100.w3", "model.layers.17.block_sparse_moe.experts.101.w3", "model.layers.17.block_sparse_moe.experts.102.w3", "model.layers.17.block_sparse_moe.experts.103.w3", "model.layers.17.block_sparse_moe.experts.104.w3", "model.layers.17.block_sparse_moe.experts.105.w3", "model.layers.17.block_sparse_moe.experts.106.w3", "model.layers.17.block_sparse_moe.experts.107.w3", "model.layers.17.block_sparse_moe.experts.108.w3", "model.layers.17.block_sparse_moe.experts.109.w3", "model.layers.17.block_sparse_moe.experts.110.w3", "model.layers.17.block_sparse_moe.experts.111.w3", "model.layers.17.block_sparse_moe.experts.112.w3", "model.layers.17.block_sparse_moe.experts.113.w3", "model.layers.17.block_sparse_moe.experts.114.w3", "model.layers.17.block_sparse_moe.experts.115.w3", "model.layers.17.block_sparse_moe.experts.116.w3", "model.layers.17.block_sparse_moe.experts.117.w3", "model.layers.17.block_sparse_moe.experts.118.w3", "model.layers.17.block_sparse_moe.experts.119.w3", "model.layers.17.block_sparse_moe.experts.120.w3", "model.layers.17.block_sparse_moe.experts.121.w3", "model.layers.17.block_sparse_moe.experts.122.w3", "model.layers.17.block_sparse_moe.experts.123.w3", "model.layers.17.block_sparse_moe.experts.124.w3", "model.layers.17.block_sparse_moe.experts.125.w3", "model.layers.17.block_sparse_moe.experts.126.w3", "model.layers.17.block_sparse_moe.experts.127.w3", "model.layers.17.block_sparse_moe.experts.128.w3", "model.layers.17.block_sparse_moe.experts.129.w3", "model.layers.17.block_sparse_moe.experts.130.w3", "model.layers.17.block_sparse_moe.experts.131.w3", "model.layers.17.block_sparse_moe.experts.132.w3", "model.layers.17.block_sparse_moe.experts.133.w3", "model.layers.17.block_sparse_moe.experts.134.w3", "model.layers.17.block_sparse_moe.experts.135.w3", "model.layers.17.block_sparse_moe.experts.136.w3", "model.layers.17.block_sparse_moe.experts.137.w3", "model.layers.17.block_sparse_moe.experts.138.w3", "model.layers.17.block_sparse_moe.experts.139.w3", "model.layers.17.block_sparse_moe.experts.140.w3", "model.layers.17.block_sparse_moe.experts.141.w3", "model.layers.17.block_sparse_moe.experts.142.w3", "model.layers.17.block_sparse_moe.experts.143.w3", "model.layers.17.block_sparse_moe.experts.144.w3", "model.layers.17.block_sparse_moe.experts.145.w3", "model.layers.17.block_sparse_moe.experts.146.w3", "model.layers.17.block_sparse_moe.experts.147.w3", "model.layers.17.block_sparse_moe.experts.148.w3", "model.layers.17.block_sparse_moe.experts.149.w3", "model.layers.17.block_sparse_moe.experts.150.w3", "model.layers.17.block_sparse_moe.experts.151.w3", "model.layers.17.block_sparse_moe.experts.152.w3", "model.layers.17.block_sparse_moe.experts.153.w3", "model.layers.17.block_sparse_moe.experts.154.w3", "model.layers.17.block_sparse_moe.experts.155.w3", "model.layers.17.block_sparse_moe.experts.156.w3", "model.layers.17.block_sparse_moe.experts.157.w3", "model.layers.17.block_sparse_moe.experts.158.w3", "model.layers.17.block_sparse_moe.experts.159.w3", "model.layers.17.block_sparse_moe.experts.160.w3", "model.layers.17.block_sparse_moe.experts.161.w3", "model.layers.17.block_sparse_moe.experts.162.w3", "model.layers.17.block_sparse_moe.experts.163.w3", "model.layers.17.block_sparse_moe.experts.164.w3", "model.layers.17.block_sparse_moe.experts.165.w3", "model.layers.17.block_sparse_moe.experts.166.w3", "model.layers.17.block_sparse_moe.experts.167.w3", "model.layers.17.block_sparse_moe.experts.168.w3", "model.layers.17.block_sparse_moe.experts.169.w3", "model.layers.17.block_sparse_moe.experts.170.w3", "model.layers.17.block_sparse_moe.experts.171.w3", "model.layers.17.block_sparse_moe.experts.172.w3", "model.layers.17.block_sparse_moe.experts.173.w3", "model.layers.17.block_sparse_moe.experts.174.w3", "model.layers.17.block_sparse_moe.experts.175.w3", "model.layers.17.block_sparse_moe.experts.176.w3", "model.layers.17.block_sparse_moe.experts.177.w3", "model.layers.17.block_sparse_moe.experts.178.w3", "model.layers.17.block_sparse_moe.experts.179.w3", "model.layers.17.block_sparse_moe.experts.180.w3", "model.layers.17.block_sparse_moe.experts.181.w3", "model.layers.17.block_sparse_moe.experts.182.w3", "model.layers.17.block_sparse_moe.experts.183.w3", "model.layers.17.block_sparse_moe.experts.184.w3", "model.layers.17.block_sparse_moe.experts.185.w3", "model.layers.17.block_sparse_moe.experts.186.w3", "model.layers.17.block_sparse_moe.experts.187.w3", "model.layers.17.block_sparse_moe.experts.188.w3", "model.layers.17.block_sparse_moe.experts.189.w3", "model.layers.17.block_sparse_moe.experts.190.w3", "model.layers.17.block_sparse_moe.experts.191.w3", "model.layers.17.block_sparse_moe.experts.192.w3", "model.layers.17.block_sparse_moe.experts.193.w3", "model.layers.17.block_sparse_moe.experts.194.w3", "model.layers.17.block_sparse_moe.experts.195.w3", "model.layers.17.block_sparse_moe.experts.196.w3", "model.layers.17.block_sparse_moe.experts.197.w3", "model.layers.17.block_sparse_moe.experts.198.w3", "model.layers.17.block_sparse_moe.experts.199.w3", "model.layers.17.block_sparse_moe.experts.200.w3", "model.layers.17.block_sparse_moe.experts.201.w3", "model.layers.17.block_sparse_moe.experts.202.w3", "model.layers.17.block_sparse_moe.experts.203.w3", "model.layers.17.block_sparse_moe.experts.204.w3", "model.layers.17.block_sparse_moe.experts.205.w3", "model.layers.17.block_sparse_moe.experts.206.w3", "model.layers.17.block_sparse_moe.experts.207.w3", "model.layers.17.block_sparse_moe.experts.208.w3", "model.layers.17.block_sparse_moe.experts.209.w3", "model.layers.17.block_sparse_moe.experts.210.w3", "model.layers.17.block_sparse_moe.experts.211.w3", "model.layers.17.block_sparse_moe.experts.212.w3", "model.layers.17.block_sparse_moe.experts.213.w3", "model.layers.17.block_sparse_moe.experts.214.w3", "model.layers.17.block_sparse_moe.experts.215.w3", "model.layers.17.block_sparse_moe.experts.216.w3", "model.layers.17.block_sparse_moe.experts.217.w3", "model.layers.17.block_sparse_moe.experts.218.w3", "model.layers.17.block_sparse_moe.experts.219.w3", "model.layers.17.block_sparse_moe.experts.220.w3", "model.layers.17.block_sparse_moe.experts.221.w3", "model.layers.17.block_sparse_moe.experts.222.w3", "model.layers.17.block_sparse_moe.experts.223.w3", "model.layers.17.block_sparse_moe.experts.224.w3", "model.layers.17.block_sparse_moe.experts.225.w3", "model.layers.17.block_sparse_moe.experts.226.w3", "model.layers.17.block_sparse_moe.experts.227.w3", "model.layers.17.block_sparse_moe.experts.228.w3", "model.layers.17.block_sparse_moe.experts.229.w3", "model.layers.17.block_sparse_moe.experts.230.w3", "model.layers.17.block_sparse_moe.experts.231.w3", "model.layers.17.block_sparse_moe.experts.232.w3", "model.layers.17.block_sparse_moe.experts.233.w3", "model.layers.17.block_sparse_moe.experts.234.w3", "model.layers.17.block_sparse_moe.experts.235.w3", "model.layers.17.block_sparse_moe.experts.236.w3", "model.layers.17.block_sparse_moe.experts.237.w3", "model.layers.17.block_sparse_moe.experts.238.w3", "model.layers.17.block_sparse_moe.experts.239.w3", "model.layers.17.block_sparse_moe.experts.240.w3", "model.layers.17.block_sparse_moe.experts.241.w3", "model.layers.17.block_sparse_moe.experts.242.w3", "model.layers.17.block_sparse_moe.experts.243.w3", "model.layers.17.block_sparse_moe.experts.244.w3", "model.layers.17.block_sparse_moe.experts.245.w3", "model.layers.17.block_sparse_moe.experts.246.w3", "model.layers.17.block_sparse_moe.experts.247.w3", "model.layers.17.block_sparse_moe.experts.248.w3", "model.layers.17.block_sparse_moe.experts.249.w3", "model.layers.17.block_sparse_moe.experts.250.w3", "model.layers.17.block_sparse_moe.experts.251.w3", "model.layers.17.block_sparse_moe.experts.252.w3", "model.layers.17.block_sparse_moe.experts.253.w3", "model.layers.17.block_sparse_moe.experts.254.w3", "model.layers.17.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00026360154151916504, "dbits": 2415919104 } ] }, { "idx": 89, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w2", "model.layers.17.block_sparse_moe.experts.1.w2", "model.layers.17.block_sparse_moe.experts.2.w2", "model.layers.17.block_sparse_moe.experts.3.w2", "model.layers.17.block_sparse_moe.experts.4.w2", "model.layers.17.block_sparse_moe.experts.5.w2", "model.layers.17.block_sparse_moe.experts.6.w2", "model.layers.17.block_sparse_moe.experts.7.w2", "model.layers.17.block_sparse_moe.experts.8.w2", "model.layers.17.block_sparse_moe.experts.9.w2", "model.layers.17.block_sparse_moe.experts.10.w2", "model.layers.17.block_sparse_moe.experts.11.w2", "model.layers.17.block_sparse_moe.experts.12.w2", "model.layers.17.block_sparse_moe.experts.13.w2", "model.layers.17.block_sparse_moe.experts.14.w2", "model.layers.17.block_sparse_moe.experts.15.w2", "model.layers.17.block_sparse_moe.experts.16.w2", "model.layers.17.block_sparse_moe.experts.17.w2", "model.layers.17.block_sparse_moe.experts.18.w2", "model.layers.17.block_sparse_moe.experts.19.w2", "model.layers.17.block_sparse_moe.experts.20.w2", "model.layers.17.block_sparse_moe.experts.21.w2", "model.layers.17.block_sparse_moe.experts.22.w2", "model.layers.17.block_sparse_moe.experts.23.w2", "model.layers.17.block_sparse_moe.experts.24.w2", "model.layers.17.block_sparse_moe.experts.25.w2", "model.layers.17.block_sparse_moe.experts.26.w2", "model.layers.17.block_sparse_moe.experts.27.w2", "model.layers.17.block_sparse_moe.experts.28.w2", "model.layers.17.block_sparse_moe.experts.29.w2", "model.layers.17.block_sparse_moe.experts.30.w2", "model.layers.17.block_sparse_moe.experts.31.w2", "model.layers.17.block_sparse_moe.experts.32.w2", "model.layers.17.block_sparse_moe.experts.33.w2", "model.layers.17.block_sparse_moe.experts.34.w2", "model.layers.17.block_sparse_moe.experts.35.w2", "model.layers.17.block_sparse_moe.experts.36.w2", "model.layers.17.block_sparse_moe.experts.37.w2", "model.layers.17.block_sparse_moe.experts.38.w2", "model.layers.17.block_sparse_moe.experts.39.w2", "model.layers.17.block_sparse_moe.experts.40.w2", "model.layers.17.block_sparse_moe.experts.41.w2", "model.layers.17.block_sparse_moe.experts.42.w2", "model.layers.17.block_sparse_moe.experts.43.w2", "model.layers.17.block_sparse_moe.experts.44.w2", "model.layers.17.block_sparse_moe.experts.45.w2", "model.layers.17.block_sparse_moe.experts.46.w2", "model.layers.17.block_sparse_moe.experts.47.w2", "model.layers.17.block_sparse_moe.experts.48.w2", "model.layers.17.block_sparse_moe.experts.49.w2", "model.layers.17.block_sparse_moe.experts.50.w2", "model.layers.17.block_sparse_moe.experts.51.w2", "model.layers.17.block_sparse_moe.experts.52.w2", "model.layers.17.block_sparse_moe.experts.53.w2", "model.layers.17.block_sparse_moe.experts.54.w2", "model.layers.17.block_sparse_moe.experts.55.w2", "model.layers.17.block_sparse_moe.experts.56.w2", "model.layers.17.block_sparse_moe.experts.57.w2", "model.layers.17.block_sparse_moe.experts.58.w2", "model.layers.17.block_sparse_moe.experts.59.w2", "model.layers.17.block_sparse_moe.experts.60.w2", "model.layers.17.block_sparse_moe.experts.61.w2", "model.layers.17.block_sparse_moe.experts.62.w2", "model.layers.17.block_sparse_moe.experts.63.w2", "model.layers.17.block_sparse_moe.experts.64.w2", "model.layers.17.block_sparse_moe.experts.65.w2", "model.layers.17.block_sparse_moe.experts.66.w2", "model.layers.17.block_sparse_moe.experts.67.w2", "model.layers.17.block_sparse_moe.experts.68.w2", "model.layers.17.block_sparse_moe.experts.69.w2", "model.layers.17.block_sparse_moe.experts.70.w2", "model.layers.17.block_sparse_moe.experts.71.w2", "model.layers.17.block_sparse_moe.experts.72.w2", "model.layers.17.block_sparse_moe.experts.73.w2", "model.layers.17.block_sparse_moe.experts.74.w2", "model.layers.17.block_sparse_moe.experts.75.w2", "model.layers.17.block_sparse_moe.experts.76.w2", "model.layers.17.block_sparse_moe.experts.77.w2", "model.layers.17.block_sparse_moe.experts.78.w2", "model.layers.17.block_sparse_moe.experts.79.w2", "model.layers.17.block_sparse_moe.experts.80.w2", "model.layers.17.block_sparse_moe.experts.81.w2", "model.layers.17.block_sparse_moe.experts.82.w2", "model.layers.17.block_sparse_moe.experts.83.w2", "model.layers.17.block_sparse_moe.experts.84.w2", "model.layers.17.block_sparse_moe.experts.85.w2", "model.layers.17.block_sparse_moe.experts.86.w2", "model.layers.17.block_sparse_moe.experts.87.w2", "model.layers.17.block_sparse_moe.experts.88.w2", "model.layers.17.block_sparse_moe.experts.89.w2", "model.layers.17.block_sparse_moe.experts.90.w2", "model.layers.17.block_sparse_moe.experts.91.w2", "model.layers.17.block_sparse_moe.experts.92.w2", "model.layers.17.block_sparse_moe.experts.93.w2", "model.layers.17.block_sparse_moe.experts.94.w2", "model.layers.17.block_sparse_moe.experts.95.w2", "model.layers.17.block_sparse_moe.experts.96.w2", "model.layers.17.block_sparse_moe.experts.97.w2", "model.layers.17.block_sparse_moe.experts.98.w2", "model.layers.17.block_sparse_moe.experts.99.w2", "model.layers.17.block_sparse_moe.experts.100.w2", "model.layers.17.block_sparse_moe.experts.101.w2", "model.layers.17.block_sparse_moe.experts.102.w2", "model.layers.17.block_sparse_moe.experts.103.w2", "model.layers.17.block_sparse_moe.experts.104.w2", "model.layers.17.block_sparse_moe.experts.105.w2", "model.layers.17.block_sparse_moe.experts.106.w2", "model.layers.17.block_sparse_moe.experts.107.w2", "model.layers.17.block_sparse_moe.experts.108.w2", "model.layers.17.block_sparse_moe.experts.109.w2", "model.layers.17.block_sparse_moe.experts.110.w2", "model.layers.17.block_sparse_moe.experts.111.w2", "model.layers.17.block_sparse_moe.experts.112.w2", "model.layers.17.block_sparse_moe.experts.113.w2", "model.layers.17.block_sparse_moe.experts.114.w2", "model.layers.17.block_sparse_moe.experts.115.w2", "model.layers.17.block_sparse_moe.experts.116.w2", "model.layers.17.block_sparse_moe.experts.117.w2", "model.layers.17.block_sparse_moe.experts.118.w2", "model.layers.17.block_sparse_moe.experts.119.w2", "model.layers.17.block_sparse_moe.experts.120.w2", "model.layers.17.block_sparse_moe.experts.121.w2", "model.layers.17.block_sparse_moe.experts.122.w2", "model.layers.17.block_sparse_moe.experts.123.w2", "model.layers.17.block_sparse_moe.experts.124.w2", "model.layers.17.block_sparse_moe.experts.125.w2", "model.layers.17.block_sparse_moe.experts.126.w2", "model.layers.17.block_sparse_moe.experts.127.w2", "model.layers.17.block_sparse_moe.experts.128.w2", "model.layers.17.block_sparse_moe.experts.129.w2", "model.layers.17.block_sparse_moe.experts.130.w2", "model.layers.17.block_sparse_moe.experts.131.w2", "model.layers.17.block_sparse_moe.experts.132.w2", "model.layers.17.block_sparse_moe.experts.133.w2", "model.layers.17.block_sparse_moe.experts.134.w2", "model.layers.17.block_sparse_moe.experts.135.w2", "model.layers.17.block_sparse_moe.experts.136.w2", "model.layers.17.block_sparse_moe.experts.137.w2", "model.layers.17.block_sparse_moe.experts.138.w2", "model.layers.17.block_sparse_moe.experts.139.w2", "model.layers.17.block_sparse_moe.experts.140.w2", "model.layers.17.block_sparse_moe.experts.141.w2", "model.layers.17.block_sparse_moe.experts.142.w2", "model.layers.17.block_sparse_moe.experts.143.w2", "model.layers.17.block_sparse_moe.experts.144.w2", "model.layers.17.block_sparse_moe.experts.145.w2", "model.layers.17.block_sparse_moe.experts.146.w2", "model.layers.17.block_sparse_moe.experts.147.w2", "model.layers.17.block_sparse_moe.experts.148.w2", "model.layers.17.block_sparse_moe.experts.149.w2", "model.layers.17.block_sparse_moe.experts.150.w2", "model.layers.17.block_sparse_moe.experts.151.w2", "model.layers.17.block_sparse_moe.experts.152.w2", "model.layers.17.block_sparse_moe.experts.153.w2", "model.layers.17.block_sparse_moe.experts.154.w2", "model.layers.17.block_sparse_moe.experts.155.w2", "model.layers.17.block_sparse_moe.experts.156.w2", "model.layers.17.block_sparse_moe.experts.157.w2", "model.layers.17.block_sparse_moe.experts.158.w2", "model.layers.17.block_sparse_moe.experts.159.w2", "model.layers.17.block_sparse_moe.experts.160.w2", "model.layers.17.block_sparse_moe.experts.161.w2", "model.layers.17.block_sparse_moe.experts.162.w2", "model.layers.17.block_sparse_moe.experts.163.w2", "model.layers.17.block_sparse_moe.experts.164.w2", "model.layers.17.block_sparse_moe.experts.165.w2", "model.layers.17.block_sparse_moe.experts.166.w2", "model.layers.17.block_sparse_moe.experts.167.w2", "model.layers.17.block_sparse_moe.experts.168.w2", "model.layers.17.block_sparse_moe.experts.169.w2", "model.layers.17.block_sparse_moe.experts.170.w2", "model.layers.17.block_sparse_moe.experts.171.w2", "model.layers.17.block_sparse_moe.experts.172.w2", "model.layers.17.block_sparse_moe.experts.173.w2", "model.layers.17.block_sparse_moe.experts.174.w2", "model.layers.17.block_sparse_moe.experts.175.w2", "model.layers.17.block_sparse_moe.experts.176.w2", "model.layers.17.block_sparse_moe.experts.177.w2", "model.layers.17.block_sparse_moe.experts.178.w2", "model.layers.17.block_sparse_moe.experts.179.w2", "model.layers.17.block_sparse_moe.experts.180.w2", "model.layers.17.block_sparse_moe.experts.181.w2", "model.layers.17.block_sparse_moe.experts.182.w2", "model.layers.17.block_sparse_moe.experts.183.w2", "model.layers.17.block_sparse_moe.experts.184.w2", "model.layers.17.block_sparse_moe.experts.185.w2", "model.layers.17.block_sparse_moe.experts.186.w2", "model.layers.17.block_sparse_moe.experts.187.w2", "model.layers.17.block_sparse_moe.experts.188.w2", "model.layers.17.block_sparse_moe.experts.189.w2", "model.layers.17.block_sparse_moe.experts.190.w2", "model.layers.17.block_sparse_moe.experts.191.w2", "model.layers.17.block_sparse_moe.experts.192.w2", "model.layers.17.block_sparse_moe.experts.193.w2", "model.layers.17.block_sparse_moe.experts.194.w2", "model.layers.17.block_sparse_moe.experts.195.w2", "model.layers.17.block_sparse_moe.experts.196.w2", "model.layers.17.block_sparse_moe.experts.197.w2", "model.layers.17.block_sparse_moe.experts.198.w2", "model.layers.17.block_sparse_moe.experts.199.w2", "model.layers.17.block_sparse_moe.experts.200.w2", "model.layers.17.block_sparse_moe.experts.201.w2", "model.layers.17.block_sparse_moe.experts.202.w2", "model.layers.17.block_sparse_moe.experts.203.w2", "model.layers.17.block_sparse_moe.experts.204.w2", "model.layers.17.block_sparse_moe.experts.205.w2", "model.layers.17.block_sparse_moe.experts.206.w2", "model.layers.17.block_sparse_moe.experts.207.w2", "model.layers.17.block_sparse_moe.experts.208.w2", "model.layers.17.block_sparse_moe.experts.209.w2", "model.layers.17.block_sparse_moe.experts.210.w2", "model.layers.17.block_sparse_moe.experts.211.w2", "model.layers.17.block_sparse_moe.experts.212.w2", "model.layers.17.block_sparse_moe.experts.213.w2", "model.layers.17.block_sparse_moe.experts.214.w2", "model.layers.17.block_sparse_moe.experts.215.w2", "model.layers.17.block_sparse_moe.experts.216.w2", "model.layers.17.block_sparse_moe.experts.217.w2", "model.layers.17.block_sparse_moe.experts.218.w2", "model.layers.17.block_sparse_moe.experts.219.w2", "model.layers.17.block_sparse_moe.experts.220.w2", "model.layers.17.block_sparse_moe.experts.221.w2", "model.layers.17.block_sparse_moe.experts.222.w2", "model.layers.17.block_sparse_moe.experts.223.w2", "model.layers.17.block_sparse_moe.experts.224.w2", "model.layers.17.block_sparse_moe.experts.225.w2", "model.layers.17.block_sparse_moe.experts.226.w2", "model.layers.17.block_sparse_moe.experts.227.w2", "model.layers.17.block_sparse_moe.experts.228.w2", "model.layers.17.block_sparse_moe.experts.229.w2", "model.layers.17.block_sparse_moe.experts.230.w2", "model.layers.17.block_sparse_moe.experts.231.w2", "model.layers.17.block_sparse_moe.experts.232.w2", "model.layers.17.block_sparse_moe.experts.233.w2", "model.layers.17.block_sparse_moe.experts.234.w2", "model.layers.17.block_sparse_moe.experts.235.w2", "model.layers.17.block_sparse_moe.experts.236.w2", "model.layers.17.block_sparse_moe.experts.237.w2", "model.layers.17.block_sparse_moe.experts.238.w2", "model.layers.17.block_sparse_moe.experts.239.w2", "model.layers.17.block_sparse_moe.experts.240.w2", "model.layers.17.block_sparse_moe.experts.241.w2", "model.layers.17.block_sparse_moe.experts.242.w2", "model.layers.17.block_sparse_moe.experts.243.w2", "model.layers.17.block_sparse_moe.experts.244.w2", "model.layers.17.block_sparse_moe.experts.245.w2", "model.layers.17.block_sparse_moe.experts.246.w2", "model.layers.17.block_sparse_moe.experts.247.w2", "model.layers.17.block_sparse_moe.experts.248.w2", "model.layers.17.block_sparse_moe.experts.249.w2", "model.layers.17.block_sparse_moe.experts.250.w2", "model.layers.17.block_sparse_moe.experts.251.w2", "model.layers.17.block_sparse_moe.experts.252.w2", "model.layers.17.block_sparse_moe.experts.253.w2", "model.layers.17.block_sparse_moe.experts.254.w2", "model.layers.17.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0011320739984511663, "dbits": 1207959552 } ] }, { "idx": 90, "layers": [ "model.layers.18.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0013271778821946079, "dbits": 18874368 } ] }, { "idx": 91, "layers": [ "model.layers.18.self_attn.k_proj", "model.layers.18.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00283597111701972, "dbits": 6291456 } ] }, { "idx": 92, "layers": [ "model.layers.18.self_attn.o_proj" ], "candidates": [ { "dkld": 0.002864733338356018, "dbits": 18874368 } ] }, { "idx": 93, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w1", "model.layers.18.block_sparse_moe.experts.1.w1", "model.layers.18.block_sparse_moe.experts.2.w1", "model.layers.18.block_sparse_moe.experts.3.w1", "model.layers.18.block_sparse_moe.experts.4.w1", "model.layers.18.block_sparse_moe.experts.5.w1", "model.layers.18.block_sparse_moe.experts.6.w1", "model.layers.18.block_sparse_moe.experts.7.w1", "model.layers.18.block_sparse_moe.experts.8.w1", "model.layers.18.block_sparse_moe.experts.9.w1", "model.layers.18.block_sparse_moe.experts.10.w1", "model.layers.18.block_sparse_moe.experts.11.w1", "model.layers.18.block_sparse_moe.experts.12.w1", "model.layers.18.block_sparse_moe.experts.13.w1", "model.layers.18.block_sparse_moe.experts.14.w1", "model.layers.18.block_sparse_moe.experts.15.w1", "model.layers.18.block_sparse_moe.experts.16.w1", "model.layers.18.block_sparse_moe.experts.17.w1", "model.layers.18.block_sparse_moe.experts.18.w1", "model.layers.18.block_sparse_moe.experts.19.w1", "model.layers.18.block_sparse_moe.experts.20.w1", "model.layers.18.block_sparse_moe.experts.21.w1", "model.layers.18.block_sparse_moe.experts.22.w1", "model.layers.18.block_sparse_moe.experts.23.w1", "model.layers.18.block_sparse_moe.experts.24.w1", "model.layers.18.block_sparse_moe.experts.25.w1", "model.layers.18.block_sparse_moe.experts.26.w1", "model.layers.18.block_sparse_moe.experts.27.w1", "model.layers.18.block_sparse_moe.experts.28.w1", "model.layers.18.block_sparse_moe.experts.29.w1", "model.layers.18.block_sparse_moe.experts.30.w1", "model.layers.18.block_sparse_moe.experts.31.w1", "model.layers.18.block_sparse_moe.experts.32.w1", "model.layers.18.block_sparse_moe.experts.33.w1", "model.layers.18.block_sparse_moe.experts.34.w1", "model.layers.18.block_sparse_moe.experts.35.w1", "model.layers.18.block_sparse_moe.experts.36.w1", "model.layers.18.block_sparse_moe.experts.37.w1", "model.layers.18.block_sparse_moe.experts.38.w1", "model.layers.18.block_sparse_moe.experts.39.w1", "model.layers.18.block_sparse_moe.experts.40.w1", "model.layers.18.block_sparse_moe.experts.41.w1", "model.layers.18.block_sparse_moe.experts.42.w1", "model.layers.18.block_sparse_moe.experts.43.w1", "model.layers.18.block_sparse_moe.experts.44.w1", "model.layers.18.block_sparse_moe.experts.45.w1", "model.layers.18.block_sparse_moe.experts.46.w1", "model.layers.18.block_sparse_moe.experts.47.w1", "model.layers.18.block_sparse_moe.experts.48.w1", "model.layers.18.block_sparse_moe.experts.49.w1", "model.layers.18.block_sparse_moe.experts.50.w1", "model.layers.18.block_sparse_moe.experts.51.w1", "model.layers.18.block_sparse_moe.experts.52.w1", "model.layers.18.block_sparse_moe.experts.53.w1", "model.layers.18.block_sparse_moe.experts.54.w1", "model.layers.18.block_sparse_moe.experts.55.w1", "model.layers.18.block_sparse_moe.experts.56.w1", "model.layers.18.block_sparse_moe.experts.57.w1", "model.layers.18.block_sparse_moe.experts.58.w1", "model.layers.18.block_sparse_moe.experts.59.w1", "model.layers.18.block_sparse_moe.experts.60.w1", "model.layers.18.block_sparse_moe.experts.61.w1", "model.layers.18.block_sparse_moe.experts.62.w1", "model.layers.18.block_sparse_moe.experts.63.w1", "model.layers.18.block_sparse_moe.experts.64.w1", "model.layers.18.block_sparse_moe.experts.65.w1", "model.layers.18.block_sparse_moe.experts.66.w1", "model.layers.18.block_sparse_moe.experts.67.w1", "model.layers.18.block_sparse_moe.experts.68.w1", "model.layers.18.block_sparse_moe.experts.69.w1", "model.layers.18.block_sparse_moe.experts.70.w1", "model.layers.18.block_sparse_moe.experts.71.w1", "model.layers.18.block_sparse_moe.experts.72.w1", "model.layers.18.block_sparse_moe.experts.73.w1", "model.layers.18.block_sparse_moe.experts.74.w1", "model.layers.18.block_sparse_moe.experts.75.w1", "model.layers.18.block_sparse_moe.experts.76.w1", "model.layers.18.block_sparse_moe.experts.77.w1", "model.layers.18.block_sparse_moe.experts.78.w1", "model.layers.18.block_sparse_moe.experts.79.w1", "model.layers.18.block_sparse_moe.experts.80.w1", "model.layers.18.block_sparse_moe.experts.81.w1", "model.layers.18.block_sparse_moe.experts.82.w1", "model.layers.18.block_sparse_moe.experts.83.w1", "model.layers.18.block_sparse_moe.experts.84.w1", "model.layers.18.block_sparse_moe.experts.85.w1", "model.layers.18.block_sparse_moe.experts.86.w1", "model.layers.18.block_sparse_moe.experts.87.w1", "model.layers.18.block_sparse_moe.experts.88.w1", "model.layers.18.block_sparse_moe.experts.89.w1", "model.layers.18.block_sparse_moe.experts.90.w1", "model.layers.18.block_sparse_moe.experts.91.w1", "model.layers.18.block_sparse_moe.experts.92.w1", "model.layers.18.block_sparse_moe.experts.93.w1", "model.layers.18.block_sparse_moe.experts.94.w1", "model.layers.18.block_sparse_moe.experts.95.w1", "model.layers.18.block_sparse_moe.experts.96.w1", "model.layers.18.block_sparse_moe.experts.97.w1", "model.layers.18.block_sparse_moe.experts.98.w1", "model.layers.18.block_sparse_moe.experts.99.w1", "model.layers.18.block_sparse_moe.experts.100.w1", "model.layers.18.block_sparse_moe.experts.101.w1", "model.layers.18.block_sparse_moe.experts.102.w1", "model.layers.18.block_sparse_moe.experts.103.w1", "model.layers.18.block_sparse_moe.experts.104.w1", "model.layers.18.block_sparse_moe.experts.105.w1", "model.layers.18.block_sparse_moe.experts.106.w1", "model.layers.18.block_sparse_moe.experts.107.w1", "model.layers.18.block_sparse_moe.experts.108.w1", "model.layers.18.block_sparse_moe.experts.109.w1", "model.layers.18.block_sparse_moe.experts.110.w1", "model.layers.18.block_sparse_moe.experts.111.w1", "model.layers.18.block_sparse_moe.experts.112.w1", "model.layers.18.block_sparse_moe.experts.113.w1", "model.layers.18.block_sparse_moe.experts.114.w1", "model.layers.18.block_sparse_moe.experts.115.w1", "model.layers.18.block_sparse_moe.experts.116.w1", "model.layers.18.block_sparse_moe.experts.117.w1", "model.layers.18.block_sparse_moe.experts.118.w1", "model.layers.18.block_sparse_moe.experts.119.w1", "model.layers.18.block_sparse_moe.experts.120.w1", "model.layers.18.block_sparse_moe.experts.121.w1", "model.layers.18.block_sparse_moe.experts.122.w1", "model.layers.18.block_sparse_moe.experts.123.w1", "model.layers.18.block_sparse_moe.experts.124.w1", "model.layers.18.block_sparse_moe.experts.125.w1", "model.layers.18.block_sparse_moe.experts.126.w1", "model.layers.18.block_sparse_moe.experts.127.w1", "model.layers.18.block_sparse_moe.experts.128.w1", "model.layers.18.block_sparse_moe.experts.129.w1", "model.layers.18.block_sparse_moe.experts.130.w1", "model.layers.18.block_sparse_moe.experts.131.w1", "model.layers.18.block_sparse_moe.experts.132.w1", "model.layers.18.block_sparse_moe.experts.133.w1", "model.layers.18.block_sparse_moe.experts.134.w1", "model.layers.18.block_sparse_moe.experts.135.w1", "model.layers.18.block_sparse_moe.experts.136.w1", "model.layers.18.block_sparse_moe.experts.137.w1", "model.layers.18.block_sparse_moe.experts.138.w1", "model.layers.18.block_sparse_moe.experts.139.w1", "model.layers.18.block_sparse_moe.experts.140.w1", "model.layers.18.block_sparse_moe.experts.141.w1", "model.layers.18.block_sparse_moe.experts.142.w1", "model.layers.18.block_sparse_moe.experts.143.w1", "model.layers.18.block_sparse_moe.experts.144.w1", "model.layers.18.block_sparse_moe.experts.145.w1", "model.layers.18.block_sparse_moe.experts.146.w1", "model.layers.18.block_sparse_moe.experts.147.w1", "model.layers.18.block_sparse_moe.experts.148.w1", "model.layers.18.block_sparse_moe.experts.149.w1", "model.layers.18.block_sparse_moe.experts.150.w1", "model.layers.18.block_sparse_moe.experts.151.w1", "model.layers.18.block_sparse_moe.experts.152.w1", "model.layers.18.block_sparse_moe.experts.153.w1", "model.layers.18.block_sparse_moe.experts.154.w1", "model.layers.18.block_sparse_moe.experts.155.w1", "model.layers.18.block_sparse_moe.experts.156.w1", "model.layers.18.block_sparse_moe.experts.157.w1", "model.layers.18.block_sparse_moe.experts.158.w1", "model.layers.18.block_sparse_moe.experts.159.w1", "model.layers.18.block_sparse_moe.experts.160.w1", "model.layers.18.block_sparse_moe.experts.161.w1", "model.layers.18.block_sparse_moe.experts.162.w1", "model.layers.18.block_sparse_moe.experts.163.w1", "model.layers.18.block_sparse_moe.experts.164.w1", "model.layers.18.block_sparse_moe.experts.165.w1", "model.layers.18.block_sparse_moe.experts.166.w1", "model.layers.18.block_sparse_moe.experts.167.w1", "model.layers.18.block_sparse_moe.experts.168.w1", "model.layers.18.block_sparse_moe.experts.169.w1", "model.layers.18.block_sparse_moe.experts.170.w1", "model.layers.18.block_sparse_moe.experts.171.w1", "model.layers.18.block_sparse_moe.experts.172.w1", "model.layers.18.block_sparse_moe.experts.173.w1", "model.layers.18.block_sparse_moe.experts.174.w1", "model.layers.18.block_sparse_moe.experts.175.w1", "model.layers.18.block_sparse_moe.experts.176.w1", "model.layers.18.block_sparse_moe.experts.177.w1", "model.layers.18.block_sparse_moe.experts.178.w1", "model.layers.18.block_sparse_moe.experts.179.w1", "model.layers.18.block_sparse_moe.experts.180.w1", "model.layers.18.block_sparse_moe.experts.181.w1", "model.layers.18.block_sparse_moe.experts.182.w1", "model.layers.18.block_sparse_moe.experts.183.w1", "model.layers.18.block_sparse_moe.experts.184.w1", "model.layers.18.block_sparse_moe.experts.185.w1", "model.layers.18.block_sparse_moe.experts.186.w1", "model.layers.18.block_sparse_moe.experts.187.w1", "model.layers.18.block_sparse_moe.experts.188.w1", "model.layers.18.block_sparse_moe.experts.189.w1", "model.layers.18.block_sparse_moe.experts.190.w1", "model.layers.18.block_sparse_moe.experts.191.w1", "model.layers.18.block_sparse_moe.experts.192.w1", "model.layers.18.block_sparse_moe.experts.193.w1", "model.layers.18.block_sparse_moe.experts.194.w1", "model.layers.18.block_sparse_moe.experts.195.w1", "model.layers.18.block_sparse_moe.experts.196.w1", "model.layers.18.block_sparse_moe.experts.197.w1", "model.layers.18.block_sparse_moe.experts.198.w1", "model.layers.18.block_sparse_moe.experts.199.w1", "model.layers.18.block_sparse_moe.experts.200.w1", "model.layers.18.block_sparse_moe.experts.201.w1", "model.layers.18.block_sparse_moe.experts.202.w1", "model.layers.18.block_sparse_moe.experts.203.w1", "model.layers.18.block_sparse_moe.experts.204.w1", "model.layers.18.block_sparse_moe.experts.205.w1", "model.layers.18.block_sparse_moe.experts.206.w1", "model.layers.18.block_sparse_moe.experts.207.w1", "model.layers.18.block_sparse_moe.experts.208.w1", "model.layers.18.block_sparse_moe.experts.209.w1", "model.layers.18.block_sparse_moe.experts.210.w1", "model.layers.18.block_sparse_moe.experts.211.w1", "model.layers.18.block_sparse_moe.experts.212.w1", "model.layers.18.block_sparse_moe.experts.213.w1", "model.layers.18.block_sparse_moe.experts.214.w1", "model.layers.18.block_sparse_moe.experts.215.w1", "model.layers.18.block_sparse_moe.experts.216.w1", "model.layers.18.block_sparse_moe.experts.217.w1", "model.layers.18.block_sparse_moe.experts.218.w1", "model.layers.18.block_sparse_moe.experts.219.w1", "model.layers.18.block_sparse_moe.experts.220.w1", "model.layers.18.block_sparse_moe.experts.221.w1", "model.layers.18.block_sparse_moe.experts.222.w1", "model.layers.18.block_sparse_moe.experts.223.w1", "model.layers.18.block_sparse_moe.experts.224.w1", "model.layers.18.block_sparse_moe.experts.225.w1", "model.layers.18.block_sparse_moe.experts.226.w1", "model.layers.18.block_sparse_moe.experts.227.w1", "model.layers.18.block_sparse_moe.experts.228.w1", "model.layers.18.block_sparse_moe.experts.229.w1", "model.layers.18.block_sparse_moe.experts.230.w1", "model.layers.18.block_sparse_moe.experts.231.w1", "model.layers.18.block_sparse_moe.experts.232.w1", "model.layers.18.block_sparse_moe.experts.233.w1", "model.layers.18.block_sparse_moe.experts.234.w1", "model.layers.18.block_sparse_moe.experts.235.w1", "model.layers.18.block_sparse_moe.experts.236.w1", "model.layers.18.block_sparse_moe.experts.237.w1", "model.layers.18.block_sparse_moe.experts.238.w1", "model.layers.18.block_sparse_moe.experts.239.w1", "model.layers.18.block_sparse_moe.experts.240.w1", "model.layers.18.block_sparse_moe.experts.241.w1", "model.layers.18.block_sparse_moe.experts.242.w1", "model.layers.18.block_sparse_moe.experts.243.w1", "model.layers.18.block_sparse_moe.experts.244.w1", "model.layers.18.block_sparse_moe.experts.245.w1", "model.layers.18.block_sparse_moe.experts.246.w1", "model.layers.18.block_sparse_moe.experts.247.w1", "model.layers.18.block_sparse_moe.experts.248.w1", "model.layers.18.block_sparse_moe.experts.249.w1", "model.layers.18.block_sparse_moe.experts.250.w1", "model.layers.18.block_sparse_moe.experts.251.w1", "model.layers.18.block_sparse_moe.experts.252.w1", "model.layers.18.block_sparse_moe.experts.253.w1", "model.layers.18.block_sparse_moe.experts.254.w1", "model.layers.18.block_sparse_moe.experts.255.w1", "model.layers.18.block_sparse_moe.experts.0.w3", "model.layers.18.block_sparse_moe.experts.1.w3", "model.layers.18.block_sparse_moe.experts.2.w3", "model.layers.18.block_sparse_moe.experts.3.w3", "model.layers.18.block_sparse_moe.experts.4.w3", "model.layers.18.block_sparse_moe.experts.5.w3", "model.layers.18.block_sparse_moe.experts.6.w3", "model.layers.18.block_sparse_moe.experts.7.w3", "model.layers.18.block_sparse_moe.experts.8.w3", "model.layers.18.block_sparse_moe.experts.9.w3", "model.layers.18.block_sparse_moe.experts.10.w3", "model.layers.18.block_sparse_moe.experts.11.w3", "model.layers.18.block_sparse_moe.experts.12.w3", "model.layers.18.block_sparse_moe.experts.13.w3", "model.layers.18.block_sparse_moe.experts.14.w3", "model.layers.18.block_sparse_moe.experts.15.w3", "model.layers.18.block_sparse_moe.experts.16.w3", "model.layers.18.block_sparse_moe.experts.17.w3", "model.layers.18.block_sparse_moe.experts.18.w3", "model.layers.18.block_sparse_moe.experts.19.w3", "model.layers.18.block_sparse_moe.experts.20.w3", "model.layers.18.block_sparse_moe.experts.21.w3", "model.layers.18.block_sparse_moe.experts.22.w3", "model.layers.18.block_sparse_moe.experts.23.w3", "model.layers.18.block_sparse_moe.experts.24.w3", "model.layers.18.block_sparse_moe.experts.25.w3", "model.layers.18.block_sparse_moe.experts.26.w3", "model.layers.18.block_sparse_moe.experts.27.w3", "model.layers.18.block_sparse_moe.experts.28.w3", "model.layers.18.block_sparse_moe.experts.29.w3", "model.layers.18.block_sparse_moe.experts.30.w3", "model.layers.18.block_sparse_moe.experts.31.w3", "model.layers.18.block_sparse_moe.experts.32.w3", "model.layers.18.block_sparse_moe.experts.33.w3", "model.layers.18.block_sparse_moe.experts.34.w3", "model.layers.18.block_sparse_moe.experts.35.w3", "model.layers.18.block_sparse_moe.experts.36.w3", "model.layers.18.block_sparse_moe.experts.37.w3", "model.layers.18.block_sparse_moe.experts.38.w3", "model.layers.18.block_sparse_moe.experts.39.w3", "model.layers.18.block_sparse_moe.experts.40.w3", "model.layers.18.block_sparse_moe.experts.41.w3", "model.layers.18.block_sparse_moe.experts.42.w3", "model.layers.18.block_sparse_moe.experts.43.w3", "model.layers.18.block_sparse_moe.experts.44.w3", "model.layers.18.block_sparse_moe.experts.45.w3", "model.layers.18.block_sparse_moe.experts.46.w3", "model.layers.18.block_sparse_moe.experts.47.w3", "model.layers.18.block_sparse_moe.experts.48.w3", "model.layers.18.block_sparse_moe.experts.49.w3", "model.layers.18.block_sparse_moe.experts.50.w3", "model.layers.18.block_sparse_moe.experts.51.w3", "model.layers.18.block_sparse_moe.experts.52.w3", "model.layers.18.block_sparse_moe.experts.53.w3", "model.layers.18.block_sparse_moe.experts.54.w3", "model.layers.18.block_sparse_moe.experts.55.w3", "model.layers.18.block_sparse_moe.experts.56.w3", "model.layers.18.block_sparse_moe.experts.57.w3", "model.layers.18.block_sparse_moe.experts.58.w3", "model.layers.18.block_sparse_moe.experts.59.w3", "model.layers.18.block_sparse_moe.experts.60.w3", "model.layers.18.block_sparse_moe.experts.61.w3", "model.layers.18.block_sparse_moe.experts.62.w3", "model.layers.18.block_sparse_moe.experts.63.w3", "model.layers.18.block_sparse_moe.experts.64.w3", "model.layers.18.block_sparse_moe.experts.65.w3", "model.layers.18.block_sparse_moe.experts.66.w3", "model.layers.18.block_sparse_moe.experts.67.w3", "model.layers.18.block_sparse_moe.experts.68.w3", "model.layers.18.block_sparse_moe.experts.69.w3", "model.layers.18.block_sparse_moe.experts.70.w3", "model.layers.18.block_sparse_moe.experts.71.w3", "model.layers.18.block_sparse_moe.experts.72.w3", "model.layers.18.block_sparse_moe.experts.73.w3", "model.layers.18.block_sparse_moe.experts.74.w3", "model.layers.18.block_sparse_moe.experts.75.w3", "model.layers.18.block_sparse_moe.experts.76.w3", "model.layers.18.block_sparse_moe.experts.77.w3", "model.layers.18.block_sparse_moe.experts.78.w3", "model.layers.18.block_sparse_moe.experts.79.w3", "model.layers.18.block_sparse_moe.experts.80.w3", "model.layers.18.block_sparse_moe.experts.81.w3", "model.layers.18.block_sparse_moe.experts.82.w3", "model.layers.18.block_sparse_moe.experts.83.w3", "model.layers.18.block_sparse_moe.experts.84.w3", "model.layers.18.block_sparse_moe.experts.85.w3", "model.layers.18.block_sparse_moe.experts.86.w3", "model.layers.18.block_sparse_moe.experts.87.w3", "model.layers.18.block_sparse_moe.experts.88.w3", "model.layers.18.block_sparse_moe.experts.89.w3", "model.layers.18.block_sparse_moe.experts.90.w3", "model.layers.18.block_sparse_moe.experts.91.w3", "model.layers.18.block_sparse_moe.experts.92.w3", "model.layers.18.block_sparse_moe.experts.93.w3", "model.layers.18.block_sparse_moe.experts.94.w3", "model.layers.18.block_sparse_moe.experts.95.w3", "model.layers.18.block_sparse_moe.experts.96.w3", "model.layers.18.block_sparse_moe.experts.97.w3", "model.layers.18.block_sparse_moe.experts.98.w3", "model.layers.18.block_sparse_moe.experts.99.w3", "model.layers.18.block_sparse_moe.experts.100.w3", "model.layers.18.block_sparse_moe.experts.101.w3", "model.layers.18.block_sparse_moe.experts.102.w3", "model.layers.18.block_sparse_moe.experts.103.w3", "model.layers.18.block_sparse_moe.experts.104.w3", "model.layers.18.block_sparse_moe.experts.105.w3", "model.layers.18.block_sparse_moe.experts.106.w3", "model.layers.18.block_sparse_moe.experts.107.w3", "model.layers.18.block_sparse_moe.experts.108.w3", "model.layers.18.block_sparse_moe.experts.109.w3", "model.layers.18.block_sparse_moe.experts.110.w3", "model.layers.18.block_sparse_moe.experts.111.w3", "model.layers.18.block_sparse_moe.experts.112.w3", "model.layers.18.block_sparse_moe.experts.113.w3", "model.layers.18.block_sparse_moe.experts.114.w3", "model.layers.18.block_sparse_moe.experts.115.w3", "model.layers.18.block_sparse_moe.experts.116.w3", "model.layers.18.block_sparse_moe.experts.117.w3", "model.layers.18.block_sparse_moe.experts.118.w3", "model.layers.18.block_sparse_moe.experts.119.w3", "model.layers.18.block_sparse_moe.experts.120.w3", "model.layers.18.block_sparse_moe.experts.121.w3", "model.layers.18.block_sparse_moe.experts.122.w3", "model.layers.18.block_sparse_moe.experts.123.w3", "model.layers.18.block_sparse_moe.experts.124.w3", "model.layers.18.block_sparse_moe.experts.125.w3", "model.layers.18.block_sparse_moe.experts.126.w3", "model.layers.18.block_sparse_moe.experts.127.w3", "model.layers.18.block_sparse_moe.experts.128.w3", "model.layers.18.block_sparse_moe.experts.129.w3", "model.layers.18.block_sparse_moe.experts.130.w3", "model.layers.18.block_sparse_moe.experts.131.w3", "model.layers.18.block_sparse_moe.experts.132.w3", "model.layers.18.block_sparse_moe.experts.133.w3", "model.layers.18.block_sparse_moe.experts.134.w3", "model.layers.18.block_sparse_moe.experts.135.w3", "model.layers.18.block_sparse_moe.experts.136.w3", "model.layers.18.block_sparse_moe.experts.137.w3", "model.layers.18.block_sparse_moe.experts.138.w3", "model.layers.18.block_sparse_moe.experts.139.w3", "model.layers.18.block_sparse_moe.experts.140.w3", "model.layers.18.block_sparse_moe.experts.141.w3", "model.layers.18.block_sparse_moe.experts.142.w3", "model.layers.18.block_sparse_moe.experts.143.w3", "model.layers.18.block_sparse_moe.experts.144.w3", "model.layers.18.block_sparse_moe.experts.145.w3", "model.layers.18.block_sparse_moe.experts.146.w3", "model.layers.18.block_sparse_moe.experts.147.w3", "model.layers.18.block_sparse_moe.experts.148.w3", "model.layers.18.block_sparse_moe.experts.149.w3", "model.layers.18.block_sparse_moe.experts.150.w3", "model.layers.18.block_sparse_moe.experts.151.w3", "model.layers.18.block_sparse_moe.experts.152.w3", "model.layers.18.block_sparse_moe.experts.153.w3", "model.layers.18.block_sparse_moe.experts.154.w3", "model.layers.18.block_sparse_moe.experts.155.w3", "model.layers.18.block_sparse_moe.experts.156.w3", "model.layers.18.block_sparse_moe.experts.157.w3", "model.layers.18.block_sparse_moe.experts.158.w3", "model.layers.18.block_sparse_moe.experts.159.w3", "model.layers.18.block_sparse_moe.experts.160.w3", "model.layers.18.block_sparse_moe.experts.161.w3", "model.layers.18.block_sparse_moe.experts.162.w3", "model.layers.18.block_sparse_moe.experts.163.w3", "model.layers.18.block_sparse_moe.experts.164.w3", "model.layers.18.block_sparse_moe.experts.165.w3", "model.layers.18.block_sparse_moe.experts.166.w3", "model.layers.18.block_sparse_moe.experts.167.w3", "model.layers.18.block_sparse_moe.experts.168.w3", "model.layers.18.block_sparse_moe.experts.169.w3", "model.layers.18.block_sparse_moe.experts.170.w3", "model.layers.18.block_sparse_moe.experts.171.w3", "model.layers.18.block_sparse_moe.experts.172.w3", "model.layers.18.block_sparse_moe.experts.173.w3", "model.layers.18.block_sparse_moe.experts.174.w3", "model.layers.18.block_sparse_moe.experts.175.w3", "model.layers.18.block_sparse_moe.experts.176.w3", "model.layers.18.block_sparse_moe.experts.177.w3", "model.layers.18.block_sparse_moe.experts.178.w3", "model.layers.18.block_sparse_moe.experts.179.w3", "model.layers.18.block_sparse_moe.experts.180.w3", "model.layers.18.block_sparse_moe.experts.181.w3", "model.layers.18.block_sparse_moe.experts.182.w3", "model.layers.18.block_sparse_moe.experts.183.w3", "model.layers.18.block_sparse_moe.experts.184.w3", "model.layers.18.block_sparse_moe.experts.185.w3", "model.layers.18.block_sparse_moe.experts.186.w3", "model.layers.18.block_sparse_moe.experts.187.w3", "model.layers.18.block_sparse_moe.experts.188.w3", "model.layers.18.block_sparse_moe.experts.189.w3", "model.layers.18.block_sparse_moe.experts.190.w3", "model.layers.18.block_sparse_moe.experts.191.w3", "model.layers.18.block_sparse_moe.experts.192.w3", "model.layers.18.block_sparse_moe.experts.193.w3", "model.layers.18.block_sparse_moe.experts.194.w3", "model.layers.18.block_sparse_moe.experts.195.w3", "model.layers.18.block_sparse_moe.experts.196.w3", "model.layers.18.block_sparse_moe.experts.197.w3", "model.layers.18.block_sparse_moe.experts.198.w3", "model.layers.18.block_sparse_moe.experts.199.w3", "model.layers.18.block_sparse_moe.experts.200.w3", "model.layers.18.block_sparse_moe.experts.201.w3", "model.layers.18.block_sparse_moe.experts.202.w3", "model.layers.18.block_sparse_moe.experts.203.w3", "model.layers.18.block_sparse_moe.experts.204.w3", "model.layers.18.block_sparse_moe.experts.205.w3", "model.layers.18.block_sparse_moe.experts.206.w3", "model.layers.18.block_sparse_moe.experts.207.w3", "model.layers.18.block_sparse_moe.experts.208.w3", "model.layers.18.block_sparse_moe.experts.209.w3", "model.layers.18.block_sparse_moe.experts.210.w3", "model.layers.18.block_sparse_moe.experts.211.w3", "model.layers.18.block_sparse_moe.experts.212.w3", "model.layers.18.block_sparse_moe.experts.213.w3", "model.layers.18.block_sparse_moe.experts.214.w3", "model.layers.18.block_sparse_moe.experts.215.w3", "model.layers.18.block_sparse_moe.experts.216.w3", "model.layers.18.block_sparse_moe.experts.217.w3", "model.layers.18.block_sparse_moe.experts.218.w3", "model.layers.18.block_sparse_moe.experts.219.w3", "model.layers.18.block_sparse_moe.experts.220.w3", "model.layers.18.block_sparse_moe.experts.221.w3", "model.layers.18.block_sparse_moe.experts.222.w3", "model.layers.18.block_sparse_moe.experts.223.w3", "model.layers.18.block_sparse_moe.experts.224.w3", "model.layers.18.block_sparse_moe.experts.225.w3", "model.layers.18.block_sparse_moe.experts.226.w3", "model.layers.18.block_sparse_moe.experts.227.w3", "model.layers.18.block_sparse_moe.experts.228.w3", "model.layers.18.block_sparse_moe.experts.229.w3", "model.layers.18.block_sparse_moe.experts.230.w3", "model.layers.18.block_sparse_moe.experts.231.w3", "model.layers.18.block_sparse_moe.experts.232.w3", "model.layers.18.block_sparse_moe.experts.233.w3", "model.layers.18.block_sparse_moe.experts.234.w3", "model.layers.18.block_sparse_moe.experts.235.w3", "model.layers.18.block_sparse_moe.experts.236.w3", "model.layers.18.block_sparse_moe.experts.237.w3", "model.layers.18.block_sparse_moe.experts.238.w3", "model.layers.18.block_sparse_moe.experts.239.w3", "model.layers.18.block_sparse_moe.experts.240.w3", "model.layers.18.block_sparse_moe.experts.241.w3", "model.layers.18.block_sparse_moe.experts.242.w3", "model.layers.18.block_sparse_moe.experts.243.w3", "model.layers.18.block_sparse_moe.experts.244.w3", "model.layers.18.block_sparse_moe.experts.245.w3", "model.layers.18.block_sparse_moe.experts.246.w3", "model.layers.18.block_sparse_moe.experts.247.w3", "model.layers.18.block_sparse_moe.experts.248.w3", "model.layers.18.block_sparse_moe.experts.249.w3", "model.layers.18.block_sparse_moe.experts.250.w3", "model.layers.18.block_sparse_moe.experts.251.w3", "model.layers.18.block_sparse_moe.experts.252.w3", "model.layers.18.block_sparse_moe.experts.253.w3", "model.layers.18.block_sparse_moe.experts.254.w3", "model.layers.18.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001528900861740068, "dbits": 2415919104 } ] }, { "idx": 94, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w2", "model.layers.18.block_sparse_moe.experts.1.w2", "model.layers.18.block_sparse_moe.experts.2.w2", "model.layers.18.block_sparse_moe.experts.3.w2", "model.layers.18.block_sparse_moe.experts.4.w2", "model.layers.18.block_sparse_moe.experts.5.w2", "model.layers.18.block_sparse_moe.experts.6.w2", "model.layers.18.block_sparse_moe.experts.7.w2", "model.layers.18.block_sparse_moe.experts.8.w2", "model.layers.18.block_sparse_moe.experts.9.w2", "model.layers.18.block_sparse_moe.experts.10.w2", "model.layers.18.block_sparse_moe.experts.11.w2", "model.layers.18.block_sparse_moe.experts.12.w2", "model.layers.18.block_sparse_moe.experts.13.w2", "model.layers.18.block_sparse_moe.experts.14.w2", "model.layers.18.block_sparse_moe.experts.15.w2", "model.layers.18.block_sparse_moe.experts.16.w2", "model.layers.18.block_sparse_moe.experts.17.w2", "model.layers.18.block_sparse_moe.experts.18.w2", "model.layers.18.block_sparse_moe.experts.19.w2", "model.layers.18.block_sparse_moe.experts.20.w2", "model.layers.18.block_sparse_moe.experts.21.w2", "model.layers.18.block_sparse_moe.experts.22.w2", "model.layers.18.block_sparse_moe.experts.23.w2", "model.layers.18.block_sparse_moe.experts.24.w2", "model.layers.18.block_sparse_moe.experts.25.w2", "model.layers.18.block_sparse_moe.experts.26.w2", "model.layers.18.block_sparse_moe.experts.27.w2", "model.layers.18.block_sparse_moe.experts.28.w2", "model.layers.18.block_sparse_moe.experts.29.w2", "model.layers.18.block_sparse_moe.experts.30.w2", "model.layers.18.block_sparse_moe.experts.31.w2", "model.layers.18.block_sparse_moe.experts.32.w2", "model.layers.18.block_sparse_moe.experts.33.w2", "model.layers.18.block_sparse_moe.experts.34.w2", "model.layers.18.block_sparse_moe.experts.35.w2", "model.layers.18.block_sparse_moe.experts.36.w2", "model.layers.18.block_sparse_moe.experts.37.w2", "model.layers.18.block_sparse_moe.experts.38.w2", "model.layers.18.block_sparse_moe.experts.39.w2", "model.layers.18.block_sparse_moe.experts.40.w2", "model.layers.18.block_sparse_moe.experts.41.w2", "model.layers.18.block_sparse_moe.experts.42.w2", "model.layers.18.block_sparse_moe.experts.43.w2", "model.layers.18.block_sparse_moe.experts.44.w2", "model.layers.18.block_sparse_moe.experts.45.w2", "model.layers.18.block_sparse_moe.experts.46.w2", "model.layers.18.block_sparse_moe.experts.47.w2", "model.layers.18.block_sparse_moe.experts.48.w2", "model.layers.18.block_sparse_moe.experts.49.w2", "model.layers.18.block_sparse_moe.experts.50.w2", "model.layers.18.block_sparse_moe.experts.51.w2", "model.layers.18.block_sparse_moe.experts.52.w2", "model.layers.18.block_sparse_moe.experts.53.w2", "model.layers.18.block_sparse_moe.experts.54.w2", "model.layers.18.block_sparse_moe.experts.55.w2", "model.layers.18.block_sparse_moe.experts.56.w2", "model.layers.18.block_sparse_moe.experts.57.w2", "model.layers.18.block_sparse_moe.experts.58.w2", "model.layers.18.block_sparse_moe.experts.59.w2", "model.layers.18.block_sparse_moe.experts.60.w2", "model.layers.18.block_sparse_moe.experts.61.w2", "model.layers.18.block_sparse_moe.experts.62.w2", "model.layers.18.block_sparse_moe.experts.63.w2", "model.layers.18.block_sparse_moe.experts.64.w2", "model.layers.18.block_sparse_moe.experts.65.w2", "model.layers.18.block_sparse_moe.experts.66.w2", "model.layers.18.block_sparse_moe.experts.67.w2", "model.layers.18.block_sparse_moe.experts.68.w2", "model.layers.18.block_sparse_moe.experts.69.w2", "model.layers.18.block_sparse_moe.experts.70.w2", "model.layers.18.block_sparse_moe.experts.71.w2", "model.layers.18.block_sparse_moe.experts.72.w2", "model.layers.18.block_sparse_moe.experts.73.w2", "model.layers.18.block_sparse_moe.experts.74.w2", "model.layers.18.block_sparse_moe.experts.75.w2", "model.layers.18.block_sparse_moe.experts.76.w2", "model.layers.18.block_sparse_moe.experts.77.w2", "model.layers.18.block_sparse_moe.experts.78.w2", "model.layers.18.block_sparse_moe.experts.79.w2", "model.layers.18.block_sparse_moe.experts.80.w2", "model.layers.18.block_sparse_moe.experts.81.w2", "model.layers.18.block_sparse_moe.experts.82.w2", "model.layers.18.block_sparse_moe.experts.83.w2", "model.layers.18.block_sparse_moe.experts.84.w2", "model.layers.18.block_sparse_moe.experts.85.w2", "model.layers.18.block_sparse_moe.experts.86.w2", "model.layers.18.block_sparse_moe.experts.87.w2", "model.layers.18.block_sparse_moe.experts.88.w2", "model.layers.18.block_sparse_moe.experts.89.w2", "model.layers.18.block_sparse_moe.experts.90.w2", "model.layers.18.block_sparse_moe.experts.91.w2", "model.layers.18.block_sparse_moe.experts.92.w2", "model.layers.18.block_sparse_moe.experts.93.w2", "model.layers.18.block_sparse_moe.experts.94.w2", "model.layers.18.block_sparse_moe.experts.95.w2", "model.layers.18.block_sparse_moe.experts.96.w2", "model.layers.18.block_sparse_moe.experts.97.w2", "model.layers.18.block_sparse_moe.experts.98.w2", "model.layers.18.block_sparse_moe.experts.99.w2", "model.layers.18.block_sparse_moe.experts.100.w2", "model.layers.18.block_sparse_moe.experts.101.w2", "model.layers.18.block_sparse_moe.experts.102.w2", "model.layers.18.block_sparse_moe.experts.103.w2", "model.layers.18.block_sparse_moe.experts.104.w2", "model.layers.18.block_sparse_moe.experts.105.w2", "model.layers.18.block_sparse_moe.experts.106.w2", "model.layers.18.block_sparse_moe.experts.107.w2", "model.layers.18.block_sparse_moe.experts.108.w2", "model.layers.18.block_sparse_moe.experts.109.w2", "model.layers.18.block_sparse_moe.experts.110.w2", "model.layers.18.block_sparse_moe.experts.111.w2", "model.layers.18.block_sparse_moe.experts.112.w2", "model.layers.18.block_sparse_moe.experts.113.w2", "model.layers.18.block_sparse_moe.experts.114.w2", "model.layers.18.block_sparse_moe.experts.115.w2", "model.layers.18.block_sparse_moe.experts.116.w2", "model.layers.18.block_sparse_moe.experts.117.w2", "model.layers.18.block_sparse_moe.experts.118.w2", "model.layers.18.block_sparse_moe.experts.119.w2", "model.layers.18.block_sparse_moe.experts.120.w2", "model.layers.18.block_sparse_moe.experts.121.w2", "model.layers.18.block_sparse_moe.experts.122.w2", "model.layers.18.block_sparse_moe.experts.123.w2", "model.layers.18.block_sparse_moe.experts.124.w2", "model.layers.18.block_sparse_moe.experts.125.w2", "model.layers.18.block_sparse_moe.experts.126.w2", "model.layers.18.block_sparse_moe.experts.127.w2", "model.layers.18.block_sparse_moe.experts.128.w2", "model.layers.18.block_sparse_moe.experts.129.w2", "model.layers.18.block_sparse_moe.experts.130.w2", "model.layers.18.block_sparse_moe.experts.131.w2", "model.layers.18.block_sparse_moe.experts.132.w2", "model.layers.18.block_sparse_moe.experts.133.w2", "model.layers.18.block_sparse_moe.experts.134.w2", "model.layers.18.block_sparse_moe.experts.135.w2", "model.layers.18.block_sparse_moe.experts.136.w2", "model.layers.18.block_sparse_moe.experts.137.w2", "model.layers.18.block_sparse_moe.experts.138.w2", "model.layers.18.block_sparse_moe.experts.139.w2", "model.layers.18.block_sparse_moe.experts.140.w2", "model.layers.18.block_sparse_moe.experts.141.w2", "model.layers.18.block_sparse_moe.experts.142.w2", "model.layers.18.block_sparse_moe.experts.143.w2", "model.layers.18.block_sparse_moe.experts.144.w2", "model.layers.18.block_sparse_moe.experts.145.w2", "model.layers.18.block_sparse_moe.experts.146.w2", "model.layers.18.block_sparse_moe.experts.147.w2", "model.layers.18.block_sparse_moe.experts.148.w2", "model.layers.18.block_sparse_moe.experts.149.w2", "model.layers.18.block_sparse_moe.experts.150.w2", "model.layers.18.block_sparse_moe.experts.151.w2", "model.layers.18.block_sparse_moe.experts.152.w2", "model.layers.18.block_sparse_moe.experts.153.w2", "model.layers.18.block_sparse_moe.experts.154.w2", "model.layers.18.block_sparse_moe.experts.155.w2", "model.layers.18.block_sparse_moe.experts.156.w2", "model.layers.18.block_sparse_moe.experts.157.w2", "model.layers.18.block_sparse_moe.experts.158.w2", "model.layers.18.block_sparse_moe.experts.159.w2", "model.layers.18.block_sparse_moe.experts.160.w2", "model.layers.18.block_sparse_moe.experts.161.w2", "model.layers.18.block_sparse_moe.experts.162.w2", "model.layers.18.block_sparse_moe.experts.163.w2", "model.layers.18.block_sparse_moe.experts.164.w2", "model.layers.18.block_sparse_moe.experts.165.w2", "model.layers.18.block_sparse_moe.experts.166.w2", "model.layers.18.block_sparse_moe.experts.167.w2", "model.layers.18.block_sparse_moe.experts.168.w2", "model.layers.18.block_sparse_moe.experts.169.w2", "model.layers.18.block_sparse_moe.experts.170.w2", "model.layers.18.block_sparse_moe.experts.171.w2", "model.layers.18.block_sparse_moe.experts.172.w2", "model.layers.18.block_sparse_moe.experts.173.w2", "model.layers.18.block_sparse_moe.experts.174.w2", "model.layers.18.block_sparse_moe.experts.175.w2", "model.layers.18.block_sparse_moe.experts.176.w2", "model.layers.18.block_sparse_moe.experts.177.w2", "model.layers.18.block_sparse_moe.experts.178.w2", "model.layers.18.block_sparse_moe.experts.179.w2", "model.layers.18.block_sparse_moe.experts.180.w2", "model.layers.18.block_sparse_moe.experts.181.w2", "model.layers.18.block_sparse_moe.experts.182.w2", "model.layers.18.block_sparse_moe.experts.183.w2", "model.layers.18.block_sparse_moe.experts.184.w2", "model.layers.18.block_sparse_moe.experts.185.w2", "model.layers.18.block_sparse_moe.experts.186.w2", "model.layers.18.block_sparse_moe.experts.187.w2", "model.layers.18.block_sparse_moe.experts.188.w2", "model.layers.18.block_sparse_moe.experts.189.w2", "model.layers.18.block_sparse_moe.experts.190.w2", "model.layers.18.block_sparse_moe.experts.191.w2", "model.layers.18.block_sparse_moe.experts.192.w2", "model.layers.18.block_sparse_moe.experts.193.w2", "model.layers.18.block_sparse_moe.experts.194.w2", "model.layers.18.block_sparse_moe.experts.195.w2", "model.layers.18.block_sparse_moe.experts.196.w2", "model.layers.18.block_sparse_moe.experts.197.w2", "model.layers.18.block_sparse_moe.experts.198.w2", "model.layers.18.block_sparse_moe.experts.199.w2", "model.layers.18.block_sparse_moe.experts.200.w2", "model.layers.18.block_sparse_moe.experts.201.w2", "model.layers.18.block_sparse_moe.experts.202.w2", "model.layers.18.block_sparse_moe.experts.203.w2", "model.layers.18.block_sparse_moe.experts.204.w2", "model.layers.18.block_sparse_moe.experts.205.w2", "model.layers.18.block_sparse_moe.experts.206.w2", "model.layers.18.block_sparse_moe.experts.207.w2", "model.layers.18.block_sparse_moe.experts.208.w2", "model.layers.18.block_sparse_moe.experts.209.w2", "model.layers.18.block_sparse_moe.experts.210.w2", "model.layers.18.block_sparse_moe.experts.211.w2", "model.layers.18.block_sparse_moe.experts.212.w2", "model.layers.18.block_sparse_moe.experts.213.w2", "model.layers.18.block_sparse_moe.experts.214.w2", "model.layers.18.block_sparse_moe.experts.215.w2", "model.layers.18.block_sparse_moe.experts.216.w2", "model.layers.18.block_sparse_moe.experts.217.w2", "model.layers.18.block_sparse_moe.experts.218.w2", "model.layers.18.block_sparse_moe.experts.219.w2", "model.layers.18.block_sparse_moe.experts.220.w2", "model.layers.18.block_sparse_moe.experts.221.w2", "model.layers.18.block_sparse_moe.experts.222.w2", "model.layers.18.block_sparse_moe.experts.223.w2", "model.layers.18.block_sparse_moe.experts.224.w2", "model.layers.18.block_sparse_moe.experts.225.w2", "model.layers.18.block_sparse_moe.experts.226.w2", "model.layers.18.block_sparse_moe.experts.227.w2", "model.layers.18.block_sparse_moe.experts.228.w2", "model.layers.18.block_sparse_moe.experts.229.w2", "model.layers.18.block_sparse_moe.experts.230.w2", "model.layers.18.block_sparse_moe.experts.231.w2", "model.layers.18.block_sparse_moe.experts.232.w2", "model.layers.18.block_sparse_moe.experts.233.w2", "model.layers.18.block_sparse_moe.experts.234.w2", "model.layers.18.block_sparse_moe.experts.235.w2", "model.layers.18.block_sparse_moe.experts.236.w2", "model.layers.18.block_sparse_moe.experts.237.w2", "model.layers.18.block_sparse_moe.experts.238.w2", "model.layers.18.block_sparse_moe.experts.239.w2", "model.layers.18.block_sparse_moe.experts.240.w2", "model.layers.18.block_sparse_moe.experts.241.w2", "model.layers.18.block_sparse_moe.experts.242.w2", "model.layers.18.block_sparse_moe.experts.243.w2", "model.layers.18.block_sparse_moe.experts.244.w2", "model.layers.18.block_sparse_moe.experts.245.w2", "model.layers.18.block_sparse_moe.experts.246.w2", "model.layers.18.block_sparse_moe.experts.247.w2", "model.layers.18.block_sparse_moe.experts.248.w2", "model.layers.18.block_sparse_moe.experts.249.w2", "model.layers.18.block_sparse_moe.experts.250.w2", "model.layers.18.block_sparse_moe.experts.251.w2", "model.layers.18.block_sparse_moe.experts.252.w2", "model.layers.18.block_sparse_moe.experts.253.w2", "model.layers.18.block_sparse_moe.experts.254.w2", "model.layers.18.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003660947084426214, "dbits": 1207959552 } ] }, { "idx": 95, "layers": [ "model.layers.19.self_attn.q_proj" ], "candidates": [ { "dkld": 0.002102652192115806, "dbits": 18874368 } ] }, { "idx": 96, "layers": [ "model.layers.19.self_attn.k_proj", "model.layers.19.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00183787643909461, "dbits": 6291456 } ] }, { "idx": 97, "layers": [ "model.layers.19.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00418356657028196, "dbits": 18874368 } ] }, { "idx": 98, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w1", "model.layers.19.block_sparse_moe.experts.1.w1", "model.layers.19.block_sparse_moe.experts.2.w1", "model.layers.19.block_sparse_moe.experts.3.w1", "model.layers.19.block_sparse_moe.experts.4.w1", "model.layers.19.block_sparse_moe.experts.5.w1", "model.layers.19.block_sparse_moe.experts.6.w1", "model.layers.19.block_sparse_moe.experts.7.w1", "model.layers.19.block_sparse_moe.experts.8.w1", "model.layers.19.block_sparse_moe.experts.9.w1", "model.layers.19.block_sparse_moe.experts.10.w1", "model.layers.19.block_sparse_moe.experts.11.w1", "model.layers.19.block_sparse_moe.experts.12.w1", "model.layers.19.block_sparse_moe.experts.13.w1", "model.layers.19.block_sparse_moe.experts.14.w1", "model.layers.19.block_sparse_moe.experts.15.w1", "model.layers.19.block_sparse_moe.experts.16.w1", "model.layers.19.block_sparse_moe.experts.17.w1", "model.layers.19.block_sparse_moe.experts.18.w1", "model.layers.19.block_sparse_moe.experts.19.w1", "model.layers.19.block_sparse_moe.experts.20.w1", "model.layers.19.block_sparse_moe.experts.21.w1", "model.layers.19.block_sparse_moe.experts.22.w1", "model.layers.19.block_sparse_moe.experts.23.w1", "model.layers.19.block_sparse_moe.experts.24.w1", "model.layers.19.block_sparse_moe.experts.25.w1", "model.layers.19.block_sparse_moe.experts.26.w1", "model.layers.19.block_sparse_moe.experts.27.w1", "model.layers.19.block_sparse_moe.experts.28.w1", "model.layers.19.block_sparse_moe.experts.29.w1", "model.layers.19.block_sparse_moe.experts.30.w1", "model.layers.19.block_sparse_moe.experts.31.w1", "model.layers.19.block_sparse_moe.experts.32.w1", "model.layers.19.block_sparse_moe.experts.33.w1", "model.layers.19.block_sparse_moe.experts.34.w1", "model.layers.19.block_sparse_moe.experts.35.w1", "model.layers.19.block_sparse_moe.experts.36.w1", "model.layers.19.block_sparse_moe.experts.37.w1", "model.layers.19.block_sparse_moe.experts.38.w1", "model.layers.19.block_sparse_moe.experts.39.w1", "model.layers.19.block_sparse_moe.experts.40.w1", "model.layers.19.block_sparse_moe.experts.41.w1", "model.layers.19.block_sparse_moe.experts.42.w1", "model.layers.19.block_sparse_moe.experts.43.w1", "model.layers.19.block_sparse_moe.experts.44.w1", "model.layers.19.block_sparse_moe.experts.45.w1", "model.layers.19.block_sparse_moe.experts.46.w1", "model.layers.19.block_sparse_moe.experts.47.w1", "model.layers.19.block_sparse_moe.experts.48.w1", "model.layers.19.block_sparse_moe.experts.49.w1", "model.layers.19.block_sparse_moe.experts.50.w1", "model.layers.19.block_sparse_moe.experts.51.w1", "model.layers.19.block_sparse_moe.experts.52.w1", "model.layers.19.block_sparse_moe.experts.53.w1", "model.layers.19.block_sparse_moe.experts.54.w1", "model.layers.19.block_sparse_moe.experts.55.w1", "model.layers.19.block_sparse_moe.experts.56.w1", "model.layers.19.block_sparse_moe.experts.57.w1", "model.layers.19.block_sparse_moe.experts.58.w1", "model.layers.19.block_sparse_moe.experts.59.w1", "model.layers.19.block_sparse_moe.experts.60.w1", "model.layers.19.block_sparse_moe.experts.61.w1", "model.layers.19.block_sparse_moe.experts.62.w1", "model.layers.19.block_sparse_moe.experts.63.w1", "model.layers.19.block_sparse_moe.experts.64.w1", "model.layers.19.block_sparse_moe.experts.65.w1", "model.layers.19.block_sparse_moe.experts.66.w1", "model.layers.19.block_sparse_moe.experts.67.w1", "model.layers.19.block_sparse_moe.experts.68.w1", "model.layers.19.block_sparse_moe.experts.69.w1", "model.layers.19.block_sparse_moe.experts.70.w1", "model.layers.19.block_sparse_moe.experts.71.w1", "model.layers.19.block_sparse_moe.experts.72.w1", "model.layers.19.block_sparse_moe.experts.73.w1", "model.layers.19.block_sparse_moe.experts.74.w1", "model.layers.19.block_sparse_moe.experts.75.w1", "model.layers.19.block_sparse_moe.experts.76.w1", "model.layers.19.block_sparse_moe.experts.77.w1", "model.layers.19.block_sparse_moe.experts.78.w1", "model.layers.19.block_sparse_moe.experts.79.w1", "model.layers.19.block_sparse_moe.experts.80.w1", "model.layers.19.block_sparse_moe.experts.81.w1", "model.layers.19.block_sparse_moe.experts.82.w1", "model.layers.19.block_sparse_moe.experts.83.w1", "model.layers.19.block_sparse_moe.experts.84.w1", "model.layers.19.block_sparse_moe.experts.85.w1", "model.layers.19.block_sparse_moe.experts.86.w1", "model.layers.19.block_sparse_moe.experts.87.w1", "model.layers.19.block_sparse_moe.experts.88.w1", "model.layers.19.block_sparse_moe.experts.89.w1", "model.layers.19.block_sparse_moe.experts.90.w1", "model.layers.19.block_sparse_moe.experts.91.w1", "model.layers.19.block_sparse_moe.experts.92.w1", "model.layers.19.block_sparse_moe.experts.93.w1", "model.layers.19.block_sparse_moe.experts.94.w1", "model.layers.19.block_sparse_moe.experts.95.w1", "model.layers.19.block_sparse_moe.experts.96.w1", "model.layers.19.block_sparse_moe.experts.97.w1", "model.layers.19.block_sparse_moe.experts.98.w1", "model.layers.19.block_sparse_moe.experts.99.w1", "model.layers.19.block_sparse_moe.experts.100.w1", "model.layers.19.block_sparse_moe.experts.101.w1", "model.layers.19.block_sparse_moe.experts.102.w1", "model.layers.19.block_sparse_moe.experts.103.w1", "model.layers.19.block_sparse_moe.experts.104.w1", "model.layers.19.block_sparse_moe.experts.105.w1", "model.layers.19.block_sparse_moe.experts.106.w1", "model.layers.19.block_sparse_moe.experts.107.w1", "model.layers.19.block_sparse_moe.experts.108.w1", "model.layers.19.block_sparse_moe.experts.109.w1", "model.layers.19.block_sparse_moe.experts.110.w1", "model.layers.19.block_sparse_moe.experts.111.w1", "model.layers.19.block_sparse_moe.experts.112.w1", "model.layers.19.block_sparse_moe.experts.113.w1", "model.layers.19.block_sparse_moe.experts.114.w1", "model.layers.19.block_sparse_moe.experts.115.w1", "model.layers.19.block_sparse_moe.experts.116.w1", "model.layers.19.block_sparse_moe.experts.117.w1", "model.layers.19.block_sparse_moe.experts.118.w1", "model.layers.19.block_sparse_moe.experts.119.w1", "model.layers.19.block_sparse_moe.experts.120.w1", "model.layers.19.block_sparse_moe.experts.121.w1", "model.layers.19.block_sparse_moe.experts.122.w1", "model.layers.19.block_sparse_moe.experts.123.w1", "model.layers.19.block_sparse_moe.experts.124.w1", "model.layers.19.block_sparse_moe.experts.125.w1", "model.layers.19.block_sparse_moe.experts.126.w1", "model.layers.19.block_sparse_moe.experts.127.w1", "model.layers.19.block_sparse_moe.experts.128.w1", "model.layers.19.block_sparse_moe.experts.129.w1", "model.layers.19.block_sparse_moe.experts.130.w1", "model.layers.19.block_sparse_moe.experts.131.w1", "model.layers.19.block_sparse_moe.experts.132.w1", "model.layers.19.block_sparse_moe.experts.133.w1", "model.layers.19.block_sparse_moe.experts.134.w1", "model.layers.19.block_sparse_moe.experts.135.w1", "model.layers.19.block_sparse_moe.experts.136.w1", "model.layers.19.block_sparse_moe.experts.137.w1", "model.layers.19.block_sparse_moe.experts.138.w1", "model.layers.19.block_sparse_moe.experts.139.w1", "model.layers.19.block_sparse_moe.experts.140.w1", "model.layers.19.block_sparse_moe.experts.141.w1", "model.layers.19.block_sparse_moe.experts.142.w1", "model.layers.19.block_sparse_moe.experts.143.w1", "model.layers.19.block_sparse_moe.experts.144.w1", "model.layers.19.block_sparse_moe.experts.145.w1", "model.layers.19.block_sparse_moe.experts.146.w1", "model.layers.19.block_sparse_moe.experts.147.w1", "model.layers.19.block_sparse_moe.experts.148.w1", "model.layers.19.block_sparse_moe.experts.149.w1", "model.layers.19.block_sparse_moe.experts.150.w1", "model.layers.19.block_sparse_moe.experts.151.w1", "model.layers.19.block_sparse_moe.experts.152.w1", "model.layers.19.block_sparse_moe.experts.153.w1", "model.layers.19.block_sparse_moe.experts.154.w1", "model.layers.19.block_sparse_moe.experts.155.w1", "model.layers.19.block_sparse_moe.experts.156.w1", "model.layers.19.block_sparse_moe.experts.157.w1", "model.layers.19.block_sparse_moe.experts.158.w1", "model.layers.19.block_sparse_moe.experts.159.w1", "model.layers.19.block_sparse_moe.experts.160.w1", "model.layers.19.block_sparse_moe.experts.161.w1", "model.layers.19.block_sparse_moe.experts.162.w1", "model.layers.19.block_sparse_moe.experts.163.w1", "model.layers.19.block_sparse_moe.experts.164.w1", "model.layers.19.block_sparse_moe.experts.165.w1", "model.layers.19.block_sparse_moe.experts.166.w1", "model.layers.19.block_sparse_moe.experts.167.w1", "model.layers.19.block_sparse_moe.experts.168.w1", "model.layers.19.block_sparse_moe.experts.169.w1", "model.layers.19.block_sparse_moe.experts.170.w1", "model.layers.19.block_sparse_moe.experts.171.w1", "model.layers.19.block_sparse_moe.experts.172.w1", "model.layers.19.block_sparse_moe.experts.173.w1", "model.layers.19.block_sparse_moe.experts.174.w1", "model.layers.19.block_sparse_moe.experts.175.w1", "model.layers.19.block_sparse_moe.experts.176.w1", "model.layers.19.block_sparse_moe.experts.177.w1", "model.layers.19.block_sparse_moe.experts.178.w1", "model.layers.19.block_sparse_moe.experts.179.w1", "model.layers.19.block_sparse_moe.experts.180.w1", "model.layers.19.block_sparse_moe.experts.181.w1", "model.layers.19.block_sparse_moe.experts.182.w1", "model.layers.19.block_sparse_moe.experts.183.w1", "model.layers.19.block_sparse_moe.experts.184.w1", "model.layers.19.block_sparse_moe.experts.185.w1", "model.layers.19.block_sparse_moe.experts.186.w1", "model.layers.19.block_sparse_moe.experts.187.w1", "model.layers.19.block_sparse_moe.experts.188.w1", "model.layers.19.block_sparse_moe.experts.189.w1", "model.layers.19.block_sparse_moe.experts.190.w1", "model.layers.19.block_sparse_moe.experts.191.w1", "model.layers.19.block_sparse_moe.experts.192.w1", "model.layers.19.block_sparse_moe.experts.193.w1", "model.layers.19.block_sparse_moe.experts.194.w1", "model.layers.19.block_sparse_moe.experts.195.w1", "model.layers.19.block_sparse_moe.experts.196.w1", "model.layers.19.block_sparse_moe.experts.197.w1", "model.layers.19.block_sparse_moe.experts.198.w1", "model.layers.19.block_sparse_moe.experts.199.w1", "model.layers.19.block_sparse_moe.experts.200.w1", "model.layers.19.block_sparse_moe.experts.201.w1", "model.layers.19.block_sparse_moe.experts.202.w1", "model.layers.19.block_sparse_moe.experts.203.w1", "model.layers.19.block_sparse_moe.experts.204.w1", "model.layers.19.block_sparse_moe.experts.205.w1", "model.layers.19.block_sparse_moe.experts.206.w1", "model.layers.19.block_sparse_moe.experts.207.w1", "model.layers.19.block_sparse_moe.experts.208.w1", "model.layers.19.block_sparse_moe.experts.209.w1", "model.layers.19.block_sparse_moe.experts.210.w1", "model.layers.19.block_sparse_moe.experts.211.w1", "model.layers.19.block_sparse_moe.experts.212.w1", "model.layers.19.block_sparse_moe.experts.213.w1", "model.layers.19.block_sparse_moe.experts.214.w1", "model.layers.19.block_sparse_moe.experts.215.w1", "model.layers.19.block_sparse_moe.experts.216.w1", "model.layers.19.block_sparse_moe.experts.217.w1", "model.layers.19.block_sparse_moe.experts.218.w1", "model.layers.19.block_sparse_moe.experts.219.w1", "model.layers.19.block_sparse_moe.experts.220.w1", "model.layers.19.block_sparse_moe.experts.221.w1", "model.layers.19.block_sparse_moe.experts.222.w1", "model.layers.19.block_sparse_moe.experts.223.w1", "model.layers.19.block_sparse_moe.experts.224.w1", "model.layers.19.block_sparse_moe.experts.225.w1", "model.layers.19.block_sparse_moe.experts.226.w1", "model.layers.19.block_sparse_moe.experts.227.w1", "model.layers.19.block_sparse_moe.experts.228.w1", "model.layers.19.block_sparse_moe.experts.229.w1", "model.layers.19.block_sparse_moe.experts.230.w1", "model.layers.19.block_sparse_moe.experts.231.w1", "model.layers.19.block_sparse_moe.experts.232.w1", "model.layers.19.block_sparse_moe.experts.233.w1", "model.layers.19.block_sparse_moe.experts.234.w1", "model.layers.19.block_sparse_moe.experts.235.w1", "model.layers.19.block_sparse_moe.experts.236.w1", "model.layers.19.block_sparse_moe.experts.237.w1", "model.layers.19.block_sparse_moe.experts.238.w1", "model.layers.19.block_sparse_moe.experts.239.w1", "model.layers.19.block_sparse_moe.experts.240.w1", "model.layers.19.block_sparse_moe.experts.241.w1", "model.layers.19.block_sparse_moe.experts.242.w1", "model.layers.19.block_sparse_moe.experts.243.w1", "model.layers.19.block_sparse_moe.experts.244.w1", "model.layers.19.block_sparse_moe.experts.245.w1", "model.layers.19.block_sparse_moe.experts.246.w1", "model.layers.19.block_sparse_moe.experts.247.w1", "model.layers.19.block_sparse_moe.experts.248.w1", "model.layers.19.block_sparse_moe.experts.249.w1", "model.layers.19.block_sparse_moe.experts.250.w1", "model.layers.19.block_sparse_moe.experts.251.w1", "model.layers.19.block_sparse_moe.experts.252.w1", "model.layers.19.block_sparse_moe.experts.253.w1", "model.layers.19.block_sparse_moe.experts.254.w1", "model.layers.19.block_sparse_moe.experts.255.w1", "model.layers.19.block_sparse_moe.experts.0.w3", "model.layers.19.block_sparse_moe.experts.1.w3", "model.layers.19.block_sparse_moe.experts.2.w3", "model.layers.19.block_sparse_moe.experts.3.w3", "model.layers.19.block_sparse_moe.experts.4.w3", "model.layers.19.block_sparse_moe.experts.5.w3", "model.layers.19.block_sparse_moe.experts.6.w3", "model.layers.19.block_sparse_moe.experts.7.w3", "model.layers.19.block_sparse_moe.experts.8.w3", "model.layers.19.block_sparse_moe.experts.9.w3", "model.layers.19.block_sparse_moe.experts.10.w3", "model.layers.19.block_sparse_moe.experts.11.w3", "model.layers.19.block_sparse_moe.experts.12.w3", "model.layers.19.block_sparse_moe.experts.13.w3", "model.layers.19.block_sparse_moe.experts.14.w3", "model.layers.19.block_sparse_moe.experts.15.w3", "model.layers.19.block_sparse_moe.experts.16.w3", "model.layers.19.block_sparse_moe.experts.17.w3", "model.layers.19.block_sparse_moe.experts.18.w3", "model.layers.19.block_sparse_moe.experts.19.w3", "model.layers.19.block_sparse_moe.experts.20.w3", "model.layers.19.block_sparse_moe.experts.21.w3", "model.layers.19.block_sparse_moe.experts.22.w3", "model.layers.19.block_sparse_moe.experts.23.w3", "model.layers.19.block_sparse_moe.experts.24.w3", "model.layers.19.block_sparse_moe.experts.25.w3", "model.layers.19.block_sparse_moe.experts.26.w3", "model.layers.19.block_sparse_moe.experts.27.w3", "model.layers.19.block_sparse_moe.experts.28.w3", "model.layers.19.block_sparse_moe.experts.29.w3", "model.layers.19.block_sparse_moe.experts.30.w3", "model.layers.19.block_sparse_moe.experts.31.w3", "model.layers.19.block_sparse_moe.experts.32.w3", "model.layers.19.block_sparse_moe.experts.33.w3", "model.layers.19.block_sparse_moe.experts.34.w3", "model.layers.19.block_sparse_moe.experts.35.w3", "model.layers.19.block_sparse_moe.experts.36.w3", "model.layers.19.block_sparse_moe.experts.37.w3", "model.layers.19.block_sparse_moe.experts.38.w3", "model.layers.19.block_sparse_moe.experts.39.w3", "model.layers.19.block_sparse_moe.experts.40.w3", "model.layers.19.block_sparse_moe.experts.41.w3", "model.layers.19.block_sparse_moe.experts.42.w3", "model.layers.19.block_sparse_moe.experts.43.w3", "model.layers.19.block_sparse_moe.experts.44.w3", "model.layers.19.block_sparse_moe.experts.45.w3", "model.layers.19.block_sparse_moe.experts.46.w3", "model.layers.19.block_sparse_moe.experts.47.w3", "model.layers.19.block_sparse_moe.experts.48.w3", "model.layers.19.block_sparse_moe.experts.49.w3", "model.layers.19.block_sparse_moe.experts.50.w3", "model.layers.19.block_sparse_moe.experts.51.w3", "model.layers.19.block_sparse_moe.experts.52.w3", "model.layers.19.block_sparse_moe.experts.53.w3", "model.layers.19.block_sparse_moe.experts.54.w3", "model.layers.19.block_sparse_moe.experts.55.w3", "model.layers.19.block_sparse_moe.experts.56.w3", "model.layers.19.block_sparse_moe.experts.57.w3", "model.layers.19.block_sparse_moe.experts.58.w3", "model.layers.19.block_sparse_moe.experts.59.w3", "model.layers.19.block_sparse_moe.experts.60.w3", "model.layers.19.block_sparse_moe.experts.61.w3", "model.layers.19.block_sparse_moe.experts.62.w3", "model.layers.19.block_sparse_moe.experts.63.w3", "model.layers.19.block_sparse_moe.experts.64.w3", "model.layers.19.block_sparse_moe.experts.65.w3", "model.layers.19.block_sparse_moe.experts.66.w3", "model.layers.19.block_sparse_moe.experts.67.w3", "model.layers.19.block_sparse_moe.experts.68.w3", "model.layers.19.block_sparse_moe.experts.69.w3", "model.layers.19.block_sparse_moe.experts.70.w3", "model.layers.19.block_sparse_moe.experts.71.w3", "model.layers.19.block_sparse_moe.experts.72.w3", "model.layers.19.block_sparse_moe.experts.73.w3", "model.layers.19.block_sparse_moe.experts.74.w3", "model.layers.19.block_sparse_moe.experts.75.w3", "model.layers.19.block_sparse_moe.experts.76.w3", "model.layers.19.block_sparse_moe.experts.77.w3", "model.layers.19.block_sparse_moe.experts.78.w3", "model.layers.19.block_sparse_moe.experts.79.w3", "model.layers.19.block_sparse_moe.experts.80.w3", "model.layers.19.block_sparse_moe.experts.81.w3", "model.layers.19.block_sparse_moe.experts.82.w3", "model.layers.19.block_sparse_moe.experts.83.w3", "model.layers.19.block_sparse_moe.experts.84.w3", "model.layers.19.block_sparse_moe.experts.85.w3", "model.layers.19.block_sparse_moe.experts.86.w3", "model.layers.19.block_sparse_moe.experts.87.w3", "model.layers.19.block_sparse_moe.experts.88.w3", "model.layers.19.block_sparse_moe.experts.89.w3", "model.layers.19.block_sparse_moe.experts.90.w3", "model.layers.19.block_sparse_moe.experts.91.w3", "model.layers.19.block_sparse_moe.experts.92.w3", "model.layers.19.block_sparse_moe.experts.93.w3", "model.layers.19.block_sparse_moe.experts.94.w3", "model.layers.19.block_sparse_moe.experts.95.w3", "model.layers.19.block_sparse_moe.experts.96.w3", "model.layers.19.block_sparse_moe.experts.97.w3", "model.layers.19.block_sparse_moe.experts.98.w3", "model.layers.19.block_sparse_moe.experts.99.w3", "model.layers.19.block_sparse_moe.experts.100.w3", "model.layers.19.block_sparse_moe.experts.101.w3", "model.layers.19.block_sparse_moe.experts.102.w3", "model.layers.19.block_sparse_moe.experts.103.w3", "model.layers.19.block_sparse_moe.experts.104.w3", "model.layers.19.block_sparse_moe.experts.105.w3", "model.layers.19.block_sparse_moe.experts.106.w3", "model.layers.19.block_sparse_moe.experts.107.w3", "model.layers.19.block_sparse_moe.experts.108.w3", "model.layers.19.block_sparse_moe.experts.109.w3", "model.layers.19.block_sparse_moe.experts.110.w3", "model.layers.19.block_sparse_moe.experts.111.w3", "model.layers.19.block_sparse_moe.experts.112.w3", "model.layers.19.block_sparse_moe.experts.113.w3", "model.layers.19.block_sparse_moe.experts.114.w3", "model.layers.19.block_sparse_moe.experts.115.w3", "model.layers.19.block_sparse_moe.experts.116.w3", "model.layers.19.block_sparse_moe.experts.117.w3", "model.layers.19.block_sparse_moe.experts.118.w3", "model.layers.19.block_sparse_moe.experts.119.w3", "model.layers.19.block_sparse_moe.experts.120.w3", "model.layers.19.block_sparse_moe.experts.121.w3", "model.layers.19.block_sparse_moe.experts.122.w3", "model.layers.19.block_sparse_moe.experts.123.w3", "model.layers.19.block_sparse_moe.experts.124.w3", "model.layers.19.block_sparse_moe.experts.125.w3", "model.layers.19.block_sparse_moe.experts.126.w3", "model.layers.19.block_sparse_moe.experts.127.w3", "model.layers.19.block_sparse_moe.experts.128.w3", "model.layers.19.block_sparse_moe.experts.129.w3", "model.layers.19.block_sparse_moe.experts.130.w3", "model.layers.19.block_sparse_moe.experts.131.w3", "model.layers.19.block_sparse_moe.experts.132.w3", "model.layers.19.block_sparse_moe.experts.133.w3", "model.layers.19.block_sparse_moe.experts.134.w3", "model.layers.19.block_sparse_moe.experts.135.w3", "model.layers.19.block_sparse_moe.experts.136.w3", "model.layers.19.block_sparse_moe.experts.137.w3", "model.layers.19.block_sparse_moe.experts.138.w3", "model.layers.19.block_sparse_moe.experts.139.w3", "model.layers.19.block_sparse_moe.experts.140.w3", "model.layers.19.block_sparse_moe.experts.141.w3", "model.layers.19.block_sparse_moe.experts.142.w3", "model.layers.19.block_sparse_moe.experts.143.w3", "model.layers.19.block_sparse_moe.experts.144.w3", "model.layers.19.block_sparse_moe.experts.145.w3", "model.layers.19.block_sparse_moe.experts.146.w3", "model.layers.19.block_sparse_moe.experts.147.w3", "model.layers.19.block_sparse_moe.experts.148.w3", "model.layers.19.block_sparse_moe.experts.149.w3", "model.layers.19.block_sparse_moe.experts.150.w3", "model.layers.19.block_sparse_moe.experts.151.w3", "model.layers.19.block_sparse_moe.experts.152.w3", "model.layers.19.block_sparse_moe.experts.153.w3", "model.layers.19.block_sparse_moe.experts.154.w3", "model.layers.19.block_sparse_moe.experts.155.w3", "model.layers.19.block_sparse_moe.experts.156.w3", "model.layers.19.block_sparse_moe.experts.157.w3", "model.layers.19.block_sparse_moe.experts.158.w3", "model.layers.19.block_sparse_moe.experts.159.w3", "model.layers.19.block_sparse_moe.experts.160.w3", "model.layers.19.block_sparse_moe.experts.161.w3", "model.layers.19.block_sparse_moe.experts.162.w3", "model.layers.19.block_sparse_moe.experts.163.w3", "model.layers.19.block_sparse_moe.experts.164.w3", "model.layers.19.block_sparse_moe.experts.165.w3", "model.layers.19.block_sparse_moe.experts.166.w3", "model.layers.19.block_sparse_moe.experts.167.w3", "model.layers.19.block_sparse_moe.experts.168.w3", "model.layers.19.block_sparse_moe.experts.169.w3", "model.layers.19.block_sparse_moe.experts.170.w3", "model.layers.19.block_sparse_moe.experts.171.w3", "model.layers.19.block_sparse_moe.experts.172.w3", "model.layers.19.block_sparse_moe.experts.173.w3", "model.layers.19.block_sparse_moe.experts.174.w3", "model.layers.19.block_sparse_moe.experts.175.w3", "model.layers.19.block_sparse_moe.experts.176.w3", "model.layers.19.block_sparse_moe.experts.177.w3", "model.layers.19.block_sparse_moe.experts.178.w3", "model.layers.19.block_sparse_moe.experts.179.w3", "model.layers.19.block_sparse_moe.experts.180.w3", "model.layers.19.block_sparse_moe.experts.181.w3", "model.layers.19.block_sparse_moe.experts.182.w3", "model.layers.19.block_sparse_moe.experts.183.w3", "model.layers.19.block_sparse_moe.experts.184.w3", "model.layers.19.block_sparse_moe.experts.185.w3", "model.layers.19.block_sparse_moe.experts.186.w3", "model.layers.19.block_sparse_moe.experts.187.w3", "model.layers.19.block_sparse_moe.experts.188.w3", "model.layers.19.block_sparse_moe.experts.189.w3", "model.layers.19.block_sparse_moe.experts.190.w3", "model.layers.19.block_sparse_moe.experts.191.w3", "model.layers.19.block_sparse_moe.experts.192.w3", "model.layers.19.block_sparse_moe.experts.193.w3", "model.layers.19.block_sparse_moe.experts.194.w3", "model.layers.19.block_sparse_moe.experts.195.w3", "model.layers.19.block_sparse_moe.experts.196.w3", "model.layers.19.block_sparse_moe.experts.197.w3", "model.layers.19.block_sparse_moe.experts.198.w3", "model.layers.19.block_sparse_moe.experts.199.w3", "model.layers.19.block_sparse_moe.experts.200.w3", "model.layers.19.block_sparse_moe.experts.201.w3", "model.layers.19.block_sparse_moe.experts.202.w3", "model.layers.19.block_sparse_moe.experts.203.w3", "model.layers.19.block_sparse_moe.experts.204.w3", "model.layers.19.block_sparse_moe.experts.205.w3", "model.layers.19.block_sparse_moe.experts.206.w3", "model.layers.19.block_sparse_moe.experts.207.w3", "model.layers.19.block_sparse_moe.experts.208.w3", "model.layers.19.block_sparse_moe.experts.209.w3", "model.layers.19.block_sparse_moe.experts.210.w3", "model.layers.19.block_sparse_moe.experts.211.w3", "model.layers.19.block_sparse_moe.experts.212.w3", "model.layers.19.block_sparse_moe.experts.213.w3", "model.layers.19.block_sparse_moe.experts.214.w3", "model.layers.19.block_sparse_moe.experts.215.w3", "model.layers.19.block_sparse_moe.experts.216.w3", "model.layers.19.block_sparse_moe.experts.217.w3", "model.layers.19.block_sparse_moe.experts.218.w3", "model.layers.19.block_sparse_moe.experts.219.w3", "model.layers.19.block_sparse_moe.experts.220.w3", "model.layers.19.block_sparse_moe.experts.221.w3", "model.layers.19.block_sparse_moe.experts.222.w3", "model.layers.19.block_sparse_moe.experts.223.w3", "model.layers.19.block_sparse_moe.experts.224.w3", "model.layers.19.block_sparse_moe.experts.225.w3", "model.layers.19.block_sparse_moe.experts.226.w3", "model.layers.19.block_sparse_moe.experts.227.w3", "model.layers.19.block_sparse_moe.experts.228.w3", "model.layers.19.block_sparse_moe.experts.229.w3", "model.layers.19.block_sparse_moe.experts.230.w3", "model.layers.19.block_sparse_moe.experts.231.w3", "model.layers.19.block_sparse_moe.experts.232.w3", "model.layers.19.block_sparse_moe.experts.233.w3", "model.layers.19.block_sparse_moe.experts.234.w3", "model.layers.19.block_sparse_moe.experts.235.w3", "model.layers.19.block_sparse_moe.experts.236.w3", "model.layers.19.block_sparse_moe.experts.237.w3", "model.layers.19.block_sparse_moe.experts.238.w3", "model.layers.19.block_sparse_moe.experts.239.w3", "model.layers.19.block_sparse_moe.experts.240.w3", "model.layers.19.block_sparse_moe.experts.241.w3", "model.layers.19.block_sparse_moe.experts.242.w3", "model.layers.19.block_sparse_moe.experts.243.w3", "model.layers.19.block_sparse_moe.experts.244.w3", "model.layers.19.block_sparse_moe.experts.245.w3", "model.layers.19.block_sparse_moe.experts.246.w3", "model.layers.19.block_sparse_moe.experts.247.w3", "model.layers.19.block_sparse_moe.experts.248.w3", "model.layers.19.block_sparse_moe.experts.249.w3", "model.layers.19.block_sparse_moe.experts.250.w3", "model.layers.19.block_sparse_moe.experts.251.w3", "model.layers.19.block_sparse_moe.experts.252.w3", "model.layers.19.block_sparse_moe.experts.253.w3", "model.layers.19.block_sparse_moe.experts.254.w3", "model.layers.19.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0004091203212737149, "dbits": 2415919104 } ] }, { "idx": 99, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w2", "model.layers.19.block_sparse_moe.experts.1.w2", "model.layers.19.block_sparse_moe.experts.2.w2", "model.layers.19.block_sparse_moe.experts.3.w2", "model.layers.19.block_sparse_moe.experts.4.w2", "model.layers.19.block_sparse_moe.experts.5.w2", "model.layers.19.block_sparse_moe.experts.6.w2", "model.layers.19.block_sparse_moe.experts.7.w2", "model.layers.19.block_sparse_moe.experts.8.w2", "model.layers.19.block_sparse_moe.experts.9.w2", "model.layers.19.block_sparse_moe.experts.10.w2", "model.layers.19.block_sparse_moe.experts.11.w2", "model.layers.19.block_sparse_moe.experts.12.w2", "model.layers.19.block_sparse_moe.experts.13.w2", "model.layers.19.block_sparse_moe.experts.14.w2", "model.layers.19.block_sparse_moe.experts.15.w2", "model.layers.19.block_sparse_moe.experts.16.w2", "model.layers.19.block_sparse_moe.experts.17.w2", "model.layers.19.block_sparse_moe.experts.18.w2", "model.layers.19.block_sparse_moe.experts.19.w2", "model.layers.19.block_sparse_moe.experts.20.w2", "model.layers.19.block_sparse_moe.experts.21.w2", "model.layers.19.block_sparse_moe.experts.22.w2", "model.layers.19.block_sparse_moe.experts.23.w2", "model.layers.19.block_sparse_moe.experts.24.w2", "model.layers.19.block_sparse_moe.experts.25.w2", "model.layers.19.block_sparse_moe.experts.26.w2", "model.layers.19.block_sparse_moe.experts.27.w2", "model.layers.19.block_sparse_moe.experts.28.w2", "model.layers.19.block_sparse_moe.experts.29.w2", "model.layers.19.block_sparse_moe.experts.30.w2", "model.layers.19.block_sparse_moe.experts.31.w2", "model.layers.19.block_sparse_moe.experts.32.w2", "model.layers.19.block_sparse_moe.experts.33.w2", "model.layers.19.block_sparse_moe.experts.34.w2", "model.layers.19.block_sparse_moe.experts.35.w2", "model.layers.19.block_sparse_moe.experts.36.w2", "model.layers.19.block_sparse_moe.experts.37.w2", "model.layers.19.block_sparse_moe.experts.38.w2", "model.layers.19.block_sparse_moe.experts.39.w2", "model.layers.19.block_sparse_moe.experts.40.w2", "model.layers.19.block_sparse_moe.experts.41.w2", "model.layers.19.block_sparse_moe.experts.42.w2", "model.layers.19.block_sparse_moe.experts.43.w2", "model.layers.19.block_sparse_moe.experts.44.w2", "model.layers.19.block_sparse_moe.experts.45.w2", "model.layers.19.block_sparse_moe.experts.46.w2", "model.layers.19.block_sparse_moe.experts.47.w2", "model.layers.19.block_sparse_moe.experts.48.w2", "model.layers.19.block_sparse_moe.experts.49.w2", "model.layers.19.block_sparse_moe.experts.50.w2", "model.layers.19.block_sparse_moe.experts.51.w2", "model.layers.19.block_sparse_moe.experts.52.w2", "model.layers.19.block_sparse_moe.experts.53.w2", "model.layers.19.block_sparse_moe.experts.54.w2", "model.layers.19.block_sparse_moe.experts.55.w2", "model.layers.19.block_sparse_moe.experts.56.w2", "model.layers.19.block_sparse_moe.experts.57.w2", "model.layers.19.block_sparse_moe.experts.58.w2", "model.layers.19.block_sparse_moe.experts.59.w2", "model.layers.19.block_sparse_moe.experts.60.w2", "model.layers.19.block_sparse_moe.experts.61.w2", "model.layers.19.block_sparse_moe.experts.62.w2", "model.layers.19.block_sparse_moe.experts.63.w2", "model.layers.19.block_sparse_moe.experts.64.w2", "model.layers.19.block_sparse_moe.experts.65.w2", "model.layers.19.block_sparse_moe.experts.66.w2", "model.layers.19.block_sparse_moe.experts.67.w2", "model.layers.19.block_sparse_moe.experts.68.w2", "model.layers.19.block_sparse_moe.experts.69.w2", "model.layers.19.block_sparse_moe.experts.70.w2", "model.layers.19.block_sparse_moe.experts.71.w2", "model.layers.19.block_sparse_moe.experts.72.w2", "model.layers.19.block_sparse_moe.experts.73.w2", "model.layers.19.block_sparse_moe.experts.74.w2", "model.layers.19.block_sparse_moe.experts.75.w2", "model.layers.19.block_sparse_moe.experts.76.w2", "model.layers.19.block_sparse_moe.experts.77.w2", "model.layers.19.block_sparse_moe.experts.78.w2", "model.layers.19.block_sparse_moe.experts.79.w2", "model.layers.19.block_sparse_moe.experts.80.w2", "model.layers.19.block_sparse_moe.experts.81.w2", "model.layers.19.block_sparse_moe.experts.82.w2", "model.layers.19.block_sparse_moe.experts.83.w2", "model.layers.19.block_sparse_moe.experts.84.w2", "model.layers.19.block_sparse_moe.experts.85.w2", "model.layers.19.block_sparse_moe.experts.86.w2", "model.layers.19.block_sparse_moe.experts.87.w2", "model.layers.19.block_sparse_moe.experts.88.w2", "model.layers.19.block_sparse_moe.experts.89.w2", "model.layers.19.block_sparse_moe.experts.90.w2", "model.layers.19.block_sparse_moe.experts.91.w2", "model.layers.19.block_sparse_moe.experts.92.w2", "model.layers.19.block_sparse_moe.experts.93.w2", "model.layers.19.block_sparse_moe.experts.94.w2", "model.layers.19.block_sparse_moe.experts.95.w2", "model.layers.19.block_sparse_moe.experts.96.w2", "model.layers.19.block_sparse_moe.experts.97.w2", "model.layers.19.block_sparse_moe.experts.98.w2", "model.layers.19.block_sparse_moe.experts.99.w2", "model.layers.19.block_sparse_moe.experts.100.w2", "model.layers.19.block_sparse_moe.experts.101.w2", "model.layers.19.block_sparse_moe.experts.102.w2", "model.layers.19.block_sparse_moe.experts.103.w2", "model.layers.19.block_sparse_moe.experts.104.w2", "model.layers.19.block_sparse_moe.experts.105.w2", "model.layers.19.block_sparse_moe.experts.106.w2", "model.layers.19.block_sparse_moe.experts.107.w2", "model.layers.19.block_sparse_moe.experts.108.w2", "model.layers.19.block_sparse_moe.experts.109.w2", "model.layers.19.block_sparse_moe.experts.110.w2", "model.layers.19.block_sparse_moe.experts.111.w2", "model.layers.19.block_sparse_moe.experts.112.w2", "model.layers.19.block_sparse_moe.experts.113.w2", "model.layers.19.block_sparse_moe.experts.114.w2", "model.layers.19.block_sparse_moe.experts.115.w2", "model.layers.19.block_sparse_moe.experts.116.w2", "model.layers.19.block_sparse_moe.experts.117.w2", "model.layers.19.block_sparse_moe.experts.118.w2", "model.layers.19.block_sparse_moe.experts.119.w2", "model.layers.19.block_sparse_moe.experts.120.w2", "model.layers.19.block_sparse_moe.experts.121.w2", "model.layers.19.block_sparse_moe.experts.122.w2", "model.layers.19.block_sparse_moe.experts.123.w2", "model.layers.19.block_sparse_moe.experts.124.w2", "model.layers.19.block_sparse_moe.experts.125.w2", "model.layers.19.block_sparse_moe.experts.126.w2", "model.layers.19.block_sparse_moe.experts.127.w2", "model.layers.19.block_sparse_moe.experts.128.w2", "model.layers.19.block_sparse_moe.experts.129.w2", "model.layers.19.block_sparse_moe.experts.130.w2", "model.layers.19.block_sparse_moe.experts.131.w2", "model.layers.19.block_sparse_moe.experts.132.w2", "model.layers.19.block_sparse_moe.experts.133.w2", "model.layers.19.block_sparse_moe.experts.134.w2", "model.layers.19.block_sparse_moe.experts.135.w2", "model.layers.19.block_sparse_moe.experts.136.w2", "model.layers.19.block_sparse_moe.experts.137.w2", "model.layers.19.block_sparse_moe.experts.138.w2", "model.layers.19.block_sparse_moe.experts.139.w2", "model.layers.19.block_sparse_moe.experts.140.w2", "model.layers.19.block_sparse_moe.experts.141.w2", "model.layers.19.block_sparse_moe.experts.142.w2", "model.layers.19.block_sparse_moe.experts.143.w2", "model.layers.19.block_sparse_moe.experts.144.w2", "model.layers.19.block_sparse_moe.experts.145.w2", "model.layers.19.block_sparse_moe.experts.146.w2", "model.layers.19.block_sparse_moe.experts.147.w2", "model.layers.19.block_sparse_moe.experts.148.w2", "model.layers.19.block_sparse_moe.experts.149.w2", "model.layers.19.block_sparse_moe.experts.150.w2", "model.layers.19.block_sparse_moe.experts.151.w2", "model.layers.19.block_sparse_moe.experts.152.w2", "model.layers.19.block_sparse_moe.experts.153.w2", "model.layers.19.block_sparse_moe.experts.154.w2", "model.layers.19.block_sparse_moe.experts.155.w2", "model.layers.19.block_sparse_moe.experts.156.w2", "model.layers.19.block_sparse_moe.experts.157.w2", "model.layers.19.block_sparse_moe.experts.158.w2", "model.layers.19.block_sparse_moe.experts.159.w2", "model.layers.19.block_sparse_moe.experts.160.w2", "model.layers.19.block_sparse_moe.experts.161.w2", "model.layers.19.block_sparse_moe.experts.162.w2", "model.layers.19.block_sparse_moe.experts.163.w2", "model.layers.19.block_sparse_moe.experts.164.w2", "model.layers.19.block_sparse_moe.experts.165.w2", "model.layers.19.block_sparse_moe.experts.166.w2", "model.layers.19.block_sparse_moe.experts.167.w2", "model.layers.19.block_sparse_moe.experts.168.w2", "model.layers.19.block_sparse_moe.experts.169.w2", "model.layers.19.block_sparse_moe.experts.170.w2", "model.layers.19.block_sparse_moe.experts.171.w2", "model.layers.19.block_sparse_moe.experts.172.w2", "model.layers.19.block_sparse_moe.experts.173.w2", "model.layers.19.block_sparse_moe.experts.174.w2", "model.layers.19.block_sparse_moe.experts.175.w2", "model.layers.19.block_sparse_moe.experts.176.w2", "model.layers.19.block_sparse_moe.experts.177.w2", "model.layers.19.block_sparse_moe.experts.178.w2", "model.layers.19.block_sparse_moe.experts.179.w2", "model.layers.19.block_sparse_moe.experts.180.w2", "model.layers.19.block_sparse_moe.experts.181.w2", "model.layers.19.block_sparse_moe.experts.182.w2", "model.layers.19.block_sparse_moe.experts.183.w2", "model.layers.19.block_sparse_moe.experts.184.w2", "model.layers.19.block_sparse_moe.experts.185.w2", "model.layers.19.block_sparse_moe.experts.186.w2", "model.layers.19.block_sparse_moe.experts.187.w2", "model.layers.19.block_sparse_moe.experts.188.w2", "model.layers.19.block_sparse_moe.experts.189.w2", "model.layers.19.block_sparse_moe.experts.190.w2", "model.layers.19.block_sparse_moe.experts.191.w2", "model.layers.19.block_sparse_moe.experts.192.w2", "model.layers.19.block_sparse_moe.experts.193.w2", "model.layers.19.block_sparse_moe.experts.194.w2", "model.layers.19.block_sparse_moe.experts.195.w2", "model.layers.19.block_sparse_moe.experts.196.w2", "model.layers.19.block_sparse_moe.experts.197.w2", "model.layers.19.block_sparse_moe.experts.198.w2", "model.layers.19.block_sparse_moe.experts.199.w2", "model.layers.19.block_sparse_moe.experts.200.w2", "model.layers.19.block_sparse_moe.experts.201.w2", "model.layers.19.block_sparse_moe.experts.202.w2", "model.layers.19.block_sparse_moe.experts.203.w2", "model.layers.19.block_sparse_moe.experts.204.w2", "model.layers.19.block_sparse_moe.experts.205.w2", "model.layers.19.block_sparse_moe.experts.206.w2", "model.layers.19.block_sparse_moe.experts.207.w2", "model.layers.19.block_sparse_moe.experts.208.w2", "model.layers.19.block_sparse_moe.experts.209.w2", "model.layers.19.block_sparse_moe.experts.210.w2", "model.layers.19.block_sparse_moe.experts.211.w2", "model.layers.19.block_sparse_moe.experts.212.w2", "model.layers.19.block_sparse_moe.experts.213.w2", "model.layers.19.block_sparse_moe.experts.214.w2", "model.layers.19.block_sparse_moe.experts.215.w2", "model.layers.19.block_sparse_moe.experts.216.w2", "model.layers.19.block_sparse_moe.experts.217.w2", "model.layers.19.block_sparse_moe.experts.218.w2", "model.layers.19.block_sparse_moe.experts.219.w2", "model.layers.19.block_sparse_moe.experts.220.w2", "model.layers.19.block_sparse_moe.experts.221.w2", "model.layers.19.block_sparse_moe.experts.222.w2", "model.layers.19.block_sparse_moe.experts.223.w2", "model.layers.19.block_sparse_moe.experts.224.w2", "model.layers.19.block_sparse_moe.experts.225.w2", "model.layers.19.block_sparse_moe.experts.226.w2", "model.layers.19.block_sparse_moe.experts.227.w2", "model.layers.19.block_sparse_moe.experts.228.w2", "model.layers.19.block_sparse_moe.experts.229.w2", "model.layers.19.block_sparse_moe.experts.230.w2", "model.layers.19.block_sparse_moe.experts.231.w2", "model.layers.19.block_sparse_moe.experts.232.w2", "model.layers.19.block_sparse_moe.experts.233.w2", "model.layers.19.block_sparse_moe.experts.234.w2", "model.layers.19.block_sparse_moe.experts.235.w2", "model.layers.19.block_sparse_moe.experts.236.w2", "model.layers.19.block_sparse_moe.experts.237.w2", "model.layers.19.block_sparse_moe.experts.238.w2", "model.layers.19.block_sparse_moe.experts.239.w2", "model.layers.19.block_sparse_moe.experts.240.w2", "model.layers.19.block_sparse_moe.experts.241.w2", "model.layers.19.block_sparse_moe.experts.242.w2", "model.layers.19.block_sparse_moe.experts.243.w2", "model.layers.19.block_sparse_moe.experts.244.w2", "model.layers.19.block_sparse_moe.experts.245.w2", "model.layers.19.block_sparse_moe.experts.246.w2", "model.layers.19.block_sparse_moe.experts.247.w2", "model.layers.19.block_sparse_moe.experts.248.w2", "model.layers.19.block_sparse_moe.experts.249.w2", "model.layers.19.block_sparse_moe.experts.250.w2", "model.layers.19.block_sparse_moe.experts.251.w2", "model.layers.19.block_sparse_moe.experts.252.w2", "model.layers.19.block_sparse_moe.experts.253.w2", "model.layers.19.block_sparse_moe.experts.254.w2", "model.layers.19.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003536134958266324, "dbits": 1207959552 } ] }, { "idx": 100, "layers": [ "model.layers.20.self_attn.q_proj" ], "candidates": [ { "dkld": 0.003748461604118347, "dbits": 18874368 } ] }, { "idx": 101, "layers": [ "model.layers.20.self_attn.k_proj", "model.layers.20.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0008100152015685369, "dbits": 6291456 } ] }, { "idx": 102, "layers": [ "model.layers.20.self_attn.o_proj" ], "candidates": [ { "dkld": -0.01493175923824308, "dbits": 18874368 } ] }, { "idx": 103, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w1", "model.layers.20.block_sparse_moe.experts.1.w1", "model.layers.20.block_sparse_moe.experts.2.w1", "model.layers.20.block_sparse_moe.experts.3.w1", "model.layers.20.block_sparse_moe.experts.4.w1", "model.layers.20.block_sparse_moe.experts.5.w1", "model.layers.20.block_sparse_moe.experts.6.w1", "model.layers.20.block_sparse_moe.experts.7.w1", "model.layers.20.block_sparse_moe.experts.8.w1", "model.layers.20.block_sparse_moe.experts.9.w1", "model.layers.20.block_sparse_moe.experts.10.w1", "model.layers.20.block_sparse_moe.experts.11.w1", "model.layers.20.block_sparse_moe.experts.12.w1", "model.layers.20.block_sparse_moe.experts.13.w1", "model.layers.20.block_sparse_moe.experts.14.w1", "model.layers.20.block_sparse_moe.experts.15.w1", "model.layers.20.block_sparse_moe.experts.16.w1", "model.layers.20.block_sparse_moe.experts.17.w1", "model.layers.20.block_sparse_moe.experts.18.w1", "model.layers.20.block_sparse_moe.experts.19.w1", "model.layers.20.block_sparse_moe.experts.20.w1", "model.layers.20.block_sparse_moe.experts.21.w1", "model.layers.20.block_sparse_moe.experts.22.w1", "model.layers.20.block_sparse_moe.experts.23.w1", "model.layers.20.block_sparse_moe.experts.24.w1", "model.layers.20.block_sparse_moe.experts.25.w1", "model.layers.20.block_sparse_moe.experts.26.w1", "model.layers.20.block_sparse_moe.experts.27.w1", "model.layers.20.block_sparse_moe.experts.28.w1", "model.layers.20.block_sparse_moe.experts.29.w1", "model.layers.20.block_sparse_moe.experts.30.w1", "model.layers.20.block_sparse_moe.experts.31.w1", "model.layers.20.block_sparse_moe.experts.32.w1", "model.layers.20.block_sparse_moe.experts.33.w1", "model.layers.20.block_sparse_moe.experts.34.w1", "model.layers.20.block_sparse_moe.experts.35.w1", "model.layers.20.block_sparse_moe.experts.36.w1", "model.layers.20.block_sparse_moe.experts.37.w1", "model.layers.20.block_sparse_moe.experts.38.w1", "model.layers.20.block_sparse_moe.experts.39.w1", "model.layers.20.block_sparse_moe.experts.40.w1", "model.layers.20.block_sparse_moe.experts.41.w1", "model.layers.20.block_sparse_moe.experts.42.w1", "model.layers.20.block_sparse_moe.experts.43.w1", "model.layers.20.block_sparse_moe.experts.44.w1", "model.layers.20.block_sparse_moe.experts.45.w1", "model.layers.20.block_sparse_moe.experts.46.w1", "model.layers.20.block_sparse_moe.experts.47.w1", "model.layers.20.block_sparse_moe.experts.48.w1", "model.layers.20.block_sparse_moe.experts.49.w1", "model.layers.20.block_sparse_moe.experts.50.w1", "model.layers.20.block_sparse_moe.experts.51.w1", "model.layers.20.block_sparse_moe.experts.52.w1", "model.layers.20.block_sparse_moe.experts.53.w1", "model.layers.20.block_sparse_moe.experts.54.w1", "model.layers.20.block_sparse_moe.experts.55.w1", "model.layers.20.block_sparse_moe.experts.56.w1", "model.layers.20.block_sparse_moe.experts.57.w1", "model.layers.20.block_sparse_moe.experts.58.w1", "model.layers.20.block_sparse_moe.experts.59.w1", "model.layers.20.block_sparse_moe.experts.60.w1", "model.layers.20.block_sparse_moe.experts.61.w1", "model.layers.20.block_sparse_moe.experts.62.w1", "model.layers.20.block_sparse_moe.experts.63.w1", "model.layers.20.block_sparse_moe.experts.64.w1", "model.layers.20.block_sparse_moe.experts.65.w1", "model.layers.20.block_sparse_moe.experts.66.w1", "model.layers.20.block_sparse_moe.experts.67.w1", "model.layers.20.block_sparse_moe.experts.68.w1", "model.layers.20.block_sparse_moe.experts.69.w1", "model.layers.20.block_sparse_moe.experts.70.w1", "model.layers.20.block_sparse_moe.experts.71.w1", "model.layers.20.block_sparse_moe.experts.72.w1", "model.layers.20.block_sparse_moe.experts.73.w1", "model.layers.20.block_sparse_moe.experts.74.w1", "model.layers.20.block_sparse_moe.experts.75.w1", "model.layers.20.block_sparse_moe.experts.76.w1", "model.layers.20.block_sparse_moe.experts.77.w1", "model.layers.20.block_sparse_moe.experts.78.w1", "model.layers.20.block_sparse_moe.experts.79.w1", "model.layers.20.block_sparse_moe.experts.80.w1", "model.layers.20.block_sparse_moe.experts.81.w1", "model.layers.20.block_sparse_moe.experts.82.w1", "model.layers.20.block_sparse_moe.experts.83.w1", "model.layers.20.block_sparse_moe.experts.84.w1", "model.layers.20.block_sparse_moe.experts.85.w1", "model.layers.20.block_sparse_moe.experts.86.w1", "model.layers.20.block_sparse_moe.experts.87.w1", "model.layers.20.block_sparse_moe.experts.88.w1", "model.layers.20.block_sparse_moe.experts.89.w1", "model.layers.20.block_sparse_moe.experts.90.w1", "model.layers.20.block_sparse_moe.experts.91.w1", "model.layers.20.block_sparse_moe.experts.92.w1", "model.layers.20.block_sparse_moe.experts.93.w1", "model.layers.20.block_sparse_moe.experts.94.w1", "model.layers.20.block_sparse_moe.experts.95.w1", "model.layers.20.block_sparse_moe.experts.96.w1", "model.layers.20.block_sparse_moe.experts.97.w1", "model.layers.20.block_sparse_moe.experts.98.w1", "model.layers.20.block_sparse_moe.experts.99.w1", "model.layers.20.block_sparse_moe.experts.100.w1", "model.layers.20.block_sparse_moe.experts.101.w1", "model.layers.20.block_sparse_moe.experts.102.w1", "model.layers.20.block_sparse_moe.experts.103.w1", "model.layers.20.block_sparse_moe.experts.104.w1", "model.layers.20.block_sparse_moe.experts.105.w1", "model.layers.20.block_sparse_moe.experts.106.w1", "model.layers.20.block_sparse_moe.experts.107.w1", "model.layers.20.block_sparse_moe.experts.108.w1", "model.layers.20.block_sparse_moe.experts.109.w1", "model.layers.20.block_sparse_moe.experts.110.w1", "model.layers.20.block_sparse_moe.experts.111.w1", "model.layers.20.block_sparse_moe.experts.112.w1", "model.layers.20.block_sparse_moe.experts.113.w1", "model.layers.20.block_sparse_moe.experts.114.w1", "model.layers.20.block_sparse_moe.experts.115.w1", "model.layers.20.block_sparse_moe.experts.116.w1", "model.layers.20.block_sparse_moe.experts.117.w1", "model.layers.20.block_sparse_moe.experts.118.w1", "model.layers.20.block_sparse_moe.experts.119.w1", "model.layers.20.block_sparse_moe.experts.120.w1", "model.layers.20.block_sparse_moe.experts.121.w1", "model.layers.20.block_sparse_moe.experts.122.w1", "model.layers.20.block_sparse_moe.experts.123.w1", "model.layers.20.block_sparse_moe.experts.124.w1", "model.layers.20.block_sparse_moe.experts.125.w1", "model.layers.20.block_sparse_moe.experts.126.w1", "model.layers.20.block_sparse_moe.experts.127.w1", "model.layers.20.block_sparse_moe.experts.128.w1", "model.layers.20.block_sparse_moe.experts.129.w1", "model.layers.20.block_sparse_moe.experts.130.w1", "model.layers.20.block_sparse_moe.experts.131.w1", "model.layers.20.block_sparse_moe.experts.132.w1", "model.layers.20.block_sparse_moe.experts.133.w1", "model.layers.20.block_sparse_moe.experts.134.w1", "model.layers.20.block_sparse_moe.experts.135.w1", "model.layers.20.block_sparse_moe.experts.136.w1", "model.layers.20.block_sparse_moe.experts.137.w1", "model.layers.20.block_sparse_moe.experts.138.w1", "model.layers.20.block_sparse_moe.experts.139.w1", "model.layers.20.block_sparse_moe.experts.140.w1", "model.layers.20.block_sparse_moe.experts.141.w1", "model.layers.20.block_sparse_moe.experts.142.w1", "model.layers.20.block_sparse_moe.experts.143.w1", "model.layers.20.block_sparse_moe.experts.144.w1", "model.layers.20.block_sparse_moe.experts.145.w1", "model.layers.20.block_sparse_moe.experts.146.w1", "model.layers.20.block_sparse_moe.experts.147.w1", "model.layers.20.block_sparse_moe.experts.148.w1", "model.layers.20.block_sparse_moe.experts.149.w1", "model.layers.20.block_sparse_moe.experts.150.w1", "model.layers.20.block_sparse_moe.experts.151.w1", "model.layers.20.block_sparse_moe.experts.152.w1", "model.layers.20.block_sparse_moe.experts.153.w1", "model.layers.20.block_sparse_moe.experts.154.w1", "model.layers.20.block_sparse_moe.experts.155.w1", "model.layers.20.block_sparse_moe.experts.156.w1", "model.layers.20.block_sparse_moe.experts.157.w1", "model.layers.20.block_sparse_moe.experts.158.w1", "model.layers.20.block_sparse_moe.experts.159.w1", "model.layers.20.block_sparse_moe.experts.160.w1", "model.layers.20.block_sparse_moe.experts.161.w1", "model.layers.20.block_sparse_moe.experts.162.w1", "model.layers.20.block_sparse_moe.experts.163.w1", "model.layers.20.block_sparse_moe.experts.164.w1", "model.layers.20.block_sparse_moe.experts.165.w1", "model.layers.20.block_sparse_moe.experts.166.w1", "model.layers.20.block_sparse_moe.experts.167.w1", "model.layers.20.block_sparse_moe.experts.168.w1", "model.layers.20.block_sparse_moe.experts.169.w1", "model.layers.20.block_sparse_moe.experts.170.w1", "model.layers.20.block_sparse_moe.experts.171.w1", "model.layers.20.block_sparse_moe.experts.172.w1", "model.layers.20.block_sparse_moe.experts.173.w1", "model.layers.20.block_sparse_moe.experts.174.w1", "model.layers.20.block_sparse_moe.experts.175.w1", "model.layers.20.block_sparse_moe.experts.176.w1", "model.layers.20.block_sparse_moe.experts.177.w1", "model.layers.20.block_sparse_moe.experts.178.w1", "model.layers.20.block_sparse_moe.experts.179.w1", "model.layers.20.block_sparse_moe.experts.180.w1", "model.layers.20.block_sparse_moe.experts.181.w1", "model.layers.20.block_sparse_moe.experts.182.w1", "model.layers.20.block_sparse_moe.experts.183.w1", "model.layers.20.block_sparse_moe.experts.184.w1", "model.layers.20.block_sparse_moe.experts.185.w1", "model.layers.20.block_sparse_moe.experts.186.w1", "model.layers.20.block_sparse_moe.experts.187.w1", "model.layers.20.block_sparse_moe.experts.188.w1", "model.layers.20.block_sparse_moe.experts.189.w1", "model.layers.20.block_sparse_moe.experts.190.w1", "model.layers.20.block_sparse_moe.experts.191.w1", "model.layers.20.block_sparse_moe.experts.192.w1", "model.layers.20.block_sparse_moe.experts.193.w1", "model.layers.20.block_sparse_moe.experts.194.w1", "model.layers.20.block_sparse_moe.experts.195.w1", "model.layers.20.block_sparse_moe.experts.196.w1", "model.layers.20.block_sparse_moe.experts.197.w1", "model.layers.20.block_sparse_moe.experts.198.w1", "model.layers.20.block_sparse_moe.experts.199.w1", "model.layers.20.block_sparse_moe.experts.200.w1", "model.layers.20.block_sparse_moe.experts.201.w1", "model.layers.20.block_sparse_moe.experts.202.w1", "model.layers.20.block_sparse_moe.experts.203.w1", "model.layers.20.block_sparse_moe.experts.204.w1", "model.layers.20.block_sparse_moe.experts.205.w1", "model.layers.20.block_sparse_moe.experts.206.w1", "model.layers.20.block_sparse_moe.experts.207.w1", "model.layers.20.block_sparse_moe.experts.208.w1", "model.layers.20.block_sparse_moe.experts.209.w1", "model.layers.20.block_sparse_moe.experts.210.w1", "model.layers.20.block_sparse_moe.experts.211.w1", "model.layers.20.block_sparse_moe.experts.212.w1", "model.layers.20.block_sparse_moe.experts.213.w1", "model.layers.20.block_sparse_moe.experts.214.w1", "model.layers.20.block_sparse_moe.experts.215.w1", "model.layers.20.block_sparse_moe.experts.216.w1", "model.layers.20.block_sparse_moe.experts.217.w1", "model.layers.20.block_sparse_moe.experts.218.w1", "model.layers.20.block_sparse_moe.experts.219.w1", "model.layers.20.block_sparse_moe.experts.220.w1", "model.layers.20.block_sparse_moe.experts.221.w1", "model.layers.20.block_sparse_moe.experts.222.w1", "model.layers.20.block_sparse_moe.experts.223.w1", "model.layers.20.block_sparse_moe.experts.224.w1", "model.layers.20.block_sparse_moe.experts.225.w1", "model.layers.20.block_sparse_moe.experts.226.w1", "model.layers.20.block_sparse_moe.experts.227.w1", "model.layers.20.block_sparse_moe.experts.228.w1", "model.layers.20.block_sparse_moe.experts.229.w1", "model.layers.20.block_sparse_moe.experts.230.w1", "model.layers.20.block_sparse_moe.experts.231.w1", "model.layers.20.block_sparse_moe.experts.232.w1", "model.layers.20.block_sparse_moe.experts.233.w1", "model.layers.20.block_sparse_moe.experts.234.w1", "model.layers.20.block_sparse_moe.experts.235.w1", "model.layers.20.block_sparse_moe.experts.236.w1", "model.layers.20.block_sparse_moe.experts.237.w1", "model.layers.20.block_sparse_moe.experts.238.w1", "model.layers.20.block_sparse_moe.experts.239.w1", "model.layers.20.block_sparse_moe.experts.240.w1", "model.layers.20.block_sparse_moe.experts.241.w1", "model.layers.20.block_sparse_moe.experts.242.w1", "model.layers.20.block_sparse_moe.experts.243.w1", "model.layers.20.block_sparse_moe.experts.244.w1", "model.layers.20.block_sparse_moe.experts.245.w1", "model.layers.20.block_sparse_moe.experts.246.w1", "model.layers.20.block_sparse_moe.experts.247.w1", "model.layers.20.block_sparse_moe.experts.248.w1", "model.layers.20.block_sparse_moe.experts.249.w1", "model.layers.20.block_sparse_moe.experts.250.w1", "model.layers.20.block_sparse_moe.experts.251.w1", "model.layers.20.block_sparse_moe.experts.252.w1", "model.layers.20.block_sparse_moe.experts.253.w1", "model.layers.20.block_sparse_moe.experts.254.w1", "model.layers.20.block_sparse_moe.experts.255.w1", "model.layers.20.block_sparse_moe.experts.0.w3", "model.layers.20.block_sparse_moe.experts.1.w3", "model.layers.20.block_sparse_moe.experts.2.w3", "model.layers.20.block_sparse_moe.experts.3.w3", "model.layers.20.block_sparse_moe.experts.4.w3", "model.layers.20.block_sparse_moe.experts.5.w3", "model.layers.20.block_sparse_moe.experts.6.w3", "model.layers.20.block_sparse_moe.experts.7.w3", "model.layers.20.block_sparse_moe.experts.8.w3", "model.layers.20.block_sparse_moe.experts.9.w3", "model.layers.20.block_sparse_moe.experts.10.w3", "model.layers.20.block_sparse_moe.experts.11.w3", "model.layers.20.block_sparse_moe.experts.12.w3", "model.layers.20.block_sparse_moe.experts.13.w3", "model.layers.20.block_sparse_moe.experts.14.w3", "model.layers.20.block_sparse_moe.experts.15.w3", "model.layers.20.block_sparse_moe.experts.16.w3", "model.layers.20.block_sparse_moe.experts.17.w3", "model.layers.20.block_sparse_moe.experts.18.w3", "model.layers.20.block_sparse_moe.experts.19.w3", "model.layers.20.block_sparse_moe.experts.20.w3", "model.layers.20.block_sparse_moe.experts.21.w3", "model.layers.20.block_sparse_moe.experts.22.w3", "model.layers.20.block_sparse_moe.experts.23.w3", "model.layers.20.block_sparse_moe.experts.24.w3", "model.layers.20.block_sparse_moe.experts.25.w3", "model.layers.20.block_sparse_moe.experts.26.w3", "model.layers.20.block_sparse_moe.experts.27.w3", "model.layers.20.block_sparse_moe.experts.28.w3", "model.layers.20.block_sparse_moe.experts.29.w3", "model.layers.20.block_sparse_moe.experts.30.w3", "model.layers.20.block_sparse_moe.experts.31.w3", "model.layers.20.block_sparse_moe.experts.32.w3", "model.layers.20.block_sparse_moe.experts.33.w3", "model.layers.20.block_sparse_moe.experts.34.w3", "model.layers.20.block_sparse_moe.experts.35.w3", "model.layers.20.block_sparse_moe.experts.36.w3", "model.layers.20.block_sparse_moe.experts.37.w3", "model.layers.20.block_sparse_moe.experts.38.w3", "model.layers.20.block_sparse_moe.experts.39.w3", "model.layers.20.block_sparse_moe.experts.40.w3", "model.layers.20.block_sparse_moe.experts.41.w3", "model.layers.20.block_sparse_moe.experts.42.w3", "model.layers.20.block_sparse_moe.experts.43.w3", "model.layers.20.block_sparse_moe.experts.44.w3", "model.layers.20.block_sparse_moe.experts.45.w3", "model.layers.20.block_sparse_moe.experts.46.w3", "model.layers.20.block_sparse_moe.experts.47.w3", "model.layers.20.block_sparse_moe.experts.48.w3", "model.layers.20.block_sparse_moe.experts.49.w3", "model.layers.20.block_sparse_moe.experts.50.w3", "model.layers.20.block_sparse_moe.experts.51.w3", "model.layers.20.block_sparse_moe.experts.52.w3", "model.layers.20.block_sparse_moe.experts.53.w3", "model.layers.20.block_sparse_moe.experts.54.w3", "model.layers.20.block_sparse_moe.experts.55.w3", "model.layers.20.block_sparse_moe.experts.56.w3", "model.layers.20.block_sparse_moe.experts.57.w3", "model.layers.20.block_sparse_moe.experts.58.w3", "model.layers.20.block_sparse_moe.experts.59.w3", "model.layers.20.block_sparse_moe.experts.60.w3", "model.layers.20.block_sparse_moe.experts.61.w3", "model.layers.20.block_sparse_moe.experts.62.w3", "model.layers.20.block_sparse_moe.experts.63.w3", "model.layers.20.block_sparse_moe.experts.64.w3", "model.layers.20.block_sparse_moe.experts.65.w3", "model.layers.20.block_sparse_moe.experts.66.w3", "model.layers.20.block_sparse_moe.experts.67.w3", "model.layers.20.block_sparse_moe.experts.68.w3", "model.layers.20.block_sparse_moe.experts.69.w3", "model.layers.20.block_sparse_moe.experts.70.w3", "model.layers.20.block_sparse_moe.experts.71.w3", "model.layers.20.block_sparse_moe.experts.72.w3", "model.layers.20.block_sparse_moe.experts.73.w3", "model.layers.20.block_sparse_moe.experts.74.w3", "model.layers.20.block_sparse_moe.experts.75.w3", "model.layers.20.block_sparse_moe.experts.76.w3", "model.layers.20.block_sparse_moe.experts.77.w3", "model.layers.20.block_sparse_moe.experts.78.w3", "model.layers.20.block_sparse_moe.experts.79.w3", "model.layers.20.block_sparse_moe.experts.80.w3", "model.layers.20.block_sparse_moe.experts.81.w3", "model.layers.20.block_sparse_moe.experts.82.w3", "model.layers.20.block_sparse_moe.experts.83.w3", "model.layers.20.block_sparse_moe.experts.84.w3", "model.layers.20.block_sparse_moe.experts.85.w3", "model.layers.20.block_sparse_moe.experts.86.w3", "model.layers.20.block_sparse_moe.experts.87.w3", "model.layers.20.block_sparse_moe.experts.88.w3", "model.layers.20.block_sparse_moe.experts.89.w3", "model.layers.20.block_sparse_moe.experts.90.w3", "model.layers.20.block_sparse_moe.experts.91.w3", "model.layers.20.block_sparse_moe.experts.92.w3", "model.layers.20.block_sparse_moe.experts.93.w3", "model.layers.20.block_sparse_moe.experts.94.w3", "model.layers.20.block_sparse_moe.experts.95.w3", "model.layers.20.block_sparse_moe.experts.96.w3", "model.layers.20.block_sparse_moe.experts.97.w3", "model.layers.20.block_sparse_moe.experts.98.w3", "model.layers.20.block_sparse_moe.experts.99.w3", "model.layers.20.block_sparse_moe.experts.100.w3", "model.layers.20.block_sparse_moe.experts.101.w3", "model.layers.20.block_sparse_moe.experts.102.w3", "model.layers.20.block_sparse_moe.experts.103.w3", "model.layers.20.block_sparse_moe.experts.104.w3", "model.layers.20.block_sparse_moe.experts.105.w3", "model.layers.20.block_sparse_moe.experts.106.w3", "model.layers.20.block_sparse_moe.experts.107.w3", "model.layers.20.block_sparse_moe.experts.108.w3", "model.layers.20.block_sparse_moe.experts.109.w3", "model.layers.20.block_sparse_moe.experts.110.w3", "model.layers.20.block_sparse_moe.experts.111.w3", "model.layers.20.block_sparse_moe.experts.112.w3", "model.layers.20.block_sparse_moe.experts.113.w3", "model.layers.20.block_sparse_moe.experts.114.w3", "model.layers.20.block_sparse_moe.experts.115.w3", "model.layers.20.block_sparse_moe.experts.116.w3", "model.layers.20.block_sparse_moe.experts.117.w3", "model.layers.20.block_sparse_moe.experts.118.w3", "model.layers.20.block_sparse_moe.experts.119.w3", "model.layers.20.block_sparse_moe.experts.120.w3", "model.layers.20.block_sparse_moe.experts.121.w3", "model.layers.20.block_sparse_moe.experts.122.w3", "model.layers.20.block_sparse_moe.experts.123.w3", "model.layers.20.block_sparse_moe.experts.124.w3", "model.layers.20.block_sparse_moe.experts.125.w3", "model.layers.20.block_sparse_moe.experts.126.w3", "model.layers.20.block_sparse_moe.experts.127.w3", "model.layers.20.block_sparse_moe.experts.128.w3", "model.layers.20.block_sparse_moe.experts.129.w3", "model.layers.20.block_sparse_moe.experts.130.w3", "model.layers.20.block_sparse_moe.experts.131.w3", "model.layers.20.block_sparse_moe.experts.132.w3", "model.layers.20.block_sparse_moe.experts.133.w3", "model.layers.20.block_sparse_moe.experts.134.w3", "model.layers.20.block_sparse_moe.experts.135.w3", "model.layers.20.block_sparse_moe.experts.136.w3", "model.layers.20.block_sparse_moe.experts.137.w3", "model.layers.20.block_sparse_moe.experts.138.w3", "model.layers.20.block_sparse_moe.experts.139.w3", "model.layers.20.block_sparse_moe.experts.140.w3", "model.layers.20.block_sparse_moe.experts.141.w3", "model.layers.20.block_sparse_moe.experts.142.w3", "model.layers.20.block_sparse_moe.experts.143.w3", "model.layers.20.block_sparse_moe.experts.144.w3", "model.layers.20.block_sparse_moe.experts.145.w3", "model.layers.20.block_sparse_moe.experts.146.w3", "model.layers.20.block_sparse_moe.experts.147.w3", "model.layers.20.block_sparse_moe.experts.148.w3", "model.layers.20.block_sparse_moe.experts.149.w3", "model.layers.20.block_sparse_moe.experts.150.w3", "model.layers.20.block_sparse_moe.experts.151.w3", "model.layers.20.block_sparse_moe.experts.152.w3", "model.layers.20.block_sparse_moe.experts.153.w3", "model.layers.20.block_sparse_moe.experts.154.w3", "model.layers.20.block_sparse_moe.experts.155.w3", "model.layers.20.block_sparse_moe.experts.156.w3", "model.layers.20.block_sparse_moe.experts.157.w3", "model.layers.20.block_sparse_moe.experts.158.w3", "model.layers.20.block_sparse_moe.experts.159.w3", "model.layers.20.block_sparse_moe.experts.160.w3", "model.layers.20.block_sparse_moe.experts.161.w3", "model.layers.20.block_sparse_moe.experts.162.w3", "model.layers.20.block_sparse_moe.experts.163.w3", "model.layers.20.block_sparse_moe.experts.164.w3", "model.layers.20.block_sparse_moe.experts.165.w3", "model.layers.20.block_sparse_moe.experts.166.w3", "model.layers.20.block_sparse_moe.experts.167.w3", "model.layers.20.block_sparse_moe.experts.168.w3", "model.layers.20.block_sparse_moe.experts.169.w3", "model.layers.20.block_sparse_moe.experts.170.w3", "model.layers.20.block_sparse_moe.experts.171.w3", "model.layers.20.block_sparse_moe.experts.172.w3", "model.layers.20.block_sparse_moe.experts.173.w3", "model.layers.20.block_sparse_moe.experts.174.w3", "model.layers.20.block_sparse_moe.experts.175.w3", "model.layers.20.block_sparse_moe.experts.176.w3", "model.layers.20.block_sparse_moe.experts.177.w3", "model.layers.20.block_sparse_moe.experts.178.w3", "model.layers.20.block_sparse_moe.experts.179.w3", "model.layers.20.block_sparse_moe.experts.180.w3", "model.layers.20.block_sparse_moe.experts.181.w3", "model.layers.20.block_sparse_moe.experts.182.w3", "model.layers.20.block_sparse_moe.experts.183.w3", "model.layers.20.block_sparse_moe.experts.184.w3", "model.layers.20.block_sparse_moe.experts.185.w3", "model.layers.20.block_sparse_moe.experts.186.w3", "model.layers.20.block_sparse_moe.experts.187.w3", "model.layers.20.block_sparse_moe.experts.188.w3", "model.layers.20.block_sparse_moe.experts.189.w3", "model.layers.20.block_sparse_moe.experts.190.w3", "model.layers.20.block_sparse_moe.experts.191.w3", "model.layers.20.block_sparse_moe.experts.192.w3", "model.layers.20.block_sparse_moe.experts.193.w3", "model.layers.20.block_sparse_moe.experts.194.w3", "model.layers.20.block_sparse_moe.experts.195.w3", "model.layers.20.block_sparse_moe.experts.196.w3", "model.layers.20.block_sparse_moe.experts.197.w3", "model.layers.20.block_sparse_moe.experts.198.w3", "model.layers.20.block_sparse_moe.experts.199.w3", "model.layers.20.block_sparse_moe.experts.200.w3", "model.layers.20.block_sparse_moe.experts.201.w3", "model.layers.20.block_sparse_moe.experts.202.w3", "model.layers.20.block_sparse_moe.experts.203.w3", "model.layers.20.block_sparse_moe.experts.204.w3", "model.layers.20.block_sparse_moe.experts.205.w3", "model.layers.20.block_sparse_moe.experts.206.w3", "model.layers.20.block_sparse_moe.experts.207.w3", "model.layers.20.block_sparse_moe.experts.208.w3", "model.layers.20.block_sparse_moe.experts.209.w3", "model.layers.20.block_sparse_moe.experts.210.w3", "model.layers.20.block_sparse_moe.experts.211.w3", "model.layers.20.block_sparse_moe.experts.212.w3", "model.layers.20.block_sparse_moe.experts.213.w3", "model.layers.20.block_sparse_moe.experts.214.w3", "model.layers.20.block_sparse_moe.experts.215.w3", "model.layers.20.block_sparse_moe.experts.216.w3", "model.layers.20.block_sparse_moe.experts.217.w3", "model.layers.20.block_sparse_moe.experts.218.w3", "model.layers.20.block_sparse_moe.experts.219.w3", "model.layers.20.block_sparse_moe.experts.220.w3", "model.layers.20.block_sparse_moe.experts.221.w3", "model.layers.20.block_sparse_moe.experts.222.w3", "model.layers.20.block_sparse_moe.experts.223.w3", "model.layers.20.block_sparse_moe.experts.224.w3", "model.layers.20.block_sparse_moe.experts.225.w3", "model.layers.20.block_sparse_moe.experts.226.w3", "model.layers.20.block_sparse_moe.experts.227.w3", "model.layers.20.block_sparse_moe.experts.228.w3", "model.layers.20.block_sparse_moe.experts.229.w3", "model.layers.20.block_sparse_moe.experts.230.w3", "model.layers.20.block_sparse_moe.experts.231.w3", "model.layers.20.block_sparse_moe.experts.232.w3", "model.layers.20.block_sparse_moe.experts.233.w3", "model.layers.20.block_sparse_moe.experts.234.w3", "model.layers.20.block_sparse_moe.experts.235.w3", "model.layers.20.block_sparse_moe.experts.236.w3", "model.layers.20.block_sparse_moe.experts.237.w3", "model.layers.20.block_sparse_moe.experts.238.w3", "model.layers.20.block_sparse_moe.experts.239.w3", "model.layers.20.block_sparse_moe.experts.240.w3", "model.layers.20.block_sparse_moe.experts.241.w3", "model.layers.20.block_sparse_moe.experts.242.w3", "model.layers.20.block_sparse_moe.experts.243.w3", "model.layers.20.block_sparse_moe.experts.244.w3", "model.layers.20.block_sparse_moe.experts.245.w3", "model.layers.20.block_sparse_moe.experts.246.w3", "model.layers.20.block_sparse_moe.experts.247.w3", "model.layers.20.block_sparse_moe.experts.248.w3", "model.layers.20.block_sparse_moe.experts.249.w3", "model.layers.20.block_sparse_moe.experts.250.w3", "model.layers.20.block_sparse_moe.experts.251.w3", "model.layers.20.block_sparse_moe.experts.252.w3", "model.layers.20.block_sparse_moe.experts.253.w3", "model.layers.20.block_sparse_moe.experts.254.w3", "model.layers.20.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00030180513858790725, "dbits": 2415919104 } ] }, { "idx": 104, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w2", "model.layers.20.block_sparse_moe.experts.1.w2", "model.layers.20.block_sparse_moe.experts.2.w2", "model.layers.20.block_sparse_moe.experts.3.w2", "model.layers.20.block_sparse_moe.experts.4.w2", "model.layers.20.block_sparse_moe.experts.5.w2", "model.layers.20.block_sparse_moe.experts.6.w2", "model.layers.20.block_sparse_moe.experts.7.w2", "model.layers.20.block_sparse_moe.experts.8.w2", "model.layers.20.block_sparse_moe.experts.9.w2", "model.layers.20.block_sparse_moe.experts.10.w2", "model.layers.20.block_sparse_moe.experts.11.w2", "model.layers.20.block_sparse_moe.experts.12.w2", "model.layers.20.block_sparse_moe.experts.13.w2", "model.layers.20.block_sparse_moe.experts.14.w2", "model.layers.20.block_sparse_moe.experts.15.w2", "model.layers.20.block_sparse_moe.experts.16.w2", "model.layers.20.block_sparse_moe.experts.17.w2", "model.layers.20.block_sparse_moe.experts.18.w2", "model.layers.20.block_sparse_moe.experts.19.w2", "model.layers.20.block_sparse_moe.experts.20.w2", "model.layers.20.block_sparse_moe.experts.21.w2", "model.layers.20.block_sparse_moe.experts.22.w2", "model.layers.20.block_sparse_moe.experts.23.w2", "model.layers.20.block_sparse_moe.experts.24.w2", "model.layers.20.block_sparse_moe.experts.25.w2", "model.layers.20.block_sparse_moe.experts.26.w2", "model.layers.20.block_sparse_moe.experts.27.w2", "model.layers.20.block_sparse_moe.experts.28.w2", "model.layers.20.block_sparse_moe.experts.29.w2", "model.layers.20.block_sparse_moe.experts.30.w2", "model.layers.20.block_sparse_moe.experts.31.w2", "model.layers.20.block_sparse_moe.experts.32.w2", "model.layers.20.block_sparse_moe.experts.33.w2", "model.layers.20.block_sparse_moe.experts.34.w2", "model.layers.20.block_sparse_moe.experts.35.w2", "model.layers.20.block_sparse_moe.experts.36.w2", "model.layers.20.block_sparse_moe.experts.37.w2", "model.layers.20.block_sparse_moe.experts.38.w2", "model.layers.20.block_sparse_moe.experts.39.w2", "model.layers.20.block_sparse_moe.experts.40.w2", "model.layers.20.block_sparse_moe.experts.41.w2", "model.layers.20.block_sparse_moe.experts.42.w2", "model.layers.20.block_sparse_moe.experts.43.w2", "model.layers.20.block_sparse_moe.experts.44.w2", "model.layers.20.block_sparse_moe.experts.45.w2", "model.layers.20.block_sparse_moe.experts.46.w2", "model.layers.20.block_sparse_moe.experts.47.w2", "model.layers.20.block_sparse_moe.experts.48.w2", "model.layers.20.block_sparse_moe.experts.49.w2", "model.layers.20.block_sparse_moe.experts.50.w2", "model.layers.20.block_sparse_moe.experts.51.w2", "model.layers.20.block_sparse_moe.experts.52.w2", "model.layers.20.block_sparse_moe.experts.53.w2", "model.layers.20.block_sparse_moe.experts.54.w2", "model.layers.20.block_sparse_moe.experts.55.w2", "model.layers.20.block_sparse_moe.experts.56.w2", "model.layers.20.block_sparse_moe.experts.57.w2", "model.layers.20.block_sparse_moe.experts.58.w2", "model.layers.20.block_sparse_moe.experts.59.w2", "model.layers.20.block_sparse_moe.experts.60.w2", "model.layers.20.block_sparse_moe.experts.61.w2", "model.layers.20.block_sparse_moe.experts.62.w2", "model.layers.20.block_sparse_moe.experts.63.w2", "model.layers.20.block_sparse_moe.experts.64.w2", "model.layers.20.block_sparse_moe.experts.65.w2", "model.layers.20.block_sparse_moe.experts.66.w2", "model.layers.20.block_sparse_moe.experts.67.w2", "model.layers.20.block_sparse_moe.experts.68.w2", "model.layers.20.block_sparse_moe.experts.69.w2", "model.layers.20.block_sparse_moe.experts.70.w2", "model.layers.20.block_sparse_moe.experts.71.w2", "model.layers.20.block_sparse_moe.experts.72.w2", "model.layers.20.block_sparse_moe.experts.73.w2", "model.layers.20.block_sparse_moe.experts.74.w2", "model.layers.20.block_sparse_moe.experts.75.w2", "model.layers.20.block_sparse_moe.experts.76.w2", "model.layers.20.block_sparse_moe.experts.77.w2", "model.layers.20.block_sparse_moe.experts.78.w2", "model.layers.20.block_sparse_moe.experts.79.w2", "model.layers.20.block_sparse_moe.experts.80.w2", "model.layers.20.block_sparse_moe.experts.81.w2", "model.layers.20.block_sparse_moe.experts.82.w2", "model.layers.20.block_sparse_moe.experts.83.w2", "model.layers.20.block_sparse_moe.experts.84.w2", "model.layers.20.block_sparse_moe.experts.85.w2", "model.layers.20.block_sparse_moe.experts.86.w2", "model.layers.20.block_sparse_moe.experts.87.w2", "model.layers.20.block_sparse_moe.experts.88.w2", "model.layers.20.block_sparse_moe.experts.89.w2", "model.layers.20.block_sparse_moe.experts.90.w2", "model.layers.20.block_sparse_moe.experts.91.w2", "model.layers.20.block_sparse_moe.experts.92.w2", "model.layers.20.block_sparse_moe.experts.93.w2", "model.layers.20.block_sparse_moe.experts.94.w2", "model.layers.20.block_sparse_moe.experts.95.w2", "model.layers.20.block_sparse_moe.experts.96.w2", "model.layers.20.block_sparse_moe.experts.97.w2", "model.layers.20.block_sparse_moe.experts.98.w2", "model.layers.20.block_sparse_moe.experts.99.w2", "model.layers.20.block_sparse_moe.experts.100.w2", "model.layers.20.block_sparse_moe.experts.101.w2", "model.layers.20.block_sparse_moe.experts.102.w2", "model.layers.20.block_sparse_moe.experts.103.w2", "model.layers.20.block_sparse_moe.experts.104.w2", "model.layers.20.block_sparse_moe.experts.105.w2", "model.layers.20.block_sparse_moe.experts.106.w2", "model.layers.20.block_sparse_moe.experts.107.w2", "model.layers.20.block_sparse_moe.experts.108.w2", "model.layers.20.block_sparse_moe.experts.109.w2", "model.layers.20.block_sparse_moe.experts.110.w2", "model.layers.20.block_sparse_moe.experts.111.w2", "model.layers.20.block_sparse_moe.experts.112.w2", "model.layers.20.block_sparse_moe.experts.113.w2", "model.layers.20.block_sparse_moe.experts.114.w2", "model.layers.20.block_sparse_moe.experts.115.w2", "model.layers.20.block_sparse_moe.experts.116.w2", "model.layers.20.block_sparse_moe.experts.117.w2", "model.layers.20.block_sparse_moe.experts.118.w2", "model.layers.20.block_sparse_moe.experts.119.w2", "model.layers.20.block_sparse_moe.experts.120.w2", "model.layers.20.block_sparse_moe.experts.121.w2", "model.layers.20.block_sparse_moe.experts.122.w2", "model.layers.20.block_sparse_moe.experts.123.w2", "model.layers.20.block_sparse_moe.experts.124.w2", "model.layers.20.block_sparse_moe.experts.125.w2", "model.layers.20.block_sparse_moe.experts.126.w2", "model.layers.20.block_sparse_moe.experts.127.w2", "model.layers.20.block_sparse_moe.experts.128.w2", "model.layers.20.block_sparse_moe.experts.129.w2", "model.layers.20.block_sparse_moe.experts.130.w2", "model.layers.20.block_sparse_moe.experts.131.w2", "model.layers.20.block_sparse_moe.experts.132.w2", "model.layers.20.block_sparse_moe.experts.133.w2", "model.layers.20.block_sparse_moe.experts.134.w2", "model.layers.20.block_sparse_moe.experts.135.w2", "model.layers.20.block_sparse_moe.experts.136.w2", "model.layers.20.block_sparse_moe.experts.137.w2", "model.layers.20.block_sparse_moe.experts.138.w2", "model.layers.20.block_sparse_moe.experts.139.w2", "model.layers.20.block_sparse_moe.experts.140.w2", "model.layers.20.block_sparse_moe.experts.141.w2", "model.layers.20.block_sparse_moe.experts.142.w2", "model.layers.20.block_sparse_moe.experts.143.w2", "model.layers.20.block_sparse_moe.experts.144.w2", "model.layers.20.block_sparse_moe.experts.145.w2", "model.layers.20.block_sparse_moe.experts.146.w2", "model.layers.20.block_sparse_moe.experts.147.w2", "model.layers.20.block_sparse_moe.experts.148.w2", "model.layers.20.block_sparse_moe.experts.149.w2", "model.layers.20.block_sparse_moe.experts.150.w2", "model.layers.20.block_sparse_moe.experts.151.w2", "model.layers.20.block_sparse_moe.experts.152.w2", "model.layers.20.block_sparse_moe.experts.153.w2", "model.layers.20.block_sparse_moe.experts.154.w2", "model.layers.20.block_sparse_moe.experts.155.w2", "model.layers.20.block_sparse_moe.experts.156.w2", "model.layers.20.block_sparse_moe.experts.157.w2", "model.layers.20.block_sparse_moe.experts.158.w2", "model.layers.20.block_sparse_moe.experts.159.w2", "model.layers.20.block_sparse_moe.experts.160.w2", "model.layers.20.block_sparse_moe.experts.161.w2", "model.layers.20.block_sparse_moe.experts.162.w2", "model.layers.20.block_sparse_moe.experts.163.w2", "model.layers.20.block_sparse_moe.experts.164.w2", "model.layers.20.block_sparse_moe.experts.165.w2", "model.layers.20.block_sparse_moe.experts.166.w2", "model.layers.20.block_sparse_moe.experts.167.w2", "model.layers.20.block_sparse_moe.experts.168.w2", "model.layers.20.block_sparse_moe.experts.169.w2", "model.layers.20.block_sparse_moe.experts.170.w2", "model.layers.20.block_sparse_moe.experts.171.w2", "model.layers.20.block_sparse_moe.experts.172.w2", "model.layers.20.block_sparse_moe.experts.173.w2", "model.layers.20.block_sparse_moe.experts.174.w2", "model.layers.20.block_sparse_moe.experts.175.w2", "model.layers.20.block_sparse_moe.experts.176.w2", "model.layers.20.block_sparse_moe.experts.177.w2", "model.layers.20.block_sparse_moe.experts.178.w2", "model.layers.20.block_sparse_moe.experts.179.w2", "model.layers.20.block_sparse_moe.experts.180.w2", "model.layers.20.block_sparse_moe.experts.181.w2", "model.layers.20.block_sparse_moe.experts.182.w2", "model.layers.20.block_sparse_moe.experts.183.w2", "model.layers.20.block_sparse_moe.experts.184.w2", "model.layers.20.block_sparse_moe.experts.185.w2", "model.layers.20.block_sparse_moe.experts.186.w2", "model.layers.20.block_sparse_moe.experts.187.w2", "model.layers.20.block_sparse_moe.experts.188.w2", "model.layers.20.block_sparse_moe.experts.189.w2", "model.layers.20.block_sparse_moe.experts.190.w2", "model.layers.20.block_sparse_moe.experts.191.w2", "model.layers.20.block_sparse_moe.experts.192.w2", "model.layers.20.block_sparse_moe.experts.193.w2", "model.layers.20.block_sparse_moe.experts.194.w2", "model.layers.20.block_sparse_moe.experts.195.w2", "model.layers.20.block_sparse_moe.experts.196.w2", "model.layers.20.block_sparse_moe.experts.197.w2", "model.layers.20.block_sparse_moe.experts.198.w2", "model.layers.20.block_sparse_moe.experts.199.w2", "model.layers.20.block_sparse_moe.experts.200.w2", "model.layers.20.block_sparse_moe.experts.201.w2", "model.layers.20.block_sparse_moe.experts.202.w2", "model.layers.20.block_sparse_moe.experts.203.w2", "model.layers.20.block_sparse_moe.experts.204.w2", "model.layers.20.block_sparse_moe.experts.205.w2", "model.layers.20.block_sparse_moe.experts.206.w2", "model.layers.20.block_sparse_moe.experts.207.w2", "model.layers.20.block_sparse_moe.experts.208.w2", "model.layers.20.block_sparse_moe.experts.209.w2", "model.layers.20.block_sparse_moe.experts.210.w2", "model.layers.20.block_sparse_moe.experts.211.w2", "model.layers.20.block_sparse_moe.experts.212.w2", "model.layers.20.block_sparse_moe.experts.213.w2", "model.layers.20.block_sparse_moe.experts.214.w2", "model.layers.20.block_sparse_moe.experts.215.w2", "model.layers.20.block_sparse_moe.experts.216.w2", "model.layers.20.block_sparse_moe.experts.217.w2", "model.layers.20.block_sparse_moe.experts.218.w2", "model.layers.20.block_sparse_moe.experts.219.w2", "model.layers.20.block_sparse_moe.experts.220.w2", "model.layers.20.block_sparse_moe.experts.221.w2", "model.layers.20.block_sparse_moe.experts.222.w2", "model.layers.20.block_sparse_moe.experts.223.w2", "model.layers.20.block_sparse_moe.experts.224.w2", "model.layers.20.block_sparse_moe.experts.225.w2", "model.layers.20.block_sparse_moe.experts.226.w2", "model.layers.20.block_sparse_moe.experts.227.w2", "model.layers.20.block_sparse_moe.experts.228.w2", "model.layers.20.block_sparse_moe.experts.229.w2", "model.layers.20.block_sparse_moe.experts.230.w2", "model.layers.20.block_sparse_moe.experts.231.w2", "model.layers.20.block_sparse_moe.experts.232.w2", "model.layers.20.block_sparse_moe.experts.233.w2", "model.layers.20.block_sparse_moe.experts.234.w2", "model.layers.20.block_sparse_moe.experts.235.w2", "model.layers.20.block_sparse_moe.experts.236.w2", "model.layers.20.block_sparse_moe.experts.237.w2", "model.layers.20.block_sparse_moe.experts.238.w2", "model.layers.20.block_sparse_moe.experts.239.w2", "model.layers.20.block_sparse_moe.experts.240.w2", "model.layers.20.block_sparse_moe.experts.241.w2", "model.layers.20.block_sparse_moe.experts.242.w2", "model.layers.20.block_sparse_moe.experts.243.w2", "model.layers.20.block_sparse_moe.experts.244.w2", "model.layers.20.block_sparse_moe.experts.245.w2", "model.layers.20.block_sparse_moe.experts.246.w2", "model.layers.20.block_sparse_moe.experts.247.w2", "model.layers.20.block_sparse_moe.experts.248.w2", "model.layers.20.block_sparse_moe.experts.249.w2", "model.layers.20.block_sparse_moe.experts.250.w2", "model.layers.20.block_sparse_moe.experts.251.w2", "model.layers.20.block_sparse_moe.experts.252.w2", "model.layers.20.block_sparse_moe.experts.253.w2", "model.layers.20.block_sparse_moe.experts.254.w2", "model.layers.20.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009939879179000632, "dbits": 1207959552 } ] }, { "idx": 105, "layers": [ "model.layers.21.self_attn.q_proj" ], "candidates": [ { "dkld": 0.001000154018402144, "dbits": 18874368 } ] }, { "idx": 106, "layers": [ "model.layers.21.self_attn.k_proj", "model.layers.21.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0024877160787583064, "dbits": 6291456 } ] }, { "idx": 107, "layers": [ "model.layers.21.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00031420886516575486, "dbits": 18874368 } ] }, { "idx": 108, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w1", "model.layers.21.block_sparse_moe.experts.1.w1", "model.layers.21.block_sparse_moe.experts.2.w1", "model.layers.21.block_sparse_moe.experts.3.w1", "model.layers.21.block_sparse_moe.experts.4.w1", "model.layers.21.block_sparse_moe.experts.5.w1", "model.layers.21.block_sparse_moe.experts.6.w1", "model.layers.21.block_sparse_moe.experts.7.w1", "model.layers.21.block_sparse_moe.experts.8.w1", "model.layers.21.block_sparse_moe.experts.9.w1", "model.layers.21.block_sparse_moe.experts.10.w1", "model.layers.21.block_sparse_moe.experts.11.w1", "model.layers.21.block_sparse_moe.experts.12.w1", "model.layers.21.block_sparse_moe.experts.13.w1", "model.layers.21.block_sparse_moe.experts.14.w1", "model.layers.21.block_sparse_moe.experts.15.w1", "model.layers.21.block_sparse_moe.experts.16.w1", "model.layers.21.block_sparse_moe.experts.17.w1", "model.layers.21.block_sparse_moe.experts.18.w1", "model.layers.21.block_sparse_moe.experts.19.w1", "model.layers.21.block_sparse_moe.experts.20.w1", "model.layers.21.block_sparse_moe.experts.21.w1", "model.layers.21.block_sparse_moe.experts.22.w1", "model.layers.21.block_sparse_moe.experts.23.w1", "model.layers.21.block_sparse_moe.experts.24.w1", "model.layers.21.block_sparse_moe.experts.25.w1", "model.layers.21.block_sparse_moe.experts.26.w1", "model.layers.21.block_sparse_moe.experts.27.w1", "model.layers.21.block_sparse_moe.experts.28.w1", "model.layers.21.block_sparse_moe.experts.29.w1", "model.layers.21.block_sparse_moe.experts.30.w1", "model.layers.21.block_sparse_moe.experts.31.w1", "model.layers.21.block_sparse_moe.experts.32.w1", "model.layers.21.block_sparse_moe.experts.33.w1", "model.layers.21.block_sparse_moe.experts.34.w1", "model.layers.21.block_sparse_moe.experts.35.w1", "model.layers.21.block_sparse_moe.experts.36.w1", "model.layers.21.block_sparse_moe.experts.37.w1", "model.layers.21.block_sparse_moe.experts.38.w1", "model.layers.21.block_sparse_moe.experts.39.w1", "model.layers.21.block_sparse_moe.experts.40.w1", "model.layers.21.block_sparse_moe.experts.41.w1", "model.layers.21.block_sparse_moe.experts.42.w1", "model.layers.21.block_sparse_moe.experts.43.w1", "model.layers.21.block_sparse_moe.experts.44.w1", "model.layers.21.block_sparse_moe.experts.45.w1", "model.layers.21.block_sparse_moe.experts.46.w1", "model.layers.21.block_sparse_moe.experts.47.w1", "model.layers.21.block_sparse_moe.experts.48.w1", "model.layers.21.block_sparse_moe.experts.49.w1", "model.layers.21.block_sparse_moe.experts.50.w1", "model.layers.21.block_sparse_moe.experts.51.w1", "model.layers.21.block_sparse_moe.experts.52.w1", "model.layers.21.block_sparse_moe.experts.53.w1", "model.layers.21.block_sparse_moe.experts.54.w1", "model.layers.21.block_sparse_moe.experts.55.w1", "model.layers.21.block_sparse_moe.experts.56.w1", "model.layers.21.block_sparse_moe.experts.57.w1", "model.layers.21.block_sparse_moe.experts.58.w1", "model.layers.21.block_sparse_moe.experts.59.w1", "model.layers.21.block_sparse_moe.experts.60.w1", "model.layers.21.block_sparse_moe.experts.61.w1", "model.layers.21.block_sparse_moe.experts.62.w1", "model.layers.21.block_sparse_moe.experts.63.w1", "model.layers.21.block_sparse_moe.experts.64.w1", "model.layers.21.block_sparse_moe.experts.65.w1", "model.layers.21.block_sparse_moe.experts.66.w1", "model.layers.21.block_sparse_moe.experts.67.w1", "model.layers.21.block_sparse_moe.experts.68.w1", "model.layers.21.block_sparse_moe.experts.69.w1", "model.layers.21.block_sparse_moe.experts.70.w1", "model.layers.21.block_sparse_moe.experts.71.w1", "model.layers.21.block_sparse_moe.experts.72.w1", "model.layers.21.block_sparse_moe.experts.73.w1", "model.layers.21.block_sparse_moe.experts.74.w1", "model.layers.21.block_sparse_moe.experts.75.w1", "model.layers.21.block_sparse_moe.experts.76.w1", "model.layers.21.block_sparse_moe.experts.77.w1", "model.layers.21.block_sparse_moe.experts.78.w1", "model.layers.21.block_sparse_moe.experts.79.w1", "model.layers.21.block_sparse_moe.experts.80.w1", "model.layers.21.block_sparse_moe.experts.81.w1", "model.layers.21.block_sparse_moe.experts.82.w1", "model.layers.21.block_sparse_moe.experts.83.w1", "model.layers.21.block_sparse_moe.experts.84.w1", "model.layers.21.block_sparse_moe.experts.85.w1", "model.layers.21.block_sparse_moe.experts.86.w1", "model.layers.21.block_sparse_moe.experts.87.w1", "model.layers.21.block_sparse_moe.experts.88.w1", "model.layers.21.block_sparse_moe.experts.89.w1", "model.layers.21.block_sparse_moe.experts.90.w1", "model.layers.21.block_sparse_moe.experts.91.w1", "model.layers.21.block_sparse_moe.experts.92.w1", "model.layers.21.block_sparse_moe.experts.93.w1", "model.layers.21.block_sparse_moe.experts.94.w1", "model.layers.21.block_sparse_moe.experts.95.w1", "model.layers.21.block_sparse_moe.experts.96.w1", "model.layers.21.block_sparse_moe.experts.97.w1", "model.layers.21.block_sparse_moe.experts.98.w1", "model.layers.21.block_sparse_moe.experts.99.w1", "model.layers.21.block_sparse_moe.experts.100.w1", "model.layers.21.block_sparse_moe.experts.101.w1", "model.layers.21.block_sparse_moe.experts.102.w1", "model.layers.21.block_sparse_moe.experts.103.w1", "model.layers.21.block_sparse_moe.experts.104.w1", "model.layers.21.block_sparse_moe.experts.105.w1", "model.layers.21.block_sparse_moe.experts.106.w1", "model.layers.21.block_sparse_moe.experts.107.w1", "model.layers.21.block_sparse_moe.experts.108.w1", "model.layers.21.block_sparse_moe.experts.109.w1", "model.layers.21.block_sparse_moe.experts.110.w1", "model.layers.21.block_sparse_moe.experts.111.w1", "model.layers.21.block_sparse_moe.experts.112.w1", "model.layers.21.block_sparse_moe.experts.113.w1", "model.layers.21.block_sparse_moe.experts.114.w1", "model.layers.21.block_sparse_moe.experts.115.w1", "model.layers.21.block_sparse_moe.experts.116.w1", "model.layers.21.block_sparse_moe.experts.117.w1", "model.layers.21.block_sparse_moe.experts.118.w1", "model.layers.21.block_sparse_moe.experts.119.w1", "model.layers.21.block_sparse_moe.experts.120.w1", "model.layers.21.block_sparse_moe.experts.121.w1", "model.layers.21.block_sparse_moe.experts.122.w1", "model.layers.21.block_sparse_moe.experts.123.w1", "model.layers.21.block_sparse_moe.experts.124.w1", "model.layers.21.block_sparse_moe.experts.125.w1", "model.layers.21.block_sparse_moe.experts.126.w1", "model.layers.21.block_sparse_moe.experts.127.w1", "model.layers.21.block_sparse_moe.experts.128.w1", "model.layers.21.block_sparse_moe.experts.129.w1", "model.layers.21.block_sparse_moe.experts.130.w1", "model.layers.21.block_sparse_moe.experts.131.w1", "model.layers.21.block_sparse_moe.experts.132.w1", "model.layers.21.block_sparse_moe.experts.133.w1", "model.layers.21.block_sparse_moe.experts.134.w1", "model.layers.21.block_sparse_moe.experts.135.w1", "model.layers.21.block_sparse_moe.experts.136.w1", "model.layers.21.block_sparse_moe.experts.137.w1", "model.layers.21.block_sparse_moe.experts.138.w1", "model.layers.21.block_sparse_moe.experts.139.w1", "model.layers.21.block_sparse_moe.experts.140.w1", "model.layers.21.block_sparse_moe.experts.141.w1", "model.layers.21.block_sparse_moe.experts.142.w1", "model.layers.21.block_sparse_moe.experts.143.w1", "model.layers.21.block_sparse_moe.experts.144.w1", "model.layers.21.block_sparse_moe.experts.145.w1", "model.layers.21.block_sparse_moe.experts.146.w1", "model.layers.21.block_sparse_moe.experts.147.w1", "model.layers.21.block_sparse_moe.experts.148.w1", "model.layers.21.block_sparse_moe.experts.149.w1", "model.layers.21.block_sparse_moe.experts.150.w1", "model.layers.21.block_sparse_moe.experts.151.w1", "model.layers.21.block_sparse_moe.experts.152.w1", "model.layers.21.block_sparse_moe.experts.153.w1", "model.layers.21.block_sparse_moe.experts.154.w1", "model.layers.21.block_sparse_moe.experts.155.w1", "model.layers.21.block_sparse_moe.experts.156.w1", "model.layers.21.block_sparse_moe.experts.157.w1", "model.layers.21.block_sparse_moe.experts.158.w1", "model.layers.21.block_sparse_moe.experts.159.w1", "model.layers.21.block_sparse_moe.experts.160.w1", "model.layers.21.block_sparse_moe.experts.161.w1", "model.layers.21.block_sparse_moe.experts.162.w1", "model.layers.21.block_sparse_moe.experts.163.w1", "model.layers.21.block_sparse_moe.experts.164.w1", "model.layers.21.block_sparse_moe.experts.165.w1", "model.layers.21.block_sparse_moe.experts.166.w1", "model.layers.21.block_sparse_moe.experts.167.w1", "model.layers.21.block_sparse_moe.experts.168.w1", "model.layers.21.block_sparse_moe.experts.169.w1", "model.layers.21.block_sparse_moe.experts.170.w1", "model.layers.21.block_sparse_moe.experts.171.w1", "model.layers.21.block_sparse_moe.experts.172.w1", "model.layers.21.block_sparse_moe.experts.173.w1", "model.layers.21.block_sparse_moe.experts.174.w1", "model.layers.21.block_sparse_moe.experts.175.w1", "model.layers.21.block_sparse_moe.experts.176.w1", "model.layers.21.block_sparse_moe.experts.177.w1", "model.layers.21.block_sparse_moe.experts.178.w1", "model.layers.21.block_sparse_moe.experts.179.w1", "model.layers.21.block_sparse_moe.experts.180.w1", "model.layers.21.block_sparse_moe.experts.181.w1", "model.layers.21.block_sparse_moe.experts.182.w1", "model.layers.21.block_sparse_moe.experts.183.w1", "model.layers.21.block_sparse_moe.experts.184.w1", "model.layers.21.block_sparse_moe.experts.185.w1", "model.layers.21.block_sparse_moe.experts.186.w1", "model.layers.21.block_sparse_moe.experts.187.w1", "model.layers.21.block_sparse_moe.experts.188.w1", "model.layers.21.block_sparse_moe.experts.189.w1", "model.layers.21.block_sparse_moe.experts.190.w1", "model.layers.21.block_sparse_moe.experts.191.w1", "model.layers.21.block_sparse_moe.experts.192.w1", "model.layers.21.block_sparse_moe.experts.193.w1", "model.layers.21.block_sparse_moe.experts.194.w1", "model.layers.21.block_sparse_moe.experts.195.w1", "model.layers.21.block_sparse_moe.experts.196.w1", "model.layers.21.block_sparse_moe.experts.197.w1", "model.layers.21.block_sparse_moe.experts.198.w1", "model.layers.21.block_sparse_moe.experts.199.w1", "model.layers.21.block_sparse_moe.experts.200.w1", "model.layers.21.block_sparse_moe.experts.201.w1", "model.layers.21.block_sparse_moe.experts.202.w1", "model.layers.21.block_sparse_moe.experts.203.w1", "model.layers.21.block_sparse_moe.experts.204.w1", "model.layers.21.block_sparse_moe.experts.205.w1", "model.layers.21.block_sparse_moe.experts.206.w1", "model.layers.21.block_sparse_moe.experts.207.w1", "model.layers.21.block_sparse_moe.experts.208.w1", "model.layers.21.block_sparse_moe.experts.209.w1", "model.layers.21.block_sparse_moe.experts.210.w1", "model.layers.21.block_sparse_moe.experts.211.w1", "model.layers.21.block_sparse_moe.experts.212.w1", "model.layers.21.block_sparse_moe.experts.213.w1", "model.layers.21.block_sparse_moe.experts.214.w1", "model.layers.21.block_sparse_moe.experts.215.w1", "model.layers.21.block_sparse_moe.experts.216.w1", "model.layers.21.block_sparse_moe.experts.217.w1", "model.layers.21.block_sparse_moe.experts.218.w1", "model.layers.21.block_sparse_moe.experts.219.w1", "model.layers.21.block_sparse_moe.experts.220.w1", "model.layers.21.block_sparse_moe.experts.221.w1", "model.layers.21.block_sparse_moe.experts.222.w1", "model.layers.21.block_sparse_moe.experts.223.w1", "model.layers.21.block_sparse_moe.experts.224.w1", "model.layers.21.block_sparse_moe.experts.225.w1", "model.layers.21.block_sparse_moe.experts.226.w1", "model.layers.21.block_sparse_moe.experts.227.w1", "model.layers.21.block_sparse_moe.experts.228.w1", "model.layers.21.block_sparse_moe.experts.229.w1", "model.layers.21.block_sparse_moe.experts.230.w1", "model.layers.21.block_sparse_moe.experts.231.w1", "model.layers.21.block_sparse_moe.experts.232.w1", "model.layers.21.block_sparse_moe.experts.233.w1", "model.layers.21.block_sparse_moe.experts.234.w1", "model.layers.21.block_sparse_moe.experts.235.w1", "model.layers.21.block_sparse_moe.experts.236.w1", "model.layers.21.block_sparse_moe.experts.237.w1", "model.layers.21.block_sparse_moe.experts.238.w1", "model.layers.21.block_sparse_moe.experts.239.w1", "model.layers.21.block_sparse_moe.experts.240.w1", "model.layers.21.block_sparse_moe.experts.241.w1", "model.layers.21.block_sparse_moe.experts.242.w1", "model.layers.21.block_sparse_moe.experts.243.w1", "model.layers.21.block_sparse_moe.experts.244.w1", "model.layers.21.block_sparse_moe.experts.245.w1", "model.layers.21.block_sparse_moe.experts.246.w1", "model.layers.21.block_sparse_moe.experts.247.w1", "model.layers.21.block_sparse_moe.experts.248.w1", "model.layers.21.block_sparse_moe.experts.249.w1", "model.layers.21.block_sparse_moe.experts.250.w1", "model.layers.21.block_sparse_moe.experts.251.w1", "model.layers.21.block_sparse_moe.experts.252.w1", "model.layers.21.block_sparse_moe.experts.253.w1", "model.layers.21.block_sparse_moe.experts.254.w1", "model.layers.21.block_sparse_moe.experts.255.w1", "model.layers.21.block_sparse_moe.experts.0.w3", "model.layers.21.block_sparse_moe.experts.1.w3", "model.layers.21.block_sparse_moe.experts.2.w3", "model.layers.21.block_sparse_moe.experts.3.w3", "model.layers.21.block_sparse_moe.experts.4.w3", "model.layers.21.block_sparse_moe.experts.5.w3", "model.layers.21.block_sparse_moe.experts.6.w3", "model.layers.21.block_sparse_moe.experts.7.w3", "model.layers.21.block_sparse_moe.experts.8.w3", "model.layers.21.block_sparse_moe.experts.9.w3", "model.layers.21.block_sparse_moe.experts.10.w3", "model.layers.21.block_sparse_moe.experts.11.w3", "model.layers.21.block_sparse_moe.experts.12.w3", "model.layers.21.block_sparse_moe.experts.13.w3", "model.layers.21.block_sparse_moe.experts.14.w3", "model.layers.21.block_sparse_moe.experts.15.w3", "model.layers.21.block_sparse_moe.experts.16.w3", "model.layers.21.block_sparse_moe.experts.17.w3", "model.layers.21.block_sparse_moe.experts.18.w3", "model.layers.21.block_sparse_moe.experts.19.w3", "model.layers.21.block_sparse_moe.experts.20.w3", "model.layers.21.block_sparse_moe.experts.21.w3", "model.layers.21.block_sparse_moe.experts.22.w3", "model.layers.21.block_sparse_moe.experts.23.w3", "model.layers.21.block_sparse_moe.experts.24.w3", "model.layers.21.block_sparse_moe.experts.25.w3", "model.layers.21.block_sparse_moe.experts.26.w3", "model.layers.21.block_sparse_moe.experts.27.w3", "model.layers.21.block_sparse_moe.experts.28.w3", "model.layers.21.block_sparse_moe.experts.29.w3", "model.layers.21.block_sparse_moe.experts.30.w3", "model.layers.21.block_sparse_moe.experts.31.w3", "model.layers.21.block_sparse_moe.experts.32.w3", "model.layers.21.block_sparse_moe.experts.33.w3", "model.layers.21.block_sparse_moe.experts.34.w3", "model.layers.21.block_sparse_moe.experts.35.w3", "model.layers.21.block_sparse_moe.experts.36.w3", "model.layers.21.block_sparse_moe.experts.37.w3", "model.layers.21.block_sparse_moe.experts.38.w3", "model.layers.21.block_sparse_moe.experts.39.w3", "model.layers.21.block_sparse_moe.experts.40.w3", "model.layers.21.block_sparse_moe.experts.41.w3", "model.layers.21.block_sparse_moe.experts.42.w3", "model.layers.21.block_sparse_moe.experts.43.w3", "model.layers.21.block_sparse_moe.experts.44.w3", "model.layers.21.block_sparse_moe.experts.45.w3", "model.layers.21.block_sparse_moe.experts.46.w3", "model.layers.21.block_sparse_moe.experts.47.w3", "model.layers.21.block_sparse_moe.experts.48.w3", "model.layers.21.block_sparse_moe.experts.49.w3", "model.layers.21.block_sparse_moe.experts.50.w3", "model.layers.21.block_sparse_moe.experts.51.w3", "model.layers.21.block_sparse_moe.experts.52.w3", "model.layers.21.block_sparse_moe.experts.53.w3", "model.layers.21.block_sparse_moe.experts.54.w3", "model.layers.21.block_sparse_moe.experts.55.w3", "model.layers.21.block_sparse_moe.experts.56.w3", "model.layers.21.block_sparse_moe.experts.57.w3", "model.layers.21.block_sparse_moe.experts.58.w3", "model.layers.21.block_sparse_moe.experts.59.w3", "model.layers.21.block_sparse_moe.experts.60.w3", "model.layers.21.block_sparse_moe.experts.61.w3", "model.layers.21.block_sparse_moe.experts.62.w3", "model.layers.21.block_sparse_moe.experts.63.w3", "model.layers.21.block_sparse_moe.experts.64.w3", "model.layers.21.block_sparse_moe.experts.65.w3", "model.layers.21.block_sparse_moe.experts.66.w3", "model.layers.21.block_sparse_moe.experts.67.w3", "model.layers.21.block_sparse_moe.experts.68.w3", "model.layers.21.block_sparse_moe.experts.69.w3", "model.layers.21.block_sparse_moe.experts.70.w3", "model.layers.21.block_sparse_moe.experts.71.w3", "model.layers.21.block_sparse_moe.experts.72.w3", "model.layers.21.block_sparse_moe.experts.73.w3", "model.layers.21.block_sparse_moe.experts.74.w3", "model.layers.21.block_sparse_moe.experts.75.w3", "model.layers.21.block_sparse_moe.experts.76.w3", "model.layers.21.block_sparse_moe.experts.77.w3", "model.layers.21.block_sparse_moe.experts.78.w3", "model.layers.21.block_sparse_moe.experts.79.w3", "model.layers.21.block_sparse_moe.experts.80.w3", "model.layers.21.block_sparse_moe.experts.81.w3", "model.layers.21.block_sparse_moe.experts.82.w3", "model.layers.21.block_sparse_moe.experts.83.w3", "model.layers.21.block_sparse_moe.experts.84.w3", "model.layers.21.block_sparse_moe.experts.85.w3", "model.layers.21.block_sparse_moe.experts.86.w3", "model.layers.21.block_sparse_moe.experts.87.w3", "model.layers.21.block_sparse_moe.experts.88.w3", "model.layers.21.block_sparse_moe.experts.89.w3", "model.layers.21.block_sparse_moe.experts.90.w3", "model.layers.21.block_sparse_moe.experts.91.w3", "model.layers.21.block_sparse_moe.experts.92.w3", "model.layers.21.block_sparse_moe.experts.93.w3", "model.layers.21.block_sparse_moe.experts.94.w3", "model.layers.21.block_sparse_moe.experts.95.w3", "model.layers.21.block_sparse_moe.experts.96.w3", "model.layers.21.block_sparse_moe.experts.97.w3", "model.layers.21.block_sparse_moe.experts.98.w3", "model.layers.21.block_sparse_moe.experts.99.w3", "model.layers.21.block_sparse_moe.experts.100.w3", "model.layers.21.block_sparse_moe.experts.101.w3", "model.layers.21.block_sparse_moe.experts.102.w3", "model.layers.21.block_sparse_moe.experts.103.w3", "model.layers.21.block_sparse_moe.experts.104.w3", "model.layers.21.block_sparse_moe.experts.105.w3", "model.layers.21.block_sparse_moe.experts.106.w3", "model.layers.21.block_sparse_moe.experts.107.w3", "model.layers.21.block_sparse_moe.experts.108.w3", "model.layers.21.block_sparse_moe.experts.109.w3", "model.layers.21.block_sparse_moe.experts.110.w3", "model.layers.21.block_sparse_moe.experts.111.w3", "model.layers.21.block_sparse_moe.experts.112.w3", "model.layers.21.block_sparse_moe.experts.113.w3", "model.layers.21.block_sparse_moe.experts.114.w3", "model.layers.21.block_sparse_moe.experts.115.w3", "model.layers.21.block_sparse_moe.experts.116.w3", "model.layers.21.block_sparse_moe.experts.117.w3", "model.layers.21.block_sparse_moe.experts.118.w3", "model.layers.21.block_sparse_moe.experts.119.w3", "model.layers.21.block_sparse_moe.experts.120.w3", "model.layers.21.block_sparse_moe.experts.121.w3", "model.layers.21.block_sparse_moe.experts.122.w3", "model.layers.21.block_sparse_moe.experts.123.w3", "model.layers.21.block_sparse_moe.experts.124.w3", "model.layers.21.block_sparse_moe.experts.125.w3", "model.layers.21.block_sparse_moe.experts.126.w3", "model.layers.21.block_sparse_moe.experts.127.w3", "model.layers.21.block_sparse_moe.experts.128.w3", "model.layers.21.block_sparse_moe.experts.129.w3", "model.layers.21.block_sparse_moe.experts.130.w3", "model.layers.21.block_sparse_moe.experts.131.w3", "model.layers.21.block_sparse_moe.experts.132.w3", "model.layers.21.block_sparse_moe.experts.133.w3", "model.layers.21.block_sparse_moe.experts.134.w3", "model.layers.21.block_sparse_moe.experts.135.w3", "model.layers.21.block_sparse_moe.experts.136.w3", "model.layers.21.block_sparse_moe.experts.137.w3", "model.layers.21.block_sparse_moe.experts.138.w3", "model.layers.21.block_sparse_moe.experts.139.w3", "model.layers.21.block_sparse_moe.experts.140.w3", "model.layers.21.block_sparse_moe.experts.141.w3", "model.layers.21.block_sparse_moe.experts.142.w3", "model.layers.21.block_sparse_moe.experts.143.w3", "model.layers.21.block_sparse_moe.experts.144.w3", "model.layers.21.block_sparse_moe.experts.145.w3", "model.layers.21.block_sparse_moe.experts.146.w3", "model.layers.21.block_sparse_moe.experts.147.w3", "model.layers.21.block_sparse_moe.experts.148.w3", "model.layers.21.block_sparse_moe.experts.149.w3", "model.layers.21.block_sparse_moe.experts.150.w3", "model.layers.21.block_sparse_moe.experts.151.w3", "model.layers.21.block_sparse_moe.experts.152.w3", "model.layers.21.block_sparse_moe.experts.153.w3", "model.layers.21.block_sparse_moe.experts.154.w3", "model.layers.21.block_sparse_moe.experts.155.w3", "model.layers.21.block_sparse_moe.experts.156.w3", "model.layers.21.block_sparse_moe.experts.157.w3", "model.layers.21.block_sparse_moe.experts.158.w3", "model.layers.21.block_sparse_moe.experts.159.w3", "model.layers.21.block_sparse_moe.experts.160.w3", "model.layers.21.block_sparse_moe.experts.161.w3", "model.layers.21.block_sparse_moe.experts.162.w3", "model.layers.21.block_sparse_moe.experts.163.w3", "model.layers.21.block_sparse_moe.experts.164.w3", "model.layers.21.block_sparse_moe.experts.165.w3", "model.layers.21.block_sparse_moe.experts.166.w3", "model.layers.21.block_sparse_moe.experts.167.w3", "model.layers.21.block_sparse_moe.experts.168.w3", "model.layers.21.block_sparse_moe.experts.169.w3", "model.layers.21.block_sparse_moe.experts.170.w3", "model.layers.21.block_sparse_moe.experts.171.w3", "model.layers.21.block_sparse_moe.experts.172.w3", "model.layers.21.block_sparse_moe.experts.173.w3", "model.layers.21.block_sparse_moe.experts.174.w3", "model.layers.21.block_sparse_moe.experts.175.w3", "model.layers.21.block_sparse_moe.experts.176.w3", "model.layers.21.block_sparse_moe.experts.177.w3", "model.layers.21.block_sparse_moe.experts.178.w3", "model.layers.21.block_sparse_moe.experts.179.w3", "model.layers.21.block_sparse_moe.experts.180.w3", "model.layers.21.block_sparse_moe.experts.181.w3", "model.layers.21.block_sparse_moe.experts.182.w3", "model.layers.21.block_sparse_moe.experts.183.w3", "model.layers.21.block_sparse_moe.experts.184.w3", "model.layers.21.block_sparse_moe.experts.185.w3", "model.layers.21.block_sparse_moe.experts.186.w3", "model.layers.21.block_sparse_moe.experts.187.w3", "model.layers.21.block_sparse_moe.experts.188.w3", "model.layers.21.block_sparse_moe.experts.189.w3", "model.layers.21.block_sparse_moe.experts.190.w3", "model.layers.21.block_sparse_moe.experts.191.w3", "model.layers.21.block_sparse_moe.experts.192.w3", "model.layers.21.block_sparse_moe.experts.193.w3", "model.layers.21.block_sparse_moe.experts.194.w3", "model.layers.21.block_sparse_moe.experts.195.w3", "model.layers.21.block_sparse_moe.experts.196.w3", "model.layers.21.block_sparse_moe.experts.197.w3", "model.layers.21.block_sparse_moe.experts.198.w3", "model.layers.21.block_sparse_moe.experts.199.w3", "model.layers.21.block_sparse_moe.experts.200.w3", "model.layers.21.block_sparse_moe.experts.201.w3", "model.layers.21.block_sparse_moe.experts.202.w3", "model.layers.21.block_sparse_moe.experts.203.w3", "model.layers.21.block_sparse_moe.experts.204.w3", "model.layers.21.block_sparse_moe.experts.205.w3", "model.layers.21.block_sparse_moe.experts.206.w3", "model.layers.21.block_sparse_moe.experts.207.w3", "model.layers.21.block_sparse_moe.experts.208.w3", "model.layers.21.block_sparse_moe.experts.209.w3", "model.layers.21.block_sparse_moe.experts.210.w3", "model.layers.21.block_sparse_moe.experts.211.w3", "model.layers.21.block_sparse_moe.experts.212.w3", "model.layers.21.block_sparse_moe.experts.213.w3", "model.layers.21.block_sparse_moe.experts.214.w3", "model.layers.21.block_sparse_moe.experts.215.w3", "model.layers.21.block_sparse_moe.experts.216.w3", "model.layers.21.block_sparse_moe.experts.217.w3", "model.layers.21.block_sparse_moe.experts.218.w3", "model.layers.21.block_sparse_moe.experts.219.w3", "model.layers.21.block_sparse_moe.experts.220.w3", "model.layers.21.block_sparse_moe.experts.221.w3", "model.layers.21.block_sparse_moe.experts.222.w3", "model.layers.21.block_sparse_moe.experts.223.w3", "model.layers.21.block_sparse_moe.experts.224.w3", "model.layers.21.block_sparse_moe.experts.225.w3", "model.layers.21.block_sparse_moe.experts.226.w3", "model.layers.21.block_sparse_moe.experts.227.w3", "model.layers.21.block_sparse_moe.experts.228.w3", "model.layers.21.block_sparse_moe.experts.229.w3", "model.layers.21.block_sparse_moe.experts.230.w3", "model.layers.21.block_sparse_moe.experts.231.w3", "model.layers.21.block_sparse_moe.experts.232.w3", "model.layers.21.block_sparse_moe.experts.233.w3", "model.layers.21.block_sparse_moe.experts.234.w3", "model.layers.21.block_sparse_moe.experts.235.w3", "model.layers.21.block_sparse_moe.experts.236.w3", "model.layers.21.block_sparse_moe.experts.237.w3", "model.layers.21.block_sparse_moe.experts.238.w3", "model.layers.21.block_sparse_moe.experts.239.w3", "model.layers.21.block_sparse_moe.experts.240.w3", "model.layers.21.block_sparse_moe.experts.241.w3", "model.layers.21.block_sparse_moe.experts.242.w3", "model.layers.21.block_sparse_moe.experts.243.w3", "model.layers.21.block_sparse_moe.experts.244.w3", "model.layers.21.block_sparse_moe.experts.245.w3", "model.layers.21.block_sparse_moe.experts.246.w3", "model.layers.21.block_sparse_moe.experts.247.w3", "model.layers.21.block_sparse_moe.experts.248.w3", "model.layers.21.block_sparse_moe.experts.249.w3", "model.layers.21.block_sparse_moe.experts.250.w3", "model.layers.21.block_sparse_moe.experts.251.w3", "model.layers.21.block_sparse_moe.experts.252.w3", "model.layers.21.block_sparse_moe.experts.253.w3", "model.layers.21.block_sparse_moe.experts.254.w3", "model.layers.21.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0028874874114990234, "dbits": 2415919104 } ] }, { "idx": 109, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w2", "model.layers.21.block_sparse_moe.experts.1.w2", "model.layers.21.block_sparse_moe.experts.2.w2", "model.layers.21.block_sparse_moe.experts.3.w2", "model.layers.21.block_sparse_moe.experts.4.w2", "model.layers.21.block_sparse_moe.experts.5.w2", "model.layers.21.block_sparse_moe.experts.6.w2", "model.layers.21.block_sparse_moe.experts.7.w2", "model.layers.21.block_sparse_moe.experts.8.w2", "model.layers.21.block_sparse_moe.experts.9.w2", "model.layers.21.block_sparse_moe.experts.10.w2", "model.layers.21.block_sparse_moe.experts.11.w2", "model.layers.21.block_sparse_moe.experts.12.w2", "model.layers.21.block_sparse_moe.experts.13.w2", "model.layers.21.block_sparse_moe.experts.14.w2", "model.layers.21.block_sparse_moe.experts.15.w2", "model.layers.21.block_sparse_moe.experts.16.w2", "model.layers.21.block_sparse_moe.experts.17.w2", "model.layers.21.block_sparse_moe.experts.18.w2", "model.layers.21.block_sparse_moe.experts.19.w2", "model.layers.21.block_sparse_moe.experts.20.w2", "model.layers.21.block_sparse_moe.experts.21.w2", "model.layers.21.block_sparse_moe.experts.22.w2", "model.layers.21.block_sparse_moe.experts.23.w2", "model.layers.21.block_sparse_moe.experts.24.w2", "model.layers.21.block_sparse_moe.experts.25.w2", "model.layers.21.block_sparse_moe.experts.26.w2", "model.layers.21.block_sparse_moe.experts.27.w2", "model.layers.21.block_sparse_moe.experts.28.w2", "model.layers.21.block_sparse_moe.experts.29.w2", "model.layers.21.block_sparse_moe.experts.30.w2", "model.layers.21.block_sparse_moe.experts.31.w2", "model.layers.21.block_sparse_moe.experts.32.w2", "model.layers.21.block_sparse_moe.experts.33.w2", "model.layers.21.block_sparse_moe.experts.34.w2", "model.layers.21.block_sparse_moe.experts.35.w2", "model.layers.21.block_sparse_moe.experts.36.w2", "model.layers.21.block_sparse_moe.experts.37.w2", "model.layers.21.block_sparse_moe.experts.38.w2", "model.layers.21.block_sparse_moe.experts.39.w2", "model.layers.21.block_sparse_moe.experts.40.w2", "model.layers.21.block_sparse_moe.experts.41.w2", "model.layers.21.block_sparse_moe.experts.42.w2", "model.layers.21.block_sparse_moe.experts.43.w2", "model.layers.21.block_sparse_moe.experts.44.w2", "model.layers.21.block_sparse_moe.experts.45.w2", "model.layers.21.block_sparse_moe.experts.46.w2", "model.layers.21.block_sparse_moe.experts.47.w2", "model.layers.21.block_sparse_moe.experts.48.w2", "model.layers.21.block_sparse_moe.experts.49.w2", "model.layers.21.block_sparse_moe.experts.50.w2", "model.layers.21.block_sparse_moe.experts.51.w2", "model.layers.21.block_sparse_moe.experts.52.w2", "model.layers.21.block_sparse_moe.experts.53.w2", "model.layers.21.block_sparse_moe.experts.54.w2", "model.layers.21.block_sparse_moe.experts.55.w2", "model.layers.21.block_sparse_moe.experts.56.w2", "model.layers.21.block_sparse_moe.experts.57.w2", "model.layers.21.block_sparse_moe.experts.58.w2", "model.layers.21.block_sparse_moe.experts.59.w2", "model.layers.21.block_sparse_moe.experts.60.w2", "model.layers.21.block_sparse_moe.experts.61.w2", "model.layers.21.block_sparse_moe.experts.62.w2", "model.layers.21.block_sparse_moe.experts.63.w2", "model.layers.21.block_sparse_moe.experts.64.w2", "model.layers.21.block_sparse_moe.experts.65.w2", "model.layers.21.block_sparse_moe.experts.66.w2", "model.layers.21.block_sparse_moe.experts.67.w2", "model.layers.21.block_sparse_moe.experts.68.w2", "model.layers.21.block_sparse_moe.experts.69.w2", "model.layers.21.block_sparse_moe.experts.70.w2", "model.layers.21.block_sparse_moe.experts.71.w2", "model.layers.21.block_sparse_moe.experts.72.w2", "model.layers.21.block_sparse_moe.experts.73.w2", "model.layers.21.block_sparse_moe.experts.74.w2", "model.layers.21.block_sparse_moe.experts.75.w2", "model.layers.21.block_sparse_moe.experts.76.w2", "model.layers.21.block_sparse_moe.experts.77.w2", "model.layers.21.block_sparse_moe.experts.78.w2", "model.layers.21.block_sparse_moe.experts.79.w2", "model.layers.21.block_sparse_moe.experts.80.w2", "model.layers.21.block_sparse_moe.experts.81.w2", "model.layers.21.block_sparse_moe.experts.82.w2", "model.layers.21.block_sparse_moe.experts.83.w2", "model.layers.21.block_sparse_moe.experts.84.w2", "model.layers.21.block_sparse_moe.experts.85.w2", "model.layers.21.block_sparse_moe.experts.86.w2", "model.layers.21.block_sparse_moe.experts.87.w2", "model.layers.21.block_sparse_moe.experts.88.w2", "model.layers.21.block_sparse_moe.experts.89.w2", "model.layers.21.block_sparse_moe.experts.90.w2", "model.layers.21.block_sparse_moe.experts.91.w2", "model.layers.21.block_sparse_moe.experts.92.w2", "model.layers.21.block_sparse_moe.experts.93.w2", "model.layers.21.block_sparse_moe.experts.94.w2", "model.layers.21.block_sparse_moe.experts.95.w2", "model.layers.21.block_sparse_moe.experts.96.w2", "model.layers.21.block_sparse_moe.experts.97.w2", "model.layers.21.block_sparse_moe.experts.98.w2", "model.layers.21.block_sparse_moe.experts.99.w2", "model.layers.21.block_sparse_moe.experts.100.w2", "model.layers.21.block_sparse_moe.experts.101.w2", "model.layers.21.block_sparse_moe.experts.102.w2", "model.layers.21.block_sparse_moe.experts.103.w2", "model.layers.21.block_sparse_moe.experts.104.w2", "model.layers.21.block_sparse_moe.experts.105.w2", "model.layers.21.block_sparse_moe.experts.106.w2", "model.layers.21.block_sparse_moe.experts.107.w2", "model.layers.21.block_sparse_moe.experts.108.w2", "model.layers.21.block_sparse_moe.experts.109.w2", "model.layers.21.block_sparse_moe.experts.110.w2", "model.layers.21.block_sparse_moe.experts.111.w2", "model.layers.21.block_sparse_moe.experts.112.w2", "model.layers.21.block_sparse_moe.experts.113.w2", "model.layers.21.block_sparse_moe.experts.114.w2", "model.layers.21.block_sparse_moe.experts.115.w2", "model.layers.21.block_sparse_moe.experts.116.w2", "model.layers.21.block_sparse_moe.experts.117.w2", "model.layers.21.block_sparse_moe.experts.118.w2", "model.layers.21.block_sparse_moe.experts.119.w2", "model.layers.21.block_sparse_moe.experts.120.w2", "model.layers.21.block_sparse_moe.experts.121.w2", "model.layers.21.block_sparse_moe.experts.122.w2", "model.layers.21.block_sparse_moe.experts.123.w2", "model.layers.21.block_sparse_moe.experts.124.w2", "model.layers.21.block_sparse_moe.experts.125.w2", "model.layers.21.block_sparse_moe.experts.126.w2", "model.layers.21.block_sparse_moe.experts.127.w2", "model.layers.21.block_sparse_moe.experts.128.w2", "model.layers.21.block_sparse_moe.experts.129.w2", "model.layers.21.block_sparse_moe.experts.130.w2", "model.layers.21.block_sparse_moe.experts.131.w2", "model.layers.21.block_sparse_moe.experts.132.w2", "model.layers.21.block_sparse_moe.experts.133.w2", "model.layers.21.block_sparse_moe.experts.134.w2", "model.layers.21.block_sparse_moe.experts.135.w2", "model.layers.21.block_sparse_moe.experts.136.w2", "model.layers.21.block_sparse_moe.experts.137.w2", "model.layers.21.block_sparse_moe.experts.138.w2", "model.layers.21.block_sparse_moe.experts.139.w2", "model.layers.21.block_sparse_moe.experts.140.w2", "model.layers.21.block_sparse_moe.experts.141.w2", "model.layers.21.block_sparse_moe.experts.142.w2", "model.layers.21.block_sparse_moe.experts.143.w2", "model.layers.21.block_sparse_moe.experts.144.w2", "model.layers.21.block_sparse_moe.experts.145.w2", "model.layers.21.block_sparse_moe.experts.146.w2", "model.layers.21.block_sparse_moe.experts.147.w2", "model.layers.21.block_sparse_moe.experts.148.w2", "model.layers.21.block_sparse_moe.experts.149.w2", "model.layers.21.block_sparse_moe.experts.150.w2", "model.layers.21.block_sparse_moe.experts.151.w2", "model.layers.21.block_sparse_moe.experts.152.w2", "model.layers.21.block_sparse_moe.experts.153.w2", "model.layers.21.block_sparse_moe.experts.154.w2", "model.layers.21.block_sparse_moe.experts.155.w2", "model.layers.21.block_sparse_moe.experts.156.w2", "model.layers.21.block_sparse_moe.experts.157.w2", "model.layers.21.block_sparse_moe.experts.158.w2", "model.layers.21.block_sparse_moe.experts.159.w2", "model.layers.21.block_sparse_moe.experts.160.w2", "model.layers.21.block_sparse_moe.experts.161.w2", "model.layers.21.block_sparse_moe.experts.162.w2", "model.layers.21.block_sparse_moe.experts.163.w2", "model.layers.21.block_sparse_moe.experts.164.w2", "model.layers.21.block_sparse_moe.experts.165.w2", "model.layers.21.block_sparse_moe.experts.166.w2", "model.layers.21.block_sparse_moe.experts.167.w2", "model.layers.21.block_sparse_moe.experts.168.w2", "model.layers.21.block_sparse_moe.experts.169.w2", "model.layers.21.block_sparse_moe.experts.170.w2", "model.layers.21.block_sparse_moe.experts.171.w2", "model.layers.21.block_sparse_moe.experts.172.w2", "model.layers.21.block_sparse_moe.experts.173.w2", "model.layers.21.block_sparse_moe.experts.174.w2", "model.layers.21.block_sparse_moe.experts.175.w2", "model.layers.21.block_sparse_moe.experts.176.w2", "model.layers.21.block_sparse_moe.experts.177.w2", "model.layers.21.block_sparse_moe.experts.178.w2", "model.layers.21.block_sparse_moe.experts.179.w2", "model.layers.21.block_sparse_moe.experts.180.w2", "model.layers.21.block_sparse_moe.experts.181.w2", "model.layers.21.block_sparse_moe.experts.182.w2", "model.layers.21.block_sparse_moe.experts.183.w2", "model.layers.21.block_sparse_moe.experts.184.w2", "model.layers.21.block_sparse_moe.experts.185.w2", "model.layers.21.block_sparse_moe.experts.186.w2", "model.layers.21.block_sparse_moe.experts.187.w2", "model.layers.21.block_sparse_moe.experts.188.w2", "model.layers.21.block_sparse_moe.experts.189.w2", "model.layers.21.block_sparse_moe.experts.190.w2", "model.layers.21.block_sparse_moe.experts.191.w2", "model.layers.21.block_sparse_moe.experts.192.w2", "model.layers.21.block_sparse_moe.experts.193.w2", "model.layers.21.block_sparse_moe.experts.194.w2", "model.layers.21.block_sparse_moe.experts.195.w2", "model.layers.21.block_sparse_moe.experts.196.w2", "model.layers.21.block_sparse_moe.experts.197.w2", "model.layers.21.block_sparse_moe.experts.198.w2", "model.layers.21.block_sparse_moe.experts.199.w2", "model.layers.21.block_sparse_moe.experts.200.w2", "model.layers.21.block_sparse_moe.experts.201.w2", "model.layers.21.block_sparse_moe.experts.202.w2", "model.layers.21.block_sparse_moe.experts.203.w2", "model.layers.21.block_sparse_moe.experts.204.w2", "model.layers.21.block_sparse_moe.experts.205.w2", "model.layers.21.block_sparse_moe.experts.206.w2", "model.layers.21.block_sparse_moe.experts.207.w2", "model.layers.21.block_sparse_moe.experts.208.w2", "model.layers.21.block_sparse_moe.experts.209.w2", "model.layers.21.block_sparse_moe.experts.210.w2", "model.layers.21.block_sparse_moe.experts.211.w2", "model.layers.21.block_sparse_moe.experts.212.w2", "model.layers.21.block_sparse_moe.experts.213.w2", "model.layers.21.block_sparse_moe.experts.214.w2", "model.layers.21.block_sparse_moe.experts.215.w2", "model.layers.21.block_sparse_moe.experts.216.w2", "model.layers.21.block_sparse_moe.experts.217.w2", "model.layers.21.block_sparse_moe.experts.218.w2", "model.layers.21.block_sparse_moe.experts.219.w2", "model.layers.21.block_sparse_moe.experts.220.w2", "model.layers.21.block_sparse_moe.experts.221.w2", "model.layers.21.block_sparse_moe.experts.222.w2", "model.layers.21.block_sparse_moe.experts.223.w2", "model.layers.21.block_sparse_moe.experts.224.w2", "model.layers.21.block_sparse_moe.experts.225.w2", "model.layers.21.block_sparse_moe.experts.226.w2", "model.layers.21.block_sparse_moe.experts.227.w2", "model.layers.21.block_sparse_moe.experts.228.w2", "model.layers.21.block_sparse_moe.experts.229.w2", "model.layers.21.block_sparse_moe.experts.230.w2", "model.layers.21.block_sparse_moe.experts.231.w2", "model.layers.21.block_sparse_moe.experts.232.w2", "model.layers.21.block_sparse_moe.experts.233.w2", "model.layers.21.block_sparse_moe.experts.234.w2", "model.layers.21.block_sparse_moe.experts.235.w2", "model.layers.21.block_sparse_moe.experts.236.w2", "model.layers.21.block_sparse_moe.experts.237.w2", "model.layers.21.block_sparse_moe.experts.238.w2", "model.layers.21.block_sparse_moe.experts.239.w2", "model.layers.21.block_sparse_moe.experts.240.w2", "model.layers.21.block_sparse_moe.experts.241.w2", "model.layers.21.block_sparse_moe.experts.242.w2", "model.layers.21.block_sparse_moe.experts.243.w2", "model.layers.21.block_sparse_moe.experts.244.w2", "model.layers.21.block_sparse_moe.experts.245.w2", "model.layers.21.block_sparse_moe.experts.246.w2", "model.layers.21.block_sparse_moe.experts.247.w2", "model.layers.21.block_sparse_moe.experts.248.w2", "model.layers.21.block_sparse_moe.experts.249.w2", "model.layers.21.block_sparse_moe.experts.250.w2", "model.layers.21.block_sparse_moe.experts.251.w2", "model.layers.21.block_sparse_moe.experts.252.w2", "model.layers.21.block_sparse_moe.experts.253.w2", "model.layers.21.block_sparse_moe.experts.254.w2", "model.layers.21.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00015052855014796584, "dbits": 1207959552 } ] }, { "idx": 110, "layers": [ "model.layers.22.self_attn.q_proj" ], "candidates": [ { "dkld": 0.003691628575325012, "dbits": 18874368 } ] }, { "idx": 111, "layers": [ "model.layers.22.self_attn.k_proj", "model.layers.22.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00010401010513305664, "dbits": 6291456 } ] }, { "idx": 112, "layers": [ "model.layers.22.self_attn.o_proj" ], "candidates": [ { "dkld": -0.012904393672943049, "dbits": 18874368 } ] }, { "idx": 113, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w1", "model.layers.22.block_sparse_moe.experts.1.w1", "model.layers.22.block_sparse_moe.experts.2.w1", "model.layers.22.block_sparse_moe.experts.3.w1", "model.layers.22.block_sparse_moe.experts.4.w1", "model.layers.22.block_sparse_moe.experts.5.w1", "model.layers.22.block_sparse_moe.experts.6.w1", "model.layers.22.block_sparse_moe.experts.7.w1", "model.layers.22.block_sparse_moe.experts.8.w1", "model.layers.22.block_sparse_moe.experts.9.w1", "model.layers.22.block_sparse_moe.experts.10.w1", "model.layers.22.block_sparse_moe.experts.11.w1", "model.layers.22.block_sparse_moe.experts.12.w1", "model.layers.22.block_sparse_moe.experts.13.w1", "model.layers.22.block_sparse_moe.experts.14.w1", "model.layers.22.block_sparse_moe.experts.15.w1", "model.layers.22.block_sparse_moe.experts.16.w1", "model.layers.22.block_sparse_moe.experts.17.w1", "model.layers.22.block_sparse_moe.experts.18.w1", "model.layers.22.block_sparse_moe.experts.19.w1", "model.layers.22.block_sparse_moe.experts.20.w1", "model.layers.22.block_sparse_moe.experts.21.w1", "model.layers.22.block_sparse_moe.experts.22.w1", "model.layers.22.block_sparse_moe.experts.23.w1", "model.layers.22.block_sparse_moe.experts.24.w1", "model.layers.22.block_sparse_moe.experts.25.w1", "model.layers.22.block_sparse_moe.experts.26.w1", "model.layers.22.block_sparse_moe.experts.27.w1", "model.layers.22.block_sparse_moe.experts.28.w1", "model.layers.22.block_sparse_moe.experts.29.w1", "model.layers.22.block_sparse_moe.experts.30.w1", "model.layers.22.block_sparse_moe.experts.31.w1", "model.layers.22.block_sparse_moe.experts.32.w1", "model.layers.22.block_sparse_moe.experts.33.w1", "model.layers.22.block_sparse_moe.experts.34.w1", "model.layers.22.block_sparse_moe.experts.35.w1", "model.layers.22.block_sparse_moe.experts.36.w1", "model.layers.22.block_sparse_moe.experts.37.w1", "model.layers.22.block_sparse_moe.experts.38.w1", "model.layers.22.block_sparse_moe.experts.39.w1", "model.layers.22.block_sparse_moe.experts.40.w1", "model.layers.22.block_sparse_moe.experts.41.w1", "model.layers.22.block_sparse_moe.experts.42.w1", "model.layers.22.block_sparse_moe.experts.43.w1", "model.layers.22.block_sparse_moe.experts.44.w1", "model.layers.22.block_sparse_moe.experts.45.w1", "model.layers.22.block_sparse_moe.experts.46.w1", "model.layers.22.block_sparse_moe.experts.47.w1", "model.layers.22.block_sparse_moe.experts.48.w1", "model.layers.22.block_sparse_moe.experts.49.w1", "model.layers.22.block_sparse_moe.experts.50.w1", "model.layers.22.block_sparse_moe.experts.51.w1", "model.layers.22.block_sparse_moe.experts.52.w1", "model.layers.22.block_sparse_moe.experts.53.w1", "model.layers.22.block_sparse_moe.experts.54.w1", "model.layers.22.block_sparse_moe.experts.55.w1", "model.layers.22.block_sparse_moe.experts.56.w1", "model.layers.22.block_sparse_moe.experts.57.w1", "model.layers.22.block_sparse_moe.experts.58.w1", "model.layers.22.block_sparse_moe.experts.59.w1", "model.layers.22.block_sparse_moe.experts.60.w1", "model.layers.22.block_sparse_moe.experts.61.w1", "model.layers.22.block_sparse_moe.experts.62.w1", "model.layers.22.block_sparse_moe.experts.63.w1", "model.layers.22.block_sparse_moe.experts.64.w1", "model.layers.22.block_sparse_moe.experts.65.w1", "model.layers.22.block_sparse_moe.experts.66.w1", "model.layers.22.block_sparse_moe.experts.67.w1", "model.layers.22.block_sparse_moe.experts.68.w1", "model.layers.22.block_sparse_moe.experts.69.w1", "model.layers.22.block_sparse_moe.experts.70.w1", "model.layers.22.block_sparse_moe.experts.71.w1", "model.layers.22.block_sparse_moe.experts.72.w1", "model.layers.22.block_sparse_moe.experts.73.w1", "model.layers.22.block_sparse_moe.experts.74.w1", "model.layers.22.block_sparse_moe.experts.75.w1", "model.layers.22.block_sparse_moe.experts.76.w1", "model.layers.22.block_sparse_moe.experts.77.w1", "model.layers.22.block_sparse_moe.experts.78.w1", "model.layers.22.block_sparse_moe.experts.79.w1", "model.layers.22.block_sparse_moe.experts.80.w1", "model.layers.22.block_sparse_moe.experts.81.w1", "model.layers.22.block_sparse_moe.experts.82.w1", "model.layers.22.block_sparse_moe.experts.83.w1", "model.layers.22.block_sparse_moe.experts.84.w1", "model.layers.22.block_sparse_moe.experts.85.w1", "model.layers.22.block_sparse_moe.experts.86.w1", "model.layers.22.block_sparse_moe.experts.87.w1", "model.layers.22.block_sparse_moe.experts.88.w1", "model.layers.22.block_sparse_moe.experts.89.w1", "model.layers.22.block_sparse_moe.experts.90.w1", "model.layers.22.block_sparse_moe.experts.91.w1", "model.layers.22.block_sparse_moe.experts.92.w1", "model.layers.22.block_sparse_moe.experts.93.w1", "model.layers.22.block_sparse_moe.experts.94.w1", "model.layers.22.block_sparse_moe.experts.95.w1", "model.layers.22.block_sparse_moe.experts.96.w1", "model.layers.22.block_sparse_moe.experts.97.w1", "model.layers.22.block_sparse_moe.experts.98.w1", "model.layers.22.block_sparse_moe.experts.99.w1", "model.layers.22.block_sparse_moe.experts.100.w1", "model.layers.22.block_sparse_moe.experts.101.w1", "model.layers.22.block_sparse_moe.experts.102.w1", "model.layers.22.block_sparse_moe.experts.103.w1", "model.layers.22.block_sparse_moe.experts.104.w1", "model.layers.22.block_sparse_moe.experts.105.w1", "model.layers.22.block_sparse_moe.experts.106.w1", "model.layers.22.block_sparse_moe.experts.107.w1", "model.layers.22.block_sparse_moe.experts.108.w1", "model.layers.22.block_sparse_moe.experts.109.w1", "model.layers.22.block_sparse_moe.experts.110.w1", "model.layers.22.block_sparse_moe.experts.111.w1", "model.layers.22.block_sparse_moe.experts.112.w1", "model.layers.22.block_sparse_moe.experts.113.w1", "model.layers.22.block_sparse_moe.experts.114.w1", "model.layers.22.block_sparse_moe.experts.115.w1", "model.layers.22.block_sparse_moe.experts.116.w1", "model.layers.22.block_sparse_moe.experts.117.w1", "model.layers.22.block_sparse_moe.experts.118.w1", "model.layers.22.block_sparse_moe.experts.119.w1", "model.layers.22.block_sparse_moe.experts.120.w1", "model.layers.22.block_sparse_moe.experts.121.w1", "model.layers.22.block_sparse_moe.experts.122.w1", "model.layers.22.block_sparse_moe.experts.123.w1", "model.layers.22.block_sparse_moe.experts.124.w1", "model.layers.22.block_sparse_moe.experts.125.w1", "model.layers.22.block_sparse_moe.experts.126.w1", "model.layers.22.block_sparse_moe.experts.127.w1", "model.layers.22.block_sparse_moe.experts.128.w1", "model.layers.22.block_sparse_moe.experts.129.w1", "model.layers.22.block_sparse_moe.experts.130.w1", "model.layers.22.block_sparse_moe.experts.131.w1", "model.layers.22.block_sparse_moe.experts.132.w1", "model.layers.22.block_sparse_moe.experts.133.w1", "model.layers.22.block_sparse_moe.experts.134.w1", "model.layers.22.block_sparse_moe.experts.135.w1", "model.layers.22.block_sparse_moe.experts.136.w1", "model.layers.22.block_sparse_moe.experts.137.w1", "model.layers.22.block_sparse_moe.experts.138.w1", "model.layers.22.block_sparse_moe.experts.139.w1", "model.layers.22.block_sparse_moe.experts.140.w1", "model.layers.22.block_sparse_moe.experts.141.w1", "model.layers.22.block_sparse_moe.experts.142.w1", "model.layers.22.block_sparse_moe.experts.143.w1", "model.layers.22.block_sparse_moe.experts.144.w1", "model.layers.22.block_sparse_moe.experts.145.w1", "model.layers.22.block_sparse_moe.experts.146.w1", "model.layers.22.block_sparse_moe.experts.147.w1", "model.layers.22.block_sparse_moe.experts.148.w1", "model.layers.22.block_sparse_moe.experts.149.w1", "model.layers.22.block_sparse_moe.experts.150.w1", "model.layers.22.block_sparse_moe.experts.151.w1", "model.layers.22.block_sparse_moe.experts.152.w1", "model.layers.22.block_sparse_moe.experts.153.w1", "model.layers.22.block_sparse_moe.experts.154.w1", "model.layers.22.block_sparse_moe.experts.155.w1", "model.layers.22.block_sparse_moe.experts.156.w1", "model.layers.22.block_sparse_moe.experts.157.w1", "model.layers.22.block_sparse_moe.experts.158.w1", "model.layers.22.block_sparse_moe.experts.159.w1", "model.layers.22.block_sparse_moe.experts.160.w1", "model.layers.22.block_sparse_moe.experts.161.w1", "model.layers.22.block_sparse_moe.experts.162.w1", "model.layers.22.block_sparse_moe.experts.163.w1", "model.layers.22.block_sparse_moe.experts.164.w1", "model.layers.22.block_sparse_moe.experts.165.w1", "model.layers.22.block_sparse_moe.experts.166.w1", "model.layers.22.block_sparse_moe.experts.167.w1", "model.layers.22.block_sparse_moe.experts.168.w1", "model.layers.22.block_sparse_moe.experts.169.w1", "model.layers.22.block_sparse_moe.experts.170.w1", "model.layers.22.block_sparse_moe.experts.171.w1", "model.layers.22.block_sparse_moe.experts.172.w1", "model.layers.22.block_sparse_moe.experts.173.w1", "model.layers.22.block_sparse_moe.experts.174.w1", "model.layers.22.block_sparse_moe.experts.175.w1", "model.layers.22.block_sparse_moe.experts.176.w1", "model.layers.22.block_sparse_moe.experts.177.w1", "model.layers.22.block_sparse_moe.experts.178.w1", "model.layers.22.block_sparse_moe.experts.179.w1", "model.layers.22.block_sparse_moe.experts.180.w1", "model.layers.22.block_sparse_moe.experts.181.w1", "model.layers.22.block_sparse_moe.experts.182.w1", "model.layers.22.block_sparse_moe.experts.183.w1", "model.layers.22.block_sparse_moe.experts.184.w1", "model.layers.22.block_sparse_moe.experts.185.w1", "model.layers.22.block_sparse_moe.experts.186.w1", "model.layers.22.block_sparse_moe.experts.187.w1", "model.layers.22.block_sparse_moe.experts.188.w1", "model.layers.22.block_sparse_moe.experts.189.w1", "model.layers.22.block_sparse_moe.experts.190.w1", "model.layers.22.block_sparse_moe.experts.191.w1", "model.layers.22.block_sparse_moe.experts.192.w1", "model.layers.22.block_sparse_moe.experts.193.w1", "model.layers.22.block_sparse_moe.experts.194.w1", "model.layers.22.block_sparse_moe.experts.195.w1", "model.layers.22.block_sparse_moe.experts.196.w1", "model.layers.22.block_sparse_moe.experts.197.w1", "model.layers.22.block_sparse_moe.experts.198.w1", "model.layers.22.block_sparse_moe.experts.199.w1", "model.layers.22.block_sparse_moe.experts.200.w1", "model.layers.22.block_sparse_moe.experts.201.w1", "model.layers.22.block_sparse_moe.experts.202.w1", "model.layers.22.block_sparse_moe.experts.203.w1", "model.layers.22.block_sparse_moe.experts.204.w1", "model.layers.22.block_sparse_moe.experts.205.w1", "model.layers.22.block_sparse_moe.experts.206.w1", "model.layers.22.block_sparse_moe.experts.207.w1", "model.layers.22.block_sparse_moe.experts.208.w1", "model.layers.22.block_sparse_moe.experts.209.w1", "model.layers.22.block_sparse_moe.experts.210.w1", "model.layers.22.block_sparse_moe.experts.211.w1", "model.layers.22.block_sparse_moe.experts.212.w1", "model.layers.22.block_sparse_moe.experts.213.w1", "model.layers.22.block_sparse_moe.experts.214.w1", "model.layers.22.block_sparse_moe.experts.215.w1", "model.layers.22.block_sparse_moe.experts.216.w1", "model.layers.22.block_sparse_moe.experts.217.w1", "model.layers.22.block_sparse_moe.experts.218.w1", "model.layers.22.block_sparse_moe.experts.219.w1", "model.layers.22.block_sparse_moe.experts.220.w1", "model.layers.22.block_sparse_moe.experts.221.w1", "model.layers.22.block_sparse_moe.experts.222.w1", "model.layers.22.block_sparse_moe.experts.223.w1", "model.layers.22.block_sparse_moe.experts.224.w1", "model.layers.22.block_sparse_moe.experts.225.w1", "model.layers.22.block_sparse_moe.experts.226.w1", "model.layers.22.block_sparse_moe.experts.227.w1", "model.layers.22.block_sparse_moe.experts.228.w1", "model.layers.22.block_sparse_moe.experts.229.w1", "model.layers.22.block_sparse_moe.experts.230.w1", "model.layers.22.block_sparse_moe.experts.231.w1", "model.layers.22.block_sparse_moe.experts.232.w1", "model.layers.22.block_sparse_moe.experts.233.w1", "model.layers.22.block_sparse_moe.experts.234.w1", "model.layers.22.block_sparse_moe.experts.235.w1", "model.layers.22.block_sparse_moe.experts.236.w1", "model.layers.22.block_sparse_moe.experts.237.w1", "model.layers.22.block_sparse_moe.experts.238.w1", "model.layers.22.block_sparse_moe.experts.239.w1", "model.layers.22.block_sparse_moe.experts.240.w1", "model.layers.22.block_sparse_moe.experts.241.w1", "model.layers.22.block_sparse_moe.experts.242.w1", "model.layers.22.block_sparse_moe.experts.243.w1", "model.layers.22.block_sparse_moe.experts.244.w1", "model.layers.22.block_sparse_moe.experts.245.w1", "model.layers.22.block_sparse_moe.experts.246.w1", "model.layers.22.block_sparse_moe.experts.247.w1", "model.layers.22.block_sparse_moe.experts.248.w1", "model.layers.22.block_sparse_moe.experts.249.w1", "model.layers.22.block_sparse_moe.experts.250.w1", "model.layers.22.block_sparse_moe.experts.251.w1", "model.layers.22.block_sparse_moe.experts.252.w1", "model.layers.22.block_sparse_moe.experts.253.w1", "model.layers.22.block_sparse_moe.experts.254.w1", "model.layers.22.block_sparse_moe.experts.255.w1", "model.layers.22.block_sparse_moe.experts.0.w3", "model.layers.22.block_sparse_moe.experts.1.w3", "model.layers.22.block_sparse_moe.experts.2.w3", "model.layers.22.block_sparse_moe.experts.3.w3", "model.layers.22.block_sparse_moe.experts.4.w3", "model.layers.22.block_sparse_moe.experts.5.w3", "model.layers.22.block_sparse_moe.experts.6.w3", "model.layers.22.block_sparse_moe.experts.7.w3", "model.layers.22.block_sparse_moe.experts.8.w3", "model.layers.22.block_sparse_moe.experts.9.w3", "model.layers.22.block_sparse_moe.experts.10.w3", "model.layers.22.block_sparse_moe.experts.11.w3", "model.layers.22.block_sparse_moe.experts.12.w3", "model.layers.22.block_sparse_moe.experts.13.w3", "model.layers.22.block_sparse_moe.experts.14.w3", "model.layers.22.block_sparse_moe.experts.15.w3", "model.layers.22.block_sparse_moe.experts.16.w3", "model.layers.22.block_sparse_moe.experts.17.w3", "model.layers.22.block_sparse_moe.experts.18.w3", "model.layers.22.block_sparse_moe.experts.19.w3", "model.layers.22.block_sparse_moe.experts.20.w3", "model.layers.22.block_sparse_moe.experts.21.w3", "model.layers.22.block_sparse_moe.experts.22.w3", "model.layers.22.block_sparse_moe.experts.23.w3", "model.layers.22.block_sparse_moe.experts.24.w3", "model.layers.22.block_sparse_moe.experts.25.w3", "model.layers.22.block_sparse_moe.experts.26.w3", "model.layers.22.block_sparse_moe.experts.27.w3", "model.layers.22.block_sparse_moe.experts.28.w3", "model.layers.22.block_sparse_moe.experts.29.w3", "model.layers.22.block_sparse_moe.experts.30.w3", "model.layers.22.block_sparse_moe.experts.31.w3", "model.layers.22.block_sparse_moe.experts.32.w3", "model.layers.22.block_sparse_moe.experts.33.w3", "model.layers.22.block_sparse_moe.experts.34.w3", "model.layers.22.block_sparse_moe.experts.35.w3", "model.layers.22.block_sparse_moe.experts.36.w3", "model.layers.22.block_sparse_moe.experts.37.w3", "model.layers.22.block_sparse_moe.experts.38.w3", "model.layers.22.block_sparse_moe.experts.39.w3", "model.layers.22.block_sparse_moe.experts.40.w3", "model.layers.22.block_sparse_moe.experts.41.w3", "model.layers.22.block_sparse_moe.experts.42.w3", "model.layers.22.block_sparse_moe.experts.43.w3", "model.layers.22.block_sparse_moe.experts.44.w3", "model.layers.22.block_sparse_moe.experts.45.w3", "model.layers.22.block_sparse_moe.experts.46.w3", "model.layers.22.block_sparse_moe.experts.47.w3", "model.layers.22.block_sparse_moe.experts.48.w3", "model.layers.22.block_sparse_moe.experts.49.w3", "model.layers.22.block_sparse_moe.experts.50.w3", "model.layers.22.block_sparse_moe.experts.51.w3", "model.layers.22.block_sparse_moe.experts.52.w3", "model.layers.22.block_sparse_moe.experts.53.w3", "model.layers.22.block_sparse_moe.experts.54.w3", "model.layers.22.block_sparse_moe.experts.55.w3", "model.layers.22.block_sparse_moe.experts.56.w3", "model.layers.22.block_sparse_moe.experts.57.w3", "model.layers.22.block_sparse_moe.experts.58.w3", "model.layers.22.block_sparse_moe.experts.59.w3", "model.layers.22.block_sparse_moe.experts.60.w3", "model.layers.22.block_sparse_moe.experts.61.w3", "model.layers.22.block_sparse_moe.experts.62.w3", "model.layers.22.block_sparse_moe.experts.63.w3", "model.layers.22.block_sparse_moe.experts.64.w3", "model.layers.22.block_sparse_moe.experts.65.w3", "model.layers.22.block_sparse_moe.experts.66.w3", "model.layers.22.block_sparse_moe.experts.67.w3", "model.layers.22.block_sparse_moe.experts.68.w3", "model.layers.22.block_sparse_moe.experts.69.w3", "model.layers.22.block_sparse_moe.experts.70.w3", "model.layers.22.block_sparse_moe.experts.71.w3", "model.layers.22.block_sparse_moe.experts.72.w3", "model.layers.22.block_sparse_moe.experts.73.w3", "model.layers.22.block_sparse_moe.experts.74.w3", "model.layers.22.block_sparse_moe.experts.75.w3", "model.layers.22.block_sparse_moe.experts.76.w3", "model.layers.22.block_sparse_moe.experts.77.w3", "model.layers.22.block_sparse_moe.experts.78.w3", "model.layers.22.block_sparse_moe.experts.79.w3", "model.layers.22.block_sparse_moe.experts.80.w3", "model.layers.22.block_sparse_moe.experts.81.w3", "model.layers.22.block_sparse_moe.experts.82.w3", "model.layers.22.block_sparse_moe.experts.83.w3", "model.layers.22.block_sparse_moe.experts.84.w3", "model.layers.22.block_sparse_moe.experts.85.w3", "model.layers.22.block_sparse_moe.experts.86.w3", "model.layers.22.block_sparse_moe.experts.87.w3", "model.layers.22.block_sparse_moe.experts.88.w3", "model.layers.22.block_sparse_moe.experts.89.w3", "model.layers.22.block_sparse_moe.experts.90.w3", "model.layers.22.block_sparse_moe.experts.91.w3", "model.layers.22.block_sparse_moe.experts.92.w3", "model.layers.22.block_sparse_moe.experts.93.w3", "model.layers.22.block_sparse_moe.experts.94.w3", "model.layers.22.block_sparse_moe.experts.95.w3", "model.layers.22.block_sparse_moe.experts.96.w3", "model.layers.22.block_sparse_moe.experts.97.w3", "model.layers.22.block_sparse_moe.experts.98.w3", "model.layers.22.block_sparse_moe.experts.99.w3", "model.layers.22.block_sparse_moe.experts.100.w3", "model.layers.22.block_sparse_moe.experts.101.w3", "model.layers.22.block_sparse_moe.experts.102.w3", "model.layers.22.block_sparse_moe.experts.103.w3", "model.layers.22.block_sparse_moe.experts.104.w3", "model.layers.22.block_sparse_moe.experts.105.w3", "model.layers.22.block_sparse_moe.experts.106.w3", "model.layers.22.block_sparse_moe.experts.107.w3", "model.layers.22.block_sparse_moe.experts.108.w3", "model.layers.22.block_sparse_moe.experts.109.w3", "model.layers.22.block_sparse_moe.experts.110.w3", "model.layers.22.block_sparse_moe.experts.111.w3", "model.layers.22.block_sparse_moe.experts.112.w3", "model.layers.22.block_sparse_moe.experts.113.w3", "model.layers.22.block_sparse_moe.experts.114.w3", "model.layers.22.block_sparse_moe.experts.115.w3", "model.layers.22.block_sparse_moe.experts.116.w3", "model.layers.22.block_sparse_moe.experts.117.w3", "model.layers.22.block_sparse_moe.experts.118.w3", "model.layers.22.block_sparse_moe.experts.119.w3", "model.layers.22.block_sparse_moe.experts.120.w3", "model.layers.22.block_sparse_moe.experts.121.w3", "model.layers.22.block_sparse_moe.experts.122.w3", "model.layers.22.block_sparse_moe.experts.123.w3", "model.layers.22.block_sparse_moe.experts.124.w3", "model.layers.22.block_sparse_moe.experts.125.w3", "model.layers.22.block_sparse_moe.experts.126.w3", "model.layers.22.block_sparse_moe.experts.127.w3", "model.layers.22.block_sparse_moe.experts.128.w3", "model.layers.22.block_sparse_moe.experts.129.w3", "model.layers.22.block_sparse_moe.experts.130.w3", "model.layers.22.block_sparse_moe.experts.131.w3", "model.layers.22.block_sparse_moe.experts.132.w3", "model.layers.22.block_sparse_moe.experts.133.w3", "model.layers.22.block_sparse_moe.experts.134.w3", "model.layers.22.block_sparse_moe.experts.135.w3", "model.layers.22.block_sparse_moe.experts.136.w3", "model.layers.22.block_sparse_moe.experts.137.w3", "model.layers.22.block_sparse_moe.experts.138.w3", "model.layers.22.block_sparse_moe.experts.139.w3", "model.layers.22.block_sparse_moe.experts.140.w3", "model.layers.22.block_sparse_moe.experts.141.w3", "model.layers.22.block_sparse_moe.experts.142.w3", "model.layers.22.block_sparse_moe.experts.143.w3", "model.layers.22.block_sparse_moe.experts.144.w3", "model.layers.22.block_sparse_moe.experts.145.w3", "model.layers.22.block_sparse_moe.experts.146.w3", "model.layers.22.block_sparse_moe.experts.147.w3", "model.layers.22.block_sparse_moe.experts.148.w3", "model.layers.22.block_sparse_moe.experts.149.w3", "model.layers.22.block_sparse_moe.experts.150.w3", "model.layers.22.block_sparse_moe.experts.151.w3", "model.layers.22.block_sparse_moe.experts.152.w3", "model.layers.22.block_sparse_moe.experts.153.w3", "model.layers.22.block_sparse_moe.experts.154.w3", "model.layers.22.block_sparse_moe.experts.155.w3", "model.layers.22.block_sparse_moe.experts.156.w3", "model.layers.22.block_sparse_moe.experts.157.w3", "model.layers.22.block_sparse_moe.experts.158.w3", "model.layers.22.block_sparse_moe.experts.159.w3", "model.layers.22.block_sparse_moe.experts.160.w3", "model.layers.22.block_sparse_moe.experts.161.w3", "model.layers.22.block_sparse_moe.experts.162.w3", "model.layers.22.block_sparse_moe.experts.163.w3", "model.layers.22.block_sparse_moe.experts.164.w3", "model.layers.22.block_sparse_moe.experts.165.w3", "model.layers.22.block_sparse_moe.experts.166.w3", "model.layers.22.block_sparse_moe.experts.167.w3", "model.layers.22.block_sparse_moe.experts.168.w3", "model.layers.22.block_sparse_moe.experts.169.w3", "model.layers.22.block_sparse_moe.experts.170.w3", "model.layers.22.block_sparse_moe.experts.171.w3", "model.layers.22.block_sparse_moe.experts.172.w3", "model.layers.22.block_sparse_moe.experts.173.w3", "model.layers.22.block_sparse_moe.experts.174.w3", "model.layers.22.block_sparse_moe.experts.175.w3", "model.layers.22.block_sparse_moe.experts.176.w3", "model.layers.22.block_sparse_moe.experts.177.w3", "model.layers.22.block_sparse_moe.experts.178.w3", "model.layers.22.block_sparse_moe.experts.179.w3", "model.layers.22.block_sparse_moe.experts.180.w3", "model.layers.22.block_sparse_moe.experts.181.w3", "model.layers.22.block_sparse_moe.experts.182.w3", "model.layers.22.block_sparse_moe.experts.183.w3", "model.layers.22.block_sparse_moe.experts.184.w3", "model.layers.22.block_sparse_moe.experts.185.w3", "model.layers.22.block_sparse_moe.experts.186.w3", "model.layers.22.block_sparse_moe.experts.187.w3", "model.layers.22.block_sparse_moe.experts.188.w3", "model.layers.22.block_sparse_moe.experts.189.w3", "model.layers.22.block_sparse_moe.experts.190.w3", "model.layers.22.block_sparse_moe.experts.191.w3", "model.layers.22.block_sparse_moe.experts.192.w3", "model.layers.22.block_sparse_moe.experts.193.w3", "model.layers.22.block_sparse_moe.experts.194.w3", "model.layers.22.block_sparse_moe.experts.195.w3", "model.layers.22.block_sparse_moe.experts.196.w3", "model.layers.22.block_sparse_moe.experts.197.w3", "model.layers.22.block_sparse_moe.experts.198.w3", "model.layers.22.block_sparse_moe.experts.199.w3", "model.layers.22.block_sparse_moe.experts.200.w3", "model.layers.22.block_sparse_moe.experts.201.w3", "model.layers.22.block_sparse_moe.experts.202.w3", "model.layers.22.block_sparse_moe.experts.203.w3", "model.layers.22.block_sparse_moe.experts.204.w3", "model.layers.22.block_sparse_moe.experts.205.w3", "model.layers.22.block_sparse_moe.experts.206.w3", "model.layers.22.block_sparse_moe.experts.207.w3", "model.layers.22.block_sparse_moe.experts.208.w3", "model.layers.22.block_sparse_moe.experts.209.w3", "model.layers.22.block_sparse_moe.experts.210.w3", "model.layers.22.block_sparse_moe.experts.211.w3", "model.layers.22.block_sparse_moe.experts.212.w3", "model.layers.22.block_sparse_moe.experts.213.w3", "model.layers.22.block_sparse_moe.experts.214.w3", "model.layers.22.block_sparse_moe.experts.215.w3", "model.layers.22.block_sparse_moe.experts.216.w3", "model.layers.22.block_sparse_moe.experts.217.w3", "model.layers.22.block_sparse_moe.experts.218.w3", "model.layers.22.block_sparse_moe.experts.219.w3", "model.layers.22.block_sparse_moe.experts.220.w3", "model.layers.22.block_sparse_moe.experts.221.w3", "model.layers.22.block_sparse_moe.experts.222.w3", "model.layers.22.block_sparse_moe.experts.223.w3", "model.layers.22.block_sparse_moe.experts.224.w3", "model.layers.22.block_sparse_moe.experts.225.w3", "model.layers.22.block_sparse_moe.experts.226.w3", "model.layers.22.block_sparse_moe.experts.227.w3", "model.layers.22.block_sparse_moe.experts.228.w3", "model.layers.22.block_sparse_moe.experts.229.w3", "model.layers.22.block_sparse_moe.experts.230.w3", "model.layers.22.block_sparse_moe.experts.231.w3", "model.layers.22.block_sparse_moe.experts.232.w3", "model.layers.22.block_sparse_moe.experts.233.w3", "model.layers.22.block_sparse_moe.experts.234.w3", "model.layers.22.block_sparse_moe.experts.235.w3", "model.layers.22.block_sparse_moe.experts.236.w3", "model.layers.22.block_sparse_moe.experts.237.w3", "model.layers.22.block_sparse_moe.experts.238.w3", "model.layers.22.block_sparse_moe.experts.239.w3", "model.layers.22.block_sparse_moe.experts.240.w3", "model.layers.22.block_sparse_moe.experts.241.w3", "model.layers.22.block_sparse_moe.experts.242.w3", "model.layers.22.block_sparse_moe.experts.243.w3", "model.layers.22.block_sparse_moe.experts.244.w3", "model.layers.22.block_sparse_moe.experts.245.w3", "model.layers.22.block_sparse_moe.experts.246.w3", "model.layers.22.block_sparse_moe.experts.247.w3", "model.layers.22.block_sparse_moe.experts.248.w3", "model.layers.22.block_sparse_moe.experts.249.w3", "model.layers.22.block_sparse_moe.experts.250.w3", "model.layers.22.block_sparse_moe.experts.251.w3", "model.layers.22.block_sparse_moe.experts.252.w3", "model.layers.22.block_sparse_moe.experts.253.w3", "model.layers.22.block_sparse_moe.experts.254.w3", "model.layers.22.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002900314331054643, "dbits": 2415919104 } ] }, { "idx": 114, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w2", "model.layers.22.block_sparse_moe.experts.1.w2", "model.layers.22.block_sparse_moe.experts.2.w2", "model.layers.22.block_sparse_moe.experts.3.w2", "model.layers.22.block_sparse_moe.experts.4.w2", "model.layers.22.block_sparse_moe.experts.5.w2", "model.layers.22.block_sparse_moe.experts.6.w2", "model.layers.22.block_sparse_moe.experts.7.w2", "model.layers.22.block_sparse_moe.experts.8.w2", "model.layers.22.block_sparse_moe.experts.9.w2", "model.layers.22.block_sparse_moe.experts.10.w2", "model.layers.22.block_sparse_moe.experts.11.w2", "model.layers.22.block_sparse_moe.experts.12.w2", "model.layers.22.block_sparse_moe.experts.13.w2", "model.layers.22.block_sparse_moe.experts.14.w2", "model.layers.22.block_sparse_moe.experts.15.w2", "model.layers.22.block_sparse_moe.experts.16.w2", "model.layers.22.block_sparse_moe.experts.17.w2", "model.layers.22.block_sparse_moe.experts.18.w2", "model.layers.22.block_sparse_moe.experts.19.w2", "model.layers.22.block_sparse_moe.experts.20.w2", "model.layers.22.block_sparse_moe.experts.21.w2", "model.layers.22.block_sparse_moe.experts.22.w2", "model.layers.22.block_sparse_moe.experts.23.w2", "model.layers.22.block_sparse_moe.experts.24.w2", "model.layers.22.block_sparse_moe.experts.25.w2", "model.layers.22.block_sparse_moe.experts.26.w2", "model.layers.22.block_sparse_moe.experts.27.w2", "model.layers.22.block_sparse_moe.experts.28.w2", "model.layers.22.block_sparse_moe.experts.29.w2", "model.layers.22.block_sparse_moe.experts.30.w2", "model.layers.22.block_sparse_moe.experts.31.w2", "model.layers.22.block_sparse_moe.experts.32.w2", "model.layers.22.block_sparse_moe.experts.33.w2", "model.layers.22.block_sparse_moe.experts.34.w2", "model.layers.22.block_sparse_moe.experts.35.w2", "model.layers.22.block_sparse_moe.experts.36.w2", "model.layers.22.block_sparse_moe.experts.37.w2", "model.layers.22.block_sparse_moe.experts.38.w2", "model.layers.22.block_sparse_moe.experts.39.w2", "model.layers.22.block_sparse_moe.experts.40.w2", "model.layers.22.block_sparse_moe.experts.41.w2", "model.layers.22.block_sparse_moe.experts.42.w2", "model.layers.22.block_sparse_moe.experts.43.w2", "model.layers.22.block_sparse_moe.experts.44.w2", "model.layers.22.block_sparse_moe.experts.45.w2", "model.layers.22.block_sparse_moe.experts.46.w2", "model.layers.22.block_sparse_moe.experts.47.w2", "model.layers.22.block_sparse_moe.experts.48.w2", "model.layers.22.block_sparse_moe.experts.49.w2", "model.layers.22.block_sparse_moe.experts.50.w2", "model.layers.22.block_sparse_moe.experts.51.w2", "model.layers.22.block_sparse_moe.experts.52.w2", "model.layers.22.block_sparse_moe.experts.53.w2", "model.layers.22.block_sparse_moe.experts.54.w2", "model.layers.22.block_sparse_moe.experts.55.w2", "model.layers.22.block_sparse_moe.experts.56.w2", "model.layers.22.block_sparse_moe.experts.57.w2", "model.layers.22.block_sparse_moe.experts.58.w2", "model.layers.22.block_sparse_moe.experts.59.w2", "model.layers.22.block_sparse_moe.experts.60.w2", "model.layers.22.block_sparse_moe.experts.61.w2", "model.layers.22.block_sparse_moe.experts.62.w2", "model.layers.22.block_sparse_moe.experts.63.w2", "model.layers.22.block_sparse_moe.experts.64.w2", "model.layers.22.block_sparse_moe.experts.65.w2", "model.layers.22.block_sparse_moe.experts.66.w2", "model.layers.22.block_sparse_moe.experts.67.w2", "model.layers.22.block_sparse_moe.experts.68.w2", "model.layers.22.block_sparse_moe.experts.69.w2", "model.layers.22.block_sparse_moe.experts.70.w2", "model.layers.22.block_sparse_moe.experts.71.w2", "model.layers.22.block_sparse_moe.experts.72.w2", "model.layers.22.block_sparse_moe.experts.73.w2", "model.layers.22.block_sparse_moe.experts.74.w2", "model.layers.22.block_sparse_moe.experts.75.w2", "model.layers.22.block_sparse_moe.experts.76.w2", "model.layers.22.block_sparse_moe.experts.77.w2", "model.layers.22.block_sparse_moe.experts.78.w2", "model.layers.22.block_sparse_moe.experts.79.w2", "model.layers.22.block_sparse_moe.experts.80.w2", "model.layers.22.block_sparse_moe.experts.81.w2", "model.layers.22.block_sparse_moe.experts.82.w2", "model.layers.22.block_sparse_moe.experts.83.w2", "model.layers.22.block_sparse_moe.experts.84.w2", "model.layers.22.block_sparse_moe.experts.85.w2", "model.layers.22.block_sparse_moe.experts.86.w2", "model.layers.22.block_sparse_moe.experts.87.w2", "model.layers.22.block_sparse_moe.experts.88.w2", "model.layers.22.block_sparse_moe.experts.89.w2", "model.layers.22.block_sparse_moe.experts.90.w2", "model.layers.22.block_sparse_moe.experts.91.w2", "model.layers.22.block_sparse_moe.experts.92.w2", "model.layers.22.block_sparse_moe.experts.93.w2", "model.layers.22.block_sparse_moe.experts.94.w2", "model.layers.22.block_sparse_moe.experts.95.w2", "model.layers.22.block_sparse_moe.experts.96.w2", "model.layers.22.block_sparse_moe.experts.97.w2", "model.layers.22.block_sparse_moe.experts.98.w2", "model.layers.22.block_sparse_moe.experts.99.w2", "model.layers.22.block_sparse_moe.experts.100.w2", "model.layers.22.block_sparse_moe.experts.101.w2", "model.layers.22.block_sparse_moe.experts.102.w2", "model.layers.22.block_sparse_moe.experts.103.w2", "model.layers.22.block_sparse_moe.experts.104.w2", "model.layers.22.block_sparse_moe.experts.105.w2", "model.layers.22.block_sparse_moe.experts.106.w2", "model.layers.22.block_sparse_moe.experts.107.w2", "model.layers.22.block_sparse_moe.experts.108.w2", "model.layers.22.block_sparse_moe.experts.109.w2", "model.layers.22.block_sparse_moe.experts.110.w2", "model.layers.22.block_sparse_moe.experts.111.w2", "model.layers.22.block_sparse_moe.experts.112.w2", "model.layers.22.block_sparse_moe.experts.113.w2", "model.layers.22.block_sparse_moe.experts.114.w2", "model.layers.22.block_sparse_moe.experts.115.w2", "model.layers.22.block_sparse_moe.experts.116.w2", "model.layers.22.block_sparse_moe.experts.117.w2", "model.layers.22.block_sparse_moe.experts.118.w2", "model.layers.22.block_sparse_moe.experts.119.w2", "model.layers.22.block_sparse_moe.experts.120.w2", "model.layers.22.block_sparse_moe.experts.121.w2", "model.layers.22.block_sparse_moe.experts.122.w2", "model.layers.22.block_sparse_moe.experts.123.w2", "model.layers.22.block_sparse_moe.experts.124.w2", "model.layers.22.block_sparse_moe.experts.125.w2", "model.layers.22.block_sparse_moe.experts.126.w2", "model.layers.22.block_sparse_moe.experts.127.w2", "model.layers.22.block_sparse_moe.experts.128.w2", "model.layers.22.block_sparse_moe.experts.129.w2", "model.layers.22.block_sparse_moe.experts.130.w2", "model.layers.22.block_sparse_moe.experts.131.w2", "model.layers.22.block_sparse_moe.experts.132.w2", "model.layers.22.block_sparse_moe.experts.133.w2", "model.layers.22.block_sparse_moe.experts.134.w2", "model.layers.22.block_sparse_moe.experts.135.w2", "model.layers.22.block_sparse_moe.experts.136.w2", "model.layers.22.block_sparse_moe.experts.137.w2", "model.layers.22.block_sparse_moe.experts.138.w2", "model.layers.22.block_sparse_moe.experts.139.w2", "model.layers.22.block_sparse_moe.experts.140.w2", "model.layers.22.block_sparse_moe.experts.141.w2", "model.layers.22.block_sparse_moe.experts.142.w2", "model.layers.22.block_sparse_moe.experts.143.w2", "model.layers.22.block_sparse_moe.experts.144.w2", "model.layers.22.block_sparse_moe.experts.145.w2", "model.layers.22.block_sparse_moe.experts.146.w2", "model.layers.22.block_sparse_moe.experts.147.w2", "model.layers.22.block_sparse_moe.experts.148.w2", "model.layers.22.block_sparse_moe.experts.149.w2", "model.layers.22.block_sparse_moe.experts.150.w2", "model.layers.22.block_sparse_moe.experts.151.w2", "model.layers.22.block_sparse_moe.experts.152.w2", "model.layers.22.block_sparse_moe.experts.153.w2", "model.layers.22.block_sparse_moe.experts.154.w2", "model.layers.22.block_sparse_moe.experts.155.w2", "model.layers.22.block_sparse_moe.experts.156.w2", "model.layers.22.block_sparse_moe.experts.157.w2", "model.layers.22.block_sparse_moe.experts.158.w2", "model.layers.22.block_sparse_moe.experts.159.w2", "model.layers.22.block_sparse_moe.experts.160.w2", "model.layers.22.block_sparse_moe.experts.161.w2", "model.layers.22.block_sparse_moe.experts.162.w2", "model.layers.22.block_sparse_moe.experts.163.w2", "model.layers.22.block_sparse_moe.experts.164.w2", "model.layers.22.block_sparse_moe.experts.165.w2", "model.layers.22.block_sparse_moe.experts.166.w2", "model.layers.22.block_sparse_moe.experts.167.w2", "model.layers.22.block_sparse_moe.experts.168.w2", "model.layers.22.block_sparse_moe.experts.169.w2", "model.layers.22.block_sparse_moe.experts.170.w2", "model.layers.22.block_sparse_moe.experts.171.w2", "model.layers.22.block_sparse_moe.experts.172.w2", "model.layers.22.block_sparse_moe.experts.173.w2", "model.layers.22.block_sparse_moe.experts.174.w2", "model.layers.22.block_sparse_moe.experts.175.w2", "model.layers.22.block_sparse_moe.experts.176.w2", "model.layers.22.block_sparse_moe.experts.177.w2", "model.layers.22.block_sparse_moe.experts.178.w2", "model.layers.22.block_sparse_moe.experts.179.w2", "model.layers.22.block_sparse_moe.experts.180.w2", "model.layers.22.block_sparse_moe.experts.181.w2", "model.layers.22.block_sparse_moe.experts.182.w2", "model.layers.22.block_sparse_moe.experts.183.w2", "model.layers.22.block_sparse_moe.experts.184.w2", "model.layers.22.block_sparse_moe.experts.185.w2", "model.layers.22.block_sparse_moe.experts.186.w2", "model.layers.22.block_sparse_moe.experts.187.w2", "model.layers.22.block_sparse_moe.experts.188.w2", "model.layers.22.block_sparse_moe.experts.189.w2", "model.layers.22.block_sparse_moe.experts.190.w2", "model.layers.22.block_sparse_moe.experts.191.w2", "model.layers.22.block_sparse_moe.experts.192.w2", "model.layers.22.block_sparse_moe.experts.193.w2", "model.layers.22.block_sparse_moe.experts.194.w2", "model.layers.22.block_sparse_moe.experts.195.w2", "model.layers.22.block_sparse_moe.experts.196.w2", "model.layers.22.block_sparse_moe.experts.197.w2", "model.layers.22.block_sparse_moe.experts.198.w2", "model.layers.22.block_sparse_moe.experts.199.w2", "model.layers.22.block_sparse_moe.experts.200.w2", "model.layers.22.block_sparse_moe.experts.201.w2", "model.layers.22.block_sparse_moe.experts.202.w2", "model.layers.22.block_sparse_moe.experts.203.w2", "model.layers.22.block_sparse_moe.experts.204.w2", "model.layers.22.block_sparse_moe.experts.205.w2", "model.layers.22.block_sparse_moe.experts.206.w2", "model.layers.22.block_sparse_moe.experts.207.w2", "model.layers.22.block_sparse_moe.experts.208.w2", "model.layers.22.block_sparse_moe.experts.209.w2", "model.layers.22.block_sparse_moe.experts.210.w2", "model.layers.22.block_sparse_moe.experts.211.w2", "model.layers.22.block_sparse_moe.experts.212.w2", "model.layers.22.block_sparse_moe.experts.213.w2", "model.layers.22.block_sparse_moe.experts.214.w2", "model.layers.22.block_sparse_moe.experts.215.w2", "model.layers.22.block_sparse_moe.experts.216.w2", "model.layers.22.block_sparse_moe.experts.217.w2", "model.layers.22.block_sparse_moe.experts.218.w2", "model.layers.22.block_sparse_moe.experts.219.w2", "model.layers.22.block_sparse_moe.experts.220.w2", "model.layers.22.block_sparse_moe.experts.221.w2", "model.layers.22.block_sparse_moe.experts.222.w2", "model.layers.22.block_sparse_moe.experts.223.w2", "model.layers.22.block_sparse_moe.experts.224.w2", "model.layers.22.block_sparse_moe.experts.225.w2", "model.layers.22.block_sparse_moe.experts.226.w2", "model.layers.22.block_sparse_moe.experts.227.w2", "model.layers.22.block_sparse_moe.experts.228.w2", "model.layers.22.block_sparse_moe.experts.229.w2", "model.layers.22.block_sparse_moe.experts.230.w2", "model.layers.22.block_sparse_moe.experts.231.w2", "model.layers.22.block_sparse_moe.experts.232.w2", "model.layers.22.block_sparse_moe.experts.233.w2", "model.layers.22.block_sparse_moe.experts.234.w2", "model.layers.22.block_sparse_moe.experts.235.w2", "model.layers.22.block_sparse_moe.experts.236.w2", "model.layers.22.block_sparse_moe.experts.237.w2", "model.layers.22.block_sparse_moe.experts.238.w2", "model.layers.22.block_sparse_moe.experts.239.w2", "model.layers.22.block_sparse_moe.experts.240.w2", "model.layers.22.block_sparse_moe.experts.241.w2", "model.layers.22.block_sparse_moe.experts.242.w2", "model.layers.22.block_sparse_moe.experts.243.w2", "model.layers.22.block_sparse_moe.experts.244.w2", "model.layers.22.block_sparse_moe.experts.245.w2", "model.layers.22.block_sparse_moe.experts.246.w2", "model.layers.22.block_sparse_moe.experts.247.w2", "model.layers.22.block_sparse_moe.experts.248.w2", "model.layers.22.block_sparse_moe.experts.249.w2", "model.layers.22.block_sparse_moe.experts.250.w2", "model.layers.22.block_sparse_moe.experts.251.w2", "model.layers.22.block_sparse_moe.experts.252.w2", "model.layers.22.block_sparse_moe.experts.253.w2", "model.layers.22.block_sparse_moe.experts.254.w2", "model.layers.22.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007182568311691062, "dbits": 1207959552 } ] }, { "idx": 115, "layers": [ "model.layers.23.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00027617812156677246, "dbits": 18874368 } ] }, { "idx": 116, "layers": [ "model.layers.23.self_attn.k_proj", "model.layers.23.self_attn.v_proj" ], "candidates": [ { "dkld": 0.007532492280006409, "dbits": 6291456 } ] }, { "idx": 117, "layers": [ "model.layers.23.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00864773094654081, "dbits": 18874368 } ] }, { "idx": 118, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w1", "model.layers.23.block_sparse_moe.experts.1.w1", "model.layers.23.block_sparse_moe.experts.2.w1", "model.layers.23.block_sparse_moe.experts.3.w1", "model.layers.23.block_sparse_moe.experts.4.w1", "model.layers.23.block_sparse_moe.experts.5.w1", "model.layers.23.block_sparse_moe.experts.6.w1", "model.layers.23.block_sparse_moe.experts.7.w1", "model.layers.23.block_sparse_moe.experts.8.w1", "model.layers.23.block_sparse_moe.experts.9.w1", "model.layers.23.block_sparse_moe.experts.10.w1", "model.layers.23.block_sparse_moe.experts.11.w1", "model.layers.23.block_sparse_moe.experts.12.w1", "model.layers.23.block_sparse_moe.experts.13.w1", "model.layers.23.block_sparse_moe.experts.14.w1", "model.layers.23.block_sparse_moe.experts.15.w1", "model.layers.23.block_sparse_moe.experts.16.w1", "model.layers.23.block_sparse_moe.experts.17.w1", "model.layers.23.block_sparse_moe.experts.18.w1", "model.layers.23.block_sparse_moe.experts.19.w1", "model.layers.23.block_sparse_moe.experts.20.w1", "model.layers.23.block_sparse_moe.experts.21.w1", "model.layers.23.block_sparse_moe.experts.22.w1", "model.layers.23.block_sparse_moe.experts.23.w1", "model.layers.23.block_sparse_moe.experts.24.w1", "model.layers.23.block_sparse_moe.experts.25.w1", "model.layers.23.block_sparse_moe.experts.26.w1", "model.layers.23.block_sparse_moe.experts.27.w1", "model.layers.23.block_sparse_moe.experts.28.w1", "model.layers.23.block_sparse_moe.experts.29.w1", "model.layers.23.block_sparse_moe.experts.30.w1", "model.layers.23.block_sparse_moe.experts.31.w1", "model.layers.23.block_sparse_moe.experts.32.w1", "model.layers.23.block_sparse_moe.experts.33.w1", "model.layers.23.block_sparse_moe.experts.34.w1", "model.layers.23.block_sparse_moe.experts.35.w1", "model.layers.23.block_sparse_moe.experts.36.w1", "model.layers.23.block_sparse_moe.experts.37.w1", "model.layers.23.block_sparse_moe.experts.38.w1", "model.layers.23.block_sparse_moe.experts.39.w1", "model.layers.23.block_sparse_moe.experts.40.w1", "model.layers.23.block_sparse_moe.experts.41.w1", "model.layers.23.block_sparse_moe.experts.42.w1", "model.layers.23.block_sparse_moe.experts.43.w1", "model.layers.23.block_sparse_moe.experts.44.w1", "model.layers.23.block_sparse_moe.experts.45.w1", "model.layers.23.block_sparse_moe.experts.46.w1", "model.layers.23.block_sparse_moe.experts.47.w1", "model.layers.23.block_sparse_moe.experts.48.w1", "model.layers.23.block_sparse_moe.experts.49.w1", "model.layers.23.block_sparse_moe.experts.50.w1", "model.layers.23.block_sparse_moe.experts.51.w1", "model.layers.23.block_sparse_moe.experts.52.w1", "model.layers.23.block_sparse_moe.experts.53.w1", "model.layers.23.block_sparse_moe.experts.54.w1", "model.layers.23.block_sparse_moe.experts.55.w1", "model.layers.23.block_sparse_moe.experts.56.w1", "model.layers.23.block_sparse_moe.experts.57.w1", "model.layers.23.block_sparse_moe.experts.58.w1", "model.layers.23.block_sparse_moe.experts.59.w1", "model.layers.23.block_sparse_moe.experts.60.w1", "model.layers.23.block_sparse_moe.experts.61.w1", "model.layers.23.block_sparse_moe.experts.62.w1", "model.layers.23.block_sparse_moe.experts.63.w1", "model.layers.23.block_sparse_moe.experts.64.w1", "model.layers.23.block_sparse_moe.experts.65.w1", "model.layers.23.block_sparse_moe.experts.66.w1", "model.layers.23.block_sparse_moe.experts.67.w1", "model.layers.23.block_sparse_moe.experts.68.w1", "model.layers.23.block_sparse_moe.experts.69.w1", "model.layers.23.block_sparse_moe.experts.70.w1", "model.layers.23.block_sparse_moe.experts.71.w1", "model.layers.23.block_sparse_moe.experts.72.w1", "model.layers.23.block_sparse_moe.experts.73.w1", "model.layers.23.block_sparse_moe.experts.74.w1", "model.layers.23.block_sparse_moe.experts.75.w1", "model.layers.23.block_sparse_moe.experts.76.w1", "model.layers.23.block_sparse_moe.experts.77.w1", "model.layers.23.block_sparse_moe.experts.78.w1", "model.layers.23.block_sparse_moe.experts.79.w1", "model.layers.23.block_sparse_moe.experts.80.w1", "model.layers.23.block_sparse_moe.experts.81.w1", "model.layers.23.block_sparse_moe.experts.82.w1", "model.layers.23.block_sparse_moe.experts.83.w1", "model.layers.23.block_sparse_moe.experts.84.w1", "model.layers.23.block_sparse_moe.experts.85.w1", "model.layers.23.block_sparse_moe.experts.86.w1", "model.layers.23.block_sparse_moe.experts.87.w1", "model.layers.23.block_sparse_moe.experts.88.w1", "model.layers.23.block_sparse_moe.experts.89.w1", "model.layers.23.block_sparse_moe.experts.90.w1", "model.layers.23.block_sparse_moe.experts.91.w1", "model.layers.23.block_sparse_moe.experts.92.w1", "model.layers.23.block_sparse_moe.experts.93.w1", "model.layers.23.block_sparse_moe.experts.94.w1", "model.layers.23.block_sparse_moe.experts.95.w1", "model.layers.23.block_sparse_moe.experts.96.w1", "model.layers.23.block_sparse_moe.experts.97.w1", "model.layers.23.block_sparse_moe.experts.98.w1", "model.layers.23.block_sparse_moe.experts.99.w1", "model.layers.23.block_sparse_moe.experts.100.w1", "model.layers.23.block_sparse_moe.experts.101.w1", "model.layers.23.block_sparse_moe.experts.102.w1", "model.layers.23.block_sparse_moe.experts.103.w1", "model.layers.23.block_sparse_moe.experts.104.w1", "model.layers.23.block_sparse_moe.experts.105.w1", "model.layers.23.block_sparse_moe.experts.106.w1", "model.layers.23.block_sparse_moe.experts.107.w1", "model.layers.23.block_sparse_moe.experts.108.w1", "model.layers.23.block_sparse_moe.experts.109.w1", "model.layers.23.block_sparse_moe.experts.110.w1", "model.layers.23.block_sparse_moe.experts.111.w1", "model.layers.23.block_sparse_moe.experts.112.w1", "model.layers.23.block_sparse_moe.experts.113.w1", "model.layers.23.block_sparse_moe.experts.114.w1", "model.layers.23.block_sparse_moe.experts.115.w1", "model.layers.23.block_sparse_moe.experts.116.w1", "model.layers.23.block_sparse_moe.experts.117.w1", "model.layers.23.block_sparse_moe.experts.118.w1", "model.layers.23.block_sparse_moe.experts.119.w1", "model.layers.23.block_sparse_moe.experts.120.w1", "model.layers.23.block_sparse_moe.experts.121.w1", "model.layers.23.block_sparse_moe.experts.122.w1", "model.layers.23.block_sparse_moe.experts.123.w1", "model.layers.23.block_sparse_moe.experts.124.w1", "model.layers.23.block_sparse_moe.experts.125.w1", "model.layers.23.block_sparse_moe.experts.126.w1", "model.layers.23.block_sparse_moe.experts.127.w1", "model.layers.23.block_sparse_moe.experts.128.w1", "model.layers.23.block_sparse_moe.experts.129.w1", "model.layers.23.block_sparse_moe.experts.130.w1", "model.layers.23.block_sparse_moe.experts.131.w1", "model.layers.23.block_sparse_moe.experts.132.w1", "model.layers.23.block_sparse_moe.experts.133.w1", "model.layers.23.block_sparse_moe.experts.134.w1", "model.layers.23.block_sparse_moe.experts.135.w1", "model.layers.23.block_sparse_moe.experts.136.w1", "model.layers.23.block_sparse_moe.experts.137.w1", "model.layers.23.block_sparse_moe.experts.138.w1", "model.layers.23.block_sparse_moe.experts.139.w1", "model.layers.23.block_sparse_moe.experts.140.w1", "model.layers.23.block_sparse_moe.experts.141.w1", "model.layers.23.block_sparse_moe.experts.142.w1", "model.layers.23.block_sparse_moe.experts.143.w1", "model.layers.23.block_sparse_moe.experts.144.w1", "model.layers.23.block_sparse_moe.experts.145.w1", "model.layers.23.block_sparse_moe.experts.146.w1", "model.layers.23.block_sparse_moe.experts.147.w1", "model.layers.23.block_sparse_moe.experts.148.w1", "model.layers.23.block_sparse_moe.experts.149.w1", "model.layers.23.block_sparse_moe.experts.150.w1", "model.layers.23.block_sparse_moe.experts.151.w1", "model.layers.23.block_sparse_moe.experts.152.w1", "model.layers.23.block_sparse_moe.experts.153.w1", "model.layers.23.block_sparse_moe.experts.154.w1", "model.layers.23.block_sparse_moe.experts.155.w1", "model.layers.23.block_sparse_moe.experts.156.w1", "model.layers.23.block_sparse_moe.experts.157.w1", "model.layers.23.block_sparse_moe.experts.158.w1", "model.layers.23.block_sparse_moe.experts.159.w1", "model.layers.23.block_sparse_moe.experts.160.w1", "model.layers.23.block_sparse_moe.experts.161.w1", "model.layers.23.block_sparse_moe.experts.162.w1", "model.layers.23.block_sparse_moe.experts.163.w1", "model.layers.23.block_sparse_moe.experts.164.w1", "model.layers.23.block_sparse_moe.experts.165.w1", "model.layers.23.block_sparse_moe.experts.166.w1", "model.layers.23.block_sparse_moe.experts.167.w1", "model.layers.23.block_sparse_moe.experts.168.w1", "model.layers.23.block_sparse_moe.experts.169.w1", "model.layers.23.block_sparse_moe.experts.170.w1", "model.layers.23.block_sparse_moe.experts.171.w1", "model.layers.23.block_sparse_moe.experts.172.w1", "model.layers.23.block_sparse_moe.experts.173.w1", "model.layers.23.block_sparse_moe.experts.174.w1", "model.layers.23.block_sparse_moe.experts.175.w1", "model.layers.23.block_sparse_moe.experts.176.w1", "model.layers.23.block_sparse_moe.experts.177.w1", "model.layers.23.block_sparse_moe.experts.178.w1", "model.layers.23.block_sparse_moe.experts.179.w1", "model.layers.23.block_sparse_moe.experts.180.w1", "model.layers.23.block_sparse_moe.experts.181.w1", "model.layers.23.block_sparse_moe.experts.182.w1", "model.layers.23.block_sparse_moe.experts.183.w1", "model.layers.23.block_sparse_moe.experts.184.w1", "model.layers.23.block_sparse_moe.experts.185.w1", "model.layers.23.block_sparse_moe.experts.186.w1", "model.layers.23.block_sparse_moe.experts.187.w1", "model.layers.23.block_sparse_moe.experts.188.w1", "model.layers.23.block_sparse_moe.experts.189.w1", "model.layers.23.block_sparse_moe.experts.190.w1", "model.layers.23.block_sparse_moe.experts.191.w1", "model.layers.23.block_sparse_moe.experts.192.w1", "model.layers.23.block_sparse_moe.experts.193.w1", "model.layers.23.block_sparse_moe.experts.194.w1", "model.layers.23.block_sparse_moe.experts.195.w1", "model.layers.23.block_sparse_moe.experts.196.w1", "model.layers.23.block_sparse_moe.experts.197.w1", "model.layers.23.block_sparse_moe.experts.198.w1", "model.layers.23.block_sparse_moe.experts.199.w1", "model.layers.23.block_sparse_moe.experts.200.w1", "model.layers.23.block_sparse_moe.experts.201.w1", "model.layers.23.block_sparse_moe.experts.202.w1", "model.layers.23.block_sparse_moe.experts.203.w1", "model.layers.23.block_sparse_moe.experts.204.w1", "model.layers.23.block_sparse_moe.experts.205.w1", "model.layers.23.block_sparse_moe.experts.206.w1", "model.layers.23.block_sparse_moe.experts.207.w1", "model.layers.23.block_sparse_moe.experts.208.w1", "model.layers.23.block_sparse_moe.experts.209.w1", "model.layers.23.block_sparse_moe.experts.210.w1", "model.layers.23.block_sparse_moe.experts.211.w1", "model.layers.23.block_sparse_moe.experts.212.w1", "model.layers.23.block_sparse_moe.experts.213.w1", "model.layers.23.block_sparse_moe.experts.214.w1", "model.layers.23.block_sparse_moe.experts.215.w1", "model.layers.23.block_sparse_moe.experts.216.w1", "model.layers.23.block_sparse_moe.experts.217.w1", "model.layers.23.block_sparse_moe.experts.218.w1", "model.layers.23.block_sparse_moe.experts.219.w1", "model.layers.23.block_sparse_moe.experts.220.w1", "model.layers.23.block_sparse_moe.experts.221.w1", "model.layers.23.block_sparse_moe.experts.222.w1", "model.layers.23.block_sparse_moe.experts.223.w1", "model.layers.23.block_sparse_moe.experts.224.w1", "model.layers.23.block_sparse_moe.experts.225.w1", "model.layers.23.block_sparse_moe.experts.226.w1", "model.layers.23.block_sparse_moe.experts.227.w1", "model.layers.23.block_sparse_moe.experts.228.w1", "model.layers.23.block_sparse_moe.experts.229.w1", "model.layers.23.block_sparse_moe.experts.230.w1", "model.layers.23.block_sparse_moe.experts.231.w1", "model.layers.23.block_sparse_moe.experts.232.w1", "model.layers.23.block_sparse_moe.experts.233.w1", "model.layers.23.block_sparse_moe.experts.234.w1", "model.layers.23.block_sparse_moe.experts.235.w1", "model.layers.23.block_sparse_moe.experts.236.w1", "model.layers.23.block_sparse_moe.experts.237.w1", "model.layers.23.block_sparse_moe.experts.238.w1", "model.layers.23.block_sparse_moe.experts.239.w1", "model.layers.23.block_sparse_moe.experts.240.w1", "model.layers.23.block_sparse_moe.experts.241.w1", "model.layers.23.block_sparse_moe.experts.242.w1", "model.layers.23.block_sparse_moe.experts.243.w1", "model.layers.23.block_sparse_moe.experts.244.w1", "model.layers.23.block_sparse_moe.experts.245.w1", "model.layers.23.block_sparse_moe.experts.246.w1", "model.layers.23.block_sparse_moe.experts.247.w1", "model.layers.23.block_sparse_moe.experts.248.w1", "model.layers.23.block_sparse_moe.experts.249.w1", "model.layers.23.block_sparse_moe.experts.250.w1", "model.layers.23.block_sparse_moe.experts.251.w1", "model.layers.23.block_sparse_moe.experts.252.w1", "model.layers.23.block_sparse_moe.experts.253.w1", "model.layers.23.block_sparse_moe.experts.254.w1", "model.layers.23.block_sparse_moe.experts.255.w1", "model.layers.23.block_sparse_moe.experts.0.w3", "model.layers.23.block_sparse_moe.experts.1.w3", "model.layers.23.block_sparse_moe.experts.2.w3", "model.layers.23.block_sparse_moe.experts.3.w3", "model.layers.23.block_sparse_moe.experts.4.w3", "model.layers.23.block_sparse_moe.experts.5.w3", "model.layers.23.block_sparse_moe.experts.6.w3", "model.layers.23.block_sparse_moe.experts.7.w3", "model.layers.23.block_sparse_moe.experts.8.w3", "model.layers.23.block_sparse_moe.experts.9.w3", "model.layers.23.block_sparse_moe.experts.10.w3", "model.layers.23.block_sparse_moe.experts.11.w3", "model.layers.23.block_sparse_moe.experts.12.w3", "model.layers.23.block_sparse_moe.experts.13.w3", "model.layers.23.block_sparse_moe.experts.14.w3", "model.layers.23.block_sparse_moe.experts.15.w3", "model.layers.23.block_sparse_moe.experts.16.w3", "model.layers.23.block_sparse_moe.experts.17.w3", "model.layers.23.block_sparse_moe.experts.18.w3", "model.layers.23.block_sparse_moe.experts.19.w3", "model.layers.23.block_sparse_moe.experts.20.w3", "model.layers.23.block_sparse_moe.experts.21.w3", "model.layers.23.block_sparse_moe.experts.22.w3", "model.layers.23.block_sparse_moe.experts.23.w3", "model.layers.23.block_sparse_moe.experts.24.w3", "model.layers.23.block_sparse_moe.experts.25.w3", "model.layers.23.block_sparse_moe.experts.26.w3", "model.layers.23.block_sparse_moe.experts.27.w3", "model.layers.23.block_sparse_moe.experts.28.w3", "model.layers.23.block_sparse_moe.experts.29.w3", "model.layers.23.block_sparse_moe.experts.30.w3", "model.layers.23.block_sparse_moe.experts.31.w3", "model.layers.23.block_sparse_moe.experts.32.w3", "model.layers.23.block_sparse_moe.experts.33.w3", "model.layers.23.block_sparse_moe.experts.34.w3", "model.layers.23.block_sparse_moe.experts.35.w3", "model.layers.23.block_sparse_moe.experts.36.w3", "model.layers.23.block_sparse_moe.experts.37.w3", "model.layers.23.block_sparse_moe.experts.38.w3", "model.layers.23.block_sparse_moe.experts.39.w3", "model.layers.23.block_sparse_moe.experts.40.w3", "model.layers.23.block_sparse_moe.experts.41.w3", "model.layers.23.block_sparse_moe.experts.42.w3", "model.layers.23.block_sparse_moe.experts.43.w3", "model.layers.23.block_sparse_moe.experts.44.w3", "model.layers.23.block_sparse_moe.experts.45.w3", "model.layers.23.block_sparse_moe.experts.46.w3", "model.layers.23.block_sparse_moe.experts.47.w3", "model.layers.23.block_sparse_moe.experts.48.w3", "model.layers.23.block_sparse_moe.experts.49.w3", "model.layers.23.block_sparse_moe.experts.50.w3", "model.layers.23.block_sparse_moe.experts.51.w3", "model.layers.23.block_sparse_moe.experts.52.w3", "model.layers.23.block_sparse_moe.experts.53.w3", "model.layers.23.block_sparse_moe.experts.54.w3", "model.layers.23.block_sparse_moe.experts.55.w3", "model.layers.23.block_sparse_moe.experts.56.w3", "model.layers.23.block_sparse_moe.experts.57.w3", "model.layers.23.block_sparse_moe.experts.58.w3", "model.layers.23.block_sparse_moe.experts.59.w3", "model.layers.23.block_sparse_moe.experts.60.w3", "model.layers.23.block_sparse_moe.experts.61.w3", "model.layers.23.block_sparse_moe.experts.62.w3", "model.layers.23.block_sparse_moe.experts.63.w3", "model.layers.23.block_sparse_moe.experts.64.w3", "model.layers.23.block_sparse_moe.experts.65.w3", "model.layers.23.block_sparse_moe.experts.66.w3", "model.layers.23.block_sparse_moe.experts.67.w3", "model.layers.23.block_sparse_moe.experts.68.w3", "model.layers.23.block_sparse_moe.experts.69.w3", "model.layers.23.block_sparse_moe.experts.70.w3", "model.layers.23.block_sparse_moe.experts.71.w3", "model.layers.23.block_sparse_moe.experts.72.w3", "model.layers.23.block_sparse_moe.experts.73.w3", "model.layers.23.block_sparse_moe.experts.74.w3", "model.layers.23.block_sparse_moe.experts.75.w3", "model.layers.23.block_sparse_moe.experts.76.w3", "model.layers.23.block_sparse_moe.experts.77.w3", "model.layers.23.block_sparse_moe.experts.78.w3", "model.layers.23.block_sparse_moe.experts.79.w3", "model.layers.23.block_sparse_moe.experts.80.w3", "model.layers.23.block_sparse_moe.experts.81.w3", "model.layers.23.block_sparse_moe.experts.82.w3", "model.layers.23.block_sparse_moe.experts.83.w3", "model.layers.23.block_sparse_moe.experts.84.w3", "model.layers.23.block_sparse_moe.experts.85.w3", "model.layers.23.block_sparse_moe.experts.86.w3", "model.layers.23.block_sparse_moe.experts.87.w3", "model.layers.23.block_sparse_moe.experts.88.w3", "model.layers.23.block_sparse_moe.experts.89.w3", "model.layers.23.block_sparse_moe.experts.90.w3", "model.layers.23.block_sparse_moe.experts.91.w3", "model.layers.23.block_sparse_moe.experts.92.w3", "model.layers.23.block_sparse_moe.experts.93.w3", "model.layers.23.block_sparse_moe.experts.94.w3", "model.layers.23.block_sparse_moe.experts.95.w3", "model.layers.23.block_sparse_moe.experts.96.w3", "model.layers.23.block_sparse_moe.experts.97.w3", "model.layers.23.block_sparse_moe.experts.98.w3", "model.layers.23.block_sparse_moe.experts.99.w3", "model.layers.23.block_sparse_moe.experts.100.w3", "model.layers.23.block_sparse_moe.experts.101.w3", "model.layers.23.block_sparse_moe.experts.102.w3", "model.layers.23.block_sparse_moe.experts.103.w3", "model.layers.23.block_sparse_moe.experts.104.w3", "model.layers.23.block_sparse_moe.experts.105.w3", "model.layers.23.block_sparse_moe.experts.106.w3", "model.layers.23.block_sparse_moe.experts.107.w3", "model.layers.23.block_sparse_moe.experts.108.w3", "model.layers.23.block_sparse_moe.experts.109.w3", "model.layers.23.block_sparse_moe.experts.110.w3", "model.layers.23.block_sparse_moe.experts.111.w3", "model.layers.23.block_sparse_moe.experts.112.w3", "model.layers.23.block_sparse_moe.experts.113.w3", "model.layers.23.block_sparse_moe.experts.114.w3", "model.layers.23.block_sparse_moe.experts.115.w3", "model.layers.23.block_sparse_moe.experts.116.w3", "model.layers.23.block_sparse_moe.experts.117.w3", "model.layers.23.block_sparse_moe.experts.118.w3", "model.layers.23.block_sparse_moe.experts.119.w3", "model.layers.23.block_sparse_moe.experts.120.w3", "model.layers.23.block_sparse_moe.experts.121.w3", "model.layers.23.block_sparse_moe.experts.122.w3", "model.layers.23.block_sparse_moe.experts.123.w3", "model.layers.23.block_sparse_moe.experts.124.w3", "model.layers.23.block_sparse_moe.experts.125.w3", "model.layers.23.block_sparse_moe.experts.126.w3", "model.layers.23.block_sparse_moe.experts.127.w3", "model.layers.23.block_sparse_moe.experts.128.w3", "model.layers.23.block_sparse_moe.experts.129.w3", "model.layers.23.block_sparse_moe.experts.130.w3", "model.layers.23.block_sparse_moe.experts.131.w3", "model.layers.23.block_sparse_moe.experts.132.w3", "model.layers.23.block_sparse_moe.experts.133.w3", "model.layers.23.block_sparse_moe.experts.134.w3", "model.layers.23.block_sparse_moe.experts.135.w3", "model.layers.23.block_sparse_moe.experts.136.w3", "model.layers.23.block_sparse_moe.experts.137.w3", "model.layers.23.block_sparse_moe.experts.138.w3", "model.layers.23.block_sparse_moe.experts.139.w3", "model.layers.23.block_sparse_moe.experts.140.w3", "model.layers.23.block_sparse_moe.experts.141.w3", "model.layers.23.block_sparse_moe.experts.142.w3", "model.layers.23.block_sparse_moe.experts.143.w3", "model.layers.23.block_sparse_moe.experts.144.w3", "model.layers.23.block_sparse_moe.experts.145.w3", "model.layers.23.block_sparse_moe.experts.146.w3", "model.layers.23.block_sparse_moe.experts.147.w3", "model.layers.23.block_sparse_moe.experts.148.w3", "model.layers.23.block_sparse_moe.experts.149.w3", "model.layers.23.block_sparse_moe.experts.150.w3", "model.layers.23.block_sparse_moe.experts.151.w3", "model.layers.23.block_sparse_moe.experts.152.w3", "model.layers.23.block_sparse_moe.experts.153.w3", "model.layers.23.block_sparse_moe.experts.154.w3", "model.layers.23.block_sparse_moe.experts.155.w3", "model.layers.23.block_sparse_moe.experts.156.w3", "model.layers.23.block_sparse_moe.experts.157.w3", "model.layers.23.block_sparse_moe.experts.158.w3", "model.layers.23.block_sparse_moe.experts.159.w3", "model.layers.23.block_sparse_moe.experts.160.w3", "model.layers.23.block_sparse_moe.experts.161.w3", "model.layers.23.block_sparse_moe.experts.162.w3", "model.layers.23.block_sparse_moe.experts.163.w3", "model.layers.23.block_sparse_moe.experts.164.w3", "model.layers.23.block_sparse_moe.experts.165.w3", "model.layers.23.block_sparse_moe.experts.166.w3", "model.layers.23.block_sparse_moe.experts.167.w3", "model.layers.23.block_sparse_moe.experts.168.w3", "model.layers.23.block_sparse_moe.experts.169.w3", "model.layers.23.block_sparse_moe.experts.170.w3", "model.layers.23.block_sparse_moe.experts.171.w3", "model.layers.23.block_sparse_moe.experts.172.w3", "model.layers.23.block_sparse_moe.experts.173.w3", "model.layers.23.block_sparse_moe.experts.174.w3", "model.layers.23.block_sparse_moe.experts.175.w3", "model.layers.23.block_sparse_moe.experts.176.w3", "model.layers.23.block_sparse_moe.experts.177.w3", "model.layers.23.block_sparse_moe.experts.178.w3", "model.layers.23.block_sparse_moe.experts.179.w3", "model.layers.23.block_sparse_moe.experts.180.w3", "model.layers.23.block_sparse_moe.experts.181.w3", "model.layers.23.block_sparse_moe.experts.182.w3", "model.layers.23.block_sparse_moe.experts.183.w3", "model.layers.23.block_sparse_moe.experts.184.w3", "model.layers.23.block_sparse_moe.experts.185.w3", "model.layers.23.block_sparse_moe.experts.186.w3", "model.layers.23.block_sparse_moe.experts.187.w3", "model.layers.23.block_sparse_moe.experts.188.w3", "model.layers.23.block_sparse_moe.experts.189.w3", "model.layers.23.block_sparse_moe.experts.190.w3", "model.layers.23.block_sparse_moe.experts.191.w3", "model.layers.23.block_sparse_moe.experts.192.w3", "model.layers.23.block_sparse_moe.experts.193.w3", "model.layers.23.block_sparse_moe.experts.194.w3", "model.layers.23.block_sparse_moe.experts.195.w3", "model.layers.23.block_sparse_moe.experts.196.w3", "model.layers.23.block_sparse_moe.experts.197.w3", "model.layers.23.block_sparse_moe.experts.198.w3", "model.layers.23.block_sparse_moe.experts.199.w3", "model.layers.23.block_sparse_moe.experts.200.w3", "model.layers.23.block_sparse_moe.experts.201.w3", "model.layers.23.block_sparse_moe.experts.202.w3", "model.layers.23.block_sparse_moe.experts.203.w3", "model.layers.23.block_sparse_moe.experts.204.w3", "model.layers.23.block_sparse_moe.experts.205.w3", "model.layers.23.block_sparse_moe.experts.206.w3", "model.layers.23.block_sparse_moe.experts.207.w3", "model.layers.23.block_sparse_moe.experts.208.w3", "model.layers.23.block_sparse_moe.experts.209.w3", "model.layers.23.block_sparse_moe.experts.210.w3", "model.layers.23.block_sparse_moe.experts.211.w3", "model.layers.23.block_sparse_moe.experts.212.w3", "model.layers.23.block_sparse_moe.experts.213.w3", "model.layers.23.block_sparse_moe.experts.214.w3", "model.layers.23.block_sparse_moe.experts.215.w3", "model.layers.23.block_sparse_moe.experts.216.w3", "model.layers.23.block_sparse_moe.experts.217.w3", "model.layers.23.block_sparse_moe.experts.218.w3", "model.layers.23.block_sparse_moe.experts.219.w3", "model.layers.23.block_sparse_moe.experts.220.w3", "model.layers.23.block_sparse_moe.experts.221.w3", "model.layers.23.block_sparse_moe.experts.222.w3", "model.layers.23.block_sparse_moe.experts.223.w3", "model.layers.23.block_sparse_moe.experts.224.w3", "model.layers.23.block_sparse_moe.experts.225.w3", "model.layers.23.block_sparse_moe.experts.226.w3", "model.layers.23.block_sparse_moe.experts.227.w3", "model.layers.23.block_sparse_moe.experts.228.w3", "model.layers.23.block_sparse_moe.experts.229.w3", "model.layers.23.block_sparse_moe.experts.230.w3", "model.layers.23.block_sparse_moe.experts.231.w3", "model.layers.23.block_sparse_moe.experts.232.w3", "model.layers.23.block_sparse_moe.experts.233.w3", "model.layers.23.block_sparse_moe.experts.234.w3", "model.layers.23.block_sparse_moe.experts.235.w3", "model.layers.23.block_sparse_moe.experts.236.w3", "model.layers.23.block_sparse_moe.experts.237.w3", "model.layers.23.block_sparse_moe.experts.238.w3", "model.layers.23.block_sparse_moe.experts.239.w3", "model.layers.23.block_sparse_moe.experts.240.w3", "model.layers.23.block_sparse_moe.experts.241.w3", "model.layers.23.block_sparse_moe.experts.242.w3", "model.layers.23.block_sparse_moe.experts.243.w3", "model.layers.23.block_sparse_moe.experts.244.w3", "model.layers.23.block_sparse_moe.experts.245.w3", "model.layers.23.block_sparse_moe.experts.246.w3", "model.layers.23.block_sparse_moe.experts.247.w3", "model.layers.23.block_sparse_moe.experts.248.w3", "model.layers.23.block_sparse_moe.experts.249.w3", "model.layers.23.block_sparse_moe.experts.250.w3", "model.layers.23.block_sparse_moe.experts.251.w3", "model.layers.23.block_sparse_moe.experts.252.w3", "model.layers.23.block_sparse_moe.experts.253.w3", "model.layers.23.block_sparse_moe.experts.254.w3", "model.layers.23.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00044309496879568755, "dbits": 2415919104 } ] }, { "idx": 119, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w2", "model.layers.23.block_sparse_moe.experts.1.w2", "model.layers.23.block_sparse_moe.experts.2.w2", "model.layers.23.block_sparse_moe.experts.3.w2", "model.layers.23.block_sparse_moe.experts.4.w2", "model.layers.23.block_sparse_moe.experts.5.w2", "model.layers.23.block_sparse_moe.experts.6.w2", "model.layers.23.block_sparse_moe.experts.7.w2", "model.layers.23.block_sparse_moe.experts.8.w2", "model.layers.23.block_sparse_moe.experts.9.w2", "model.layers.23.block_sparse_moe.experts.10.w2", "model.layers.23.block_sparse_moe.experts.11.w2", "model.layers.23.block_sparse_moe.experts.12.w2", "model.layers.23.block_sparse_moe.experts.13.w2", "model.layers.23.block_sparse_moe.experts.14.w2", "model.layers.23.block_sparse_moe.experts.15.w2", "model.layers.23.block_sparse_moe.experts.16.w2", "model.layers.23.block_sparse_moe.experts.17.w2", "model.layers.23.block_sparse_moe.experts.18.w2", "model.layers.23.block_sparse_moe.experts.19.w2", "model.layers.23.block_sparse_moe.experts.20.w2", "model.layers.23.block_sparse_moe.experts.21.w2", "model.layers.23.block_sparse_moe.experts.22.w2", "model.layers.23.block_sparse_moe.experts.23.w2", "model.layers.23.block_sparse_moe.experts.24.w2", "model.layers.23.block_sparse_moe.experts.25.w2", "model.layers.23.block_sparse_moe.experts.26.w2", "model.layers.23.block_sparse_moe.experts.27.w2", "model.layers.23.block_sparse_moe.experts.28.w2", "model.layers.23.block_sparse_moe.experts.29.w2", "model.layers.23.block_sparse_moe.experts.30.w2", "model.layers.23.block_sparse_moe.experts.31.w2", "model.layers.23.block_sparse_moe.experts.32.w2", "model.layers.23.block_sparse_moe.experts.33.w2", "model.layers.23.block_sparse_moe.experts.34.w2", "model.layers.23.block_sparse_moe.experts.35.w2", "model.layers.23.block_sparse_moe.experts.36.w2", "model.layers.23.block_sparse_moe.experts.37.w2", "model.layers.23.block_sparse_moe.experts.38.w2", "model.layers.23.block_sparse_moe.experts.39.w2", "model.layers.23.block_sparse_moe.experts.40.w2", "model.layers.23.block_sparse_moe.experts.41.w2", "model.layers.23.block_sparse_moe.experts.42.w2", "model.layers.23.block_sparse_moe.experts.43.w2", "model.layers.23.block_sparse_moe.experts.44.w2", "model.layers.23.block_sparse_moe.experts.45.w2", "model.layers.23.block_sparse_moe.experts.46.w2", "model.layers.23.block_sparse_moe.experts.47.w2", "model.layers.23.block_sparse_moe.experts.48.w2", "model.layers.23.block_sparse_moe.experts.49.w2", "model.layers.23.block_sparse_moe.experts.50.w2", "model.layers.23.block_sparse_moe.experts.51.w2", "model.layers.23.block_sparse_moe.experts.52.w2", "model.layers.23.block_sparse_moe.experts.53.w2", "model.layers.23.block_sparse_moe.experts.54.w2", "model.layers.23.block_sparse_moe.experts.55.w2", "model.layers.23.block_sparse_moe.experts.56.w2", "model.layers.23.block_sparse_moe.experts.57.w2", "model.layers.23.block_sparse_moe.experts.58.w2", "model.layers.23.block_sparse_moe.experts.59.w2", "model.layers.23.block_sparse_moe.experts.60.w2", "model.layers.23.block_sparse_moe.experts.61.w2", "model.layers.23.block_sparse_moe.experts.62.w2", "model.layers.23.block_sparse_moe.experts.63.w2", "model.layers.23.block_sparse_moe.experts.64.w2", "model.layers.23.block_sparse_moe.experts.65.w2", "model.layers.23.block_sparse_moe.experts.66.w2", "model.layers.23.block_sparse_moe.experts.67.w2", "model.layers.23.block_sparse_moe.experts.68.w2", "model.layers.23.block_sparse_moe.experts.69.w2", "model.layers.23.block_sparse_moe.experts.70.w2", "model.layers.23.block_sparse_moe.experts.71.w2", "model.layers.23.block_sparse_moe.experts.72.w2", "model.layers.23.block_sparse_moe.experts.73.w2", "model.layers.23.block_sparse_moe.experts.74.w2", "model.layers.23.block_sparse_moe.experts.75.w2", "model.layers.23.block_sparse_moe.experts.76.w2", "model.layers.23.block_sparse_moe.experts.77.w2", "model.layers.23.block_sparse_moe.experts.78.w2", "model.layers.23.block_sparse_moe.experts.79.w2", "model.layers.23.block_sparse_moe.experts.80.w2", "model.layers.23.block_sparse_moe.experts.81.w2", "model.layers.23.block_sparse_moe.experts.82.w2", "model.layers.23.block_sparse_moe.experts.83.w2", "model.layers.23.block_sparse_moe.experts.84.w2", "model.layers.23.block_sparse_moe.experts.85.w2", "model.layers.23.block_sparse_moe.experts.86.w2", "model.layers.23.block_sparse_moe.experts.87.w2", "model.layers.23.block_sparse_moe.experts.88.w2", "model.layers.23.block_sparse_moe.experts.89.w2", "model.layers.23.block_sparse_moe.experts.90.w2", "model.layers.23.block_sparse_moe.experts.91.w2", "model.layers.23.block_sparse_moe.experts.92.w2", "model.layers.23.block_sparse_moe.experts.93.w2", "model.layers.23.block_sparse_moe.experts.94.w2", "model.layers.23.block_sparse_moe.experts.95.w2", "model.layers.23.block_sparse_moe.experts.96.w2", "model.layers.23.block_sparse_moe.experts.97.w2", "model.layers.23.block_sparse_moe.experts.98.w2", "model.layers.23.block_sparse_moe.experts.99.w2", "model.layers.23.block_sparse_moe.experts.100.w2", "model.layers.23.block_sparse_moe.experts.101.w2", "model.layers.23.block_sparse_moe.experts.102.w2", "model.layers.23.block_sparse_moe.experts.103.w2", "model.layers.23.block_sparse_moe.experts.104.w2", "model.layers.23.block_sparse_moe.experts.105.w2", "model.layers.23.block_sparse_moe.experts.106.w2", "model.layers.23.block_sparse_moe.experts.107.w2", "model.layers.23.block_sparse_moe.experts.108.w2", "model.layers.23.block_sparse_moe.experts.109.w2", "model.layers.23.block_sparse_moe.experts.110.w2", "model.layers.23.block_sparse_moe.experts.111.w2", "model.layers.23.block_sparse_moe.experts.112.w2", "model.layers.23.block_sparse_moe.experts.113.w2", "model.layers.23.block_sparse_moe.experts.114.w2", "model.layers.23.block_sparse_moe.experts.115.w2", "model.layers.23.block_sparse_moe.experts.116.w2", "model.layers.23.block_sparse_moe.experts.117.w2", "model.layers.23.block_sparse_moe.experts.118.w2", "model.layers.23.block_sparse_moe.experts.119.w2", "model.layers.23.block_sparse_moe.experts.120.w2", "model.layers.23.block_sparse_moe.experts.121.w2", "model.layers.23.block_sparse_moe.experts.122.w2", "model.layers.23.block_sparse_moe.experts.123.w2", "model.layers.23.block_sparse_moe.experts.124.w2", "model.layers.23.block_sparse_moe.experts.125.w2", "model.layers.23.block_sparse_moe.experts.126.w2", "model.layers.23.block_sparse_moe.experts.127.w2", "model.layers.23.block_sparse_moe.experts.128.w2", "model.layers.23.block_sparse_moe.experts.129.w2", "model.layers.23.block_sparse_moe.experts.130.w2", "model.layers.23.block_sparse_moe.experts.131.w2", "model.layers.23.block_sparse_moe.experts.132.w2", "model.layers.23.block_sparse_moe.experts.133.w2", "model.layers.23.block_sparse_moe.experts.134.w2", "model.layers.23.block_sparse_moe.experts.135.w2", "model.layers.23.block_sparse_moe.experts.136.w2", "model.layers.23.block_sparse_moe.experts.137.w2", "model.layers.23.block_sparse_moe.experts.138.w2", "model.layers.23.block_sparse_moe.experts.139.w2", "model.layers.23.block_sparse_moe.experts.140.w2", "model.layers.23.block_sparse_moe.experts.141.w2", "model.layers.23.block_sparse_moe.experts.142.w2", "model.layers.23.block_sparse_moe.experts.143.w2", "model.layers.23.block_sparse_moe.experts.144.w2", "model.layers.23.block_sparse_moe.experts.145.w2", "model.layers.23.block_sparse_moe.experts.146.w2", "model.layers.23.block_sparse_moe.experts.147.w2", "model.layers.23.block_sparse_moe.experts.148.w2", "model.layers.23.block_sparse_moe.experts.149.w2", "model.layers.23.block_sparse_moe.experts.150.w2", "model.layers.23.block_sparse_moe.experts.151.w2", "model.layers.23.block_sparse_moe.experts.152.w2", "model.layers.23.block_sparse_moe.experts.153.w2", "model.layers.23.block_sparse_moe.experts.154.w2", "model.layers.23.block_sparse_moe.experts.155.w2", "model.layers.23.block_sparse_moe.experts.156.w2", "model.layers.23.block_sparse_moe.experts.157.w2", "model.layers.23.block_sparse_moe.experts.158.w2", "model.layers.23.block_sparse_moe.experts.159.w2", "model.layers.23.block_sparse_moe.experts.160.w2", "model.layers.23.block_sparse_moe.experts.161.w2", "model.layers.23.block_sparse_moe.experts.162.w2", "model.layers.23.block_sparse_moe.experts.163.w2", "model.layers.23.block_sparse_moe.experts.164.w2", "model.layers.23.block_sparse_moe.experts.165.w2", "model.layers.23.block_sparse_moe.experts.166.w2", "model.layers.23.block_sparse_moe.experts.167.w2", "model.layers.23.block_sparse_moe.experts.168.w2", "model.layers.23.block_sparse_moe.experts.169.w2", "model.layers.23.block_sparse_moe.experts.170.w2", "model.layers.23.block_sparse_moe.experts.171.w2", "model.layers.23.block_sparse_moe.experts.172.w2", "model.layers.23.block_sparse_moe.experts.173.w2", "model.layers.23.block_sparse_moe.experts.174.w2", "model.layers.23.block_sparse_moe.experts.175.w2", "model.layers.23.block_sparse_moe.experts.176.w2", "model.layers.23.block_sparse_moe.experts.177.w2", "model.layers.23.block_sparse_moe.experts.178.w2", "model.layers.23.block_sparse_moe.experts.179.w2", "model.layers.23.block_sparse_moe.experts.180.w2", "model.layers.23.block_sparse_moe.experts.181.w2", "model.layers.23.block_sparse_moe.experts.182.w2", "model.layers.23.block_sparse_moe.experts.183.w2", "model.layers.23.block_sparse_moe.experts.184.w2", "model.layers.23.block_sparse_moe.experts.185.w2", "model.layers.23.block_sparse_moe.experts.186.w2", "model.layers.23.block_sparse_moe.experts.187.w2", "model.layers.23.block_sparse_moe.experts.188.w2", "model.layers.23.block_sparse_moe.experts.189.w2", "model.layers.23.block_sparse_moe.experts.190.w2", "model.layers.23.block_sparse_moe.experts.191.w2", "model.layers.23.block_sparse_moe.experts.192.w2", "model.layers.23.block_sparse_moe.experts.193.w2", "model.layers.23.block_sparse_moe.experts.194.w2", "model.layers.23.block_sparse_moe.experts.195.w2", "model.layers.23.block_sparse_moe.experts.196.w2", "model.layers.23.block_sparse_moe.experts.197.w2", "model.layers.23.block_sparse_moe.experts.198.w2", "model.layers.23.block_sparse_moe.experts.199.w2", "model.layers.23.block_sparse_moe.experts.200.w2", "model.layers.23.block_sparse_moe.experts.201.w2", "model.layers.23.block_sparse_moe.experts.202.w2", "model.layers.23.block_sparse_moe.experts.203.w2", "model.layers.23.block_sparse_moe.experts.204.w2", "model.layers.23.block_sparse_moe.experts.205.w2", "model.layers.23.block_sparse_moe.experts.206.w2", "model.layers.23.block_sparse_moe.experts.207.w2", "model.layers.23.block_sparse_moe.experts.208.w2", "model.layers.23.block_sparse_moe.experts.209.w2", "model.layers.23.block_sparse_moe.experts.210.w2", "model.layers.23.block_sparse_moe.experts.211.w2", "model.layers.23.block_sparse_moe.experts.212.w2", "model.layers.23.block_sparse_moe.experts.213.w2", "model.layers.23.block_sparse_moe.experts.214.w2", "model.layers.23.block_sparse_moe.experts.215.w2", "model.layers.23.block_sparse_moe.experts.216.w2", "model.layers.23.block_sparse_moe.experts.217.w2", "model.layers.23.block_sparse_moe.experts.218.w2", "model.layers.23.block_sparse_moe.experts.219.w2", "model.layers.23.block_sparse_moe.experts.220.w2", "model.layers.23.block_sparse_moe.experts.221.w2", "model.layers.23.block_sparse_moe.experts.222.w2", "model.layers.23.block_sparse_moe.experts.223.w2", "model.layers.23.block_sparse_moe.experts.224.w2", "model.layers.23.block_sparse_moe.experts.225.w2", "model.layers.23.block_sparse_moe.experts.226.w2", "model.layers.23.block_sparse_moe.experts.227.w2", "model.layers.23.block_sparse_moe.experts.228.w2", "model.layers.23.block_sparse_moe.experts.229.w2", "model.layers.23.block_sparse_moe.experts.230.w2", "model.layers.23.block_sparse_moe.experts.231.w2", "model.layers.23.block_sparse_moe.experts.232.w2", "model.layers.23.block_sparse_moe.experts.233.w2", "model.layers.23.block_sparse_moe.experts.234.w2", "model.layers.23.block_sparse_moe.experts.235.w2", "model.layers.23.block_sparse_moe.experts.236.w2", "model.layers.23.block_sparse_moe.experts.237.w2", "model.layers.23.block_sparse_moe.experts.238.w2", "model.layers.23.block_sparse_moe.experts.239.w2", "model.layers.23.block_sparse_moe.experts.240.w2", "model.layers.23.block_sparse_moe.experts.241.w2", "model.layers.23.block_sparse_moe.experts.242.w2", "model.layers.23.block_sparse_moe.experts.243.w2", "model.layers.23.block_sparse_moe.experts.244.w2", "model.layers.23.block_sparse_moe.experts.245.w2", "model.layers.23.block_sparse_moe.experts.246.w2", "model.layers.23.block_sparse_moe.experts.247.w2", "model.layers.23.block_sparse_moe.experts.248.w2", "model.layers.23.block_sparse_moe.experts.249.w2", "model.layers.23.block_sparse_moe.experts.250.w2", "model.layers.23.block_sparse_moe.experts.251.w2", "model.layers.23.block_sparse_moe.experts.252.w2", "model.layers.23.block_sparse_moe.experts.253.w2", "model.layers.23.block_sparse_moe.experts.254.w2", "model.layers.23.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005365490913391557, "dbits": 1207959552 } ] }, { "idx": 120, "layers": [ "model.layers.24.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0012915015220641424, "dbits": 18874368 } ] }, { "idx": 121, "layers": [ "model.layers.24.self_attn.k_proj", "model.layers.24.self_attn.v_proj" ], "candidates": [ { "dkld": -0.008662542700767495, "dbits": 6291456 } ] }, { "idx": 122, "layers": [ "model.layers.24.self_attn.o_proj" ], "candidates": [ { "dkld": -0.013127663731574923, "dbits": 18874368 } ] }, { "idx": 123, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w1", "model.layers.24.block_sparse_moe.experts.1.w1", "model.layers.24.block_sparse_moe.experts.2.w1", "model.layers.24.block_sparse_moe.experts.3.w1", "model.layers.24.block_sparse_moe.experts.4.w1", "model.layers.24.block_sparse_moe.experts.5.w1", "model.layers.24.block_sparse_moe.experts.6.w1", "model.layers.24.block_sparse_moe.experts.7.w1", "model.layers.24.block_sparse_moe.experts.8.w1", "model.layers.24.block_sparse_moe.experts.9.w1", "model.layers.24.block_sparse_moe.experts.10.w1", "model.layers.24.block_sparse_moe.experts.11.w1", "model.layers.24.block_sparse_moe.experts.12.w1", "model.layers.24.block_sparse_moe.experts.13.w1", "model.layers.24.block_sparse_moe.experts.14.w1", "model.layers.24.block_sparse_moe.experts.15.w1", "model.layers.24.block_sparse_moe.experts.16.w1", "model.layers.24.block_sparse_moe.experts.17.w1", "model.layers.24.block_sparse_moe.experts.18.w1", "model.layers.24.block_sparse_moe.experts.19.w1", "model.layers.24.block_sparse_moe.experts.20.w1", "model.layers.24.block_sparse_moe.experts.21.w1", "model.layers.24.block_sparse_moe.experts.22.w1", "model.layers.24.block_sparse_moe.experts.23.w1", "model.layers.24.block_sparse_moe.experts.24.w1", "model.layers.24.block_sparse_moe.experts.25.w1", "model.layers.24.block_sparse_moe.experts.26.w1", "model.layers.24.block_sparse_moe.experts.27.w1", "model.layers.24.block_sparse_moe.experts.28.w1", "model.layers.24.block_sparse_moe.experts.29.w1", "model.layers.24.block_sparse_moe.experts.30.w1", "model.layers.24.block_sparse_moe.experts.31.w1", "model.layers.24.block_sparse_moe.experts.32.w1", "model.layers.24.block_sparse_moe.experts.33.w1", "model.layers.24.block_sparse_moe.experts.34.w1", "model.layers.24.block_sparse_moe.experts.35.w1", "model.layers.24.block_sparse_moe.experts.36.w1", "model.layers.24.block_sparse_moe.experts.37.w1", "model.layers.24.block_sparse_moe.experts.38.w1", "model.layers.24.block_sparse_moe.experts.39.w1", "model.layers.24.block_sparse_moe.experts.40.w1", "model.layers.24.block_sparse_moe.experts.41.w1", "model.layers.24.block_sparse_moe.experts.42.w1", "model.layers.24.block_sparse_moe.experts.43.w1", "model.layers.24.block_sparse_moe.experts.44.w1", "model.layers.24.block_sparse_moe.experts.45.w1", "model.layers.24.block_sparse_moe.experts.46.w1", "model.layers.24.block_sparse_moe.experts.47.w1", "model.layers.24.block_sparse_moe.experts.48.w1", "model.layers.24.block_sparse_moe.experts.49.w1", "model.layers.24.block_sparse_moe.experts.50.w1", "model.layers.24.block_sparse_moe.experts.51.w1", "model.layers.24.block_sparse_moe.experts.52.w1", "model.layers.24.block_sparse_moe.experts.53.w1", "model.layers.24.block_sparse_moe.experts.54.w1", "model.layers.24.block_sparse_moe.experts.55.w1", "model.layers.24.block_sparse_moe.experts.56.w1", "model.layers.24.block_sparse_moe.experts.57.w1", "model.layers.24.block_sparse_moe.experts.58.w1", "model.layers.24.block_sparse_moe.experts.59.w1", "model.layers.24.block_sparse_moe.experts.60.w1", "model.layers.24.block_sparse_moe.experts.61.w1", "model.layers.24.block_sparse_moe.experts.62.w1", "model.layers.24.block_sparse_moe.experts.63.w1", "model.layers.24.block_sparse_moe.experts.64.w1", "model.layers.24.block_sparse_moe.experts.65.w1", "model.layers.24.block_sparse_moe.experts.66.w1", "model.layers.24.block_sparse_moe.experts.67.w1", "model.layers.24.block_sparse_moe.experts.68.w1", "model.layers.24.block_sparse_moe.experts.69.w1", "model.layers.24.block_sparse_moe.experts.70.w1", "model.layers.24.block_sparse_moe.experts.71.w1", "model.layers.24.block_sparse_moe.experts.72.w1", "model.layers.24.block_sparse_moe.experts.73.w1", "model.layers.24.block_sparse_moe.experts.74.w1", "model.layers.24.block_sparse_moe.experts.75.w1", "model.layers.24.block_sparse_moe.experts.76.w1", "model.layers.24.block_sparse_moe.experts.77.w1", "model.layers.24.block_sparse_moe.experts.78.w1", "model.layers.24.block_sparse_moe.experts.79.w1", "model.layers.24.block_sparse_moe.experts.80.w1", "model.layers.24.block_sparse_moe.experts.81.w1", "model.layers.24.block_sparse_moe.experts.82.w1", "model.layers.24.block_sparse_moe.experts.83.w1", "model.layers.24.block_sparse_moe.experts.84.w1", "model.layers.24.block_sparse_moe.experts.85.w1", "model.layers.24.block_sparse_moe.experts.86.w1", "model.layers.24.block_sparse_moe.experts.87.w1", "model.layers.24.block_sparse_moe.experts.88.w1", "model.layers.24.block_sparse_moe.experts.89.w1", "model.layers.24.block_sparse_moe.experts.90.w1", "model.layers.24.block_sparse_moe.experts.91.w1", "model.layers.24.block_sparse_moe.experts.92.w1", "model.layers.24.block_sparse_moe.experts.93.w1", "model.layers.24.block_sparse_moe.experts.94.w1", "model.layers.24.block_sparse_moe.experts.95.w1", "model.layers.24.block_sparse_moe.experts.96.w1", "model.layers.24.block_sparse_moe.experts.97.w1", "model.layers.24.block_sparse_moe.experts.98.w1", "model.layers.24.block_sparse_moe.experts.99.w1", "model.layers.24.block_sparse_moe.experts.100.w1", "model.layers.24.block_sparse_moe.experts.101.w1", "model.layers.24.block_sparse_moe.experts.102.w1", "model.layers.24.block_sparse_moe.experts.103.w1", "model.layers.24.block_sparse_moe.experts.104.w1", "model.layers.24.block_sparse_moe.experts.105.w1", "model.layers.24.block_sparse_moe.experts.106.w1", "model.layers.24.block_sparse_moe.experts.107.w1", "model.layers.24.block_sparse_moe.experts.108.w1", "model.layers.24.block_sparse_moe.experts.109.w1", "model.layers.24.block_sparse_moe.experts.110.w1", "model.layers.24.block_sparse_moe.experts.111.w1", "model.layers.24.block_sparse_moe.experts.112.w1", "model.layers.24.block_sparse_moe.experts.113.w1", "model.layers.24.block_sparse_moe.experts.114.w1", "model.layers.24.block_sparse_moe.experts.115.w1", "model.layers.24.block_sparse_moe.experts.116.w1", "model.layers.24.block_sparse_moe.experts.117.w1", "model.layers.24.block_sparse_moe.experts.118.w1", "model.layers.24.block_sparse_moe.experts.119.w1", "model.layers.24.block_sparse_moe.experts.120.w1", "model.layers.24.block_sparse_moe.experts.121.w1", "model.layers.24.block_sparse_moe.experts.122.w1", "model.layers.24.block_sparse_moe.experts.123.w1", "model.layers.24.block_sparse_moe.experts.124.w1", "model.layers.24.block_sparse_moe.experts.125.w1", "model.layers.24.block_sparse_moe.experts.126.w1", "model.layers.24.block_sparse_moe.experts.127.w1", "model.layers.24.block_sparse_moe.experts.128.w1", "model.layers.24.block_sparse_moe.experts.129.w1", "model.layers.24.block_sparse_moe.experts.130.w1", "model.layers.24.block_sparse_moe.experts.131.w1", "model.layers.24.block_sparse_moe.experts.132.w1", "model.layers.24.block_sparse_moe.experts.133.w1", "model.layers.24.block_sparse_moe.experts.134.w1", "model.layers.24.block_sparse_moe.experts.135.w1", "model.layers.24.block_sparse_moe.experts.136.w1", "model.layers.24.block_sparse_moe.experts.137.w1", "model.layers.24.block_sparse_moe.experts.138.w1", "model.layers.24.block_sparse_moe.experts.139.w1", "model.layers.24.block_sparse_moe.experts.140.w1", "model.layers.24.block_sparse_moe.experts.141.w1", "model.layers.24.block_sparse_moe.experts.142.w1", "model.layers.24.block_sparse_moe.experts.143.w1", "model.layers.24.block_sparse_moe.experts.144.w1", "model.layers.24.block_sparse_moe.experts.145.w1", "model.layers.24.block_sparse_moe.experts.146.w1", "model.layers.24.block_sparse_moe.experts.147.w1", "model.layers.24.block_sparse_moe.experts.148.w1", "model.layers.24.block_sparse_moe.experts.149.w1", "model.layers.24.block_sparse_moe.experts.150.w1", "model.layers.24.block_sparse_moe.experts.151.w1", "model.layers.24.block_sparse_moe.experts.152.w1", "model.layers.24.block_sparse_moe.experts.153.w1", "model.layers.24.block_sparse_moe.experts.154.w1", "model.layers.24.block_sparse_moe.experts.155.w1", "model.layers.24.block_sparse_moe.experts.156.w1", "model.layers.24.block_sparse_moe.experts.157.w1", "model.layers.24.block_sparse_moe.experts.158.w1", "model.layers.24.block_sparse_moe.experts.159.w1", "model.layers.24.block_sparse_moe.experts.160.w1", "model.layers.24.block_sparse_moe.experts.161.w1", "model.layers.24.block_sparse_moe.experts.162.w1", "model.layers.24.block_sparse_moe.experts.163.w1", "model.layers.24.block_sparse_moe.experts.164.w1", "model.layers.24.block_sparse_moe.experts.165.w1", "model.layers.24.block_sparse_moe.experts.166.w1", "model.layers.24.block_sparse_moe.experts.167.w1", "model.layers.24.block_sparse_moe.experts.168.w1", "model.layers.24.block_sparse_moe.experts.169.w1", "model.layers.24.block_sparse_moe.experts.170.w1", "model.layers.24.block_sparse_moe.experts.171.w1", "model.layers.24.block_sparse_moe.experts.172.w1", "model.layers.24.block_sparse_moe.experts.173.w1", "model.layers.24.block_sparse_moe.experts.174.w1", "model.layers.24.block_sparse_moe.experts.175.w1", "model.layers.24.block_sparse_moe.experts.176.w1", "model.layers.24.block_sparse_moe.experts.177.w1", "model.layers.24.block_sparse_moe.experts.178.w1", "model.layers.24.block_sparse_moe.experts.179.w1", "model.layers.24.block_sparse_moe.experts.180.w1", "model.layers.24.block_sparse_moe.experts.181.w1", "model.layers.24.block_sparse_moe.experts.182.w1", "model.layers.24.block_sparse_moe.experts.183.w1", "model.layers.24.block_sparse_moe.experts.184.w1", "model.layers.24.block_sparse_moe.experts.185.w1", "model.layers.24.block_sparse_moe.experts.186.w1", "model.layers.24.block_sparse_moe.experts.187.w1", "model.layers.24.block_sparse_moe.experts.188.w1", "model.layers.24.block_sparse_moe.experts.189.w1", "model.layers.24.block_sparse_moe.experts.190.w1", "model.layers.24.block_sparse_moe.experts.191.w1", "model.layers.24.block_sparse_moe.experts.192.w1", "model.layers.24.block_sparse_moe.experts.193.w1", "model.layers.24.block_sparse_moe.experts.194.w1", "model.layers.24.block_sparse_moe.experts.195.w1", "model.layers.24.block_sparse_moe.experts.196.w1", "model.layers.24.block_sparse_moe.experts.197.w1", "model.layers.24.block_sparse_moe.experts.198.w1", "model.layers.24.block_sparse_moe.experts.199.w1", "model.layers.24.block_sparse_moe.experts.200.w1", "model.layers.24.block_sparse_moe.experts.201.w1", "model.layers.24.block_sparse_moe.experts.202.w1", "model.layers.24.block_sparse_moe.experts.203.w1", "model.layers.24.block_sparse_moe.experts.204.w1", "model.layers.24.block_sparse_moe.experts.205.w1", "model.layers.24.block_sparse_moe.experts.206.w1", "model.layers.24.block_sparse_moe.experts.207.w1", "model.layers.24.block_sparse_moe.experts.208.w1", "model.layers.24.block_sparse_moe.experts.209.w1", "model.layers.24.block_sparse_moe.experts.210.w1", "model.layers.24.block_sparse_moe.experts.211.w1", "model.layers.24.block_sparse_moe.experts.212.w1", "model.layers.24.block_sparse_moe.experts.213.w1", "model.layers.24.block_sparse_moe.experts.214.w1", "model.layers.24.block_sparse_moe.experts.215.w1", "model.layers.24.block_sparse_moe.experts.216.w1", "model.layers.24.block_sparse_moe.experts.217.w1", "model.layers.24.block_sparse_moe.experts.218.w1", "model.layers.24.block_sparse_moe.experts.219.w1", "model.layers.24.block_sparse_moe.experts.220.w1", "model.layers.24.block_sparse_moe.experts.221.w1", "model.layers.24.block_sparse_moe.experts.222.w1", "model.layers.24.block_sparse_moe.experts.223.w1", "model.layers.24.block_sparse_moe.experts.224.w1", "model.layers.24.block_sparse_moe.experts.225.w1", "model.layers.24.block_sparse_moe.experts.226.w1", "model.layers.24.block_sparse_moe.experts.227.w1", "model.layers.24.block_sparse_moe.experts.228.w1", "model.layers.24.block_sparse_moe.experts.229.w1", "model.layers.24.block_sparse_moe.experts.230.w1", "model.layers.24.block_sparse_moe.experts.231.w1", "model.layers.24.block_sparse_moe.experts.232.w1", "model.layers.24.block_sparse_moe.experts.233.w1", "model.layers.24.block_sparse_moe.experts.234.w1", "model.layers.24.block_sparse_moe.experts.235.w1", "model.layers.24.block_sparse_moe.experts.236.w1", "model.layers.24.block_sparse_moe.experts.237.w1", "model.layers.24.block_sparse_moe.experts.238.w1", "model.layers.24.block_sparse_moe.experts.239.w1", "model.layers.24.block_sparse_moe.experts.240.w1", "model.layers.24.block_sparse_moe.experts.241.w1", "model.layers.24.block_sparse_moe.experts.242.w1", "model.layers.24.block_sparse_moe.experts.243.w1", "model.layers.24.block_sparse_moe.experts.244.w1", "model.layers.24.block_sparse_moe.experts.245.w1", "model.layers.24.block_sparse_moe.experts.246.w1", "model.layers.24.block_sparse_moe.experts.247.w1", "model.layers.24.block_sparse_moe.experts.248.w1", "model.layers.24.block_sparse_moe.experts.249.w1", "model.layers.24.block_sparse_moe.experts.250.w1", "model.layers.24.block_sparse_moe.experts.251.w1", "model.layers.24.block_sparse_moe.experts.252.w1", "model.layers.24.block_sparse_moe.experts.253.w1", "model.layers.24.block_sparse_moe.experts.254.w1", "model.layers.24.block_sparse_moe.experts.255.w1", "model.layers.24.block_sparse_moe.experts.0.w3", "model.layers.24.block_sparse_moe.experts.1.w3", "model.layers.24.block_sparse_moe.experts.2.w3", "model.layers.24.block_sparse_moe.experts.3.w3", "model.layers.24.block_sparse_moe.experts.4.w3", "model.layers.24.block_sparse_moe.experts.5.w3", "model.layers.24.block_sparse_moe.experts.6.w3", "model.layers.24.block_sparse_moe.experts.7.w3", "model.layers.24.block_sparse_moe.experts.8.w3", "model.layers.24.block_sparse_moe.experts.9.w3", "model.layers.24.block_sparse_moe.experts.10.w3", "model.layers.24.block_sparse_moe.experts.11.w3", "model.layers.24.block_sparse_moe.experts.12.w3", "model.layers.24.block_sparse_moe.experts.13.w3", "model.layers.24.block_sparse_moe.experts.14.w3", "model.layers.24.block_sparse_moe.experts.15.w3", "model.layers.24.block_sparse_moe.experts.16.w3", "model.layers.24.block_sparse_moe.experts.17.w3", "model.layers.24.block_sparse_moe.experts.18.w3", "model.layers.24.block_sparse_moe.experts.19.w3", "model.layers.24.block_sparse_moe.experts.20.w3", "model.layers.24.block_sparse_moe.experts.21.w3", "model.layers.24.block_sparse_moe.experts.22.w3", "model.layers.24.block_sparse_moe.experts.23.w3", "model.layers.24.block_sparse_moe.experts.24.w3", "model.layers.24.block_sparse_moe.experts.25.w3", "model.layers.24.block_sparse_moe.experts.26.w3", "model.layers.24.block_sparse_moe.experts.27.w3", "model.layers.24.block_sparse_moe.experts.28.w3", "model.layers.24.block_sparse_moe.experts.29.w3", "model.layers.24.block_sparse_moe.experts.30.w3", "model.layers.24.block_sparse_moe.experts.31.w3", "model.layers.24.block_sparse_moe.experts.32.w3", "model.layers.24.block_sparse_moe.experts.33.w3", "model.layers.24.block_sparse_moe.experts.34.w3", "model.layers.24.block_sparse_moe.experts.35.w3", "model.layers.24.block_sparse_moe.experts.36.w3", "model.layers.24.block_sparse_moe.experts.37.w3", "model.layers.24.block_sparse_moe.experts.38.w3", "model.layers.24.block_sparse_moe.experts.39.w3", "model.layers.24.block_sparse_moe.experts.40.w3", "model.layers.24.block_sparse_moe.experts.41.w3", "model.layers.24.block_sparse_moe.experts.42.w3", "model.layers.24.block_sparse_moe.experts.43.w3", "model.layers.24.block_sparse_moe.experts.44.w3", "model.layers.24.block_sparse_moe.experts.45.w3", "model.layers.24.block_sparse_moe.experts.46.w3", "model.layers.24.block_sparse_moe.experts.47.w3", "model.layers.24.block_sparse_moe.experts.48.w3", "model.layers.24.block_sparse_moe.experts.49.w3", "model.layers.24.block_sparse_moe.experts.50.w3", "model.layers.24.block_sparse_moe.experts.51.w3", "model.layers.24.block_sparse_moe.experts.52.w3", "model.layers.24.block_sparse_moe.experts.53.w3", "model.layers.24.block_sparse_moe.experts.54.w3", "model.layers.24.block_sparse_moe.experts.55.w3", "model.layers.24.block_sparse_moe.experts.56.w3", "model.layers.24.block_sparse_moe.experts.57.w3", "model.layers.24.block_sparse_moe.experts.58.w3", "model.layers.24.block_sparse_moe.experts.59.w3", "model.layers.24.block_sparse_moe.experts.60.w3", "model.layers.24.block_sparse_moe.experts.61.w3", "model.layers.24.block_sparse_moe.experts.62.w3", "model.layers.24.block_sparse_moe.experts.63.w3", "model.layers.24.block_sparse_moe.experts.64.w3", "model.layers.24.block_sparse_moe.experts.65.w3", "model.layers.24.block_sparse_moe.experts.66.w3", "model.layers.24.block_sparse_moe.experts.67.w3", "model.layers.24.block_sparse_moe.experts.68.w3", "model.layers.24.block_sparse_moe.experts.69.w3", "model.layers.24.block_sparse_moe.experts.70.w3", "model.layers.24.block_sparse_moe.experts.71.w3", "model.layers.24.block_sparse_moe.experts.72.w3", "model.layers.24.block_sparse_moe.experts.73.w3", "model.layers.24.block_sparse_moe.experts.74.w3", "model.layers.24.block_sparse_moe.experts.75.w3", "model.layers.24.block_sparse_moe.experts.76.w3", "model.layers.24.block_sparse_moe.experts.77.w3", "model.layers.24.block_sparse_moe.experts.78.w3", "model.layers.24.block_sparse_moe.experts.79.w3", "model.layers.24.block_sparse_moe.experts.80.w3", "model.layers.24.block_sparse_moe.experts.81.w3", "model.layers.24.block_sparse_moe.experts.82.w3", "model.layers.24.block_sparse_moe.experts.83.w3", "model.layers.24.block_sparse_moe.experts.84.w3", "model.layers.24.block_sparse_moe.experts.85.w3", "model.layers.24.block_sparse_moe.experts.86.w3", "model.layers.24.block_sparse_moe.experts.87.w3", "model.layers.24.block_sparse_moe.experts.88.w3", "model.layers.24.block_sparse_moe.experts.89.w3", "model.layers.24.block_sparse_moe.experts.90.w3", "model.layers.24.block_sparse_moe.experts.91.w3", "model.layers.24.block_sparse_moe.experts.92.w3", "model.layers.24.block_sparse_moe.experts.93.w3", "model.layers.24.block_sparse_moe.experts.94.w3", "model.layers.24.block_sparse_moe.experts.95.w3", "model.layers.24.block_sparse_moe.experts.96.w3", "model.layers.24.block_sparse_moe.experts.97.w3", "model.layers.24.block_sparse_moe.experts.98.w3", "model.layers.24.block_sparse_moe.experts.99.w3", "model.layers.24.block_sparse_moe.experts.100.w3", "model.layers.24.block_sparse_moe.experts.101.w3", "model.layers.24.block_sparse_moe.experts.102.w3", "model.layers.24.block_sparse_moe.experts.103.w3", "model.layers.24.block_sparse_moe.experts.104.w3", "model.layers.24.block_sparse_moe.experts.105.w3", "model.layers.24.block_sparse_moe.experts.106.w3", "model.layers.24.block_sparse_moe.experts.107.w3", "model.layers.24.block_sparse_moe.experts.108.w3", "model.layers.24.block_sparse_moe.experts.109.w3", "model.layers.24.block_sparse_moe.experts.110.w3", "model.layers.24.block_sparse_moe.experts.111.w3", "model.layers.24.block_sparse_moe.experts.112.w3", "model.layers.24.block_sparse_moe.experts.113.w3", "model.layers.24.block_sparse_moe.experts.114.w3", "model.layers.24.block_sparse_moe.experts.115.w3", "model.layers.24.block_sparse_moe.experts.116.w3", "model.layers.24.block_sparse_moe.experts.117.w3", "model.layers.24.block_sparse_moe.experts.118.w3", "model.layers.24.block_sparse_moe.experts.119.w3", "model.layers.24.block_sparse_moe.experts.120.w3", "model.layers.24.block_sparse_moe.experts.121.w3", "model.layers.24.block_sparse_moe.experts.122.w3", "model.layers.24.block_sparse_moe.experts.123.w3", "model.layers.24.block_sparse_moe.experts.124.w3", "model.layers.24.block_sparse_moe.experts.125.w3", "model.layers.24.block_sparse_moe.experts.126.w3", "model.layers.24.block_sparse_moe.experts.127.w3", "model.layers.24.block_sparse_moe.experts.128.w3", "model.layers.24.block_sparse_moe.experts.129.w3", "model.layers.24.block_sparse_moe.experts.130.w3", "model.layers.24.block_sparse_moe.experts.131.w3", "model.layers.24.block_sparse_moe.experts.132.w3", "model.layers.24.block_sparse_moe.experts.133.w3", "model.layers.24.block_sparse_moe.experts.134.w3", "model.layers.24.block_sparse_moe.experts.135.w3", "model.layers.24.block_sparse_moe.experts.136.w3", "model.layers.24.block_sparse_moe.experts.137.w3", "model.layers.24.block_sparse_moe.experts.138.w3", "model.layers.24.block_sparse_moe.experts.139.w3", "model.layers.24.block_sparse_moe.experts.140.w3", "model.layers.24.block_sparse_moe.experts.141.w3", "model.layers.24.block_sparse_moe.experts.142.w3", "model.layers.24.block_sparse_moe.experts.143.w3", "model.layers.24.block_sparse_moe.experts.144.w3", "model.layers.24.block_sparse_moe.experts.145.w3", "model.layers.24.block_sparse_moe.experts.146.w3", "model.layers.24.block_sparse_moe.experts.147.w3", "model.layers.24.block_sparse_moe.experts.148.w3", "model.layers.24.block_sparse_moe.experts.149.w3", "model.layers.24.block_sparse_moe.experts.150.w3", "model.layers.24.block_sparse_moe.experts.151.w3", "model.layers.24.block_sparse_moe.experts.152.w3", "model.layers.24.block_sparse_moe.experts.153.w3", "model.layers.24.block_sparse_moe.experts.154.w3", "model.layers.24.block_sparse_moe.experts.155.w3", "model.layers.24.block_sparse_moe.experts.156.w3", "model.layers.24.block_sparse_moe.experts.157.w3", "model.layers.24.block_sparse_moe.experts.158.w3", "model.layers.24.block_sparse_moe.experts.159.w3", "model.layers.24.block_sparse_moe.experts.160.w3", "model.layers.24.block_sparse_moe.experts.161.w3", "model.layers.24.block_sparse_moe.experts.162.w3", "model.layers.24.block_sparse_moe.experts.163.w3", "model.layers.24.block_sparse_moe.experts.164.w3", "model.layers.24.block_sparse_moe.experts.165.w3", "model.layers.24.block_sparse_moe.experts.166.w3", "model.layers.24.block_sparse_moe.experts.167.w3", "model.layers.24.block_sparse_moe.experts.168.w3", "model.layers.24.block_sparse_moe.experts.169.w3", "model.layers.24.block_sparse_moe.experts.170.w3", "model.layers.24.block_sparse_moe.experts.171.w3", "model.layers.24.block_sparse_moe.experts.172.w3", "model.layers.24.block_sparse_moe.experts.173.w3", "model.layers.24.block_sparse_moe.experts.174.w3", "model.layers.24.block_sparse_moe.experts.175.w3", "model.layers.24.block_sparse_moe.experts.176.w3", "model.layers.24.block_sparse_moe.experts.177.w3", "model.layers.24.block_sparse_moe.experts.178.w3", "model.layers.24.block_sparse_moe.experts.179.w3", "model.layers.24.block_sparse_moe.experts.180.w3", "model.layers.24.block_sparse_moe.experts.181.w3", "model.layers.24.block_sparse_moe.experts.182.w3", "model.layers.24.block_sparse_moe.experts.183.w3", "model.layers.24.block_sparse_moe.experts.184.w3", "model.layers.24.block_sparse_moe.experts.185.w3", "model.layers.24.block_sparse_moe.experts.186.w3", "model.layers.24.block_sparse_moe.experts.187.w3", "model.layers.24.block_sparse_moe.experts.188.w3", "model.layers.24.block_sparse_moe.experts.189.w3", "model.layers.24.block_sparse_moe.experts.190.w3", "model.layers.24.block_sparse_moe.experts.191.w3", "model.layers.24.block_sparse_moe.experts.192.w3", "model.layers.24.block_sparse_moe.experts.193.w3", "model.layers.24.block_sparse_moe.experts.194.w3", "model.layers.24.block_sparse_moe.experts.195.w3", "model.layers.24.block_sparse_moe.experts.196.w3", "model.layers.24.block_sparse_moe.experts.197.w3", "model.layers.24.block_sparse_moe.experts.198.w3", "model.layers.24.block_sparse_moe.experts.199.w3", "model.layers.24.block_sparse_moe.experts.200.w3", "model.layers.24.block_sparse_moe.experts.201.w3", "model.layers.24.block_sparse_moe.experts.202.w3", "model.layers.24.block_sparse_moe.experts.203.w3", "model.layers.24.block_sparse_moe.experts.204.w3", "model.layers.24.block_sparse_moe.experts.205.w3", "model.layers.24.block_sparse_moe.experts.206.w3", "model.layers.24.block_sparse_moe.experts.207.w3", "model.layers.24.block_sparse_moe.experts.208.w3", "model.layers.24.block_sparse_moe.experts.209.w3", "model.layers.24.block_sparse_moe.experts.210.w3", "model.layers.24.block_sparse_moe.experts.211.w3", "model.layers.24.block_sparse_moe.experts.212.w3", "model.layers.24.block_sparse_moe.experts.213.w3", "model.layers.24.block_sparse_moe.experts.214.w3", "model.layers.24.block_sparse_moe.experts.215.w3", "model.layers.24.block_sparse_moe.experts.216.w3", "model.layers.24.block_sparse_moe.experts.217.w3", "model.layers.24.block_sparse_moe.experts.218.w3", "model.layers.24.block_sparse_moe.experts.219.w3", "model.layers.24.block_sparse_moe.experts.220.w3", "model.layers.24.block_sparse_moe.experts.221.w3", "model.layers.24.block_sparse_moe.experts.222.w3", "model.layers.24.block_sparse_moe.experts.223.w3", "model.layers.24.block_sparse_moe.experts.224.w3", "model.layers.24.block_sparse_moe.experts.225.w3", "model.layers.24.block_sparse_moe.experts.226.w3", "model.layers.24.block_sparse_moe.experts.227.w3", "model.layers.24.block_sparse_moe.experts.228.w3", "model.layers.24.block_sparse_moe.experts.229.w3", "model.layers.24.block_sparse_moe.experts.230.w3", "model.layers.24.block_sparse_moe.experts.231.w3", "model.layers.24.block_sparse_moe.experts.232.w3", "model.layers.24.block_sparse_moe.experts.233.w3", "model.layers.24.block_sparse_moe.experts.234.w3", "model.layers.24.block_sparse_moe.experts.235.w3", "model.layers.24.block_sparse_moe.experts.236.w3", "model.layers.24.block_sparse_moe.experts.237.w3", "model.layers.24.block_sparse_moe.experts.238.w3", "model.layers.24.block_sparse_moe.experts.239.w3", "model.layers.24.block_sparse_moe.experts.240.w3", "model.layers.24.block_sparse_moe.experts.241.w3", "model.layers.24.block_sparse_moe.experts.242.w3", "model.layers.24.block_sparse_moe.experts.243.w3", "model.layers.24.block_sparse_moe.experts.244.w3", "model.layers.24.block_sparse_moe.experts.245.w3", "model.layers.24.block_sparse_moe.experts.246.w3", "model.layers.24.block_sparse_moe.experts.247.w3", "model.layers.24.block_sparse_moe.experts.248.w3", "model.layers.24.block_sparse_moe.experts.249.w3", "model.layers.24.block_sparse_moe.experts.250.w3", "model.layers.24.block_sparse_moe.experts.251.w3", "model.layers.24.block_sparse_moe.experts.252.w3", "model.layers.24.block_sparse_moe.experts.253.w3", "model.layers.24.block_sparse_moe.experts.254.w3", "model.layers.24.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00010833740234383882, "dbits": 2415919104 } ] }, { "idx": 124, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w2", "model.layers.24.block_sparse_moe.experts.1.w2", "model.layers.24.block_sparse_moe.experts.2.w2", "model.layers.24.block_sparse_moe.experts.3.w2", "model.layers.24.block_sparse_moe.experts.4.w2", "model.layers.24.block_sparse_moe.experts.5.w2", "model.layers.24.block_sparse_moe.experts.6.w2", "model.layers.24.block_sparse_moe.experts.7.w2", "model.layers.24.block_sparse_moe.experts.8.w2", "model.layers.24.block_sparse_moe.experts.9.w2", "model.layers.24.block_sparse_moe.experts.10.w2", "model.layers.24.block_sparse_moe.experts.11.w2", "model.layers.24.block_sparse_moe.experts.12.w2", "model.layers.24.block_sparse_moe.experts.13.w2", "model.layers.24.block_sparse_moe.experts.14.w2", "model.layers.24.block_sparse_moe.experts.15.w2", "model.layers.24.block_sparse_moe.experts.16.w2", "model.layers.24.block_sparse_moe.experts.17.w2", "model.layers.24.block_sparse_moe.experts.18.w2", "model.layers.24.block_sparse_moe.experts.19.w2", "model.layers.24.block_sparse_moe.experts.20.w2", "model.layers.24.block_sparse_moe.experts.21.w2", "model.layers.24.block_sparse_moe.experts.22.w2", "model.layers.24.block_sparse_moe.experts.23.w2", "model.layers.24.block_sparse_moe.experts.24.w2", "model.layers.24.block_sparse_moe.experts.25.w2", "model.layers.24.block_sparse_moe.experts.26.w2", "model.layers.24.block_sparse_moe.experts.27.w2", "model.layers.24.block_sparse_moe.experts.28.w2", "model.layers.24.block_sparse_moe.experts.29.w2", "model.layers.24.block_sparse_moe.experts.30.w2", "model.layers.24.block_sparse_moe.experts.31.w2", "model.layers.24.block_sparse_moe.experts.32.w2", "model.layers.24.block_sparse_moe.experts.33.w2", "model.layers.24.block_sparse_moe.experts.34.w2", "model.layers.24.block_sparse_moe.experts.35.w2", "model.layers.24.block_sparse_moe.experts.36.w2", "model.layers.24.block_sparse_moe.experts.37.w2", "model.layers.24.block_sparse_moe.experts.38.w2", "model.layers.24.block_sparse_moe.experts.39.w2", "model.layers.24.block_sparse_moe.experts.40.w2", "model.layers.24.block_sparse_moe.experts.41.w2", "model.layers.24.block_sparse_moe.experts.42.w2", "model.layers.24.block_sparse_moe.experts.43.w2", "model.layers.24.block_sparse_moe.experts.44.w2", "model.layers.24.block_sparse_moe.experts.45.w2", "model.layers.24.block_sparse_moe.experts.46.w2", "model.layers.24.block_sparse_moe.experts.47.w2", "model.layers.24.block_sparse_moe.experts.48.w2", "model.layers.24.block_sparse_moe.experts.49.w2", "model.layers.24.block_sparse_moe.experts.50.w2", "model.layers.24.block_sparse_moe.experts.51.w2", "model.layers.24.block_sparse_moe.experts.52.w2", "model.layers.24.block_sparse_moe.experts.53.w2", "model.layers.24.block_sparse_moe.experts.54.w2", "model.layers.24.block_sparse_moe.experts.55.w2", "model.layers.24.block_sparse_moe.experts.56.w2", "model.layers.24.block_sparse_moe.experts.57.w2", "model.layers.24.block_sparse_moe.experts.58.w2", "model.layers.24.block_sparse_moe.experts.59.w2", "model.layers.24.block_sparse_moe.experts.60.w2", "model.layers.24.block_sparse_moe.experts.61.w2", "model.layers.24.block_sparse_moe.experts.62.w2", "model.layers.24.block_sparse_moe.experts.63.w2", "model.layers.24.block_sparse_moe.experts.64.w2", "model.layers.24.block_sparse_moe.experts.65.w2", "model.layers.24.block_sparse_moe.experts.66.w2", "model.layers.24.block_sparse_moe.experts.67.w2", "model.layers.24.block_sparse_moe.experts.68.w2", "model.layers.24.block_sparse_moe.experts.69.w2", "model.layers.24.block_sparse_moe.experts.70.w2", "model.layers.24.block_sparse_moe.experts.71.w2", "model.layers.24.block_sparse_moe.experts.72.w2", "model.layers.24.block_sparse_moe.experts.73.w2", "model.layers.24.block_sparse_moe.experts.74.w2", "model.layers.24.block_sparse_moe.experts.75.w2", "model.layers.24.block_sparse_moe.experts.76.w2", "model.layers.24.block_sparse_moe.experts.77.w2", "model.layers.24.block_sparse_moe.experts.78.w2", "model.layers.24.block_sparse_moe.experts.79.w2", "model.layers.24.block_sparse_moe.experts.80.w2", "model.layers.24.block_sparse_moe.experts.81.w2", "model.layers.24.block_sparse_moe.experts.82.w2", "model.layers.24.block_sparse_moe.experts.83.w2", "model.layers.24.block_sparse_moe.experts.84.w2", "model.layers.24.block_sparse_moe.experts.85.w2", "model.layers.24.block_sparse_moe.experts.86.w2", "model.layers.24.block_sparse_moe.experts.87.w2", "model.layers.24.block_sparse_moe.experts.88.w2", "model.layers.24.block_sparse_moe.experts.89.w2", "model.layers.24.block_sparse_moe.experts.90.w2", "model.layers.24.block_sparse_moe.experts.91.w2", "model.layers.24.block_sparse_moe.experts.92.w2", "model.layers.24.block_sparse_moe.experts.93.w2", "model.layers.24.block_sparse_moe.experts.94.w2", "model.layers.24.block_sparse_moe.experts.95.w2", "model.layers.24.block_sparse_moe.experts.96.w2", "model.layers.24.block_sparse_moe.experts.97.w2", "model.layers.24.block_sparse_moe.experts.98.w2", "model.layers.24.block_sparse_moe.experts.99.w2", "model.layers.24.block_sparse_moe.experts.100.w2", "model.layers.24.block_sparse_moe.experts.101.w2", "model.layers.24.block_sparse_moe.experts.102.w2", "model.layers.24.block_sparse_moe.experts.103.w2", "model.layers.24.block_sparse_moe.experts.104.w2", "model.layers.24.block_sparse_moe.experts.105.w2", "model.layers.24.block_sparse_moe.experts.106.w2", "model.layers.24.block_sparse_moe.experts.107.w2", "model.layers.24.block_sparse_moe.experts.108.w2", "model.layers.24.block_sparse_moe.experts.109.w2", "model.layers.24.block_sparse_moe.experts.110.w2", "model.layers.24.block_sparse_moe.experts.111.w2", "model.layers.24.block_sparse_moe.experts.112.w2", "model.layers.24.block_sparse_moe.experts.113.w2", "model.layers.24.block_sparse_moe.experts.114.w2", "model.layers.24.block_sparse_moe.experts.115.w2", "model.layers.24.block_sparse_moe.experts.116.w2", "model.layers.24.block_sparse_moe.experts.117.w2", "model.layers.24.block_sparse_moe.experts.118.w2", "model.layers.24.block_sparse_moe.experts.119.w2", "model.layers.24.block_sparse_moe.experts.120.w2", "model.layers.24.block_sparse_moe.experts.121.w2", "model.layers.24.block_sparse_moe.experts.122.w2", "model.layers.24.block_sparse_moe.experts.123.w2", "model.layers.24.block_sparse_moe.experts.124.w2", "model.layers.24.block_sparse_moe.experts.125.w2", "model.layers.24.block_sparse_moe.experts.126.w2", "model.layers.24.block_sparse_moe.experts.127.w2", "model.layers.24.block_sparse_moe.experts.128.w2", "model.layers.24.block_sparse_moe.experts.129.w2", "model.layers.24.block_sparse_moe.experts.130.w2", "model.layers.24.block_sparse_moe.experts.131.w2", "model.layers.24.block_sparse_moe.experts.132.w2", "model.layers.24.block_sparse_moe.experts.133.w2", "model.layers.24.block_sparse_moe.experts.134.w2", "model.layers.24.block_sparse_moe.experts.135.w2", "model.layers.24.block_sparse_moe.experts.136.w2", "model.layers.24.block_sparse_moe.experts.137.w2", "model.layers.24.block_sparse_moe.experts.138.w2", "model.layers.24.block_sparse_moe.experts.139.w2", "model.layers.24.block_sparse_moe.experts.140.w2", "model.layers.24.block_sparse_moe.experts.141.w2", "model.layers.24.block_sparse_moe.experts.142.w2", "model.layers.24.block_sparse_moe.experts.143.w2", "model.layers.24.block_sparse_moe.experts.144.w2", "model.layers.24.block_sparse_moe.experts.145.w2", "model.layers.24.block_sparse_moe.experts.146.w2", "model.layers.24.block_sparse_moe.experts.147.w2", "model.layers.24.block_sparse_moe.experts.148.w2", "model.layers.24.block_sparse_moe.experts.149.w2", "model.layers.24.block_sparse_moe.experts.150.w2", "model.layers.24.block_sparse_moe.experts.151.w2", "model.layers.24.block_sparse_moe.experts.152.w2", "model.layers.24.block_sparse_moe.experts.153.w2", "model.layers.24.block_sparse_moe.experts.154.w2", "model.layers.24.block_sparse_moe.experts.155.w2", "model.layers.24.block_sparse_moe.experts.156.w2", "model.layers.24.block_sparse_moe.experts.157.w2", "model.layers.24.block_sparse_moe.experts.158.w2", "model.layers.24.block_sparse_moe.experts.159.w2", "model.layers.24.block_sparse_moe.experts.160.w2", "model.layers.24.block_sparse_moe.experts.161.w2", "model.layers.24.block_sparse_moe.experts.162.w2", "model.layers.24.block_sparse_moe.experts.163.w2", "model.layers.24.block_sparse_moe.experts.164.w2", "model.layers.24.block_sparse_moe.experts.165.w2", "model.layers.24.block_sparse_moe.experts.166.w2", "model.layers.24.block_sparse_moe.experts.167.w2", "model.layers.24.block_sparse_moe.experts.168.w2", "model.layers.24.block_sparse_moe.experts.169.w2", "model.layers.24.block_sparse_moe.experts.170.w2", "model.layers.24.block_sparse_moe.experts.171.w2", "model.layers.24.block_sparse_moe.experts.172.w2", "model.layers.24.block_sparse_moe.experts.173.w2", "model.layers.24.block_sparse_moe.experts.174.w2", "model.layers.24.block_sparse_moe.experts.175.w2", "model.layers.24.block_sparse_moe.experts.176.w2", "model.layers.24.block_sparse_moe.experts.177.w2", "model.layers.24.block_sparse_moe.experts.178.w2", "model.layers.24.block_sparse_moe.experts.179.w2", "model.layers.24.block_sparse_moe.experts.180.w2", "model.layers.24.block_sparse_moe.experts.181.w2", "model.layers.24.block_sparse_moe.experts.182.w2", "model.layers.24.block_sparse_moe.experts.183.w2", "model.layers.24.block_sparse_moe.experts.184.w2", "model.layers.24.block_sparse_moe.experts.185.w2", "model.layers.24.block_sparse_moe.experts.186.w2", "model.layers.24.block_sparse_moe.experts.187.w2", "model.layers.24.block_sparse_moe.experts.188.w2", "model.layers.24.block_sparse_moe.experts.189.w2", "model.layers.24.block_sparse_moe.experts.190.w2", "model.layers.24.block_sparse_moe.experts.191.w2", "model.layers.24.block_sparse_moe.experts.192.w2", "model.layers.24.block_sparse_moe.experts.193.w2", "model.layers.24.block_sparse_moe.experts.194.w2", "model.layers.24.block_sparse_moe.experts.195.w2", "model.layers.24.block_sparse_moe.experts.196.w2", "model.layers.24.block_sparse_moe.experts.197.w2", "model.layers.24.block_sparse_moe.experts.198.w2", "model.layers.24.block_sparse_moe.experts.199.w2", "model.layers.24.block_sparse_moe.experts.200.w2", "model.layers.24.block_sparse_moe.experts.201.w2", "model.layers.24.block_sparse_moe.experts.202.w2", "model.layers.24.block_sparse_moe.experts.203.w2", "model.layers.24.block_sparse_moe.experts.204.w2", "model.layers.24.block_sparse_moe.experts.205.w2", "model.layers.24.block_sparse_moe.experts.206.w2", "model.layers.24.block_sparse_moe.experts.207.w2", "model.layers.24.block_sparse_moe.experts.208.w2", "model.layers.24.block_sparse_moe.experts.209.w2", "model.layers.24.block_sparse_moe.experts.210.w2", "model.layers.24.block_sparse_moe.experts.211.w2", "model.layers.24.block_sparse_moe.experts.212.w2", "model.layers.24.block_sparse_moe.experts.213.w2", "model.layers.24.block_sparse_moe.experts.214.w2", "model.layers.24.block_sparse_moe.experts.215.w2", "model.layers.24.block_sparse_moe.experts.216.w2", "model.layers.24.block_sparse_moe.experts.217.w2", "model.layers.24.block_sparse_moe.experts.218.w2", "model.layers.24.block_sparse_moe.experts.219.w2", "model.layers.24.block_sparse_moe.experts.220.w2", "model.layers.24.block_sparse_moe.experts.221.w2", "model.layers.24.block_sparse_moe.experts.222.w2", "model.layers.24.block_sparse_moe.experts.223.w2", "model.layers.24.block_sparse_moe.experts.224.w2", "model.layers.24.block_sparse_moe.experts.225.w2", "model.layers.24.block_sparse_moe.experts.226.w2", "model.layers.24.block_sparse_moe.experts.227.w2", "model.layers.24.block_sparse_moe.experts.228.w2", "model.layers.24.block_sparse_moe.experts.229.w2", "model.layers.24.block_sparse_moe.experts.230.w2", "model.layers.24.block_sparse_moe.experts.231.w2", "model.layers.24.block_sparse_moe.experts.232.w2", "model.layers.24.block_sparse_moe.experts.233.w2", "model.layers.24.block_sparse_moe.experts.234.w2", "model.layers.24.block_sparse_moe.experts.235.w2", "model.layers.24.block_sparse_moe.experts.236.w2", "model.layers.24.block_sparse_moe.experts.237.w2", "model.layers.24.block_sparse_moe.experts.238.w2", "model.layers.24.block_sparse_moe.experts.239.w2", "model.layers.24.block_sparse_moe.experts.240.w2", "model.layers.24.block_sparse_moe.experts.241.w2", "model.layers.24.block_sparse_moe.experts.242.w2", "model.layers.24.block_sparse_moe.experts.243.w2", "model.layers.24.block_sparse_moe.experts.244.w2", "model.layers.24.block_sparse_moe.experts.245.w2", "model.layers.24.block_sparse_moe.experts.246.w2", "model.layers.24.block_sparse_moe.experts.247.w2", "model.layers.24.block_sparse_moe.experts.248.w2", "model.layers.24.block_sparse_moe.experts.249.w2", "model.layers.24.block_sparse_moe.experts.250.w2", "model.layers.24.block_sparse_moe.experts.251.w2", "model.layers.24.block_sparse_moe.experts.252.w2", "model.layers.24.block_sparse_moe.experts.253.w2", "model.layers.24.block_sparse_moe.experts.254.w2", "model.layers.24.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0002593725919723955, "dbits": 1207959552 } ] }, { "idx": 125, "layers": [ "model.layers.25.self_attn.q_proj" ], "candidates": [ { "dkld": -0.008335992693901062, "dbits": 18874368 } ] }, { "idx": 126, "layers": [ "model.layers.25.self_attn.k_proj", "model.layers.25.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0041806280612946, "dbits": 6291456 } ] }, { "idx": 127, "layers": [ "model.layers.25.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0043378651142120805, "dbits": 18874368 } ] }, { "idx": 128, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w1", "model.layers.25.block_sparse_moe.experts.1.w1", "model.layers.25.block_sparse_moe.experts.2.w1", "model.layers.25.block_sparse_moe.experts.3.w1", "model.layers.25.block_sparse_moe.experts.4.w1", "model.layers.25.block_sparse_moe.experts.5.w1", "model.layers.25.block_sparse_moe.experts.6.w1", "model.layers.25.block_sparse_moe.experts.7.w1", "model.layers.25.block_sparse_moe.experts.8.w1", "model.layers.25.block_sparse_moe.experts.9.w1", "model.layers.25.block_sparse_moe.experts.10.w1", "model.layers.25.block_sparse_moe.experts.11.w1", "model.layers.25.block_sparse_moe.experts.12.w1", "model.layers.25.block_sparse_moe.experts.13.w1", "model.layers.25.block_sparse_moe.experts.14.w1", "model.layers.25.block_sparse_moe.experts.15.w1", "model.layers.25.block_sparse_moe.experts.16.w1", "model.layers.25.block_sparse_moe.experts.17.w1", "model.layers.25.block_sparse_moe.experts.18.w1", "model.layers.25.block_sparse_moe.experts.19.w1", "model.layers.25.block_sparse_moe.experts.20.w1", "model.layers.25.block_sparse_moe.experts.21.w1", "model.layers.25.block_sparse_moe.experts.22.w1", "model.layers.25.block_sparse_moe.experts.23.w1", "model.layers.25.block_sparse_moe.experts.24.w1", "model.layers.25.block_sparse_moe.experts.25.w1", "model.layers.25.block_sparse_moe.experts.26.w1", "model.layers.25.block_sparse_moe.experts.27.w1", "model.layers.25.block_sparse_moe.experts.28.w1", "model.layers.25.block_sparse_moe.experts.29.w1", "model.layers.25.block_sparse_moe.experts.30.w1", "model.layers.25.block_sparse_moe.experts.31.w1", "model.layers.25.block_sparse_moe.experts.32.w1", "model.layers.25.block_sparse_moe.experts.33.w1", "model.layers.25.block_sparse_moe.experts.34.w1", "model.layers.25.block_sparse_moe.experts.35.w1", "model.layers.25.block_sparse_moe.experts.36.w1", "model.layers.25.block_sparse_moe.experts.37.w1", "model.layers.25.block_sparse_moe.experts.38.w1", "model.layers.25.block_sparse_moe.experts.39.w1", "model.layers.25.block_sparse_moe.experts.40.w1", "model.layers.25.block_sparse_moe.experts.41.w1", "model.layers.25.block_sparse_moe.experts.42.w1", "model.layers.25.block_sparse_moe.experts.43.w1", "model.layers.25.block_sparse_moe.experts.44.w1", "model.layers.25.block_sparse_moe.experts.45.w1", "model.layers.25.block_sparse_moe.experts.46.w1", "model.layers.25.block_sparse_moe.experts.47.w1", "model.layers.25.block_sparse_moe.experts.48.w1", "model.layers.25.block_sparse_moe.experts.49.w1", "model.layers.25.block_sparse_moe.experts.50.w1", "model.layers.25.block_sparse_moe.experts.51.w1", "model.layers.25.block_sparse_moe.experts.52.w1", "model.layers.25.block_sparse_moe.experts.53.w1", "model.layers.25.block_sparse_moe.experts.54.w1", "model.layers.25.block_sparse_moe.experts.55.w1", "model.layers.25.block_sparse_moe.experts.56.w1", "model.layers.25.block_sparse_moe.experts.57.w1", "model.layers.25.block_sparse_moe.experts.58.w1", "model.layers.25.block_sparse_moe.experts.59.w1", "model.layers.25.block_sparse_moe.experts.60.w1", "model.layers.25.block_sparse_moe.experts.61.w1", "model.layers.25.block_sparse_moe.experts.62.w1", "model.layers.25.block_sparse_moe.experts.63.w1", "model.layers.25.block_sparse_moe.experts.64.w1", "model.layers.25.block_sparse_moe.experts.65.w1", "model.layers.25.block_sparse_moe.experts.66.w1", "model.layers.25.block_sparse_moe.experts.67.w1", "model.layers.25.block_sparse_moe.experts.68.w1", "model.layers.25.block_sparse_moe.experts.69.w1", "model.layers.25.block_sparse_moe.experts.70.w1", "model.layers.25.block_sparse_moe.experts.71.w1", "model.layers.25.block_sparse_moe.experts.72.w1", "model.layers.25.block_sparse_moe.experts.73.w1", "model.layers.25.block_sparse_moe.experts.74.w1", "model.layers.25.block_sparse_moe.experts.75.w1", "model.layers.25.block_sparse_moe.experts.76.w1", "model.layers.25.block_sparse_moe.experts.77.w1", "model.layers.25.block_sparse_moe.experts.78.w1", "model.layers.25.block_sparse_moe.experts.79.w1", "model.layers.25.block_sparse_moe.experts.80.w1", "model.layers.25.block_sparse_moe.experts.81.w1", "model.layers.25.block_sparse_moe.experts.82.w1", "model.layers.25.block_sparse_moe.experts.83.w1", "model.layers.25.block_sparse_moe.experts.84.w1", "model.layers.25.block_sparse_moe.experts.85.w1", "model.layers.25.block_sparse_moe.experts.86.w1", "model.layers.25.block_sparse_moe.experts.87.w1", "model.layers.25.block_sparse_moe.experts.88.w1", "model.layers.25.block_sparse_moe.experts.89.w1", "model.layers.25.block_sparse_moe.experts.90.w1", "model.layers.25.block_sparse_moe.experts.91.w1", "model.layers.25.block_sparse_moe.experts.92.w1", "model.layers.25.block_sparse_moe.experts.93.w1", "model.layers.25.block_sparse_moe.experts.94.w1", "model.layers.25.block_sparse_moe.experts.95.w1", "model.layers.25.block_sparse_moe.experts.96.w1", "model.layers.25.block_sparse_moe.experts.97.w1", "model.layers.25.block_sparse_moe.experts.98.w1", "model.layers.25.block_sparse_moe.experts.99.w1", "model.layers.25.block_sparse_moe.experts.100.w1", "model.layers.25.block_sparse_moe.experts.101.w1", "model.layers.25.block_sparse_moe.experts.102.w1", "model.layers.25.block_sparse_moe.experts.103.w1", "model.layers.25.block_sparse_moe.experts.104.w1", "model.layers.25.block_sparse_moe.experts.105.w1", "model.layers.25.block_sparse_moe.experts.106.w1", "model.layers.25.block_sparse_moe.experts.107.w1", "model.layers.25.block_sparse_moe.experts.108.w1", "model.layers.25.block_sparse_moe.experts.109.w1", "model.layers.25.block_sparse_moe.experts.110.w1", "model.layers.25.block_sparse_moe.experts.111.w1", "model.layers.25.block_sparse_moe.experts.112.w1", "model.layers.25.block_sparse_moe.experts.113.w1", "model.layers.25.block_sparse_moe.experts.114.w1", "model.layers.25.block_sparse_moe.experts.115.w1", "model.layers.25.block_sparse_moe.experts.116.w1", "model.layers.25.block_sparse_moe.experts.117.w1", "model.layers.25.block_sparse_moe.experts.118.w1", "model.layers.25.block_sparse_moe.experts.119.w1", "model.layers.25.block_sparse_moe.experts.120.w1", "model.layers.25.block_sparse_moe.experts.121.w1", "model.layers.25.block_sparse_moe.experts.122.w1", "model.layers.25.block_sparse_moe.experts.123.w1", "model.layers.25.block_sparse_moe.experts.124.w1", "model.layers.25.block_sparse_moe.experts.125.w1", "model.layers.25.block_sparse_moe.experts.126.w1", "model.layers.25.block_sparse_moe.experts.127.w1", "model.layers.25.block_sparse_moe.experts.128.w1", "model.layers.25.block_sparse_moe.experts.129.w1", "model.layers.25.block_sparse_moe.experts.130.w1", "model.layers.25.block_sparse_moe.experts.131.w1", "model.layers.25.block_sparse_moe.experts.132.w1", "model.layers.25.block_sparse_moe.experts.133.w1", "model.layers.25.block_sparse_moe.experts.134.w1", "model.layers.25.block_sparse_moe.experts.135.w1", "model.layers.25.block_sparse_moe.experts.136.w1", "model.layers.25.block_sparse_moe.experts.137.w1", "model.layers.25.block_sparse_moe.experts.138.w1", "model.layers.25.block_sparse_moe.experts.139.w1", "model.layers.25.block_sparse_moe.experts.140.w1", "model.layers.25.block_sparse_moe.experts.141.w1", "model.layers.25.block_sparse_moe.experts.142.w1", "model.layers.25.block_sparse_moe.experts.143.w1", "model.layers.25.block_sparse_moe.experts.144.w1", "model.layers.25.block_sparse_moe.experts.145.w1", "model.layers.25.block_sparse_moe.experts.146.w1", "model.layers.25.block_sparse_moe.experts.147.w1", "model.layers.25.block_sparse_moe.experts.148.w1", "model.layers.25.block_sparse_moe.experts.149.w1", "model.layers.25.block_sparse_moe.experts.150.w1", "model.layers.25.block_sparse_moe.experts.151.w1", "model.layers.25.block_sparse_moe.experts.152.w1", "model.layers.25.block_sparse_moe.experts.153.w1", "model.layers.25.block_sparse_moe.experts.154.w1", "model.layers.25.block_sparse_moe.experts.155.w1", "model.layers.25.block_sparse_moe.experts.156.w1", "model.layers.25.block_sparse_moe.experts.157.w1", "model.layers.25.block_sparse_moe.experts.158.w1", "model.layers.25.block_sparse_moe.experts.159.w1", "model.layers.25.block_sparse_moe.experts.160.w1", "model.layers.25.block_sparse_moe.experts.161.w1", "model.layers.25.block_sparse_moe.experts.162.w1", "model.layers.25.block_sparse_moe.experts.163.w1", "model.layers.25.block_sparse_moe.experts.164.w1", "model.layers.25.block_sparse_moe.experts.165.w1", "model.layers.25.block_sparse_moe.experts.166.w1", "model.layers.25.block_sparse_moe.experts.167.w1", "model.layers.25.block_sparse_moe.experts.168.w1", "model.layers.25.block_sparse_moe.experts.169.w1", "model.layers.25.block_sparse_moe.experts.170.w1", "model.layers.25.block_sparse_moe.experts.171.w1", "model.layers.25.block_sparse_moe.experts.172.w1", "model.layers.25.block_sparse_moe.experts.173.w1", "model.layers.25.block_sparse_moe.experts.174.w1", "model.layers.25.block_sparse_moe.experts.175.w1", "model.layers.25.block_sparse_moe.experts.176.w1", "model.layers.25.block_sparse_moe.experts.177.w1", "model.layers.25.block_sparse_moe.experts.178.w1", "model.layers.25.block_sparse_moe.experts.179.w1", "model.layers.25.block_sparse_moe.experts.180.w1", "model.layers.25.block_sparse_moe.experts.181.w1", "model.layers.25.block_sparse_moe.experts.182.w1", "model.layers.25.block_sparse_moe.experts.183.w1", "model.layers.25.block_sparse_moe.experts.184.w1", "model.layers.25.block_sparse_moe.experts.185.w1", "model.layers.25.block_sparse_moe.experts.186.w1", "model.layers.25.block_sparse_moe.experts.187.w1", "model.layers.25.block_sparse_moe.experts.188.w1", "model.layers.25.block_sparse_moe.experts.189.w1", "model.layers.25.block_sparse_moe.experts.190.w1", "model.layers.25.block_sparse_moe.experts.191.w1", "model.layers.25.block_sparse_moe.experts.192.w1", "model.layers.25.block_sparse_moe.experts.193.w1", "model.layers.25.block_sparse_moe.experts.194.w1", "model.layers.25.block_sparse_moe.experts.195.w1", "model.layers.25.block_sparse_moe.experts.196.w1", "model.layers.25.block_sparse_moe.experts.197.w1", "model.layers.25.block_sparse_moe.experts.198.w1", "model.layers.25.block_sparse_moe.experts.199.w1", "model.layers.25.block_sparse_moe.experts.200.w1", "model.layers.25.block_sparse_moe.experts.201.w1", "model.layers.25.block_sparse_moe.experts.202.w1", "model.layers.25.block_sparse_moe.experts.203.w1", "model.layers.25.block_sparse_moe.experts.204.w1", "model.layers.25.block_sparse_moe.experts.205.w1", "model.layers.25.block_sparse_moe.experts.206.w1", "model.layers.25.block_sparse_moe.experts.207.w1", "model.layers.25.block_sparse_moe.experts.208.w1", "model.layers.25.block_sparse_moe.experts.209.w1", "model.layers.25.block_sparse_moe.experts.210.w1", "model.layers.25.block_sparse_moe.experts.211.w1", "model.layers.25.block_sparse_moe.experts.212.w1", "model.layers.25.block_sparse_moe.experts.213.w1", "model.layers.25.block_sparse_moe.experts.214.w1", "model.layers.25.block_sparse_moe.experts.215.w1", "model.layers.25.block_sparse_moe.experts.216.w1", "model.layers.25.block_sparse_moe.experts.217.w1", "model.layers.25.block_sparse_moe.experts.218.w1", "model.layers.25.block_sparse_moe.experts.219.w1", "model.layers.25.block_sparse_moe.experts.220.w1", "model.layers.25.block_sparse_moe.experts.221.w1", "model.layers.25.block_sparse_moe.experts.222.w1", "model.layers.25.block_sparse_moe.experts.223.w1", "model.layers.25.block_sparse_moe.experts.224.w1", "model.layers.25.block_sparse_moe.experts.225.w1", "model.layers.25.block_sparse_moe.experts.226.w1", "model.layers.25.block_sparse_moe.experts.227.w1", "model.layers.25.block_sparse_moe.experts.228.w1", "model.layers.25.block_sparse_moe.experts.229.w1", "model.layers.25.block_sparse_moe.experts.230.w1", "model.layers.25.block_sparse_moe.experts.231.w1", "model.layers.25.block_sparse_moe.experts.232.w1", "model.layers.25.block_sparse_moe.experts.233.w1", "model.layers.25.block_sparse_moe.experts.234.w1", "model.layers.25.block_sparse_moe.experts.235.w1", "model.layers.25.block_sparse_moe.experts.236.w1", "model.layers.25.block_sparse_moe.experts.237.w1", "model.layers.25.block_sparse_moe.experts.238.w1", "model.layers.25.block_sparse_moe.experts.239.w1", "model.layers.25.block_sparse_moe.experts.240.w1", "model.layers.25.block_sparse_moe.experts.241.w1", "model.layers.25.block_sparse_moe.experts.242.w1", "model.layers.25.block_sparse_moe.experts.243.w1", "model.layers.25.block_sparse_moe.experts.244.w1", "model.layers.25.block_sparse_moe.experts.245.w1", "model.layers.25.block_sparse_moe.experts.246.w1", "model.layers.25.block_sparse_moe.experts.247.w1", "model.layers.25.block_sparse_moe.experts.248.w1", "model.layers.25.block_sparse_moe.experts.249.w1", "model.layers.25.block_sparse_moe.experts.250.w1", "model.layers.25.block_sparse_moe.experts.251.w1", "model.layers.25.block_sparse_moe.experts.252.w1", "model.layers.25.block_sparse_moe.experts.253.w1", "model.layers.25.block_sparse_moe.experts.254.w1", "model.layers.25.block_sparse_moe.experts.255.w1", "model.layers.25.block_sparse_moe.experts.0.w3", "model.layers.25.block_sparse_moe.experts.1.w3", "model.layers.25.block_sparse_moe.experts.2.w3", "model.layers.25.block_sparse_moe.experts.3.w3", "model.layers.25.block_sparse_moe.experts.4.w3", "model.layers.25.block_sparse_moe.experts.5.w3", "model.layers.25.block_sparse_moe.experts.6.w3", "model.layers.25.block_sparse_moe.experts.7.w3", "model.layers.25.block_sparse_moe.experts.8.w3", "model.layers.25.block_sparse_moe.experts.9.w3", "model.layers.25.block_sparse_moe.experts.10.w3", "model.layers.25.block_sparse_moe.experts.11.w3", "model.layers.25.block_sparse_moe.experts.12.w3", "model.layers.25.block_sparse_moe.experts.13.w3", "model.layers.25.block_sparse_moe.experts.14.w3", "model.layers.25.block_sparse_moe.experts.15.w3", "model.layers.25.block_sparse_moe.experts.16.w3", "model.layers.25.block_sparse_moe.experts.17.w3", "model.layers.25.block_sparse_moe.experts.18.w3", "model.layers.25.block_sparse_moe.experts.19.w3", "model.layers.25.block_sparse_moe.experts.20.w3", "model.layers.25.block_sparse_moe.experts.21.w3", "model.layers.25.block_sparse_moe.experts.22.w3", "model.layers.25.block_sparse_moe.experts.23.w3", "model.layers.25.block_sparse_moe.experts.24.w3", "model.layers.25.block_sparse_moe.experts.25.w3", "model.layers.25.block_sparse_moe.experts.26.w3", "model.layers.25.block_sparse_moe.experts.27.w3", "model.layers.25.block_sparse_moe.experts.28.w3", "model.layers.25.block_sparse_moe.experts.29.w3", "model.layers.25.block_sparse_moe.experts.30.w3", "model.layers.25.block_sparse_moe.experts.31.w3", "model.layers.25.block_sparse_moe.experts.32.w3", "model.layers.25.block_sparse_moe.experts.33.w3", "model.layers.25.block_sparse_moe.experts.34.w3", "model.layers.25.block_sparse_moe.experts.35.w3", "model.layers.25.block_sparse_moe.experts.36.w3", "model.layers.25.block_sparse_moe.experts.37.w3", "model.layers.25.block_sparse_moe.experts.38.w3", "model.layers.25.block_sparse_moe.experts.39.w3", "model.layers.25.block_sparse_moe.experts.40.w3", "model.layers.25.block_sparse_moe.experts.41.w3", "model.layers.25.block_sparse_moe.experts.42.w3", "model.layers.25.block_sparse_moe.experts.43.w3", "model.layers.25.block_sparse_moe.experts.44.w3", "model.layers.25.block_sparse_moe.experts.45.w3", "model.layers.25.block_sparse_moe.experts.46.w3", "model.layers.25.block_sparse_moe.experts.47.w3", "model.layers.25.block_sparse_moe.experts.48.w3", "model.layers.25.block_sparse_moe.experts.49.w3", "model.layers.25.block_sparse_moe.experts.50.w3", "model.layers.25.block_sparse_moe.experts.51.w3", "model.layers.25.block_sparse_moe.experts.52.w3", "model.layers.25.block_sparse_moe.experts.53.w3", "model.layers.25.block_sparse_moe.experts.54.w3", "model.layers.25.block_sparse_moe.experts.55.w3", "model.layers.25.block_sparse_moe.experts.56.w3", "model.layers.25.block_sparse_moe.experts.57.w3", "model.layers.25.block_sparse_moe.experts.58.w3", "model.layers.25.block_sparse_moe.experts.59.w3", "model.layers.25.block_sparse_moe.experts.60.w3", "model.layers.25.block_sparse_moe.experts.61.w3", "model.layers.25.block_sparse_moe.experts.62.w3", "model.layers.25.block_sparse_moe.experts.63.w3", "model.layers.25.block_sparse_moe.experts.64.w3", "model.layers.25.block_sparse_moe.experts.65.w3", "model.layers.25.block_sparse_moe.experts.66.w3", "model.layers.25.block_sparse_moe.experts.67.w3", "model.layers.25.block_sparse_moe.experts.68.w3", "model.layers.25.block_sparse_moe.experts.69.w3", "model.layers.25.block_sparse_moe.experts.70.w3", "model.layers.25.block_sparse_moe.experts.71.w3", "model.layers.25.block_sparse_moe.experts.72.w3", "model.layers.25.block_sparse_moe.experts.73.w3", "model.layers.25.block_sparse_moe.experts.74.w3", "model.layers.25.block_sparse_moe.experts.75.w3", "model.layers.25.block_sparse_moe.experts.76.w3", "model.layers.25.block_sparse_moe.experts.77.w3", "model.layers.25.block_sparse_moe.experts.78.w3", "model.layers.25.block_sparse_moe.experts.79.w3", "model.layers.25.block_sparse_moe.experts.80.w3", "model.layers.25.block_sparse_moe.experts.81.w3", "model.layers.25.block_sparse_moe.experts.82.w3", "model.layers.25.block_sparse_moe.experts.83.w3", "model.layers.25.block_sparse_moe.experts.84.w3", "model.layers.25.block_sparse_moe.experts.85.w3", "model.layers.25.block_sparse_moe.experts.86.w3", "model.layers.25.block_sparse_moe.experts.87.w3", "model.layers.25.block_sparse_moe.experts.88.w3", "model.layers.25.block_sparse_moe.experts.89.w3", "model.layers.25.block_sparse_moe.experts.90.w3", "model.layers.25.block_sparse_moe.experts.91.w3", "model.layers.25.block_sparse_moe.experts.92.w3", "model.layers.25.block_sparse_moe.experts.93.w3", "model.layers.25.block_sparse_moe.experts.94.w3", "model.layers.25.block_sparse_moe.experts.95.w3", "model.layers.25.block_sparse_moe.experts.96.w3", "model.layers.25.block_sparse_moe.experts.97.w3", "model.layers.25.block_sparse_moe.experts.98.w3", "model.layers.25.block_sparse_moe.experts.99.w3", "model.layers.25.block_sparse_moe.experts.100.w3", "model.layers.25.block_sparse_moe.experts.101.w3", "model.layers.25.block_sparse_moe.experts.102.w3", "model.layers.25.block_sparse_moe.experts.103.w3", "model.layers.25.block_sparse_moe.experts.104.w3", "model.layers.25.block_sparse_moe.experts.105.w3", "model.layers.25.block_sparse_moe.experts.106.w3", "model.layers.25.block_sparse_moe.experts.107.w3", "model.layers.25.block_sparse_moe.experts.108.w3", "model.layers.25.block_sparse_moe.experts.109.w3", "model.layers.25.block_sparse_moe.experts.110.w3", "model.layers.25.block_sparse_moe.experts.111.w3", "model.layers.25.block_sparse_moe.experts.112.w3", "model.layers.25.block_sparse_moe.experts.113.w3", "model.layers.25.block_sparse_moe.experts.114.w3", "model.layers.25.block_sparse_moe.experts.115.w3", "model.layers.25.block_sparse_moe.experts.116.w3", "model.layers.25.block_sparse_moe.experts.117.w3", "model.layers.25.block_sparse_moe.experts.118.w3", "model.layers.25.block_sparse_moe.experts.119.w3", "model.layers.25.block_sparse_moe.experts.120.w3", "model.layers.25.block_sparse_moe.experts.121.w3", "model.layers.25.block_sparse_moe.experts.122.w3", "model.layers.25.block_sparse_moe.experts.123.w3", "model.layers.25.block_sparse_moe.experts.124.w3", "model.layers.25.block_sparse_moe.experts.125.w3", "model.layers.25.block_sparse_moe.experts.126.w3", "model.layers.25.block_sparse_moe.experts.127.w3", "model.layers.25.block_sparse_moe.experts.128.w3", "model.layers.25.block_sparse_moe.experts.129.w3", "model.layers.25.block_sparse_moe.experts.130.w3", "model.layers.25.block_sparse_moe.experts.131.w3", "model.layers.25.block_sparse_moe.experts.132.w3", "model.layers.25.block_sparse_moe.experts.133.w3", "model.layers.25.block_sparse_moe.experts.134.w3", "model.layers.25.block_sparse_moe.experts.135.w3", "model.layers.25.block_sparse_moe.experts.136.w3", "model.layers.25.block_sparse_moe.experts.137.w3", "model.layers.25.block_sparse_moe.experts.138.w3", "model.layers.25.block_sparse_moe.experts.139.w3", "model.layers.25.block_sparse_moe.experts.140.w3", "model.layers.25.block_sparse_moe.experts.141.w3", "model.layers.25.block_sparse_moe.experts.142.w3", "model.layers.25.block_sparse_moe.experts.143.w3", "model.layers.25.block_sparse_moe.experts.144.w3", "model.layers.25.block_sparse_moe.experts.145.w3", "model.layers.25.block_sparse_moe.experts.146.w3", "model.layers.25.block_sparse_moe.experts.147.w3", "model.layers.25.block_sparse_moe.experts.148.w3", "model.layers.25.block_sparse_moe.experts.149.w3", "model.layers.25.block_sparse_moe.experts.150.w3", "model.layers.25.block_sparse_moe.experts.151.w3", "model.layers.25.block_sparse_moe.experts.152.w3", "model.layers.25.block_sparse_moe.experts.153.w3", "model.layers.25.block_sparse_moe.experts.154.w3", "model.layers.25.block_sparse_moe.experts.155.w3", "model.layers.25.block_sparse_moe.experts.156.w3", "model.layers.25.block_sparse_moe.experts.157.w3", "model.layers.25.block_sparse_moe.experts.158.w3", "model.layers.25.block_sparse_moe.experts.159.w3", "model.layers.25.block_sparse_moe.experts.160.w3", "model.layers.25.block_sparse_moe.experts.161.w3", "model.layers.25.block_sparse_moe.experts.162.w3", "model.layers.25.block_sparse_moe.experts.163.w3", "model.layers.25.block_sparse_moe.experts.164.w3", "model.layers.25.block_sparse_moe.experts.165.w3", "model.layers.25.block_sparse_moe.experts.166.w3", "model.layers.25.block_sparse_moe.experts.167.w3", "model.layers.25.block_sparse_moe.experts.168.w3", "model.layers.25.block_sparse_moe.experts.169.w3", "model.layers.25.block_sparse_moe.experts.170.w3", "model.layers.25.block_sparse_moe.experts.171.w3", "model.layers.25.block_sparse_moe.experts.172.w3", "model.layers.25.block_sparse_moe.experts.173.w3", "model.layers.25.block_sparse_moe.experts.174.w3", "model.layers.25.block_sparse_moe.experts.175.w3", "model.layers.25.block_sparse_moe.experts.176.w3", "model.layers.25.block_sparse_moe.experts.177.w3", "model.layers.25.block_sparse_moe.experts.178.w3", "model.layers.25.block_sparse_moe.experts.179.w3", "model.layers.25.block_sparse_moe.experts.180.w3", "model.layers.25.block_sparse_moe.experts.181.w3", "model.layers.25.block_sparse_moe.experts.182.w3", "model.layers.25.block_sparse_moe.experts.183.w3", "model.layers.25.block_sparse_moe.experts.184.w3", "model.layers.25.block_sparse_moe.experts.185.w3", "model.layers.25.block_sparse_moe.experts.186.w3", "model.layers.25.block_sparse_moe.experts.187.w3", "model.layers.25.block_sparse_moe.experts.188.w3", "model.layers.25.block_sparse_moe.experts.189.w3", "model.layers.25.block_sparse_moe.experts.190.w3", "model.layers.25.block_sparse_moe.experts.191.w3", "model.layers.25.block_sparse_moe.experts.192.w3", "model.layers.25.block_sparse_moe.experts.193.w3", "model.layers.25.block_sparse_moe.experts.194.w3", "model.layers.25.block_sparse_moe.experts.195.w3", "model.layers.25.block_sparse_moe.experts.196.w3", "model.layers.25.block_sparse_moe.experts.197.w3", "model.layers.25.block_sparse_moe.experts.198.w3", "model.layers.25.block_sparse_moe.experts.199.w3", "model.layers.25.block_sparse_moe.experts.200.w3", "model.layers.25.block_sparse_moe.experts.201.w3", "model.layers.25.block_sparse_moe.experts.202.w3", "model.layers.25.block_sparse_moe.experts.203.w3", "model.layers.25.block_sparse_moe.experts.204.w3", "model.layers.25.block_sparse_moe.experts.205.w3", "model.layers.25.block_sparse_moe.experts.206.w3", "model.layers.25.block_sparse_moe.experts.207.w3", "model.layers.25.block_sparse_moe.experts.208.w3", "model.layers.25.block_sparse_moe.experts.209.w3", "model.layers.25.block_sparse_moe.experts.210.w3", "model.layers.25.block_sparse_moe.experts.211.w3", "model.layers.25.block_sparse_moe.experts.212.w3", "model.layers.25.block_sparse_moe.experts.213.w3", "model.layers.25.block_sparse_moe.experts.214.w3", "model.layers.25.block_sparse_moe.experts.215.w3", "model.layers.25.block_sparse_moe.experts.216.w3", "model.layers.25.block_sparse_moe.experts.217.w3", "model.layers.25.block_sparse_moe.experts.218.w3", "model.layers.25.block_sparse_moe.experts.219.w3", "model.layers.25.block_sparse_moe.experts.220.w3", "model.layers.25.block_sparse_moe.experts.221.w3", "model.layers.25.block_sparse_moe.experts.222.w3", "model.layers.25.block_sparse_moe.experts.223.w3", "model.layers.25.block_sparse_moe.experts.224.w3", "model.layers.25.block_sparse_moe.experts.225.w3", "model.layers.25.block_sparse_moe.experts.226.w3", "model.layers.25.block_sparse_moe.experts.227.w3", "model.layers.25.block_sparse_moe.experts.228.w3", "model.layers.25.block_sparse_moe.experts.229.w3", "model.layers.25.block_sparse_moe.experts.230.w3", "model.layers.25.block_sparse_moe.experts.231.w3", "model.layers.25.block_sparse_moe.experts.232.w3", "model.layers.25.block_sparse_moe.experts.233.w3", "model.layers.25.block_sparse_moe.experts.234.w3", "model.layers.25.block_sparse_moe.experts.235.w3", "model.layers.25.block_sparse_moe.experts.236.w3", "model.layers.25.block_sparse_moe.experts.237.w3", "model.layers.25.block_sparse_moe.experts.238.w3", "model.layers.25.block_sparse_moe.experts.239.w3", "model.layers.25.block_sparse_moe.experts.240.w3", "model.layers.25.block_sparse_moe.experts.241.w3", "model.layers.25.block_sparse_moe.experts.242.w3", "model.layers.25.block_sparse_moe.experts.243.w3", "model.layers.25.block_sparse_moe.experts.244.w3", "model.layers.25.block_sparse_moe.experts.245.w3", "model.layers.25.block_sparse_moe.experts.246.w3", "model.layers.25.block_sparse_moe.experts.247.w3", "model.layers.25.block_sparse_moe.experts.248.w3", "model.layers.25.block_sparse_moe.experts.249.w3", "model.layers.25.block_sparse_moe.experts.250.w3", "model.layers.25.block_sparse_moe.experts.251.w3", "model.layers.25.block_sparse_moe.experts.252.w3", "model.layers.25.block_sparse_moe.experts.253.w3", "model.layers.25.block_sparse_moe.experts.254.w3", "model.layers.25.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0062103271484375, "dbits": 2415919104 } ] }, { "idx": 129, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w2", "model.layers.25.block_sparse_moe.experts.1.w2", "model.layers.25.block_sparse_moe.experts.2.w2", "model.layers.25.block_sparse_moe.experts.3.w2", "model.layers.25.block_sparse_moe.experts.4.w2", "model.layers.25.block_sparse_moe.experts.5.w2", "model.layers.25.block_sparse_moe.experts.6.w2", "model.layers.25.block_sparse_moe.experts.7.w2", "model.layers.25.block_sparse_moe.experts.8.w2", "model.layers.25.block_sparse_moe.experts.9.w2", "model.layers.25.block_sparse_moe.experts.10.w2", "model.layers.25.block_sparse_moe.experts.11.w2", "model.layers.25.block_sparse_moe.experts.12.w2", "model.layers.25.block_sparse_moe.experts.13.w2", "model.layers.25.block_sparse_moe.experts.14.w2", "model.layers.25.block_sparse_moe.experts.15.w2", "model.layers.25.block_sparse_moe.experts.16.w2", "model.layers.25.block_sparse_moe.experts.17.w2", "model.layers.25.block_sparse_moe.experts.18.w2", "model.layers.25.block_sparse_moe.experts.19.w2", "model.layers.25.block_sparse_moe.experts.20.w2", "model.layers.25.block_sparse_moe.experts.21.w2", "model.layers.25.block_sparse_moe.experts.22.w2", "model.layers.25.block_sparse_moe.experts.23.w2", "model.layers.25.block_sparse_moe.experts.24.w2", "model.layers.25.block_sparse_moe.experts.25.w2", "model.layers.25.block_sparse_moe.experts.26.w2", "model.layers.25.block_sparse_moe.experts.27.w2", "model.layers.25.block_sparse_moe.experts.28.w2", "model.layers.25.block_sparse_moe.experts.29.w2", "model.layers.25.block_sparse_moe.experts.30.w2", "model.layers.25.block_sparse_moe.experts.31.w2", "model.layers.25.block_sparse_moe.experts.32.w2", "model.layers.25.block_sparse_moe.experts.33.w2", "model.layers.25.block_sparse_moe.experts.34.w2", "model.layers.25.block_sparse_moe.experts.35.w2", "model.layers.25.block_sparse_moe.experts.36.w2", "model.layers.25.block_sparse_moe.experts.37.w2", "model.layers.25.block_sparse_moe.experts.38.w2", "model.layers.25.block_sparse_moe.experts.39.w2", "model.layers.25.block_sparse_moe.experts.40.w2", "model.layers.25.block_sparse_moe.experts.41.w2", "model.layers.25.block_sparse_moe.experts.42.w2", "model.layers.25.block_sparse_moe.experts.43.w2", "model.layers.25.block_sparse_moe.experts.44.w2", "model.layers.25.block_sparse_moe.experts.45.w2", "model.layers.25.block_sparse_moe.experts.46.w2", "model.layers.25.block_sparse_moe.experts.47.w2", "model.layers.25.block_sparse_moe.experts.48.w2", "model.layers.25.block_sparse_moe.experts.49.w2", "model.layers.25.block_sparse_moe.experts.50.w2", "model.layers.25.block_sparse_moe.experts.51.w2", "model.layers.25.block_sparse_moe.experts.52.w2", "model.layers.25.block_sparse_moe.experts.53.w2", "model.layers.25.block_sparse_moe.experts.54.w2", "model.layers.25.block_sparse_moe.experts.55.w2", "model.layers.25.block_sparse_moe.experts.56.w2", "model.layers.25.block_sparse_moe.experts.57.w2", "model.layers.25.block_sparse_moe.experts.58.w2", "model.layers.25.block_sparse_moe.experts.59.w2", "model.layers.25.block_sparse_moe.experts.60.w2", "model.layers.25.block_sparse_moe.experts.61.w2", "model.layers.25.block_sparse_moe.experts.62.w2", "model.layers.25.block_sparse_moe.experts.63.w2", "model.layers.25.block_sparse_moe.experts.64.w2", "model.layers.25.block_sparse_moe.experts.65.w2", "model.layers.25.block_sparse_moe.experts.66.w2", "model.layers.25.block_sparse_moe.experts.67.w2", "model.layers.25.block_sparse_moe.experts.68.w2", "model.layers.25.block_sparse_moe.experts.69.w2", "model.layers.25.block_sparse_moe.experts.70.w2", "model.layers.25.block_sparse_moe.experts.71.w2", "model.layers.25.block_sparse_moe.experts.72.w2", "model.layers.25.block_sparse_moe.experts.73.w2", "model.layers.25.block_sparse_moe.experts.74.w2", "model.layers.25.block_sparse_moe.experts.75.w2", "model.layers.25.block_sparse_moe.experts.76.w2", "model.layers.25.block_sparse_moe.experts.77.w2", "model.layers.25.block_sparse_moe.experts.78.w2", "model.layers.25.block_sparse_moe.experts.79.w2", "model.layers.25.block_sparse_moe.experts.80.w2", "model.layers.25.block_sparse_moe.experts.81.w2", "model.layers.25.block_sparse_moe.experts.82.w2", "model.layers.25.block_sparse_moe.experts.83.w2", "model.layers.25.block_sparse_moe.experts.84.w2", "model.layers.25.block_sparse_moe.experts.85.w2", "model.layers.25.block_sparse_moe.experts.86.w2", "model.layers.25.block_sparse_moe.experts.87.w2", "model.layers.25.block_sparse_moe.experts.88.w2", "model.layers.25.block_sparse_moe.experts.89.w2", "model.layers.25.block_sparse_moe.experts.90.w2", "model.layers.25.block_sparse_moe.experts.91.w2", "model.layers.25.block_sparse_moe.experts.92.w2", "model.layers.25.block_sparse_moe.experts.93.w2", "model.layers.25.block_sparse_moe.experts.94.w2", "model.layers.25.block_sparse_moe.experts.95.w2", "model.layers.25.block_sparse_moe.experts.96.w2", "model.layers.25.block_sparse_moe.experts.97.w2", "model.layers.25.block_sparse_moe.experts.98.w2", "model.layers.25.block_sparse_moe.experts.99.w2", "model.layers.25.block_sparse_moe.experts.100.w2", "model.layers.25.block_sparse_moe.experts.101.w2", "model.layers.25.block_sparse_moe.experts.102.w2", "model.layers.25.block_sparse_moe.experts.103.w2", "model.layers.25.block_sparse_moe.experts.104.w2", "model.layers.25.block_sparse_moe.experts.105.w2", "model.layers.25.block_sparse_moe.experts.106.w2", "model.layers.25.block_sparse_moe.experts.107.w2", "model.layers.25.block_sparse_moe.experts.108.w2", "model.layers.25.block_sparse_moe.experts.109.w2", "model.layers.25.block_sparse_moe.experts.110.w2", "model.layers.25.block_sparse_moe.experts.111.w2", "model.layers.25.block_sparse_moe.experts.112.w2", "model.layers.25.block_sparse_moe.experts.113.w2", "model.layers.25.block_sparse_moe.experts.114.w2", "model.layers.25.block_sparse_moe.experts.115.w2", "model.layers.25.block_sparse_moe.experts.116.w2", "model.layers.25.block_sparse_moe.experts.117.w2", "model.layers.25.block_sparse_moe.experts.118.w2", "model.layers.25.block_sparse_moe.experts.119.w2", "model.layers.25.block_sparse_moe.experts.120.w2", "model.layers.25.block_sparse_moe.experts.121.w2", "model.layers.25.block_sparse_moe.experts.122.w2", "model.layers.25.block_sparse_moe.experts.123.w2", "model.layers.25.block_sparse_moe.experts.124.w2", "model.layers.25.block_sparse_moe.experts.125.w2", "model.layers.25.block_sparse_moe.experts.126.w2", "model.layers.25.block_sparse_moe.experts.127.w2", "model.layers.25.block_sparse_moe.experts.128.w2", "model.layers.25.block_sparse_moe.experts.129.w2", "model.layers.25.block_sparse_moe.experts.130.w2", "model.layers.25.block_sparse_moe.experts.131.w2", "model.layers.25.block_sparse_moe.experts.132.w2", "model.layers.25.block_sparse_moe.experts.133.w2", "model.layers.25.block_sparse_moe.experts.134.w2", "model.layers.25.block_sparse_moe.experts.135.w2", "model.layers.25.block_sparse_moe.experts.136.w2", "model.layers.25.block_sparse_moe.experts.137.w2", "model.layers.25.block_sparse_moe.experts.138.w2", "model.layers.25.block_sparse_moe.experts.139.w2", "model.layers.25.block_sparse_moe.experts.140.w2", "model.layers.25.block_sparse_moe.experts.141.w2", "model.layers.25.block_sparse_moe.experts.142.w2", "model.layers.25.block_sparse_moe.experts.143.w2", "model.layers.25.block_sparse_moe.experts.144.w2", "model.layers.25.block_sparse_moe.experts.145.w2", "model.layers.25.block_sparse_moe.experts.146.w2", "model.layers.25.block_sparse_moe.experts.147.w2", "model.layers.25.block_sparse_moe.experts.148.w2", "model.layers.25.block_sparse_moe.experts.149.w2", "model.layers.25.block_sparse_moe.experts.150.w2", "model.layers.25.block_sparse_moe.experts.151.w2", "model.layers.25.block_sparse_moe.experts.152.w2", "model.layers.25.block_sparse_moe.experts.153.w2", "model.layers.25.block_sparse_moe.experts.154.w2", "model.layers.25.block_sparse_moe.experts.155.w2", "model.layers.25.block_sparse_moe.experts.156.w2", "model.layers.25.block_sparse_moe.experts.157.w2", "model.layers.25.block_sparse_moe.experts.158.w2", "model.layers.25.block_sparse_moe.experts.159.w2", "model.layers.25.block_sparse_moe.experts.160.w2", "model.layers.25.block_sparse_moe.experts.161.w2", "model.layers.25.block_sparse_moe.experts.162.w2", "model.layers.25.block_sparse_moe.experts.163.w2", "model.layers.25.block_sparse_moe.experts.164.w2", "model.layers.25.block_sparse_moe.experts.165.w2", "model.layers.25.block_sparse_moe.experts.166.w2", "model.layers.25.block_sparse_moe.experts.167.w2", "model.layers.25.block_sparse_moe.experts.168.w2", "model.layers.25.block_sparse_moe.experts.169.w2", "model.layers.25.block_sparse_moe.experts.170.w2", "model.layers.25.block_sparse_moe.experts.171.w2", "model.layers.25.block_sparse_moe.experts.172.w2", "model.layers.25.block_sparse_moe.experts.173.w2", "model.layers.25.block_sparse_moe.experts.174.w2", "model.layers.25.block_sparse_moe.experts.175.w2", "model.layers.25.block_sparse_moe.experts.176.w2", "model.layers.25.block_sparse_moe.experts.177.w2", "model.layers.25.block_sparse_moe.experts.178.w2", "model.layers.25.block_sparse_moe.experts.179.w2", "model.layers.25.block_sparse_moe.experts.180.w2", "model.layers.25.block_sparse_moe.experts.181.w2", "model.layers.25.block_sparse_moe.experts.182.w2", "model.layers.25.block_sparse_moe.experts.183.w2", "model.layers.25.block_sparse_moe.experts.184.w2", "model.layers.25.block_sparse_moe.experts.185.w2", "model.layers.25.block_sparse_moe.experts.186.w2", "model.layers.25.block_sparse_moe.experts.187.w2", "model.layers.25.block_sparse_moe.experts.188.w2", "model.layers.25.block_sparse_moe.experts.189.w2", "model.layers.25.block_sparse_moe.experts.190.w2", "model.layers.25.block_sparse_moe.experts.191.w2", "model.layers.25.block_sparse_moe.experts.192.w2", "model.layers.25.block_sparse_moe.experts.193.w2", "model.layers.25.block_sparse_moe.experts.194.w2", "model.layers.25.block_sparse_moe.experts.195.w2", "model.layers.25.block_sparse_moe.experts.196.w2", "model.layers.25.block_sparse_moe.experts.197.w2", "model.layers.25.block_sparse_moe.experts.198.w2", "model.layers.25.block_sparse_moe.experts.199.w2", "model.layers.25.block_sparse_moe.experts.200.w2", "model.layers.25.block_sparse_moe.experts.201.w2", "model.layers.25.block_sparse_moe.experts.202.w2", "model.layers.25.block_sparse_moe.experts.203.w2", "model.layers.25.block_sparse_moe.experts.204.w2", "model.layers.25.block_sparse_moe.experts.205.w2", "model.layers.25.block_sparse_moe.experts.206.w2", "model.layers.25.block_sparse_moe.experts.207.w2", "model.layers.25.block_sparse_moe.experts.208.w2", "model.layers.25.block_sparse_moe.experts.209.w2", "model.layers.25.block_sparse_moe.experts.210.w2", "model.layers.25.block_sparse_moe.experts.211.w2", "model.layers.25.block_sparse_moe.experts.212.w2", "model.layers.25.block_sparse_moe.experts.213.w2", "model.layers.25.block_sparse_moe.experts.214.w2", "model.layers.25.block_sparse_moe.experts.215.w2", "model.layers.25.block_sparse_moe.experts.216.w2", "model.layers.25.block_sparse_moe.experts.217.w2", "model.layers.25.block_sparse_moe.experts.218.w2", "model.layers.25.block_sparse_moe.experts.219.w2", "model.layers.25.block_sparse_moe.experts.220.w2", "model.layers.25.block_sparse_moe.experts.221.w2", "model.layers.25.block_sparse_moe.experts.222.w2", "model.layers.25.block_sparse_moe.experts.223.w2", "model.layers.25.block_sparse_moe.experts.224.w2", "model.layers.25.block_sparse_moe.experts.225.w2", "model.layers.25.block_sparse_moe.experts.226.w2", "model.layers.25.block_sparse_moe.experts.227.w2", "model.layers.25.block_sparse_moe.experts.228.w2", "model.layers.25.block_sparse_moe.experts.229.w2", "model.layers.25.block_sparse_moe.experts.230.w2", "model.layers.25.block_sparse_moe.experts.231.w2", "model.layers.25.block_sparse_moe.experts.232.w2", "model.layers.25.block_sparse_moe.experts.233.w2", "model.layers.25.block_sparse_moe.experts.234.w2", "model.layers.25.block_sparse_moe.experts.235.w2", "model.layers.25.block_sparse_moe.experts.236.w2", "model.layers.25.block_sparse_moe.experts.237.w2", "model.layers.25.block_sparse_moe.experts.238.w2", "model.layers.25.block_sparse_moe.experts.239.w2", "model.layers.25.block_sparse_moe.experts.240.w2", "model.layers.25.block_sparse_moe.experts.241.w2", "model.layers.25.block_sparse_moe.experts.242.w2", "model.layers.25.block_sparse_moe.experts.243.w2", "model.layers.25.block_sparse_moe.experts.244.w2", "model.layers.25.block_sparse_moe.experts.245.w2", "model.layers.25.block_sparse_moe.experts.246.w2", "model.layers.25.block_sparse_moe.experts.247.w2", "model.layers.25.block_sparse_moe.experts.248.w2", "model.layers.25.block_sparse_moe.experts.249.w2", "model.layers.25.block_sparse_moe.experts.250.w2", "model.layers.25.block_sparse_moe.experts.251.w2", "model.layers.25.block_sparse_moe.experts.252.w2", "model.layers.25.block_sparse_moe.experts.253.w2", "model.layers.25.block_sparse_moe.experts.254.w2", "model.layers.25.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0003796339035034846, "dbits": 1207959552 } ] }, { "idx": 130, "layers": [ "model.layers.26.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0034618169069290383, "dbits": 18874368 } ] }, { "idx": 131, "layers": [ "model.layers.26.self_attn.k_proj", "model.layers.26.self_attn.v_proj" ], "candidates": [ { "dkld": -0.019314816594123818, "dbits": 6291456 } ] }, { "idx": 132, "layers": [ "model.layers.26.self_attn.o_proj" ], "candidates": [ { "dkld": 0.000763940811157271, "dbits": 18874368 } ] }, { "idx": 133, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w1", "model.layers.26.block_sparse_moe.experts.1.w1", "model.layers.26.block_sparse_moe.experts.2.w1", "model.layers.26.block_sparse_moe.experts.3.w1", "model.layers.26.block_sparse_moe.experts.4.w1", "model.layers.26.block_sparse_moe.experts.5.w1", "model.layers.26.block_sparse_moe.experts.6.w1", "model.layers.26.block_sparse_moe.experts.7.w1", "model.layers.26.block_sparse_moe.experts.8.w1", "model.layers.26.block_sparse_moe.experts.9.w1", "model.layers.26.block_sparse_moe.experts.10.w1", "model.layers.26.block_sparse_moe.experts.11.w1", "model.layers.26.block_sparse_moe.experts.12.w1", "model.layers.26.block_sparse_moe.experts.13.w1", "model.layers.26.block_sparse_moe.experts.14.w1", "model.layers.26.block_sparse_moe.experts.15.w1", "model.layers.26.block_sparse_moe.experts.16.w1", "model.layers.26.block_sparse_moe.experts.17.w1", "model.layers.26.block_sparse_moe.experts.18.w1", "model.layers.26.block_sparse_moe.experts.19.w1", "model.layers.26.block_sparse_moe.experts.20.w1", "model.layers.26.block_sparse_moe.experts.21.w1", "model.layers.26.block_sparse_moe.experts.22.w1", "model.layers.26.block_sparse_moe.experts.23.w1", "model.layers.26.block_sparse_moe.experts.24.w1", "model.layers.26.block_sparse_moe.experts.25.w1", "model.layers.26.block_sparse_moe.experts.26.w1", "model.layers.26.block_sparse_moe.experts.27.w1", "model.layers.26.block_sparse_moe.experts.28.w1", "model.layers.26.block_sparse_moe.experts.29.w1", "model.layers.26.block_sparse_moe.experts.30.w1", "model.layers.26.block_sparse_moe.experts.31.w1", "model.layers.26.block_sparse_moe.experts.32.w1", "model.layers.26.block_sparse_moe.experts.33.w1", "model.layers.26.block_sparse_moe.experts.34.w1", "model.layers.26.block_sparse_moe.experts.35.w1", "model.layers.26.block_sparse_moe.experts.36.w1", "model.layers.26.block_sparse_moe.experts.37.w1", "model.layers.26.block_sparse_moe.experts.38.w1", "model.layers.26.block_sparse_moe.experts.39.w1", "model.layers.26.block_sparse_moe.experts.40.w1", "model.layers.26.block_sparse_moe.experts.41.w1", "model.layers.26.block_sparse_moe.experts.42.w1", "model.layers.26.block_sparse_moe.experts.43.w1", "model.layers.26.block_sparse_moe.experts.44.w1", "model.layers.26.block_sparse_moe.experts.45.w1", "model.layers.26.block_sparse_moe.experts.46.w1", "model.layers.26.block_sparse_moe.experts.47.w1", "model.layers.26.block_sparse_moe.experts.48.w1", "model.layers.26.block_sparse_moe.experts.49.w1", "model.layers.26.block_sparse_moe.experts.50.w1", "model.layers.26.block_sparse_moe.experts.51.w1", "model.layers.26.block_sparse_moe.experts.52.w1", "model.layers.26.block_sparse_moe.experts.53.w1", "model.layers.26.block_sparse_moe.experts.54.w1", "model.layers.26.block_sparse_moe.experts.55.w1", "model.layers.26.block_sparse_moe.experts.56.w1", "model.layers.26.block_sparse_moe.experts.57.w1", "model.layers.26.block_sparse_moe.experts.58.w1", "model.layers.26.block_sparse_moe.experts.59.w1", "model.layers.26.block_sparse_moe.experts.60.w1", "model.layers.26.block_sparse_moe.experts.61.w1", "model.layers.26.block_sparse_moe.experts.62.w1", "model.layers.26.block_sparse_moe.experts.63.w1", "model.layers.26.block_sparse_moe.experts.64.w1", "model.layers.26.block_sparse_moe.experts.65.w1", "model.layers.26.block_sparse_moe.experts.66.w1", "model.layers.26.block_sparse_moe.experts.67.w1", "model.layers.26.block_sparse_moe.experts.68.w1", "model.layers.26.block_sparse_moe.experts.69.w1", "model.layers.26.block_sparse_moe.experts.70.w1", "model.layers.26.block_sparse_moe.experts.71.w1", "model.layers.26.block_sparse_moe.experts.72.w1", "model.layers.26.block_sparse_moe.experts.73.w1", "model.layers.26.block_sparse_moe.experts.74.w1", "model.layers.26.block_sparse_moe.experts.75.w1", "model.layers.26.block_sparse_moe.experts.76.w1", "model.layers.26.block_sparse_moe.experts.77.w1", "model.layers.26.block_sparse_moe.experts.78.w1", "model.layers.26.block_sparse_moe.experts.79.w1", "model.layers.26.block_sparse_moe.experts.80.w1", "model.layers.26.block_sparse_moe.experts.81.w1", "model.layers.26.block_sparse_moe.experts.82.w1", "model.layers.26.block_sparse_moe.experts.83.w1", "model.layers.26.block_sparse_moe.experts.84.w1", "model.layers.26.block_sparse_moe.experts.85.w1", "model.layers.26.block_sparse_moe.experts.86.w1", "model.layers.26.block_sparse_moe.experts.87.w1", "model.layers.26.block_sparse_moe.experts.88.w1", "model.layers.26.block_sparse_moe.experts.89.w1", "model.layers.26.block_sparse_moe.experts.90.w1", "model.layers.26.block_sparse_moe.experts.91.w1", "model.layers.26.block_sparse_moe.experts.92.w1", "model.layers.26.block_sparse_moe.experts.93.w1", "model.layers.26.block_sparse_moe.experts.94.w1", "model.layers.26.block_sparse_moe.experts.95.w1", "model.layers.26.block_sparse_moe.experts.96.w1", "model.layers.26.block_sparse_moe.experts.97.w1", "model.layers.26.block_sparse_moe.experts.98.w1", "model.layers.26.block_sparse_moe.experts.99.w1", "model.layers.26.block_sparse_moe.experts.100.w1", "model.layers.26.block_sparse_moe.experts.101.w1", "model.layers.26.block_sparse_moe.experts.102.w1", "model.layers.26.block_sparse_moe.experts.103.w1", "model.layers.26.block_sparse_moe.experts.104.w1", "model.layers.26.block_sparse_moe.experts.105.w1", "model.layers.26.block_sparse_moe.experts.106.w1", "model.layers.26.block_sparse_moe.experts.107.w1", "model.layers.26.block_sparse_moe.experts.108.w1", "model.layers.26.block_sparse_moe.experts.109.w1", "model.layers.26.block_sparse_moe.experts.110.w1", "model.layers.26.block_sparse_moe.experts.111.w1", "model.layers.26.block_sparse_moe.experts.112.w1", "model.layers.26.block_sparse_moe.experts.113.w1", "model.layers.26.block_sparse_moe.experts.114.w1", "model.layers.26.block_sparse_moe.experts.115.w1", "model.layers.26.block_sparse_moe.experts.116.w1", "model.layers.26.block_sparse_moe.experts.117.w1", "model.layers.26.block_sparse_moe.experts.118.w1", "model.layers.26.block_sparse_moe.experts.119.w1", "model.layers.26.block_sparse_moe.experts.120.w1", "model.layers.26.block_sparse_moe.experts.121.w1", "model.layers.26.block_sparse_moe.experts.122.w1", "model.layers.26.block_sparse_moe.experts.123.w1", "model.layers.26.block_sparse_moe.experts.124.w1", "model.layers.26.block_sparse_moe.experts.125.w1", "model.layers.26.block_sparse_moe.experts.126.w1", "model.layers.26.block_sparse_moe.experts.127.w1", "model.layers.26.block_sparse_moe.experts.128.w1", "model.layers.26.block_sparse_moe.experts.129.w1", "model.layers.26.block_sparse_moe.experts.130.w1", "model.layers.26.block_sparse_moe.experts.131.w1", "model.layers.26.block_sparse_moe.experts.132.w1", "model.layers.26.block_sparse_moe.experts.133.w1", "model.layers.26.block_sparse_moe.experts.134.w1", "model.layers.26.block_sparse_moe.experts.135.w1", "model.layers.26.block_sparse_moe.experts.136.w1", "model.layers.26.block_sparse_moe.experts.137.w1", "model.layers.26.block_sparse_moe.experts.138.w1", "model.layers.26.block_sparse_moe.experts.139.w1", "model.layers.26.block_sparse_moe.experts.140.w1", "model.layers.26.block_sparse_moe.experts.141.w1", "model.layers.26.block_sparse_moe.experts.142.w1", "model.layers.26.block_sparse_moe.experts.143.w1", "model.layers.26.block_sparse_moe.experts.144.w1", "model.layers.26.block_sparse_moe.experts.145.w1", "model.layers.26.block_sparse_moe.experts.146.w1", "model.layers.26.block_sparse_moe.experts.147.w1", "model.layers.26.block_sparse_moe.experts.148.w1", "model.layers.26.block_sparse_moe.experts.149.w1", "model.layers.26.block_sparse_moe.experts.150.w1", "model.layers.26.block_sparse_moe.experts.151.w1", "model.layers.26.block_sparse_moe.experts.152.w1", "model.layers.26.block_sparse_moe.experts.153.w1", "model.layers.26.block_sparse_moe.experts.154.w1", "model.layers.26.block_sparse_moe.experts.155.w1", "model.layers.26.block_sparse_moe.experts.156.w1", "model.layers.26.block_sparse_moe.experts.157.w1", "model.layers.26.block_sparse_moe.experts.158.w1", "model.layers.26.block_sparse_moe.experts.159.w1", "model.layers.26.block_sparse_moe.experts.160.w1", "model.layers.26.block_sparse_moe.experts.161.w1", "model.layers.26.block_sparse_moe.experts.162.w1", "model.layers.26.block_sparse_moe.experts.163.w1", "model.layers.26.block_sparse_moe.experts.164.w1", "model.layers.26.block_sparse_moe.experts.165.w1", "model.layers.26.block_sparse_moe.experts.166.w1", "model.layers.26.block_sparse_moe.experts.167.w1", "model.layers.26.block_sparse_moe.experts.168.w1", "model.layers.26.block_sparse_moe.experts.169.w1", "model.layers.26.block_sparse_moe.experts.170.w1", "model.layers.26.block_sparse_moe.experts.171.w1", "model.layers.26.block_sparse_moe.experts.172.w1", "model.layers.26.block_sparse_moe.experts.173.w1", "model.layers.26.block_sparse_moe.experts.174.w1", "model.layers.26.block_sparse_moe.experts.175.w1", "model.layers.26.block_sparse_moe.experts.176.w1", "model.layers.26.block_sparse_moe.experts.177.w1", "model.layers.26.block_sparse_moe.experts.178.w1", "model.layers.26.block_sparse_moe.experts.179.w1", "model.layers.26.block_sparse_moe.experts.180.w1", "model.layers.26.block_sparse_moe.experts.181.w1", "model.layers.26.block_sparse_moe.experts.182.w1", "model.layers.26.block_sparse_moe.experts.183.w1", "model.layers.26.block_sparse_moe.experts.184.w1", "model.layers.26.block_sparse_moe.experts.185.w1", "model.layers.26.block_sparse_moe.experts.186.w1", "model.layers.26.block_sparse_moe.experts.187.w1", "model.layers.26.block_sparse_moe.experts.188.w1", "model.layers.26.block_sparse_moe.experts.189.w1", "model.layers.26.block_sparse_moe.experts.190.w1", "model.layers.26.block_sparse_moe.experts.191.w1", "model.layers.26.block_sparse_moe.experts.192.w1", "model.layers.26.block_sparse_moe.experts.193.w1", "model.layers.26.block_sparse_moe.experts.194.w1", "model.layers.26.block_sparse_moe.experts.195.w1", "model.layers.26.block_sparse_moe.experts.196.w1", "model.layers.26.block_sparse_moe.experts.197.w1", "model.layers.26.block_sparse_moe.experts.198.w1", "model.layers.26.block_sparse_moe.experts.199.w1", "model.layers.26.block_sparse_moe.experts.200.w1", "model.layers.26.block_sparse_moe.experts.201.w1", "model.layers.26.block_sparse_moe.experts.202.w1", "model.layers.26.block_sparse_moe.experts.203.w1", "model.layers.26.block_sparse_moe.experts.204.w1", "model.layers.26.block_sparse_moe.experts.205.w1", "model.layers.26.block_sparse_moe.experts.206.w1", "model.layers.26.block_sparse_moe.experts.207.w1", "model.layers.26.block_sparse_moe.experts.208.w1", "model.layers.26.block_sparse_moe.experts.209.w1", "model.layers.26.block_sparse_moe.experts.210.w1", "model.layers.26.block_sparse_moe.experts.211.w1", "model.layers.26.block_sparse_moe.experts.212.w1", "model.layers.26.block_sparse_moe.experts.213.w1", "model.layers.26.block_sparse_moe.experts.214.w1", "model.layers.26.block_sparse_moe.experts.215.w1", "model.layers.26.block_sparse_moe.experts.216.w1", "model.layers.26.block_sparse_moe.experts.217.w1", "model.layers.26.block_sparse_moe.experts.218.w1", "model.layers.26.block_sparse_moe.experts.219.w1", "model.layers.26.block_sparse_moe.experts.220.w1", "model.layers.26.block_sparse_moe.experts.221.w1", "model.layers.26.block_sparse_moe.experts.222.w1", "model.layers.26.block_sparse_moe.experts.223.w1", "model.layers.26.block_sparse_moe.experts.224.w1", "model.layers.26.block_sparse_moe.experts.225.w1", "model.layers.26.block_sparse_moe.experts.226.w1", "model.layers.26.block_sparse_moe.experts.227.w1", "model.layers.26.block_sparse_moe.experts.228.w1", "model.layers.26.block_sparse_moe.experts.229.w1", "model.layers.26.block_sparse_moe.experts.230.w1", "model.layers.26.block_sparse_moe.experts.231.w1", "model.layers.26.block_sparse_moe.experts.232.w1", "model.layers.26.block_sparse_moe.experts.233.w1", "model.layers.26.block_sparse_moe.experts.234.w1", "model.layers.26.block_sparse_moe.experts.235.w1", "model.layers.26.block_sparse_moe.experts.236.w1", "model.layers.26.block_sparse_moe.experts.237.w1", "model.layers.26.block_sparse_moe.experts.238.w1", "model.layers.26.block_sparse_moe.experts.239.w1", "model.layers.26.block_sparse_moe.experts.240.w1", "model.layers.26.block_sparse_moe.experts.241.w1", "model.layers.26.block_sparse_moe.experts.242.w1", "model.layers.26.block_sparse_moe.experts.243.w1", "model.layers.26.block_sparse_moe.experts.244.w1", "model.layers.26.block_sparse_moe.experts.245.w1", "model.layers.26.block_sparse_moe.experts.246.w1", "model.layers.26.block_sparse_moe.experts.247.w1", "model.layers.26.block_sparse_moe.experts.248.w1", "model.layers.26.block_sparse_moe.experts.249.w1", "model.layers.26.block_sparse_moe.experts.250.w1", "model.layers.26.block_sparse_moe.experts.251.w1", "model.layers.26.block_sparse_moe.experts.252.w1", "model.layers.26.block_sparse_moe.experts.253.w1", "model.layers.26.block_sparse_moe.experts.254.w1", "model.layers.26.block_sparse_moe.experts.255.w1", "model.layers.26.block_sparse_moe.experts.0.w3", "model.layers.26.block_sparse_moe.experts.1.w3", "model.layers.26.block_sparse_moe.experts.2.w3", "model.layers.26.block_sparse_moe.experts.3.w3", "model.layers.26.block_sparse_moe.experts.4.w3", "model.layers.26.block_sparse_moe.experts.5.w3", "model.layers.26.block_sparse_moe.experts.6.w3", "model.layers.26.block_sparse_moe.experts.7.w3", "model.layers.26.block_sparse_moe.experts.8.w3", "model.layers.26.block_sparse_moe.experts.9.w3", "model.layers.26.block_sparse_moe.experts.10.w3", "model.layers.26.block_sparse_moe.experts.11.w3", "model.layers.26.block_sparse_moe.experts.12.w3", "model.layers.26.block_sparse_moe.experts.13.w3", "model.layers.26.block_sparse_moe.experts.14.w3", "model.layers.26.block_sparse_moe.experts.15.w3", "model.layers.26.block_sparse_moe.experts.16.w3", "model.layers.26.block_sparse_moe.experts.17.w3", "model.layers.26.block_sparse_moe.experts.18.w3", "model.layers.26.block_sparse_moe.experts.19.w3", "model.layers.26.block_sparse_moe.experts.20.w3", "model.layers.26.block_sparse_moe.experts.21.w3", "model.layers.26.block_sparse_moe.experts.22.w3", "model.layers.26.block_sparse_moe.experts.23.w3", "model.layers.26.block_sparse_moe.experts.24.w3", "model.layers.26.block_sparse_moe.experts.25.w3", "model.layers.26.block_sparse_moe.experts.26.w3", "model.layers.26.block_sparse_moe.experts.27.w3", "model.layers.26.block_sparse_moe.experts.28.w3", "model.layers.26.block_sparse_moe.experts.29.w3", "model.layers.26.block_sparse_moe.experts.30.w3", "model.layers.26.block_sparse_moe.experts.31.w3", "model.layers.26.block_sparse_moe.experts.32.w3", "model.layers.26.block_sparse_moe.experts.33.w3", "model.layers.26.block_sparse_moe.experts.34.w3", "model.layers.26.block_sparse_moe.experts.35.w3", "model.layers.26.block_sparse_moe.experts.36.w3", "model.layers.26.block_sparse_moe.experts.37.w3", "model.layers.26.block_sparse_moe.experts.38.w3", "model.layers.26.block_sparse_moe.experts.39.w3", "model.layers.26.block_sparse_moe.experts.40.w3", "model.layers.26.block_sparse_moe.experts.41.w3", "model.layers.26.block_sparse_moe.experts.42.w3", "model.layers.26.block_sparse_moe.experts.43.w3", "model.layers.26.block_sparse_moe.experts.44.w3", "model.layers.26.block_sparse_moe.experts.45.w3", "model.layers.26.block_sparse_moe.experts.46.w3", "model.layers.26.block_sparse_moe.experts.47.w3", "model.layers.26.block_sparse_moe.experts.48.w3", "model.layers.26.block_sparse_moe.experts.49.w3", "model.layers.26.block_sparse_moe.experts.50.w3", "model.layers.26.block_sparse_moe.experts.51.w3", "model.layers.26.block_sparse_moe.experts.52.w3", "model.layers.26.block_sparse_moe.experts.53.w3", "model.layers.26.block_sparse_moe.experts.54.w3", "model.layers.26.block_sparse_moe.experts.55.w3", "model.layers.26.block_sparse_moe.experts.56.w3", "model.layers.26.block_sparse_moe.experts.57.w3", "model.layers.26.block_sparse_moe.experts.58.w3", "model.layers.26.block_sparse_moe.experts.59.w3", "model.layers.26.block_sparse_moe.experts.60.w3", "model.layers.26.block_sparse_moe.experts.61.w3", "model.layers.26.block_sparse_moe.experts.62.w3", "model.layers.26.block_sparse_moe.experts.63.w3", "model.layers.26.block_sparse_moe.experts.64.w3", "model.layers.26.block_sparse_moe.experts.65.w3", "model.layers.26.block_sparse_moe.experts.66.w3", "model.layers.26.block_sparse_moe.experts.67.w3", "model.layers.26.block_sparse_moe.experts.68.w3", "model.layers.26.block_sparse_moe.experts.69.w3", "model.layers.26.block_sparse_moe.experts.70.w3", "model.layers.26.block_sparse_moe.experts.71.w3", "model.layers.26.block_sparse_moe.experts.72.w3", "model.layers.26.block_sparse_moe.experts.73.w3", "model.layers.26.block_sparse_moe.experts.74.w3", "model.layers.26.block_sparse_moe.experts.75.w3", "model.layers.26.block_sparse_moe.experts.76.w3", "model.layers.26.block_sparse_moe.experts.77.w3", "model.layers.26.block_sparse_moe.experts.78.w3", "model.layers.26.block_sparse_moe.experts.79.w3", "model.layers.26.block_sparse_moe.experts.80.w3", "model.layers.26.block_sparse_moe.experts.81.w3", "model.layers.26.block_sparse_moe.experts.82.w3", "model.layers.26.block_sparse_moe.experts.83.w3", "model.layers.26.block_sparse_moe.experts.84.w3", "model.layers.26.block_sparse_moe.experts.85.w3", "model.layers.26.block_sparse_moe.experts.86.w3", "model.layers.26.block_sparse_moe.experts.87.w3", "model.layers.26.block_sparse_moe.experts.88.w3", "model.layers.26.block_sparse_moe.experts.89.w3", "model.layers.26.block_sparse_moe.experts.90.w3", "model.layers.26.block_sparse_moe.experts.91.w3", "model.layers.26.block_sparse_moe.experts.92.w3", "model.layers.26.block_sparse_moe.experts.93.w3", "model.layers.26.block_sparse_moe.experts.94.w3", "model.layers.26.block_sparse_moe.experts.95.w3", "model.layers.26.block_sparse_moe.experts.96.w3", "model.layers.26.block_sparse_moe.experts.97.w3", "model.layers.26.block_sparse_moe.experts.98.w3", "model.layers.26.block_sparse_moe.experts.99.w3", "model.layers.26.block_sparse_moe.experts.100.w3", "model.layers.26.block_sparse_moe.experts.101.w3", "model.layers.26.block_sparse_moe.experts.102.w3", "model.layers.26.block_sparse_moe.experts.103.w3", "model.layers.26.block_sparse_moe.experts.104.w3", "model.layers.26.block_sparse_moe.experts.105.w3", "model.layers.26.block_sparse_moe.experts.106.w3", "model.layers.26.block_sparse_moe.experts.107.w3", "model.layers.26.block_sparse_moe.experts.108.w3", "model.layers.26.block_sparse_moe.experts.109.w3", "model.layers.26.block_sparse_moe.experts.110.w3", "model.layers.26.block_sparse_moe.experts.111.w3", "model.layers.26.block_sparse_moe.experts.112.w3", "model.layers.26.block_sparse_moe.experts.113.w3", "model.layers.26.block_sparse_moe.experts.114.w3", "model.layers.26.block_sparse_moe.experts.115.w3", "model.layers.26.block_sparse_moe.experts.116.w3", "model.layers.26.block_sparse_moe.experts.117.w3", "model.layers.26.block_sparse_moe.experts.118.w3", "model.layers.26.block_sparse_moe.experts.119.w3", "model.layers.26.block_sparse_moe.experts.120.w3", "model.layers.26.block_sparse_moe.experts.121.w3", "model.layers.26.block_sparse_moe.experts.122.w3", "model.layers.26.block_sparse_moe.experts.123.w3", "model.layers.26.block_sparse_moe.experts.124.w3", "model.layers.26.block_sparse_moe.experts.125.w3", "model.layers.26.block_sparse_moe.experts.126.w3", "model.layers.26.block_sparse_moe.experts.127.w3", "model.layers.26.block_sparse_moe.experts.128.w3", "model.layers.26.block_sparse_moe.experts.129.w3", "model.layers.26.block_sparse_moe.experts.130.w3", "model.layers.26.block_sparse_moe.experts.131.w3", "model.layers.26.block_sparse_moe.experts.132.w3", "model.layers.26.block_sparse_moe.experts.133.w3", "model.layers.26.block_sparse_moe.experts.134.w3", "model.layers.26.block_sparse_moe.experts.135.w3", "model.layers.26.block_sparse_moe.experts.136.w3", "model.layers.26.block_sparse_moe.experts.137.w3", "model.layers.26.block_sparse_moe.experts.138.w3", "model.layers.26.block_sparse_moe.experts.139.w3", "model.layers.26.block_sparse_moe.experts.140.w3", "model.layers.26.block_sparse_moe.experts.141.w3", "model.layers.26.block_sparse_moe.experts.142.w3", "model.layers.26.block_sparse_moe.experts.143.w3", "model.layers.26.block_sparse_moe.experts.144.w3", "model.layers.26.block_sparse_moe.experts.145.w3", "model.layers.26.block_sparse_moe.experts.146.w3", "model.layers.26.block_sparse_moe.experts.147.w3", "model.layers.26.block_sparse_moe.experts.148.w3", "model.layers.26.block_sparse_moe.experts.149.w3", "model.layers.26.block_sparse_moe.experts.150.w3", "model.layers.26.block_sparse_moe.experts.151.w3", "model.layers.26.block_sparse_moe.experts.152.w3", "model.layers.26.block_sparse_moe.experts.153.w3", "model.layers.26.block_sparse_moe.experts.154.w3", "model.layers.26.block_sparse_moe.experts.155.w3", "model.layers.26.block_sparse_moe.experts.156.w3", "model.layers.26.block_sparse_moe.experts.157.w3", "model.layers.26.block_sparse_moe.experts.158.w3", "model.layers.26.block_sparse_moe.experts.159.w3", "model.layers.26.block_sparse_moe.experts.160.w3", "model.layers.26.block_sparse_moe.experts.161.w3", "model.layers.26.block_sparse_moe.experts.162.w3", "model.layers.26.block_sparse_moe.experts.163.w3", "model.layers.26.block_sparse_moe.experts.164.w3", "model.layers.26.block_sparse_moe.experts.165.w3", "model.layers.26.block_sparse_moe.experts.166.w3", "model.layers.26.block_sparse_moe.experts.167.w3", "model.layers.26.block_sparse_moe.experts.168.w3", "model.layers.26.block_sparse_moe.experts.169.w3", "model.layers.26.block_sparse_moe.experts.170.w3", "model.layers.26.block_sparse_moe.experts.171.w3", "model.layers.26.block_sparse_moe.experts.172.w3", "model.layers.26.block_sparse_moe.experts.173.w3", "model.layers.26.block_sparse_moe.experts.174.w3", "model.layers.26.block_sparse_moe.experts.175.w3", "model.layers.26.block_sparse_moe.experts.176.w3", "model.layers.26.block_sparse_moe.experts.177.w3", "model.layers.26.block_sparse_moe.experts.178.w3", "model.layers.26.block_sparse_moe.experts.179.w3", "model.layers.26.block_sparse_moe.experts.180.w3", "model.layers.26.block_sparse_moe.experts.181.w3", "model.layers.26.block_sparse_moe.experts.182.w3", "model.layers.26.block_sparse_moe.experts.183.w3", "model.layers.26.block_sparse_moe.experts.184.w3", "model.layers.26.block_sparse_moe.experts.185.w3", "model.layers.26.block_sparse_moe.experts.186.w3", "model.layers.26.block_sparse_moe.experts.187.w3", "model.layers.26.block_sparse_moe.experts.188.w3", "model.layers.26.block_sparse_moe.experts.189.w3", "model.layers.26.block_sparse_moe.experts.190.w3", "model.layers.26.block_sparse_moe.experts.191.w3", "model.layers.26.block_sparse_moe.experts.192.w3", "model.layers.26.block_sparse_moe.experts.193.w3", "model.layers.26.block_sparse_moe.experts.194.w3", "model.layers.26.block_sparse_moe.experts.195.w3", "model.layers.26.block_sparse_moe.experts.196.w3", "model.layers.26.block_sparse_moe.experts.197.w3", "model.layers.26.block_sparse_moe.experts.198.w3", "model.layers.26.block_sparse_moe.experts.199.w3", "model.layers.26.block_sparse_moe.experts.200.w3", "model.layers.26.block_sparse_moe.experts.201.w3", "model.layers.26.block_sparse_moe.experts.202.w3", "model.layers.26.block_sparse_moe.experts.203.w3", "model.layers.26.block_sparse_moe.experts.204.w3", "model.layers.26.block_sparse_moe.experts.205.w3", "model.layers.26.block_sparse_moe.experts.206.w3", "model.layers.26.block_sparse_moe.experts.207.w3", "model.layers.26.block_sparse_moe.experts.208.w3", "model.layers.26.block_sparse_moe.experts.209.w3", "model.layers.26.block_sparse_moe.experts.210.w3", "model.layers.26.block_sparse_moe.experts.211.w3", "model.layers.26.block_sparse_moe.experts.212.w3", "model.layers.26.block_sparse_moe.experts.213.w3", "model.layers.26.block_sparse_moe.experts.214.w3", "model.layers.26.block_sparse_moe.experts.215.w3", "model.layers.26.block_sparse_moe.experts.216.w3", "model.layers.26.block_sparse_moe.experts.217.w3", "model.layers.26.block_sparse_moe.experts.218.w3", "model.layers.26.block_sparse_moe.experts.219.w3", "model.layers.26.block_sparse_moe.experts.220.w3", "model.layers.26.block_sparse_moe.experts.221.w3", "model.layers.26.block_sparse_moe.experts.222.w3", "model.layers.26.block_sparse_moe.experts.223.w3", "model.layers.26.block_sparse_moe.experts.224.w3", "model.layers.26.block_sparse_moe.experts.225.w3", "model.layers.26.block_sparse_moe.experts.226.w3", "model.layers.26.block_sparse_moe.experts.227.w3", "model.layers.26.block_sparse_moe.experts.228.w3", "model.layers.26.block_sparse_moe.experts.229.w3", "model.layers.26.block_sparse_moe.experts.230.w3", "model.layers.26.block_sparse_moe.experts.231.w3", "model.layers.26.block_sparse_moe.experts.232.w3", "model.layers.26.block_sparse_moe.experts.233.w3", "model.layers.26.block_sparse_moe.experts.234.w3", "model.layers.26.block_sparse_moe.experts.235.w3", "model.layers.26.block_sparse_moe.experts.236.w3", "model.layers.26.block_sparse_moe.experts.237.w3", "model.layers.26.block_sparse_moe.experts.238.w3", "model.layers.26.block_sparse_moe.experts.239.w3", "model.layers.26.block_sparse_moe.experts.240.w3", "model.layers.26.block_sparse_moe.experts.241.w3", "model.layers.26.block_sparse_moe.experts.242.w3", "model.layers.26.block_sparse_moe.experts.243.w3", "model.layers.26.block_sparse_moe.experts.244.w3", "model.layers.26.block_sparse_moe.experts.245.w3", "model.layers.26.block_sparse_moe.experts.246.w3", "model.layers.26.block_sparse_moe.experts.247.w3", "model.layers.26.block_sparse_moe.experts.248.w3", "model.layers.26.block_sparse_moe.experts.249.w3", "model.layers.26.block_sparse_moe.experts.250.w3", "model.layers.26.block_sparse_moe.experts.251.w3", "model.layers.26.block_sparse_moe.experts.252.w3", "model.layers.26.block_sparse_moe.experts.253.w3", "model.layers.26.block_sparse_moe.experts.254.w3", "model.layers.26.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0015811681747436301, "dbits": 2415919104 } ] }, { "idx": 134, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w2", "model.layers.26.block_sparse_moe.experts.1.w2", "model.layers.26.block_sparse_moe.experts.2.w2", "model.layers.26.block_sparse_moe.experts.3.w2", "model.layers.26.block_sparse_moe.experts.4.w2", "model.layers.26.block_sparse_moe.experts.5.w2", "model.layers.26.block_sparse_moe.experts.6.w2", "model.layers.26.block_sparse_moe.experts.7.w2", "model.layers.26.block_sparse_moe.experts.8.w2", "model.layers.26.block_sparse_moe.experts.9.w2", "model.layers.26.block_sparse_moe.experts.10.w2", "model.layers.26.block_sparse_moe.experts.11.w2", "model.layers.26.block_sparse_moe.experts.12.w2", "model.layers.26.block_sparse_moe.experts.13.w2", "model.layers.26.block_sparse_moe.experts.14.w2", "model.layers.26.block_sparse_moe.experts.15.w2", "model.layers.26.block_sparse_moe.experts.16.w2", "model.layers.26.block_sparse_moe.experts.17.w2", "model.layers.26.block_sparse_moe.experts.18.w2", "model.layers.26.block_sparse_moe.experts.19.w2", "model.layers.26.block_sparse_moe.experts.20.w2", "model.layers.26.block_sparse_moe.experts.21.w2", "model.layers.26.block_sparse_moe.experts.22.w2", "model.layers.26.block_sparse_moe.experts.23.w2", "model.layers.26.block_sparse_moe.experts.24.w2", "model.layers.26.block_sparse_moe.experts.25.w2", "model.layers.26.block_sparse_moe.experts.26.w2", "model.layers.26.block_sparse_moe.experts.27.w2", "model.layers.26.block_sparse_moe.experts.28.w2", "model.layers.26.block_sparse_moe.experts.29.w2", "model.layers.26.block_sparse_moe.experts.30.w2", "model.layers.26.block_sparse_moe.experts.31.w2", "model.layers.26.block_sparse_moe.experts.32.w2", "model.layers.26.block_sparse_moe.experts.33.w2", "model.layers.26.block_sparse_moe.experts.34.w2", "model.layers.26.block_sparse_moe.experts.35.w2", "model.layers.26.block_sparse_moe.experts.36.w2", "model.layers.26.block_sparse_moe.experts.37.w2", "model.layers.26.block_sparse_moe.experts.38.w2", "model.layers.26.block_sparse_moe.experts.39.w2", "model.layers.26.block_sparse_moe.experts.40.w2", "model.layers.26.block_sparse_moe.experts.41.w2", "model.layers.26.block_sparse_moe.experts.42.w2", "model.layers.26.block_sparse_moe.experts.43.w2", "model.layers.26.block_sparse_moe.experts.44.w2", "model.layers.26.block_sparse_moe.experts.45.w2", "model.layers.26.block_sparse_moe.experts.46.w2", "model.layers.26.block_sparse_moe.experts.47.w2", "model.layers.26.block_sparse_moe.experts.48.w2", "model.layers.26.block_sparse_moe.experts.49.w2", "model.layers.26.block_sparse_moe.experts.50.w2", "model.layers.26.block_sparse_moe.experts.51.w2", "model.layers.26.block_sparse_moe.experts.52.w2", "model.layers.26.block_sparse_moe.experts.53.w2", "model.layers.26.block_sparse_moe.experts.54.w2", "model.layers.26.block_sparse_moe.experts.55.w2", "model.layers.26.block_sparse_moe.experts.56.w2", "model.layers.26.block_sparse_moe.experts.57.w2", "model.layers.26.block_sparse_moe.experts.58.w2", "model.layers.26.block_sparse_moe.experts.59.w2", "model.layers.26.block_sparse_moe.experts.60.w2", "model.layers.26.block_sparse_moe.experts.61.w2", "model.layers.26.block_sparse_moe.experts.62.w2", "model.layers.26.block_sparse_moe.experts.63.w2", "model.layers.26.block_sparse_moe.experts.64.w2", "model.layers.26.block_sparse_moe.experts.65.w2", "model.layers.26.block_sparse_moe.experts.66.w2", "model.layers.26.block_sparse_moe.experts.67.w2", "model.layers.26.block_sparse_moe.experts.68.w2", "model.layers.26.block_sparse_moe.experts.69.w2", "model.layers.26.block_sparse_moe.experts.70.w2", "model.layers.26.block_sparse_moe.experts.71.w2", "model.layers.26.block_sparse_moe.experts.72.w2", "model.layers.26.block_sparse_moe.experts.73.w2", "model.layers.26.block_sparse_moe.experts.74.w2", "model.layers.26.block_sparse_moe.experts.75.w2", "model.layers.26.block_sparse_moe.experts.76.w2", "model.layers.26.block_sparse_moe.experts.77.w2", "model.layers.26.block_sparse_moe.experts.78.w2", "model.layers.26.block_sparse_moe.experts.79.w2", "model.layers.26.block_sparse_moe.experts.80.w2", "model.layers.26.block_sparse_moe.experts.81.w2", "model.layers.26.block_sparse_moe.experts.82.w2", "model.layers.26.block_sparse_moe.experts.83.w2", "model.layers.26.block_sparse_moe.experts.84.w2", "model.layers.26.block_sparse_moe.experts.85.w2", "model.layers.26.block_sparse_moe.experts.86.w2", "model.layers.26.block_sparse_moe.experts.87.w2", "model.layers.26.block_sparse_moe.experts.88.w2", "model.layers.26.block_sparse_moe.experts.89.w2", "model.layers.26.block_sparse_moe.experts.90.w2", "model.layers.26.block_sparse_moe.experts.91.w2", "model.layers.26.block_sparse_moe.experts.92.w2", "model.layers.26.block_sparse_moe.experts.93.w2", "model.layers.26.block_sparse_moe.experts.94.w2", "model.layers.26.block_sparse_moe.experts.95.w2", "model.layers.26.block_sparse_moe.experts.96.w2", "model.layers.26.block_sparse_moe.experts.97.w2", "model.layers.26.block_sparse_moe.experts.98.w2", "model.layers.26.block_sparse_moe.experts.99.w2", "model.layers.26.block_sparse_moe.experts.100.w2", "model.layers.26.block_sparse_moe.experts.101.w2", "model.layers.26.block_sparse_moe.experts.102.w2", "model.layers.26.block_sparse_moe.experts.103.w2", "model.layers.26.block_sparse_moe.experts.104.w2", "model.layers.26.block_sparse_moe.experts.105.w2", "model.layers.26.block_sparse_moe.experts.106.w2", "model.layers.26.block_sparse_moe.experts.107.w2", "model.layers.26.block_sparse_moe.experts.108.w2", "model.layers.26.block_sparse_moe.experts.109.w2", "model.layers.26.block_sparse_moe.experts.110.w2", "model.layers.26.block_sparse_moe.experts.111.w2", "model.layers.26.block_sparse_moe.experts.112.w2", "model.layers.26.block_sparse_moe.experts.113.w2", "model.layers.26.block_sparse_moe.experts.114.w2", "model.layers.26.block_sparse_moe.experts.115.w2", "model.layers.26.block_sparse_moe.experts.116.w2", "model.layers.26.block_sparse_moe.experts.117.w2", "model.layers.26.block_sparse_moe.experts.118.w2", "model.layers.26.block_sparse_moe.experts.119.w2", "model.layers.26.block_sparse_moe.experts.120.w2", "model.layers.26.block_sparse_moe.experts.121.w2", "model.layers.26.block_sparse_moe.experts.122.w2", "model.layers.26.block_sparse_moe.experts.123.w2", "model.layers.26.block_sparse_moe.experts.124.w2", "model.layers.26.block_sparse_moe.experts.125.w2", "model.layers.26.block_sparse_moe.experts.126.w2", "model.layers.26.block_sparse_moe.experts.127.w2", "model.layers.26.block_sparse_moe.experts.128.w2", "model.layers.26.block_sparse_moe.experts.129.w2", "model.layers.26.block_sparse_moe.experts.130.w2", "model.layers.26.block_sparse_moe.experts.131.w2", "model.layers.26.block_sparse_moe.experts.132.w2", "model.layers.26.block_sparse_moe.experts.133.w2", "model.layers.26.block_sparse_moe.experts.134.w2", "model.layers.26.block_sparse_moe.experts.135.w2", "model.layers.26.block_sparse_moe.experts.136.w2", "model.layers.26.block_sparse_moe.experts.137.w2", "model.layers.26.block_sparse_moe.experts.138.w2", "model.layers.26.block_sparse_moe.experts.139.w2", "model.layers.26.block_sparse_moe.experts.140.w2", "model.layers.26.block_sparse_moe.experts.141.w2", "model.layers.26.block_sparse_moe.experts.142.w2", "model.layers.26.block_sparse_moe.experts.143.w2", "model.layers.26.block_sparse_moe.experts.144.w2", "model.layers.26.block_sparse_moe.experts.145.w2", "model.layers.26.block_sparse_moe.experts.146.w2", "model.layers.26.block_sparse_moe.experts.147.w2", "model.layers.26.block_sparse_moe.experts.148.w2", "model.layers.26.block_sparse_moe.experts.149.w2", "model.layers.26.block_sparse_moe.experts.150.w2", "model.layers.26.block_sparse_moe.experts.151.w2", "model.layers.26.block_sparse_moe.experts.152.w2", "model.layers.26.block_sparse_moe.experts.153.w2", "model.layers.26.block_sparse_moe.experts.154.w2", "model.layers.26.block_sparse_moe.experts.155.w2", "model.layers.26.block_sparse_moe.experts.156.w2", "model.layers.26.block_sparse_moe.experts.157.w2", "model.layers.26.block_sparse_moe.experts.158.w2", "model.layers.26.block_sparse_moe.experts.159.w2", "model.layers.26.block_sparse_moe.experts.160.w2", "model.layers.26.block_sparse_moe.experts.161.w2", "model.layers.26.block_sparse_moe.experts.162.w2", "model.layers.26.block_sparse_moe.experts.163.w2", "model.layers.26.block_sparse_moe.experts.164.w2", "model.layers.26.block_sparse_moe.experts.165.w2", "model.layers.26.block_sparse_moe.experts.166.w2", "model.layers.26.block_sparse_moe.experts.167.w2", "model.layers.26.block_sparse_moe.experts.168.w2", "model.layers.26.block_sparse_moe.experts.169.w2", "model.layers.26.block_sparse_moe.experts.170.w2", "model.layers.26.block_sparse_moe.experts.171.w2", "model.layers.26.block_sparse_moe.experts.172.w2", "model.layers.26.block_sparse_moe.experts.173.w2", "model.layers.26.block_sparse_moe.experts.174.w2", "model.layers.26.block_sparse_moe.experts.175.w2", "model.layers.26.block_sparse_moe.experts.176.w2", "model.layers.26.block_sparse_moe.experts.177.w2", "model.layers.26.block_sparse_moe.experts.178.w2", "model.layers.26.block_sparse_moe.experts.179.w2", "model.layers.26.block_sparse_moe.experts.180.w2", "model.layers.26.block_sparse_moe.experts.181.w2", "model.layers.26.block_sparse_moe.experts.182.w2", "model.layers.26.block_sparse_moe.experts.183.w2", "model.layers.26.block_sparse_moe.experts.184.w2", "model.layers.26.block_sparse_moe.experts.185.w2", "model.layers.26.block_sparse_moe.experts.186.w2", "model.layers.26.block_sparse_moe.experts.187.w2", "model.layers.26.block_sparse_moe.experts.188.w2", "model.layers.26.block_sparse_moe.experts.189.w2", "model.layers.26.block_sparse_moe.experts.190.w2", "model.layers.26.block_sparse_moe.experts.191.w2", "model.layers.26.block_sparse_moe.experts.192.w2", "model.layers.26.block_sparse_moe.experts.193.w2", "model.layers.26.block_sparse_moe.experts.194.w2", "model.layers.26.block_sparse_moe.experts.195.w2", "model.layers.26.block_sparse_moe.experts.196.w2", "model.layers.26.block_sparse_moe.experts.197.w2", "model.layers.26.block_sparse_moe.experts.198.w2", "model.layers.26.block_sparse_moe.experts.199.w2", "model.layers.26.block_sparse_moe.experts.200.w2", "model.layers.26.block_sparse_moe.experts.201.w2", "model.layers.26.block_sparse_moe.experts.202.w2", "model.layers.26.block_sparse_moe.experts.203.w2", "model.layers.26.block_sparse_moe.experts.204.w2", "model.layers.26.block_sparse_moe.experts.205.w2", "model.layers.26.block_sparse_moe.experts.206.w2", "model.layers.26.block_sparse_moe.experts.207.w2", "model.layers.26.block_sparse_moe.experts.208.w2", "model.layers.26.block_sparse_moe.experts.209.w2", "model.layers.26.block_sparse_moe.experts.210.w2", "model.layers.26.block_sparse_moe.experts.211.w2", "model.layers.26.block_sparse_moe.experts.212.w2", "model.layers.26.block_sparse_moe.experts.213.w2", "model.layers.26.block_sparse_moe.experts.214.w2", "model.layers.26.block_sparse_moe.experts.215.w2", "model.layers.26.block_sparse_moe.experts.216.w2", "model.layers.26.block_sparse_moe.experts.217.w2", "model.layers.26.block_sparse_moe.experts.218.w2", "model.layers.26.block_sparse_moe.experts.219.w2", "model.layers.26.block_sparse_moe.experts.220.w2", "model.layers.26.block_sparse_moe.experts.221.w2", "model.layers.26.block_sparse_moe.experts.222.w2", "model.layers.26.block_sparse_moe.experts.223.w2", "model.layers.26.block_sparse_moe.experts.224.w2", "model.layers.26.block_sparse_moe.experts.225.w2", "model.layers.26.block_sparse_moe.experts.226.w2", "model.layers.26.block_sparse_moe.experts.227.w2", "model.layers.26.block_sparse_moe.experts.228.w2", "model.layers.26.block_sparse_moe.experts.229.w2", "model.layers.26.block_sparse_moe.experts.230.w2", "model.layers.26.block_sparse_moe.experts.231.w2", "model.layers.26.block_sparse_moe.experts.232.w2", "model.layers.26.block_sparse_moe.experts.233.w2", "model.layers.26.block_sparse_moe.experts.234.w2", "model.layers.26.block_sparse_moe.experts.235.w2", "model.layers.26.block_sparse_moe.experts.236.w2", "model.layers.26.block_sparse_moe.experts.237.w2", "model.layers.26.block_sparse_moe.experts.238.w2", "model.layers.26.block_sparse_moe.experts.239.w2", "model.layers.26.block_sparse_moe.experts.240.w2", "model.layers.26.block_sparse_moe.experts.241.w2", "model.layers.26.block_sparse_moe.experts.242.w2", "model.layers.26.block_sparse_moe.experts.243.w2", "model.layers.26.block_sparse_moe.experts.244.w2", "model.layers.26.block_sparse_moe.experts.245.w2", "model.layers.26.block_sparse_moe.experts.246.w2", "model.layers.26.block_sparse_moe.experts.247.w2", "model.layers.26.block_sparse_moe.experts.248.w2", "model.layers.26.block_sparse_moe.experts.249.w2", "model.layers.26.block_sparse_moe.experts.250.w2", "model.layers.26.block_sparse_moe.experts.251.w2", "model.layers.26.block_sparse_moe.experts.252.w2", "model.layers.26.block_sparse_moe.experts.253.w2", "model.layers.26.block_sparse_moe.experts.254.w2", "model.layers.26.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006703764200209905, "dbits": 1207959552 } ] }, { "idx": 135, "layers": [ "model.layers.27.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00027313530445094436, "dbits": 18874368 } ] }, { "idx": 136, "layers": [ "model.layers.27.self_attn.k_proj", "model.layers.27.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004342371225357078, "dbits": 6291456 } ] }, { "idx": 137, "layers": [ "model.layers.27.self_attn.o_proj" ], "candidates": [ { "dkld": -0.014569425582885653, "dbits": 18874368 } ] }, { "idx": 138, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w1", "model.layers.27.block_sparse_moe.experts.1.w1", "model.layers.27.block_sparse_moe.experts.2.w1", "model.layers.27.block_sparse_moe.experts.3.w1", "model.layers.27.block_sparse_moe.experts.4.w1", "model.layers.27.block_sparse_moe.experts.5.w1", "model.layers.27.block_sparse_moe.experts.6.w1", "model.layers.27.block_sparse_moe.experts.7.w1", "model.layers.27.block_sparse_moe.experts.8.w1", "model.layers.27.block_sparse_moe.experts.9.w1", "model.layers.27.block_sparse_moe.experts.10.w1", "model.layers.27.block_sparse_moe.experts.11.w1", "model.layers.27.block_sparse_moe.experts.12.w1", "model.layers.27.block_sparse_moe.experts.13.w1", "model.layers.27.block_sparse_moe.experts.14.w1", "model.layers.27.block_sparse_moe.experts.15.w1", "model.layers.27.block_sparse_moe.experts.16.w1", "model.layers.27.block_sparse_moe.experts.17.w1", "model.layers.27.block_sparse_moe.experts.18.w1", "model.layers.27.block_sparse_moe.experts.19.w1", "model.layers.27.block_sparse_moe.experts.20.w1", "model.layers.27.block_sparse_moe.experts.21.w1", "model.layers.27.block_sparse_moe.experts.22.w1", "model.layers.27.block_sparse_moe.experts.23.w1", "model.layers.27.block_sparse_moe.experts.24.w1", "model.layers.27.block_sparse_moe.experts.25.w1", "model.layers.27.block_sparse_moe.experts.26.w1", "model.layers.27.block_sparse_moe.experts.27.w1", "model.layers.27.block_sparse_moe.experts.28.w1", "model.layers.27.block_sparse_moe.experts.29.w1", "model.layers.27.block_sparse_moe.experts.30.w1", "model.layers.27.block_sparse_moe.experts.31.w1", "model.layers.27.block_sparse_moe.experts.32.w1", "model.layers.27.block_sparse_moe.experts.33.w1", "model.layers.27.block_sparse_moe.experts.34.w1", "model.layers.27.block_sparse_moe.experts.35.w1", "model.layers.27.block_sparse_moe.experts.36.w1", "model.layers.27.block_sparse_moe.experts.37.w1", "model.layers.27.block_sparse_moe.experts.38.w1", "model.layers.27.block_sparse_moe.experts.39.w1", "model.layers.27.block_sparse_moe.experts.40.w1", "model.layers.27.block_sparse_moe.experts.41.w1", "model.layers.27.block_sparse_moe.experts.42.w1", "model.layers.27.block_sparse_moe.experts.43.w1", "model.layers.27.block_sparse_moe.experts.44.w1", "model.layers.27.block_sparse_moe.experts.45.w1", "model.layers.27.block_sparse_moe.experts.46.w1", "model.layers.27.block_sparse_moe.experts.47.w1", "model.layers.27.block_sparse_moe.experts.48.w1", "model.layers.27.block_sparse_moe.experts.49.w1", "model.layers.27.block_sparse_moe.experts.50.w1", "model.layers.27.block_sparse_moe.experts.51.w1", "model.layers.27.block_sparse_moe.experts.52.w1", "model.layers.27.block_sparse_moe.experts.53.w1", "model.layers.27.block_sparse_moe.experts.54.w1", "model.layers.27.block_sparse_moe.experts.55.w1", "model.layers.27.block_sparse_moe.experts.56.w1", "model.layers.27.block_sparse_moe.experts.57.w1", "model.layers.27.block_sparse_moe.experts.58.w1", "model.layers.27.block_sparse_moe.experts.59.w1", "model.layers.27.block_sparse_moe.experts.60.w1", "model.layers.27.block_sparse_moe.experts.61.w1", "model.layers.27.block_sparse_moe.experts.62.w1", "model.layers.27.block_sparse_moe.experts.63.w1", "model.layers.27.block_sparse_moe.experts.64.w1", "model.layers.27.block_sparse_moe.experts.65.w1", "model.layers.27.block_sparse_moe.experts.66.w1", "model.layers.27.block_sparse_moe.experts.67.w1", "model.layers.27.block_sparse_moe.experts.68.w1", "model.layers.27.block_sparse_moe.experts.69.w1", "model.layers.27.block_sparse_moe.experts.70.w1", "model.layers.27.block_sparse_moe.experts.71.w1", "model.layers.27.block_sparse_moe.experts.72.w1", "model.layers.27.block_sparse_moe.experts.73.w1", "model.layers.27.block_sparse_moe.experts.74.w1", "model.layers.27.block_sparse_moe.experts.75.w1", "model.layers.27.block_sparse_moe.experts.76.w1", "model.layers.27.block_sparse_moe.experts.77.w1", "model.layers.27.block_sparse_moe.experts.78.w1", "model.layers.27.block_sparse_moe.experts.79.w1", "model.layers.27.block_sparse_moe.experts.80.w1", "model.layers.27.block_sparse_moe.experts.81.w1", "model.layers.27.block_sparse_moe.experts.82.w1", "model.layers.27.block_sparse_moe.experts.83.w1", "model.layers.27.block_sparse_moe.experts.84.w1", "model.layers.27.block_sparse_moe.experts.85.w1", "model.layers.27.block_sparse_moe.experts.86.w1", "model.layers.27.block_sparse_moe.experts.87.w1", "model.layers.27.block_sparse_moe.experts.88.w1", "model.layers.27.block_sparse_moe.experts.89.w1", "model.layers.27.block_sparse_moe.experts.90.w1", "model.layers.27.block_sparse_moe.experts.91.w1", "model.layers.27.block_sparse_moe.experts.92.w1", "model.layers.27.block_sparse_moe.experts.93.w1", "model.layers.27.block_sparse_moe.experts.94.w1", "model.layers.27.block_sparse_moe.experts.95.w1", "model.layers.27.block_sparse_moe.experts.96.w1", "model.layers.27.block_sparse_moe.experts.97.w1", "model.layers.27.block_sparse_moe.experts.98.w1", "model.layers.27.block_sparse_moe.experts.99.w1", "model.layers.27.block_sparse_moe.experts.100.w1", "model.layers.27.block_sparse_moe.experts.101.w1", "model.layers.27.block_sparse_moe.experts.102.w1", "model.layers.27.block_sparse_moe.experts.103.w1", "model.layers.27.block_sparse_moe.experts.104.w1", "model.layers.27.block_sparse_moe.experts.105.w1", "model.layers.27.block_sparse_moe.experts.106.w1", "model.layers.27.block_sparse_moe.experts.107.w1", "model.layers.27.block_sparse_moe.experts.108.w1", "model.layers.27.block_sparse_moe.experts.109.w1", "model.layers.27.block_sparse_moe.experts.110.w1", "model.layers.27.block_sparse_moe.experts.111.w1", "model.layers.27.block_sparse_moe.experts.112.w1", "model.layers.27.block_sparse_moe.experts.113.w1", "model.layers.27.block_sparse_moe.experts.114.w1", "model.layers.27.block_sparse_moe.experts.115.w1", "model.layers.27.block_sparse_moe.experts.116.w1", "model.layers.27.block_sparse_moe.experts.117.w1", "model.layers.27.block_sparse_moe.experts.118.w1", "model.layers.27.block_sparse_moe.experts.119.w1", "model.layers.27.block_sparse_moe.experts.120.w1", "model.layers.27.block_sparse_moe.experts.121.w1", "model.layers.27.block_sparse_moe.experts.122.w1", "model.layers.27.block_sparse_moe.experts.123.w1", "model.layers.27.block_sparse_moe.experts.124.w1", "model.layers.27.block_sparse_moe.experts.125.w1", "model.layers.27.block_sparse_moe.experts.126.w1", "model.layers.27.block_sparse_moe.experts.127.w1", "model.layers.27.block_sparse_moe.experts.128.w1", "model.layers.27.block_sparse_moe.experts.129.w1", "model.layers.27.block_sparse_moe.experts.130.w1", "model.layers.27.block_sparse_moe.experts.131.w1", "model.layers.27.block_sparse_moe.experts.132.w1", "model.layers.27.block_sparse_moe.experts.133.w1", "model.layers.27.block_sparse_moe.experts.134.w1", "model.layers.27.block_sparse_moe.experts.135.w1", "model.layers.27.block_sparse_moe.experts.136.w1", "model.layers.27.block_sparse_moe.experts.137.w1", "model.layers.27.block_sparse_moe.experts.138.w1", "model.layers.27.block_sparse_moe.experts.139.w1", "model.layers.27.block_sparse_moe.experts.140.w1", "model.layers.27.block_sparse_moe.experts.141.w1", "model.layers.27.block_sparse_moe.experts.142.w1", "model.layers.27.block_sparse_moe.experts.143.w1", "model.layers.27.block_sparse_moe.experts.144.w1", "model.layers.27.block_sparse_moe.experts.145.w1", "model.layers.27.block_sparse_moe.experts.146.w1", "model.layers.27.block_sparse_moe.experts.147.w1", "model.layers.27.block_sparse_moe.experts.148.w1", "model.layers.27.block_sparse_moe.experts.149.w1", "model.layers.27.block_sparse_moe.experts.150.w1", "model.layers.27.block_sparse_moe.experts.151.w1", "model.layers.27.block_sparse_moe.experts.152.w1", "model.layers.27.block_sparse_moe.experts.153.w1", "model.layers.27.block_sparse_moe.experts.154.w1", "model.layers.27.block_sparse_moe.experts.155.w1", "model.layers.27.block_sparse_moe.experts.156.w1", "model.layers.27.block_sparse_moe.experts.157.w1", "model.layers.27.block_sparse_moe.experts.158.w1", "model.layers.27.block_sparse_moe.experts.159.w1", "model.layers.27.block_sparse_moe.experts.160.w1", "model.layers.27.block_sparse_moe.experts.161.w1", "model.layers.27.block_sparse_moe.experts.162.w1", "model.layers.27.block_sparse_moe.experts.163.w1", "model.layers.27.block_sparse_moe.experts.164.w1", "model.layers.27.block_sparse_moe.experts.165.w1", "model.layers.27.block_sparse_moe.experts.166.w1", "model.layers.27.block_sparse_moe.experts.167.w1", "model.layers.27.block_sparse_moe.experts.168.w1", "model.layers.27.block_sparse_moe.experts.169.w1", "model.layers.27.block_sparse_moe.experts.170.w1", "model.layers.27.block_sparse_moe.experts.171.w1", "model.layers.27.block_sparse_moe.experts.172.w1", "model.layers.27.block_sparse_moe.experts.173.w1", "model.layers.27.block_sparse_moe.experts.174.w1", "model.layers.27.block_sparse_moe.experts.175.w1", "model.layers.27.block_sparse_moe.experts.176.w1", "model.layers.27.block_sparse_moe.experts.177.w1", "model.layers.27.block_sparse_moe.experts.178.w1", "model.layers.27.block_sparse_moe.experts.179.w1", "model.layers.27.block_sparse_moe.experts.180.w1", "model.layers.27.block_sparse_moe.experts.181.w1", "model.layers.27.block_sparse_moe.experts.182.w1", "model.layers.27.block_sparse_moe.experts.183.w1", "model.layers.27.block_sparse_moe.experts.184.w1", "model.layers.27.block_sparse_moe.experts.185.w1", "model.layers.27.block_sparse_moe.experts.186.w1", "model.layers.27.block_sparse_moe.experts.187.w1", "model.layers.27.block_sparse_moe.experts.188.w1", "model.layers.27.block_sparse_moe.experts.189.w1", "model.layers.27.block_sparse_moe.experts.190.w1", "model.layers.27.block_sparse_moe.experts.191.w1", "model.layers.27.block_sparse_moe.experts.192.w1", "model.layers.27.block_sparse_moe.experts.193.w1", "model.layers.27.block_sparse_moe.experts.194.w1", "model.layers.27.block_sparse_moe.experts.195.w1", "model.layers.27.block_sparse_moe.experts.196.w1", "model.layers.27.block_sparse_moe.experts.197.w1", "model.layers.27.block_sparse_moe.experts.198.w1", "model.layers.27.block_sparse_moe.experts.199.w1", "model.layers.27.block_sparse_moe.experts.200.w1", "model.layers.27.block_sparse_moe.experts.201.w1", "model.layers.27.block_sparse_moe.experts.202.w1", "model.layers.27.block_sparse_moe.experts.203.w1", "model.layers.27.block_sparse_moe.experts.204.w1", "model.layers.27.block_sparse_moe.experts.205.w1", "model.layers.27.block_sparse_moe.experts.206.w1", "model.layers.27.block_sparse_moe.experts.207.w1", "model.layers.27.block_sparse_moe.experts.208.w1", "model.layers.27.block_sparse_moe.experts.209.w1", "model.layers.27.block_sparse_moe.experts.210.w1", "model.layers.27.block_sparse_moe.experts.211.w1", "model.layers.27.block_sparse_moe.experts.212.w1", "model.layers.27.block_sparse_moe.experts.213.w1", "model.layers.27.block_sparse_moe.experts.214.w1", "model.layers.27.block_sparse_moe.experts.215.w1", "model.layers.27.block_sparse_moe.experts.216.w1", "model.layers.27.block_sparse_moe.experts.217.w1", "model.layers.27.block_sparse_moe.experts.218.w1", "model.layers.27.block_sparse_moe.experts.219.w1", "model.layers.27.block_sparse_moe.experts.220.w1", "model.layers.27.block_sparse_moe.experts.221.w1", "model.layers.27.block_sparse_moe.experts.222.w1", "model.layers.27.block_sparse_moe.experts.223.w1", "model.layers.27.block_sparse_moe.experts.224.w1", "model.layers.27.block_sparse_moe.experts.225.w1", "model.layers.27.block_sparse_moe.experts.226.w1", "model.layers.27.block_sparse_moe.experts.227.w1", "model.layers.27.block_sparse_moe.experts.228.w1", "model.layers.27.block_sparse_moe.experts.229.w1", "model.layers.27.block_sparse_moe.experts.230.w1", "model.layers.27.block_sparse_moe.experts.231.w1", "model.layers.27.block_sparse_moe.experts.232.w1", "model.layers.27.block_sparse_moe.experts.233.w1", "model.layers.27.block_sparse_moe.experts.234.w1", "model.layers.27.block_sparse_moe.experts.235.w1", "model.layers.27.block_sparse_moe.experts.236.w1", "model.layers.27.block_sparse_moe.experts.237.w1", "model.layers.27.block_sparse_moe.experts.238.w1", "model.layers.27.block_sparse_moe.experts.239.w1", "model.layers.27.block_sparse_moe.experts.240.w1", "model.layers.27.block_sparse_moe.experts.241.w1", "model.layers.27.block_sparse_moe.experts.242.w1", "model.layers.27.block_sparse_moe.experts.243.w1", "model.layers.27.block_sparse_moe.experts.244.w1", "model.layers.27.block_sparse_moe.experts.245.w1", "model.layers.27.block_sparse_moe.experts.246.w1", "model.layers.27.block_sparse_moe.experts.247.w1", "model.layers.27.block_sparse_moe.experts.248.w1", "model.layers.27.block_sparse_moe.experts.249.w1", "model.layers.27.block_sparse_moe.experts.250.w1", "model.layers.27.block_sparse_moe.experts.251.w1", "model.layers.27.block_sparse_moe.experts.252.w1", "model.layers.27.block_sparse_moe.experts.253.w1", "model.layers.27.block_sparse_moe.experts.254.w1", "model.layers.27.block_sparse_moe.experts.255.w1", "model.layers.27.block_sparse_moe.experts.0.w3", "model.layers.27.block_sparse_moe.experts.1.w3", "model.layers.27.block_sparse_moe.experts.2.w3", "model.layers.27.block_sparse_moe.experts.3.w3", "model.layers.27.block_sparse_moe.experts.4.w3", "model.layers.27.block_sparse_moe.experts.5.w3", "model.layers.27.block_sparse_moe.experts.6.w3", "model.layers.27.block_sparse_moe.experts.7.w3", "model.layers.27.block_sparse_moe.experts.8.w3", "model.layers.27.block_sparse_moe.experts.9.w3", "model.layers.27.block_sparse_moe.experts.10.w3", "model.layers.27.block_sparse_moe.experts.11.w3", "model.layers.27.block_sparse_moe.experts.12.w3", "model.layers.27.block_sparse_moe.experts.13.w3", "model.layers.27.block_sparse_moe.experts.14.w3", "model.layers.27.block_sparse_moe.experts.15.w3", "model.layers.27.block_sparse_moe.experts.16.w3", "model.layers.27.block_sparse_moe.experts.17.w3", "model.layers.27.block_sparse_moe.experts.18.w3", "model.layers.27.block_sparse_moe.experts.19.w3", "model.layers.27.block_sparse_moe.experts.20.w3", "model.layers.27.block_sparse_moe.experts.21.w3", "model.layers.27.block_sparse_moe.experts.22.w3", "model.layers.27.block_sparse_moe.experts.23.w3", "model.layers.27.block_sparse_moe.experts.24.w3", "model.layers.27.block_sparse_moe.experts.25.w3", "model.layers.27.block_sparse_moe.experts.26.w3", "model.layers.27.block_sparse_moe.experts.27.w3", "model.layers.27.block_sparse_moe.experts.28.w3", "model.layers.27.block_sparse_moe.experts.29.w3", "model.layers.27.block_sparse_moe.experts.30.w3", "model.layers.27.block_sparse_moe.experts.31.w3", "model.layers.27.block_sparse_moe.experts.32.w3", "model.layers.27.block_sparse_moe.experts.33.w3", "model.layers.27.block_sparse_moe.experts.34.w3", "model.layers.27.block_sparse_moe.experts.35.w3", "model.layers.27.block_sparse_moe.experts.36.w3", "model.layers.27.block_sparse_moe.experts.37.w3", "model.layers.27.block_sparse_moe.experts.38.w3", "model.layers.27.block_sparse_moe.experts.39.w3", "model.layers.27.block_sparse_moe.experts.40.w3", "model.layers.27.block_sparse_moe.experts.41.w3", "model.layers.27.block_sparse_moe.experts.42.w3", "model.layers.27.block_sparse_moe.experts.43.w3", "model.layers.27.block_sparse_moe.experts.44.w3", "model.layers.27.block_sparse_moe.experts.45.w3", "model.layers.27.block_sparse_moe.experts.46.w3", "model.layers.27.block_sparse_moe.experts.47.w3", "model.layers.27.block_sparse_moe.experts.48.w3", "model.layers.27.block_sparse_moe.experts.49.w3", "model.layers.27.block_sparse_moe.experts.50.w3", "model.layers.27.block_sparse_moe.experts.51.w3", "model.layers.27.block_sparse_moe.experts.52.w3", "model.layers.27.block_sparse_moe.experts.53.w3", "model.layers.27.block_sparse_moe.experts.54.w3", "model.layers.27.block_sparse_moe.experts.55.w3", "model.layers.27.block_sparse_moe.experts.56.w3", "model.layers.27.block_sparse_moe.experts.57.w3", "model.layers.27.block_sparse_moe.experts.58.w3", "model.layers.27.block_sparse_moe.experts.59.w3", "model.layers.27.block_sparse_moe.experts.60.w3", "model.layers.27.block_sparse_moe.experts.61.w3", "model.layers.27.block_sparse_moe.experts.62.w3", "model.layers.27.block_sparse_moe.experts.63.w3", "model.layers.27.block_sparse_moe.experts.64.w3", "model.layers.27.block_sparse_moe.experts.65.w3", "model.layers.27.block_sparse_moe.experts.66.w3", "model.layers.27.block_sparse_moe.experts.67.w3", "model.layers.27.block_sparse_moe.experts.68.w3", "model.layers.27.block_sparse_moe.experts.69.w3", "model.layers.27.block_sparse_moe.experts.70.w3", "model.layers.27.block_sparse_moe.experts.71.w3", "model.layers.27.block_sparse_moe.experts.72.w3", "model.layers.27.block_sparse_moe.experts.73.w3", "model.layers.27.block_sparse_moe.experts.74.w3", "model.layers.27.block_sparse_moe.experts.75.w3", "model.layers.27.block_sparse_moe.experts.76.w3", "model.layers.27.block_sparse_moe.experts.77.w3", "model.layers.27.block_sparse_moe.experts.78.w3", "model.layers.27.block_sparse_moe.experts.79.w3", "model.layers.27.block_sparse_moe.experts.80.w3", "model.layers.27.block_sparse_moe.experts.81.w3", "model.layers.27.block_sparse_moe.experts.82.w3", "model.layers.27.block_sparse_moe.experts.83.w3", "model.layers.27.block_sparse_moe.experts.84.w3", "model.layers.27.block_sparse_moe.experts.85.w3", "model.layers.27.block_sparse_moe.experts.86.w3", "model.layers.27.block_sparse_moe.experts.87.w3", "model.layers.27.block_sparse_moe.experts.88.w3", "model.layers.27.block_sparse_moe.experts.89.w3", "model.layers.27.block_sparse_moe.experts.90.w3", "model.layers.27.block_sparse_moe.experts.91.w3", "model.layers.27.block_sparse_moe.experts.92.w3", "model.layers.27.block_sparse_moe.experts.93.w3", "model.layers.27.block_sparse_moe.experts.94.w3", "model.layers.27.block_sparse_moe.experts.95.w3", "model.layers.27.block_sparse_moe.experts.96.w3", "model.layers.27.block_sparse_moe.experts.97.w3", "model.layers.27.block_sparse_moe.experts.98.w3", "model.layers.27.block_sparse_moe.experts.99.w3", "model.layers.27.block_sparse_moe.experts.100.w3", "model.layers.27.block_sparse_moe.experts.101.w3", "model.layers.27.block_sparse_moe.experts.102.w3", "model.layers.27.block_sparse_moe.experts.103.w3", "model.layers.27.block_sparse_moe.experts.104.w3", "model.layers.27.block_sparse_moe.experts.105.w3", "model.layers.27.block_sparse_moe.experts.106.w3", "model.layers.27.block_sparse_moe.experts.107.w3", "model.layers.27.block_sparse_moe.experts.108.w3", "model.layers.27.block_sparse_moe.experts.109.w3", "model.layers.27.block_sparse_moe.experts.110.w3", "model.layers.27.block_sparse_moe.experts.111.w3", "model.layers.27.block_sparse_moe.experts.112.w3", "model.layers.27.block_sparse_moe.experts.113.w3", "model.layers.27.block_sparse_moe.experts.114.w3", "model.layers.27.block_sparse_moe.experts.115.w3", "model.layers.27.block_sparse_moe.experts.116.w3", "model.layers.27.block_sparse_moe.experts.117.w3", "model.layers.27.block_sparse_moe.experts.118.w3", "model.layers.27.block_sparse_moe.experts.119.w3", "model.layers.27.block_sparse_moe.experts.120.w3", "model.layers.27.block_sparse_moe.experts.121.w3", "model.layers.27.block_sparse_moe.experts.122.w3", "model.layers.27.block_sparse_moe.experts.123.w3", "model.layers.27.block_sparse_moe.experts.124.w3", "model.layers.27.block_sparse_moe.experts.125.w3", "model.layers.27.block_sparse_moe.experts.126.w3", "model.layers.27.block_sparse_moe.experts.127.w3", "model.layers.27.block_sparse_moe.experts.128.w3", "model.layers.27.block_sparse_moe.experts.129.w3", "model.layers.27.block_sparse_moe.experts.130.w3", "model.layers.27.block_sparse_moe.experts.131.w3", "model.layers.27.block_sparse_moe.experts.132.w3", "model.layers.27.block_sparse_moe.experts.133.w3", "model.layers.27.block_sparse_moe.experts.134.w3", "model.layers.27.block_sparse_moe.experts.135.w3", "model.layers.27.block_sparse_moe.experts.136.w3", "model.layers.27.block_sparse_moe.experts.137.w3", "model.layers.27.block_sparse_moe.experts.138.w3", "model.layers.27.block_sparse_moe.experts.139.w3", "model.layers.27.block_sparse_moe.experts.140.w3", "model.layers.27.block_sparse_moe.experts.141.w3", "model.layers.27.block_sparse_moe.experts.142.w3", "model.layers.27.block_sparse_moe.experts.143.w3", "model.layers.27.block_sparse_moe.experts.144.w3", "model.layers.27.block_sparse_moe.experts.145.w3", "model.layers.27.block_sparse_moe.experts.146.w3", "model.layers.27.block_sparse_moe.experts.147.w3", "model.layers.27.block_sparse_moe.experts.148.w3", "model.layers.27.block_sparse_moe.experts.149.w3", "model.layers.27.block_sparse_moe.experts.150.w3", "model.layers.27.block_sparse_moe.experts.151.w3", "model.layers.27.block_sparse_moe.experts.152.w3", "model.layers.27.block_sparse_moe.experts.153.w3", "model.layers.27.block_sparse_moe.experts.154.w3", "model.layers.27.block_sparse_moe.experts.155.w3", "model.layers.27.block_sparse_moe.experts.156.w3", "model.layers.27.block_sparse_moe.experts.157.w3", "model.layers.27.block_sparse_moe.experts.158.w3", "model.layers.27.block_sparse_moe.experts.159.w3", "model.layers.27.block_sparse_moe.experts.160.w3", "model.layers.27.block_sparse_moe.experts.161.w3", "model.layers.27.block_sparse_moe.experts.162.w3", "model.layers.27.block_sparse_moe.experts.163.w3", "model.layers.27.block_sparse_moe.experts.164.w3", "model.layers.27.block_sparse_moe.experts.165.w3", "model.layers.27.block_sparse_moe.experts.166.w3", "model.layers.27.block_sparse_moe.experts.167.w3", "model.layers.27.block_sparse_moe.experts.168.w3", "model.layers.27.block_sparse_moe.experts.169.w3", "model.layers.27.block_sparse_moe.experts.170.w3", "model.layers.27.block_sparse_moe.experts.171.w3", "model.layers.27.block_sparse_moe.experts.172.w3", "model.layers.27.block_sparse_moe.experts.173.w3", "model.layers.27.block_sparse_moe.experts.174.w3", "model.layers.27.block_sparse_moe.experts.175.w3", "model.layers.27.block_sparse_moe.experts.176.w3", "model.layers.27.block_sparse_moe.experts.177.w3", "model.layers.27.block_sparse_moe.experts.178.w3", "model.layers.27.block_sparse_moe.experts.179.w3", "model.layers.27.block_sparse_moe.experts.180.w3", "model.layers.27.block_sparse_moe.experts.181.w3", "model.layers.27.block_sparse_moe.experts.182.w3", "model.layers.27.block_sparse_moe.experts.183.w3", "model.layers.27.block_sparse_moe.experts.184.w3", "model.layers.27.block_sparse_moe.experts.185.w3", "model.layers.27.block_sparse_moe.experts.186.w3", "model.layers.27.block_sparse_moe.experts.187.w3", "model.layers.27.block_sparse_moe.experts.188.w3", "model.layers.27.block_sparse_moe.experts.189.w3", "model.layers.27.block_sparse_moe.experts.190.w3", "model.layers.27.block_sparse_moe.experts.191.w3", "model.layers.27.block_sparse_moe.experts.192.w3", "model.layers.27.block_sparse_moe.experts.193.w3", "model.layers.27.block_sparse_moe.experts.194.w3", "model.layers.27.block_sparse_moe.experts.195.w3", "model.layers.27.block_sparse_moe.experts.196.w3", "model.layers.27.block_sparse_moe.experts.197.w3", "model.layers.27.block_sparse_moe.experts.198.w3", "model.layers.27.block_sparse_moe.experts.199.w3", "model.layers.27.block_sparse_moe.experts.200.w3", "model.layers.27.block_sparse_moe.experts.201.w3", "model.layers.27.block_sparse_moe.experts.202.w3", "model.layers.27.block_sparse_moe.experts.203.w3", "model.layers.27.block_sparse_moe.experts.204.w3", "model.layers.27.block_sparse_moe.experts.205.w3", "model.layers.27.block_sparse_moe.experts.206.w3", "model.layers.27.block_sparse_moe.experts.207.w3", "model.layers.27.block_sparse_moe.experts.208.w3", "model.layers.27.block_sparse_moe.experts.209.w3", "model.layers.27.block_sparse_moe.experts.210.w3", "model.layers.27.block_sparse_moe.experts.211.w3", "model.layers.27.block_sparse_moe.experts.212.w3", "model.layers.27.block_sparse_moe.experts.213.w3", "model.layers.27.block_sparse_moe.experts.214.w3", "model.layers.27.block_sparse_moe.experts.215.w3", "model.layers.27.block_sparse_moe.experts.216.w3", "model.layers.27.block_sparse_moe.experts.217.w3", "model.layers.27.block_sparse_moe.experts.218.w3", "model.layers.27.block_sparse_moe.experts.219.w3", "model.layers.27.block_sparse_moe.experts.220.w3", "model.layers.27.block_sparse_moe.experts.221.w3", "model.layers.27.block_sparse_moe.experts.222.w3", "model.layers.27.block_sparse_moe.experts.223.w3", "model.layers.27.block_sparse_moe.experts.224.w3", "model.layers.27.block_sparse_moe.experts.225.w3", "model.layers.27.block_sparse_moe.experts.226.w3", "model.layers.27.block_sparse_moe.experts.227.w3", "model.layers.27.block_sparse_moe.experts.228.w3", "model.layers.27.block_sparse_moe.experts.229.w3", "model.layers.27.block_sparse_moe.experts.230.w3", "model.layers.27.block_sparse_moe.experts.231.w3", "model.layers.27.block_sparse_moe.experts.232.w3", "model.layers.27.block_sparse_moe.experts.233.w3", "model.layers.27.block_sparse_moe.experts.234.w3", "model.layers.27.block_sparse_moe.experts.235.w3", "model.layers.27.block_sparse_moe.experts.236.w3", "model.layers.27.block_sparse_moe.experts.237.w3", "model.layers.27.block_sparse_moe.experts.238.w3", "model.layers.27.block_sparse_moe.experts.239.w3", "model.layers.27.block_sparse_moe.experts.240.w3", "model.layers.27.block_sparse_moe.experts.241.w3", "model.layers.27.block_sparse_moe.experts.242.w3", "model.layers.27.block_sparse_moe.experts.243.w3", "model.layers.27.block_sparse_moe.experts.244.w3", "model.layers.27.block_sparse_moe.experts.245.w3", "model.layers.27.block_sparse_moe.experts.246.w3", "model.layers.27.block_sparse_moe.experts.247.w3", "model.layers.27.block_sparse_moe.experts.248.w3", "model.layers.27.block_sparse_moe.experts.249.w3", "model.layers.27.block_sparse_moe.experts.250.w3", "model.layers.27.block_sparse_moe.experts.251.w3", "model.layers.27.block_sparse_moe.experts.252.w3", "model.layers.27.block_sparse_moe.experts.253.w3", "model.layers.27.block_sparse_moe.experts.254.w3", "model.layers.27.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.004596450924873263, "dbits": 2415919104 } ] }, { "idx": 139, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w2", "model.layers.27.block_sparse_moe.experts.1.w2", "model.layers.27.block_sparse_moe.experts.2.w2", "model.layers.27.block_sparse_moe.experts.3.w2", "model.layers.27.block_sparse_moe.experts.4.w2", "model.layers.27.block_sparse_moe.experts.5.w2", "model.layers.27.block_sparse_moe.experts.6.w2", "model.layers.27.block_sparse_moe.experts.7.w2", "model.layers.27.block_sparse_moe.experts.8.w2", "model.layers.27.block_sparse_moe.experts.9.w2", "model.layers.27.block_sparse_moe.experts.10.w2", "model.layers.27.block_sparse_moe.experts.11.w2", "model.layers.27.block_sparse_moe.experts.12.w2", "model.layers.27.block_sparse_moe.experts.13.w2", "model.layers.27.block_sparse_moe.experts.14.w2", "model.layers.27.block_sparse_moe.experts.15.w2", "model.layers.27.block_sparse_moe.experts.16.w2", "model.layers.27.block_sparse_moe.experts.17.w2", "model.layers.27.block_sparse_moe.experts.18.w2", "model.layers.27.block_sparse_moe.experts.19.w2", "model.layers.27.block_sparse_moe.experts.20.w2", "model.layers.27.block_sparse_moe.experts.21.w2", "model.layers.27.block_sparse_moe.experts.22.w2", "model.layers.27.block_sparse_moe.experts.23.w2", "model.layers.27.block_sparse_moe.experts.24.w2", "model.layers.27.block_sparse_moe.experts.25.w2", "model.layers.27.block_sparse_moe.experts.26.w2", "model.layers.27.block_sparse_moe.experts.27.w2", "model.layers.27.block_sparse_moe.experts.28.w2", "model.layers.27.block_sparse_moe.experts.29.w2", "model.layers.27.block_sparse_moe.experts.30.w2", "model.layers.27.block_sparse_moe.experts.31.w2", "model.layers.27.block_sparse_moe.experts.32.w2", "model.layers.27.block_sparse_moe.experts.33.w2", "model.layers.27.block_sparse_moe.experts.34.w2", "model.layers.27.block_sparse_moe.experts.35.w2", "model.layers.27.block_sparse_moe.experts.36.w2", "model.layers.27.block_sparse_moe.experts.37.w2", "model.layers.27.block_sparse_moe.experts.38.w2", "model.layers.27.block_sparse_moe.experts.39.w2", "model.layers.27.block_sparse_moe.experts.40.w2", "model.layers.27.block_sparse_moe.experts.41.w2", "model.layers.27.block_sparse_moe.experts.42.w2", "model.layers.27.block_sparse_moe.experts.43.w2", "model.layers.27.block_sparse_moe.experts.44.w2", "model.layers.27.block_sparse_moe.experts.45.w2", "model.layers.27.block_sparse_moe.experts.46.w2", "model.layers.27.block_sparse_moe.experts.47.w2", "model.layers.27.block_sparse_moe.experts.48.w2", "model.layers.27.block_sparse_moe.experts.49.w2", "model.layers.27.block_sparse_moe.experts.50.w2", "model.layers.27.block_sparse_moe.experts.51.w2", "model.layers.27.block_sparse_moe.experts.52.w2", "model.layers.27.block_sparse_moe.experts.53.w2", "model.layers.27.block_sparse_moe.experts.54.w2", "model.layers.27.block_sparse_moe.experts.55.w2", "model.layers.27.block_sparse_moe.experts.56.w2", "model.layers.27.block_sparse_moe.experts.57.w2", "model.layers.27.block_sparse_moe.experts.58.w2", "model.layers.27.block_sparse_moe.experts.59.w2", "model.layers.27.block_sparse_moe.experts.60.w2", "model.layers.27.block_sparse_moe.experts.61.w2", "model.layers.27.block_sparse_moe.experts.62.w2", "model.layers.27.block_sparse_moe.experts.63.w2", "model.layers.27.block_sparse_moe.experts.64.w2", "model.layers.27.block_sparse_moe.experts.65.w2", "model.layers.27.block_sparse_moe.experts.66.w2", "model.layers.27.block_sparse_moe.experts.67.w2", "model.layers.27.block_sparse_moe.experts.68.w2", "model.layers.27.block_sparse_moe.experts.69.w2", "model.layers.27.block_sparse_moe.experts.70.w2", "model.layers.27.block_sparse_moe.experts.71.w2", "model.layers.27.block_sparse_moe.experts.72.w2", "model.layers.27.block_sparse_moe.experts.73.w2", "model.layers.27.block_sparse_moe.experts.74.w2", "model.layers.27.block_sparse_moe.experts.75.w2", "model.layers.27.block_sparse_moe.experts.76.w2", "model.layers.27.block_sparse_moe.experts.77.w2", "model.layers.27.block_sparse_moe.experts.78.w2", "model.layers.27.block_sparse_moe.experts.79.w2", "model.layers.27.block_sparse_moe.experts.80.w2", "model.layers.27.block_sparse_moe.experts.81.w2", "model.layers.27.block_sparse_moe.experts.82.w2", "model.layers.27.block_sparse_moe.experts.83.w2", "model.layers.27.block_sparse_moe.experts.84.w2", "model.layers.27.block_sparse_moe.experts.85.w2", "model.layers.27.block_sparse_moe.experts.86.w2", "model.layers.27.block_sparse_moe.experts.87.w2", "model.layers.27.block_sparse_moe.experts.88.w2", "model.layers.27.block_sparse_moe.experts.89.w2", "model.layers.27.block_sparse_moe.experts.90.w2", "model.layers.27.block_sparse_moe.experts.91.w2", "model.layers.27.block_sparse_moe.experts.92.w2", "model.layers.27.block_sparse_moe.experts.93.w2", "model.layers.27.block_sparse_moe.experts.94.w2", "model.layers.27.block_sparse_moe.experts.95.w2", "model.layers.27.block_sparse_moe.experts.96.w2", "model.layers.27.block_sparse_moe.experts.97.w2", "model.layers.27.block_sparse_moe.experts.98.w2", "model.layers.27.block_sparse_moe.experts.99.w2", "model.layers.27.block_sparse_moe.experts.100.w2", "model.layers.27.block_sparse_moe.experts.101.w2", "model.layers.27.block_sparse_moe.experts.102.w2", "model.layers.27.block_sparse_moe.experts.103.w2", "model.layers.27.block_sparse_moe.experts.104.w2", "model.layers.27.block_sparse_moe.experts.105.w2", "model.layers.27.block_sparse_moe.experts.106.w2", "model.layers.27.block_sparse_moe.experts.107.w2", "model.layers.27.block_sparse_moe.experts.108.w2", "model.layers.27.block_sparse_moe.experts.109.w2", "model.layers.27.block_sparse_moe.experts.110.w2", "model.layers.27.block_sparse_moe.experts.111.w2", "model.layers.27.block_sparse_moe.experts.112.w2", "model.layers.27.block_sparse_moe.experts.113.w2", "model.layers.27.block_sparse_moe.experts.114.w2", "model.layers.27.block_sparse_moe.experts.115.w2", "model.layers.27.block_sparse_moe.experts.116.w2", "model.layers.27.block_sparse_moe.experts.117.w2", "model.layers.27.block_sparse_moe.experts.118.w2", "model.layers.27.block_sparse_moe.experts.119.w2", "model.layers.27.block_sparse_moe.experts.120.w2", "model.layers.27.block_sparse_moe.experts.121.w2", "model.layers.27.block_sparse_moe.experts.122.w2", "model.layers.27.block_sparse_moe.experts.123.w2", "model.layers.27.block_sparse_moe.experts.124.w2", "model.layers.27.block_sparse_moe.experts.125.w2", "model.layers.27.block_sparse_moe.experts.126.w2", "model.layers.27.block_sparse_moe.experts.127.w2", "model.layers.27.block_sparse_moe.experts.128.w2", "model.layers.27.block_sparse_moe.experts.129.w2", "model.layers.27.block_sparse_moe.experts.130.w2", "model.layers.27.block_sparse_moe.experts.131.w2", "model.layers.27.block_sparse_moe.experts.132.w2", "model.layers.27.block_sparse_moe.experts.133.w2", "model.layers.27.block_sparse_moe.experts.134.w2", "model.layers.27.block_sparse_moe.experts.135.w2", "model.layers.27.block_sparse_moe.experts.136.w2", "model.layers.27.block_sparse_moe.experts.137.w2", "model.layers.27.block_sparse_moe.experts.138.w2", "model.layers.27.block_sparse_moe.experts.139.w2", "model.layers.27.block_sparse_moe.experts.140.w2", "model.layers.27.block_sparse_moe.experts.141.w2", "model.layers.27.block_sparse_moe.experts.142.w2", "model.layers.27.block_sparse_moe.experts.143.w2", "model.layers.27.block_sparse_moe.experts.144.w2", "model.layers.27.block_sparse_moe.experts.145.w2", "model.layers.27.block_sparse_moe.experts.146.w2", "model.layers.27.block_sparse_moe.experts.147.w2", "model.layers.27.block_sparse_moe.experts.148.w2", "model.layers.27.block_sparse_moe.experts.149.w2", "model.layers.27.block_sparse_moe.experts.150.w2", "model.layers.27.block_sparse_moe.experts.151.w2", "model.layers.27.block_sparse_moe.experts.152.w2", "model.layers.27.block_sparse_moe.experts.153.w2", "model.layers.27.block_sparse_moe.experts.154.w2", "model.layers.27.block_sparse_moe.experts.155.w2", "model.layers.27.block_sparse_moe.experts.156.w2", "model.layers.27.block_sparse_moe.experts.157.w2", "model.layers.27.block_sparse_moe.experts.158.w2", "model.layers.27.block_sparse_moe.experts.159.w2", "model.layers.27.block_sparse_moe.experts.160.w2", "model.layers.27.block_sparse_moe.experts.161.w2", "model.layers.27.block_sparse_moe.experts.162.w2", "model.layers.27.block_sparse_moe.experts.163.w2", "model.layers.27.block_sparse_moe.experts.164.w2", "model.layers.27.block_sparse_moe.experts.165.w2", "model.layers.27.block_sparse_moe.experts.166.w2", "model.layers.27.block_sparse_moe.experts.167.w2", "model.layers.27.block_sparse_moe.experts.168.w2", "model.layers.27.block_sparse_moe.experts.169.w2", "model.layers.27.block_sparse_moe.experts.170.w2", "model.layers.27.block_sparse_moe.experts.171.w2", "model.layers.27.block_sparse_moe.experts.172.w2", "model.layers.27.block_sparse_moe.experts.173.w2", "model.layers.27.block_sparse_moe.experts.174.w2", "model.layers.27.block_sparse_moe.experts.175.w2", "model.layers.27.block_sparse_moe.experts.176.w2", "model.layers.27.block_sparse_moe.experts.177.w2", "model.layers.27.block_sparse_moe.experts.178.w2", "model.layers.27.block_sparse_moe.experts.179.w2", "model.layers.27.block_sparse_moe.experts.180.w2", "model.layers.27.block_sparse_moe.experts.181.w2", "model.layers.27.block_sparse_moe.experts.182.w2", "model.layers.27.block_sparse_moe.experts.183.w2", "model.layers.27.block_sparse_moe.experts.184.w2", "model.layers.27.block_sparse_moe.experts.185.w2", "model.layers.27.block_sparse_moe.experts.186.w2", "model.layers.27.block_sparse_moe.experts.187.w2", "model.layers.27.block_sparse_moe.experts.188.w2", "model.layers.27.block_sparse_moe.experts.189.w2", "model.layers.27.block_sparse_moe.experts.190.w2", "model.layers.27.block_sparse_moe.experts.191.w2", "model.layers.27.block_sparse_moe.experts.192.w2", "model.layers.27.block_sparse_moe.experts.193.w2", "model.layers.27.block_sparse_moe.experts.194.w2", "model.layers.27.block_sparse_moe.experts.195.w2", "model.layers.27.block_sparse_moe.experts.196.w2", "model.layers.27.block_sparse_moe.experts.197.w2", "model.layers.27.block_sparse_moe.experts.198.w2", "model.layers.27.block_sparse_moe.experts.199.w2", "model.layers.27.block_sparse_moe.experts.200.w2", "model.layers.27.block_sparse_moe.experts.201.w2", "model.layers.27.block_sparse_moe.experts.202.w2", "model.layers.27.block_sparse_moe.experts.203.w2", "model.layers.27.block_sparse_moe.experts.204.w2", "model.layers.27.block_sparse_moe.experts.205.w2", "model.layers.27.block_sparse_moe.experts.206.w2", "model.layers.27.block_sparse_moe.experts.207.w2", "model.layers.27.block_sparse_moe.experts.208.w2", "model.layers.27.block_sparse_moe.experts.209.w2", "model.layers.27.block_sparse_moe.experts.210.w2", "model.layers.27.block_sparse_moe.experts.211.w2", "model.layers.27.block_sparse_moe.experts.212.w2", "model.layers.27.block_sparse_moe.experts.213.w2", "model.layers.27.block_sparse_moe.experts.214.w2", "model.layers.27.block_sparse_moe.experts.215.w2", "model.layers.27.block_sparse_moe.experts.216.w2", "model.layers.27.block_sparse_moe.experts.217.w2", "model.layers.27.block_sparse_moe.experts.218.w2", "model.layers.27.block_sparse_moe.experts.219.w2", "model.layers.27.block_sparse_moe.experts.220.w2", "model.layers.27.block_sparse_moe.experts.221.w2", "model.layers.27.block_sparse_moe.experts.222.w2", "model.layers.27.block_sparse_moe.experts.223.w2", "model.layers.27.block_sparse_moe.experts.224.w2", "model.layers.27.block_sparse_moe.experts.225.w2", "model.layers.27.block_sparse_moe.experts.226.w2", "model.layers.27.block_sparse_moe.experts.227.w2", "model.layers.27.block_sparse_moe.experts.228.w2", "model.layers.27.block_sparse_moe.experts.229.w2", "model.layers.27.block_sparse_moe.experts.230.w2", "model.layers.27.block_sparse_moe.experts.231.w2", "model.layers.27.block_sparse_moe.experts.232.w2", "model.layers.27.block_sparse_moe.experts.233.w2", "model.layers.27.block_sparse_moe.experts.234.w2", "model.layers.27.block_sparse_moe.experts.235.w2", "model.layers.27.block_sparse_moe.experts.236.w2", "model.layers.27.block_sparse_moe.experts.237.w2", "model.layers.27.block_sparse_moe.experts.238.w2", "model.layers.27.block_sparse_moe.experts.239.w2", "model.layers.27.block_sparse_moe.experts.240.w2", "model.layers.27.block_sparse_moe.experts.241.w2", "model.layers.27.block_sparse_moe.experts.242.w2", "model.layers.27.block_sparse_moe.experts.243.w2", "model.layers.27.block_sparse_moe.experts.244.w2", "model.layers.27.block_sparse_moe.experts.245.w2", "model.layers.27.block_sparse_moe.experts.246.w2", "model.layers.27.block_sparse_moe.experts.247.w2", "model.layers.27.block_sparse_moe.experts.248.w2", "model.layers.27.block_sparse_moe.experts.249.w2", "model.layers.27.block_sparse_moe.experts.250.w2", "model.layers.27.block_sparse_moe.experts.251.w2", "model.layers.27.block_sparse_moe.experts.252.w2", "model.layers.27.block_sparse_moe.experts.253.w2", "model.layers.27.block_sparse_moe.experts.254.w2", "model.layers.27.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007415592670440008, "dbits": 1207959552 } ] }, { "idx": 140, "layers": [ "model.layers.28.self_attn.q_proj" ], "candidates": [ { "dkld": -0.003289148211479187, "dbits": 18874368 } ] }, { "idx": 141, "layers": [ "model.layers.28.self_attn.k_proj", "model.layers.28.self_attn.v_proj" ], "candidates": [ { "dkld": 0.005977320671081587, "dbits": 6291456 } ] }, { "idx": 142, "layers": [ "model.layers.28.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00246637165546415, "dbits": 18874368 } ] }, { "idx": 143, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w1", "model.layers.28.block_sparse_moe.experts.1.w1", "model.layers.28.block_sparse_moe.experts.2.w1", "model.layers.28.block_sparse_moe.experts.3.w1", "model.layers.28.block_sparse_moe.experts.4.w1", "model.layers.28.block_sparse_moe.experts.5.w1", "model.layers.28.block_sparse_moe.experts.6.w1", "model.layers.28.block_sparse_moe.experts.7.w1", "model.layers.28.block_sparse_moe.experts.8.w1", "model.layers.28.block_sparse_moe.experts.9.w1", "model.layers.28.block_sparse_moe.experts.10.w1", "model.layers.28.block_sparse_moe.experts.11.w1", "model.layers.28.block_sparse_moe.experts.12.w1", "model.layers.28.block_sparse_moe.experts.13.w1", "model.layers.28.block_sparse_moe.experts.14.w1", "model.layers.28.block_sparse_moe.experts.15.w1", "model.layers.28.block_sparse_moe.experts.16.w1", "model.layers.28.block_sparse_moe.experts.17.w1", "model.layers.28.block_sparse_moe.experts.18.w1", "model.layers.28.block_sparse_moe.experts.19.w1", "model.layers.28.block_sparse_moe.experts.20.w1", "model.layers.28.block_sparse_moe.experts.21.w1", "model.layers.28.block_sparse_moe.experts.22.w1", "model.layers.28.block_sparse_moe.experts.23.w1", "model.layers.28.block_sparse_moe.experts.24.w1", "model.layers.28.block_sparse_moe.experts.25.w1", "model.layers.28.block_sparse_moe.experts.26.w1", "model.layers.28.block_sparse_moe.experts.27.w1", "model.layers.28.block_sparse_moe.experts.28.w1", "model.layers.28.block_sparse_moe.experts.29.w1", "model.layers.28.block_sparse_moe.experts.30.w1", "model.layers.28.block_sparse_moe.experts.31.w1", "model.layers.28.block_sparse_moe.experts.32.w1", "model.layers.28.block_sparse_moe.experts.33.w1", "model.layers.28.block_sparse_moe.experts.34.w1", "model.layers.28.block_sparse_moe.experts.35.w1", "model.layers.28.block_sparse_moe.experts.36.w1", "model.layers.28.block_sparse_moe.experts.37.w1", "model.layers.28.block_sparse_moe.experts.38.w1", "model.layers.28.block_sparse_moe.experts.39.w1", "model.layers.28.block_sparse_moe.experts.40.w1", "model.layers.28.block_sparse_moe.experts.41.w1", "model.layers.28.block_sparse_moe.experts.42.w1", "model.layers.28.block_sparse_moe.experts.43.w1", "model.layers.28.block_sparse_moe.experts.44.w1", "model.layers.28.block_sparse_moe.experts.45.w1", "model.layers.28.block_sparse_moe.experts.46.w1", "model.layers.28.block_sparse_moe.experts.47.w1", "model.layers.28.block_sparse_moe.experts.48.w1", "model.layers.28.block_sparse_moe.experts.49.w1", "model.layers.28.block_sparse_moe.experts.50.w1", "model.layers.28.block_sparse_moe.experts.51.w1", "model.layers.28.block_sparse_moe.experts.52.w1", "model.layers.28.block_sparse_moe.experts.53.w1", "model.layers.28.block_sparse_moe.experts.54.w1", "model.layers.28.block_sparse_moe.experts.55.w1", "model.layers.28.block_sparse_moe.experts.56.w1", "model.layers.28.block_sparse_moe.experts.57.w1", "model.layers.28.block_sparse_moe.experts.58.w1", "model.layers.28.block_sparse_moe.experts.59.w1", "model.layers.28.block_sparse_moe.experts.60.w1", "model.layers.28.block_sparse_moe.experts.61.w1", "model.layers.28.block_sparse_moe.experts.62.w1", "model.layers.28.block_sparse_moe.experts.63.w1", "model.layers.28.block_sparse_moe.experts.64.w1", "model.layers.28.block_sparse_moe.experts.65.w1", "model.layers.28.block_sparse_moe.experts.66.w1", "model.layers.28.block_sparse_moe.experts.67.w1", "model.layers.28.block_sparse_moe.experts.68.w1", "model.layers.28.block_sparse_moe.experts.69.w1", "model.layers.28.block_sparse_moe.experts.70.w1", "model.layers.28.block_sparse_moe.experts.71.w1", "model.layers.28.block_sparse_moe.experts.72.w1", "model.layers.28.block_sparse_moe.experts.73.w1", "model.layers.28.block_sparse_moe.experts.74.w1", "model.layers.28.block_sparse_moe.experts.75.w1", "model.layers.28.block_sparse_moe.experts.76.w1", "model.layers.28.block_sparse_moe.experts.77.w1", "model.layers.28.block_sparse_moe.experts.78.w1", "model.layers.28.block_sparse_moe.experts.79.w1", "model.layers.28.block_sparse_moe.experts.80.w1", "model.layers.28.block_sparse_moe.experts.81.w1", "model.layers.28.block_sparse_moe.experts.82.w1", "model.layers.28.block_sparse_moe.experts.83.w1", "model.layers.28.block_sparse_moe.experts.84.w1", "model.layers.28.block_sparse_moe.experts.85.w1", "model.layers.28.block_sparse_moe.experts.86.w1", "model.layers.28.block_sparse_moe.experts.87.w1", "model.layers.28.block_sparse_moe.experts.88.w1", "model.layers.28.block_sparse_moe.experts.89.w1", "model.layers.28.block_sparse_moe.experts.90.w1", "model.layers.28.block_sparse_moe.experts.91.w1", "model.layers.28.block_sparse_moe.experts.92.w1", "model.layers.28.block_sparse_moe.experts.93.w1", "model.layers.28.block_sparse_moe.experts.94.w1", "model.layers.28.block_sparse_moe.experts.95.w1", "model.layers.28.block_sparse_moe.experts.96.w1", "model.layers.28.block_sparse_moe.experts.97.w1", "model.layers.28.block_sparse_moe.experts.98.w1", "model.layers.28.block_sparse_moe.experts.99.w1", "model.layers.28.block_sparse_moe.experts.100.w1", "model.layers.28.block_sparse_moe.experts.101.w1", "model.layers.28.block_sparse_moe.experts.102.w1", "model.layers.28.block_sparse_moe.experts.103.w1", "model.layers.28.block_sparse_moe.experts.104.w1", "model.layers.28.block_sparse_moe.experts.105.w1", "model.layers.28.block_sparse_moe.experts.106.w1", "model.layers.28.block_sparse_moe.experts.107.w1", "model.layers.28.block_sparse_moe.experts.108.w1", "model.layers.28.block_sparse_moe.experts.109.w1", "model.layers.28.block_sparse_moe.experts.110.w1", "model.layers.28.block_sparse_moe.experts.111.w1", "model.layers.28.block_sparse_moe.experts.112.w1", "model.layers.28.block_sparse_moe.experts.113.w1", "model.layers.28.block_sparse_moe.experts.114.w1", "model.layers.28.block_sparse_moe.experts.115.w1", "model.layers.28.block_sparse_moe.experts.116.w1", "model.layers.28.block_sparse_moe.experts.117.w1", "model.layers.28.block_sparse_moe.experts.118.w1", "model.layers.28.block_sparse_moe.experts.119.w1", "model.layers.28.block_sparse_moe.experts.120.w1", "model.layers.28.block_sparse_moe.experts.121.w1", "model.layers.28.block_sparse_moe.experts.122.w1", "model.layers.28.block_sparse_moe.experts.123.w1", "model.layers.28.block_sparse_moe.experts.124.w1", "model.layers.28.block_sparse_moe.experts.125.w1", "model.layers.28.block_sparse_moe.experts.126.w1", "model.layers.28.block_sparse_moe.experts.127.w1", "model.layers.28.block_sparse_moe.experts.128.w1", "model.layers.28.block_sparse_moe.experts.129.w1", "model.layers.28.block_sparse_moe.experts.130.w1", "model.layers.28.block_sparse_moe.experts.131.w1", "model.layers.28.block_sparse_moe.experts.132.w1", "model.layers.28.block_sparse_moe.experts.133.w1", "model.layers.28.block_sparse_moe.experts.134.w1", "model.layers.28.block_sparse_moe.experts.135.w1", "model.layers.28.block_sparse_moe.experts.136.w1", "model.layers.28.block_sparse_moe.experts.137.w1", "model.layers.28.block_sparse_moe.experts.138.w1", "model.layers.28.block_sparse_moe.experts.139.w1", "model.layers.28.block_sparse_moe.experts.140.w1", "model.layers.28.block_sparse_moe.experts.141.w1", "model.layers.28.block_sparse_moe.experts.142.w1", "model.layers.28.block_sparse_moe.experts.143.w1", "model.layers.28.block_sparse_moe.experts.144.w1", "model.layers.28.block_sparse_moe.experts.145.w1", "model.layers.28.block_sparse_moe.experts.146.w1", "model.layers.28.block_sparse_moe.experts.147.w1", "model.layers.28.block_sparse_moe.experts.148.w1", "model.layers.28.block_sparse_moe.experts.149.w1", "model.layers.28.block_sparse_moe.experts.150.w1", "model.layers.28.block_sparse_moe.experts.151.w1", "model.layers.28.block_sparse_moe.experts.152.w1", "model.layers.28.block_sparse_moe.experts.153.w1", "model.layers.28.block_sparse_moe.experts.154.w1", "model.layers.28.block_sparse_moe.experts.155.w1", "model.layers.28.block_sparse_moe.experts.156.w1", "model.layers.28.block_sparse_moe.experts.157.w1", "model.layers.28.block_sparse_moe.experts.158.w1", "model.layers.28.block_sparse_moe.experts.159.w1", "model.layers.28.block_sparse_moe.experts.160.w1", "model.layers.28.block_sparse_moe.experts.161.w1", "model.layers.28.block_sparse_moe.experts.162.w1", "model.layers.28.block_sparse_moe.experts.163.w1", "model.layers.28.block_sparse_moe.experts.164.w1", "model.layers.28.block_sparse_moe.experts.165.w1", "model.layers.28.block_sparse_moe.experts.166.w1", "model.layers.28.block_sparse_moe.experts.167.w1", "model.layers.28.block_sparse_moe.experts.168.w1", "model.layers.28.block_sparse_moe.experts.169.w1", "model.layers.28.block_sparse_moe.experts.170.w1", "model.layers.28.block_sparse_moe.experts.171.w1", "model.layers.28.block_sparse_moe.experts.172.w1", "model.layers.28.block_sparse_moe.experts.173.w1", "model.layers.28.block_sparse_moe.experts.174.w1", "model.layers.28.block_sparse_moe.experts.175.w1", "model.layers.28.block_sparse_moe.experts.176.w1", "model.layers.28.block_sparse_moe.experts.177.w1", "model.layers.28.block_sparse_moe.experts.178.w1", "model.layers.28.block_sparse_moe.experts.179.w1", "model.layers.28.block_sparse_moe.experts.180.w1", "model.layers.28.block_sparse_moe.experts.181.w1", "model.layers.28.block_sparse_moe.experts.182.w1", "model.layers.28.block_sparse_moe.experts.183.w1", "model.layers.28.block_sparse_moe.experts.184.w1", "model.layers.28.block_sparse_moe.experts.185.w1", "model.layers.28.block_sparse_moe.experts.186.w1", "model.layers.28.block_sparse_moe.experts.187.w1", "model.layers.28.block_sparse_moe.experts.188.w1", "model.layers.28.block_sparse_moe.experts.189.w1", "model.layers.28.block_sparse_moe.experts.190.w1", "model.layers.28.block_sparse_moe.experts.191.w1", "model.layers.28.block_sparse_moe.experts.192.w1", "model.layers.28.block_sparse_moe.experts.193.w1", "model.layers.28.block_sparse_moe.experts.194.w1", "model.layers.28.block_sparse_moe.experts.195.w1", "model.layers.28.block_sparse_moe.experts.196.w1", "model.layers.28.block_sparse_moe.experts.197.w1", "model.layers.28.block_sparse_moe.experts.198.w1", "model.layers.28.block_sparse_moe.experts.199.w1", "model.layers.28.block_sparse_moe.experts.200.w1", "model.layers.28.block_sparse_moe.experts.201.w1", "model.layers.28.block_sparse_moe.experts.202.w1", "model.layers.28.block_sparse_moe.experts.203.w1", "model.layers.28.block_sparse_moe.experts.204.w1", "model.layers.28.block_sparse_moe.experts.205.w1", "model.layers.28.block_sparse_moe.experts.206.w1", "model.layers.28.block_sparse_moe.experts.207.w1", "model.layers.28.block_sparse_moe.experts.208.w1", "model.layers.28.block_sparse_moe.experts.209.w1", "model.layers.28.block_sparse_moe.experts.210.w1", "model.layers.28.block_sparse_moe.experts.211.w1", "model.layers.28.block_sparse_moe.experts.212.w1", "model.layers.28.block_sparse_moe.experts.213.w1", "model.layers.28.block_sparse_moe.experts.214.w1", "model.layers.28.block_sparse_moe.experts.215.w1", "model.layers.28.block_sparse_moe.experts.216.w1", "model.layers.28.block_sparse_moe.experts.217.w1", "model.layers.28.block_sparse_moe.experts.218.w1", "model.layers.28.block_sparse_moe.experts.219.w1", "model.layers.28.block_sparse_moe.experts.220.w1", "model.layers.28.block_sparse_moe.experts.221.w1", "model.layers.28.block_sparse_moe.experts.222.w1", "model.layers.28.block_sparse_moe.experts.223.w1", "model.layers.28.block_sparse_moe.experts.224.w1", "model.layers.28.block_sparse_moe.experts.225.w1", "model.layers.28.block_sparse_moe.experts.226.w1", "model.layers.28.block_sparse_moe.experts.227.w1", "model.layers.28.block_sparse_moe.experts.228.w1", "model.layers.28.block_sparse_moe.experts.229.w1", "model.layers.28.block_sparse_moe.experts.230.w1", "model.layers.28.block_sparse_moe.experts.231.w1", "model.layers.28.block_sparse_moe.experts.232.w1", "model.layers.28.block_sparse_moe.experts.233.w1", "model.layers.28.block_sparse_moe.experts.234.w1", "model.layers.28.block_sparse_moe.experts.235.w1", "model.layers.28.block_sparse_moe.experts.236.w1", "model.layers.28.block_sparse_moe.experts.237.w1", "model.layers.28.block_sparse_moe.experts.238.w1", "model.layers.28.block_sparse_moe.experts.239.w1", "model.layers.28.block_sparse_moe.experts.240.w1", "model.layers.28.block_sparse_moe.experts.241.w1", "model.layers.28.block_sparse_moe.experts.242.w1", "model.layers.28.block_sparse_moe.experts.243.w1", "model.layers.28.block_sparse_moe.experts.244.w1", "model.layers.28.block_sparse_moe.experts.245.w1", "model.layers.28.block_sparse_moe.experts.246.w1", "model.layers.28.block_sparse_moe.experts.247.w1", "model.layers.28.block_sparse_moe.experts.248.w1", "model.layers.28.block_sparse_moe.experts.249.w1", "model.layers.28.block_sparse_moe.experts.250.w1", "model.layers.28.block_sparse_moe.experts.251.w1", "model.layers.28.block_sparse_moe.experts.252.w1", "model.layers.28.block_sparse_moe.experts.253.w1", "model.layers.28.block_sparse_moe.experts.254.w1", "model.layers.28.block_sparse_moe.experts.255.w1", "model.layers.28.block_sparse_moe.experts.0.w3", "model.layers.28.block_sparse_moe.experts.1.w3", "model.layers.28.block_sparse_moe.experts.2.w3", "model.layers.28.block_sparse_moe.experts.3.w3", "model.layers.28.block_sparse_moe.experts.4.w3", "model.layers.28.block_sparse_moe.experts.5.w3", "model.layers.28.block_sparse_moe.experts.6.w3", "model.layers.28.block_sparse_moe.experts.7.w3", "model.layers.28.block_sparse_moe.experts.8.w3", "model.layers.28.block_sparse_moe.experts.9.w3", "model.layers.28.block_sparse_moe.experts.10.w3", "model.layers.28.block_sparse_moe.experts.11.w3", "model.layers.28.block_sparse_moe.experts.12.w3", "model.layers.28.block_sparse_moe.experts.13.w3", "model.layers.28.block_sparse_moe.experts.14.w3", "model.layers.28.block_sparse_moe.experts.15.w3", "model.layers.28.block_sparse_moe.experts.16.w3", "model.layers.28.block_sparse_moe.experts.17.w3", "model.layers.28.block_sparse_moe.experts.18.w3", "model.layers.28.block_sparse_moe.experts.19.w3", "model.layers.28.block_sparse_moe.experts.20.w3", "model.layers.28.block_sparse_moe.experts.21.w3", "model.layers.28.block_sparse_moe.experts.22.w3", "model.layers.28.block_sparse_moe.experts.23.w3", "model.layers.28.block_sparse_moe.experts.24.w3", "model.layers.28.block_sparse_moe.experts.25.w3", "model.layers.28.block_sparse_moe.experts.26.w3", "model.layers.28.block_sparse_moe.experts.27.w3", "model.layers.28.block_sparse_moe.experts.28.w3", "model.layers.28.block_sparse_moe.experts.29.w3", "model.layers.28.block_sparse_moe.experts.30.w3", "model.layers.28.block_sparse_moe.experts.31.w3", "model.layers.28.block_sparse_moe.experts.32.w3", "model.layers.28.block_sparse_moe.experts.33.w3", "model.layers.28.block_sparse_moe.experts.34.w3", "model.layers.28.block_sparse_moe.experts.35.w3", "model.layers.28.block_sparse_moe.experts.36.w3", "model.layers.28.block_sparse_moe.experts.37.w3", "model.layers.28.block_sparse_moe.experts.38.w3", "model.layers.28.block_sparse_moe.experts.39.w3", "model.layers.28.block_sparse_moe.experts.40.w3", "model.layers.28.block_sparse_moe.experts.41.w3", "model.layers.28.block_sparse_moe.experts.42.w3", "model.layers.28.block_sparse_moe.experts.43.w3", "model.layers.28.block_sparse_moe.experts.44.w3", "model.layers.28.block_sparse_moe.experts.45.w3", "model.layers.28.block_sparse_moe.experts.46.w3", "model.layers.28.block_sparse_moe.experts.47.w3", "model.layers.28.block_sparse_moe.experts.48.w3", "model.layers.28.block_sparse_moe.experts.49.w3", "model.layers.28.block_sparse_moe.experts.50.w3", "model.layers.28.block_sparse_moe.experts.51.w3", "model.layers.28.block_sparse_moe.experts.52.w3", "model.layers.28.block_sparse_moe.experts.53.w3", "model.layers.28.block_sparse_moe.experts.54.w3", "model.layers.28.block_sparse_moe.experts.55.w3", "model.layers.28.block_sparse_moe.experts.56.w3", "model.layers.28.block_sparse_moe.experts.57.w3", "model.layers.28.block_sparse_moe.experts.58.w3", "model.layers.28.block_sparse_moe.experts.59.w3", "model.layers.28.block_sparse_moe.experts.60.w3", "model.layers.28.block_sparse_moe.experts.61.w3", "model.layers.28.block_sparse_moe.experts.62.w3", "model.layers.28.block_sparse_moe.experts.63.w3", "model.layers.28.block_sparse_moe.experts.64.w3", "model.layers.28.block_sparse_moe.experts.65.w3", "model.layers.28.block_sparse_moe.experts.66.w3", "model.layers.28.block_sparse_moe.experts.67.w3", "model.layers.28.block_sparse_moe.experts.68.w3", "model.layers.28.block_sparse_moe.experts.69.w3", "model.layers.28.block_sparse_moe.experts.70.w3", "model.layers.28.block_sparse_moe.experts.71.w3", "model.layers.28.block_sparse_moe.experts.72.w3", "model.layers.28.block_sparse_moe.experts.73.w3", "model.layers.28.block_sparse_moe.experts.74.w3", "model.layers.28.block_sparse_moe.experts.75.w3", "model.layers.28.block_sparse_moe.experts.76.w3", "model.layers.28.block_sparse_moe.experts.77.w3", "model.layers.28.block_sparse_moe.experts.78.w3", "model.layers.28.block_sparse_moe.experts.79.w3", "model.layers.28.block_sparse_moe.experts.80.w3", "model.layers.28.block_sparse_moe.experts.81.w3", "model.layers.28.block_sparse_moe.experts.82.w3", "model.layers.28.block_sparse_moe.experts.83.w3", "model.layers.28.block_sparse_moe.experts.84.w3", "model.layers.28.block_sparse_moe.experts.85.w3", "model.layers.28.block_sparse_moe.experts.86.w3", "model.layers.28.block_sparse_moe.experts.87.w3", "model.layers.28.block_sparse_moe.experts.88.w3", "model.layers.28.block_sparse_moe.experts.89.w3", "model.layers.28.block_sparse_moe.experts.90.w3", "model.layers.28.block_sparse_moe.experts.91.w3", "model.layers.28.block_sparse_moe.experts.92.w3", "model.layers.28.block_sparse_moe.experts.93.w3", "model.layers.28.block_sparse_moe.experts.94.w3", "model.layers.28.block_sparse_moe.experts.95.w3", "model.layers.28.block_sparse_moe.experts.96.w3", "model.layers.28.block_sparse_moe.experts.97.w3", "model.layers.28.block_sparse_moe.experts.98.w3", "model.layers.28.block_sparse_moe.experts.99.w3", "model.layers.28.block_sparse_moe.experts.100.w3", "model.layers.28.block_sparse_moe.experts.101.w3", "model.layers.28.block_sparse_moe.experts.102.w3", "model.layers.28.block_sparse_moe.experts.103.w3", "model.layers.28.block_sparse_moe.experts.104.w3", "model.layers.28.block_sparse_moe.experts.105.w3", "model.layers.28.block_sparse_moe.experts.106.w3", "model.layers.28.block_sparse_moe.experts.107.w3", "model.layers.28.block_sparse_moe.experts.108.w3", "model.layers.28.block_sparse_moe.experts.109.w3", "model.layers.28.block_sparse_moe.experts.110.w3", "model.layers.28.block_sparse_moe.experts.111.w3", "model.layers.28.block_sparse_moe.experts.112.w3", "model.layers.28.block_sparse_moe.experts.113.w3", "model.layers.28.block_sparse_moe.experts.114.w3", "model.layers.28.block_sparse_moe.experts.115.w3", "model.layers.28.block_sparse_moe.experts.116.w3", "model.layers.28.block_sparse_moe.experts.117.w3", "model.layers.28.block_sparse_moe.experts.118.w3", "model.layers.28.block_sparse_moe.experts.119.w3", "model.layers.28.block_sparse_moe.experts.120.w3", "model.layers.28.block_sparse_moe.experts.121.w3", "model.layers.28.block_sparse_moe.experts.122.w3", "model.layers.28.block_sparse_moe.experts.123.w3", "model.layers.28.block_sparse_moe.experts.124.w3", "model.layers.28.block_sparse_moe.experts.125.w3", "model.layers.28.block_sparse_moe.experts.126.w3", "model.layers.28.block_sparse_moe.experts.127.w3", "model.layers.28.block_sparse_moe.experts.128.w3", "model.layers.28.block_sparse_moe.experts.129.w3", "model.layers.28.block_sparse_moe.experts.130.w3", "model.layers.28.block_sparse_moe.experts.131.w3", "model.layers.28.block_sparse_moe.experts.132.w3", "model.layers.28.block_sparse_moe.experts.133.w3", "model.layers.28.block_sparse_moe.experts.134.w3", "model.layers.28.block_sparse_moe.experts.135.w3", "model.layers.28.block_sparse_moe.experts.136.w3", "model.layers.28.block_sparse_moe.experts.137.w3", "model.layers.28.block_sparse_moe.experts.138.w3", "model.layers.28.block_sparse_moe.experts.139.w3", "model.layers.28.block_sparse_moe.experts.140.w3", "model.layers.28.block_sparse_moe.experts.141.w3", "model.layers.28.block_sparse_moe.experts.142.w3", "model.layers.28.block_sparse_moe.experts.143.w3", "model.layers.28.block_sparse_moe.experts.144.w3", "model.layers.28.block_sparse_moe.experts.145.w3", "model.layers.28.block_sparse_moe.experts.146.w3", "model.layers.28.block_sparse_moe.experts.147.w3", "model.layers.28.block_sparse_moe.experts.148.w3", "model.layers.28.block_sparse_moe.experts.149.w3", "model.layers.28.block_sparse_moe.experts.150.w3", "model.layers.28.block_sparse_moe.experts.151.w3", "model.layers.28.block_sparse_moe.experts.152.w3", "model.layers.28.block_sparse_moe.experts.153.w3", "model.layers.28.block_sparse_moe.experts.154.w3", "model.layers.28.block_sparse_moe.experts.155.w3", "model.layers.28.block_sparse_moe.experts.156.w3", "model.layers.28.block_sparse_moe.experts.157.w3", "model.layers.28.block_sparse_moe.experts.158.w3", "model.layers.28.block_sparse_moe.experts.159.w3", "model.layers.28.block_sparse_moe.experts.160.w3", "model.layers.28.block_sparse_moe.experts.161.w3", "model.layers.28.block_sparse_moe.experts.162.w3", "model.layers.28.block_sparse_moe.experts.163.w3", "model.layers.28.block_sparse_moe.experts.164.w3", "model.layers.28.block_sparse_moe.experts.165.w3", "model.layers.28.block_sparse_moe.experts.166.w3", "model.layers.28.block_sparse_moe.experts.167.w3", "model.layers.28.block_sparse_moe.experts.168.w3", "model.layers.28.block_sparse_moe.experts.169.w3", "model.layers.28.block_sparse_moe.experts.170.w3", "model.layers.28.block_sparse_moe.experts.171.w3", "model.layers.28.block_sparse_moe.experts.172.w3", "model.layers.28.block_sparse_moe.experts.173.w3", "model.layers.28.block_sparse_moe.experts.174.w3", "model.layers.28.block_sparse_moe.experts.175.w3", "model.layers.28.block_sparse_moe.experts.176.w3", "model.layers.28.block_sparse_moe.experts.177.w3", "model.layers.28.block_sparse_moe.experts.178.w3", "model.layers.28.block_sparse_moe.experts.179.w3", "model.layers.28.block_sparse_moe.experts.180.w3", "model.layers.28.block_sparse_moe.experts.181.w3", "model.layers.28.block_sparse_moe.experts.182.w3", "model.layers.28.block_sparse_moe.experts.183.w3", "model.layers.28.block_sparse_moe.experts.184.w3", "model.layers.28.block_sparse_moe.experts.185.w3", "model.layers.28.block_sparse_moe.experts.186.w3", "model.layers.28.block_sparse_moe.experts.187.w3", "model.layers.28.block_sparse_moe.experts.188.w3", "model.layers.28.block_sparse_moe.experts.189.w3", "model.layers.28.block_sparse_moe.experts.190.w3", "model.layers.28.block_sparse_moe.experts.191.w3", "model.layers.28.block_sparse_moe.experts.192.w3", "model.layers.28.block_sparse_moe.experts.193.w3", "model.layers.28.block_sparse_moe.experts.194.w3", "model.layers.28.block_sparse_moe.experts.195.w3", "model.layers.28.block_sparse_moe.experts.196.w3", "model.layers.28.block_sparse_moe.experts.197.w3", "model.layers.28.block_sparse_moe.experts.198.w3", "model.layers.28.block_sparse_moe.experts.199.w3", "model.layers.28.block_sparse_moe.experts.200.w3", "model.layers.28.block_sparse_moe.experts.201.w3", "model.layers.28.block_sparse_moe.experts.202.w3", "model.layers.28.block_sparse_moe.experts.203.w3", "model.layers.28.block_sparse_moe.experts.204.w3", "model.layers.28.block_sparse_moe.experts.205.w3", "model.layers.28.block_sparse_moe.experts.206.w3", "model.layers.28.block_sparse_moe.experts.207.w3", "model.layers.28.block_sparse_moe.experts.208.w3", "model.layers.28.block_sparse_moe.experts.209.w3", "model.layers.28.block_sparse_moe.experts.210.w3", "model.layers.28.block_sparse_moe.experts.211.w3", "model.layers.28.block_sparse_moe.experts.212.w3", "model.layers.28.block_sparse_moe.experts.213.w3", "model.layers.28.block_sparse_moe.experts.214.w3", "model.layers.28.block_sparse_moe.experts.215.w3", "model.layers.28.block_sparse_moe.experts.216.w3", "model.layers.28.block_sparse_moe.experts.217.w3", "model.layers.28.block_sparse_moe.experts.218.w3", "model.layers.28.block_sparse_moe.experts.219.w3", "model.layers.28.block_sparse_moe.experts.220.w3", "model.layers.28.block_sparse_moe.experts.221.w3", "model.layers.28.block_sparse_moe.experts.222.w3", "model.layers.28.block_sparse_moe.experts.223.w3", "model.layers.28.block_sparse_moe.experts.224.w3", "model.layers.28.block_sparse_moe.experts.225.w3", "model.layers.28.block_sparse_moe.experts.226.w3", "model.layers.28.block_sparse_moe.experts.227.w3", "model.layers.28.block_sparse_moe.experts.228.w3", "model.layers.28.block_sparse_moe.experts.229.w3", "model.layers.28.block_sparse_moe.experts.230.w3", "model.layers.28.block_sparse_moe.experts.231.w3", "model.layers.28.block_sparse_moe.experts.232.w3", "model.layers.28.block_sparse_moe.experts.233.w3", "model.layers.28.block_sparse_moe.experts.234.w3", "model.layers.28.block_sparse_moe.experts.235.w3", "model.layers.28.block_sparse_moe.experts.236.w3", "model.layers.28.block_sparse_moe.experts.237.w3", "model.layers.28.block_sparse_moe.experts.238.w3", "model.layers.28.block_sparse_moe.experts.239.w3", "model.layers.28.block_sparse_moe.experts.240.w3", "model.layers.28.block_sparse_moe.experts.241.w3", "model.layers.28.block_sparse_moe.experts.242.w3", "model.layers.28.block_sparse_moe.experts.243.w3", "model.layers.28.block_sparse_moe.experts.244.w3", "model.layers.28.block_sparse_moe.experts.245.w3", "model.layers.28.block_sparse_moe.experts.246.w3", "model.layers.28.block_sparse_moe.experts.247.w3", "model.layers.28.block_sparse_moe.experts.248.w3", "model.layers.28.block_sparse_moe.experts.249.w3", "model.layers.28.block_sparse_moe.experts.250.w3", "model.layers.28.block_sparse_moe.experts.251.w3", "model.layers.28.block_sparse_moe.experts.252.w3", "model.layers.28.block_sparse_moe.experts.253.w3", "model.layers.28.block_sparse_moe.experts.254.w3", "model.layers.28.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001868394017219499, "dbits": 2415919104 } ] }, { "idx": 144, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w2", "model.layers.28.block_sparse_moe.experts.1.w2", "model.layers.28.block_sparse_moe.experts.2.w2", "model.layers.28.block_sparse_moe.experts.3.w2", "model.layers.28.block_sparse_moe.experts.4.w2", "model.layers.28.block_sparse_moe.experts.5.w2", "model.layers.28.block_sparse_moe.experts.6.w2", "model.layers.28.block_sparse_moe.experts.7.w2", "model.layers.28.block_sparse_moe.experts.8.w2", "model.layers.28.block_sparse_moe.experts.9.w2", "model.layers.28.block_sparse_moe.experts.10.w2", "model.layers.28.block_sparse_moe.experts.11.w2", "model.layers.28.block_sparse_moe.experts.12.w2", "model.layers.28.block_sparse_moe.experts.13.w2", "model.layers.28.block_sparse_moe.experts.14.w2", "model.layers.28.block_sparse_moe.experts.15.w2", "model.layers.28.block_sparse_moe.experts.16.w2", "model.layers.28.block_sparse_moe.experts.17.w2", "model.layers.28.block_sparse_moe.experts.18.w2", "model.layers.28.block_sparse_moe.experts.19.w2", "model.layers.28.block_sparse_moe.experts.20.w2", "model.layers.28.block_sparse_moe.experts.21.w2", "model.layers.28.block_sparse_moe.experts.22.w2", "model.layers.28.block_sparse_moe.experts.23.w2", "model.layers.28.block_sparse_moe.experts.24.w2", "model.layers.28.block_sparse_moe.experts.25.w2", "model.layers.28.block_sparse_moe.experts.26.w2", "model.layers.28.block_sparse_moe.experts.27.w2", "model.layers.28.block_sparse_moe.experts.28.w2", "model.layers.28.block_sparse_moe.experts.29.w2", "model.layers.28.block_sparse_moe.experts.30.w2", "model.layers.28.block_sparse_moe.experts.31.w2", "model.layers.28.block_sparse_moe.experts.32.w2", "model.layers.28.block_sparse_moe.experts.33.w2", "model.layers.28.block_sparse_moe.experts.34.w2", "model.layers.28.block_sparse_moe.experts.35.w2", "model.layers.28.block_sparse_moe.experts.36.w2", "model.layers.28.block_sparse_moe.experts.37.w2", "model.layers.28.block_sparse_moe.experts.38.w2", "model.layers.28.block_sparse_moe.experts.39.w2", "model.layers.28.block_sparse_moe.experts.40.w2", "model.layers.28.block_sparse_moe.experts.41.w2", "model.layers.28.block_sparse_moe.experts.42.w2", "model.layers.28.block_sparse_moe.experts.43.w2", "model.layers.28.block_sparse_moe.experts.44.w2", "model.layers.28.block_sparse_moe.experts.45.w2", "model.layers.28.block_sparse_moe.experts.46.w2", "model.layers.28.block_sparse_moe.experts.47.w2", "model.layers.28.block_sparse_moe.experts.48.w2", "model.layers.28.block_sparse_moe.experts.49.w2", "model.layers.28.block_sparse_moe.experts.50.w2", "model.layers.28.block_sparse_moe.experts.51.w2", "model.layers.28.block_sparse_moe.experts.52.w2", "model.layers.28.block_sparse_moe.experts.53.w2", "model.layers.28.block_sparse_moe.experts.54.w2", "model.layers.28.block_sparse_moe.experts.55.w2", "model.layers.28.block_sparse_moe.experts.56.w2", "model.layers.28.block_sparse_moe.experts.57.w2", "model.layers.28.block_sparse_moe.experts.58.w2", "model.layers.28.block_sparse_moe.experts.59.w2", "model.layers.28.block_sparse_moe.experts.60.w2", "model.layers.28.block_sparse_moe.experts.61.w2", "model.layers.28.block_sparse_moe.experts.62.w2", "model.layers.28.block_sparse_moe.experts.63.w2", "model.layers.28.block_sparse_moe.experts.64.w2", "model.layers.28.block_sparse_moe.experts.65.w2", "model.layers.28.block_sparse_moe.experts.66.w2", "model.layers.28.block_sparse_moe.experts.67.w2", "model.layers.28.block_sparse_moe.experts.68.w2", "model.layers.28.block_sparse_moe.experts.69.w2", "model.layers.28.block_sparse_moe.experts.70.w2", "model.layers.28.block_sparse_moe.experts.71.w2", "model.layers.28.block_sparse_moe.experts.72.w2", "model.layers.28.block_sparse_moe.experts.73.w2", "model.layers.28.block_sparse_moe.experts.74.w2", "model.layers.28.block_sparse_moe.experts.75.w2", "model.layers.28.block_sparse_moe.experts.76.w2", "model.layers.28.block_sparse_moe.experts.77.w2", "model.layers.28.block_sparse_moe.experts.78.w2", "model.layers.28.block_sparse_moe.experts.79.w2", "model.layers.28.block_sparse_moe.experts.80.w2", "model.layers.28.block_sparse_moe.experts.81.w2", "model.layers.28.block_sparse_moe.experts.82.w2", "model.layers.28.block_sparse_moe.experts.83.w2", "model.layers.28.block_sparse_moe.experts.84.w2", "model.layers.28.block_sparse_moe.experts.85.w2", "model.layers.28.block_sparse_moe.experts.86.w2", "model.layers.28.block_sparse_moe.experts.87.w2", "model.layers.28.block_sparse_moe.experts.88.w2", "model.layers.28.block_sparse_moe.experts.89.w2", "model.layers.28.block_sparse_moe.experts.90.w2", "model.layers.28.block_sparse_moe.experts.91.w2", "model.layers.28.block_sparse_moe.experts.92.w2", "model.layers.28.block_sparse_moe.experts.93.w2", "model.layers.28.block_sparse_moe.experts.94.w2", "model.layers.28.block_sparse_moe.experts.95.w2", "model.layers.28.block_sparse_moe.experts.96.w2", "model.layers.28.block_sparse_moe.experts.97.w2", "model.layers.28.block_sparse_moe.experts.98.w2", "model.layers.28.block_sparse_moe.experts.99.w2", "model.layers.28.block_sparse_moe.experts.100.w2", "model.layers.28.block_sparse_moe.experts.101.w2", "model.layers.28.block_sparse_moe.experts.102.w2", "model.layers.28.block_sparse_moe.experts.103.w2", "model.layers.28.block_sparse_moe.experts.104.w2", "model.layers.28.block_sparse_moe.experts.105.w2", "model.layers.28.block_sparse_moe.experts.106.w2", "model.layers.28.block_sparse_moe.experts.107.w2", "model.layers.28.block_sparse_moe.experts.108.w2", "model.layers.28.block_sparse_moe.experts.109.w2", "model.layers.28.block_sparse_moe.experts.110.w2", "model.layers.28.block_sparse_moe.experts.111.w2", "model.layers.28.block_sparse_moe.experts.112.w2", "model.layers.28.block_sparse_moe.experts.113.w2", "model.layers.28.block_sparse_moe.experts.114.w2", "model.layers.28.block_sparse_moe.experts.115.w2", "model.layers.28.block_sparse_moe.experts.116.w2", "model.layers.28.block_sparse_moe.experts.117.w2", "model.layers.28.block_sparse_moe.experts.118.w2", "model.layers.28.block_sparse_moe.experts.119.w2", "model.layers.28.block_sparse_moe.experts.120.w2", "model.layers.28.block_sparse_moe.experts.121.w2", "model.layers.28.block_sparse_moe.experts.122.w2", "model.layers.28.block_sparse_moe.experts.123.w2", "model.layers.28.block_sparse_moe.experts.124.w2", "model.layers.28.block_sparse_moe.experts.125.w2", "model.layers.28.block_sparse_moe.experts.126.w2", "model.layers.28.block_sparse_moe.experts.127.w2", "model.layers.28.block_sparse_moe.experts.128.w2", "model.layers.28.block_sparse_moe.experts.129.w2", "model.layers.28.block_sparse_moe.experts.130.w2", "model.layers.28.block_sparse_moe.experts.131.w2", "model.layers.28.block_sparse_moe.experts.132.w2", "model.layers.28.block_sparse_moe.experts.133.w2", "model.layers.28.block_sparse_moe.experts.134.w2", "model.layers.28.block_sparse_moe.experts.135.w2", "model.layers.28.block_sparse_moe.experts.136.w2", "model.layers.28.block_sparse_moe.experts.137.w2", "model.layers.28.block_sparse_moe.experts.138.w2", "model.layers.28.block_sparse_moe.experts.139.w2", "model.layers.28.block_sparse_moe.experts.140.w2", "model.layers.28.block_sparse_moe.experts.141.w2", "model.layers.28.block_sparse_moe.experts.142.w2", "model.layers.28.block_sparse_moe.experts.143.w2", "model.layers.28.block_sparse_moe.experts.144.w2", "model.layers.28.block_sparse_moe.experts.145.w2", "model.layers.28.block_sparse_moe.experts.146.w2", "model.layers.28.block_sparse_moe.experts.147.w2", "model.layers.28.block_sparse_moe.experts.148.w2", "model.layers.28.block_sparse_moe.experts.149.w2", "model.layers.28.block_sparse_moe.experts.150.w2", "model.layers.28.block_sparse_moe.experts.151.w2", "model.layers.28.block_sparse_moe.experts.152.w2", "model.layers.28.block_sparse_moe.experts.153.w2", "model.layers.28.block_sparse_moe.experts.154.w2", "model.layers.28.block_sparse_moe.experts.155.w2", "model.layers.28.block_sparse_moe.experts.156.w2", "model.layers.28.block_sparse_moe.experts.157.w2", "model.layers.28.block_sparse_moe.experts.158.w2", "model.layers.28.block_sparse_moe.experts.159.w2", "model.layers.28.block_sparse_moe.experts.160.w2", "model.layers.28.block_sparse_moe.experts.161.w2", "model.layers.28.block_sparse_moe.experts.162.w2", "model.layers.28.block_sparse_moe.experts.163.w2", "model.layers.28.block_sparse_moe.experts.164.w2", "model.layers.28.block_sparse_moe.experts.165.w2", "model.layers.28.block_sparse_moe.experts.166.w2", "model.layers.28.block_sparse_moe.experts.167.w2", "model.layers.28.block_sparse_moe.experts.168.w2", "model.layers.28.block_sparse_moe.experts.169.w2", "model.layers.28.block_sparse_moe.experts.170.w2", "model.layers.28.block_sparse_moe.experts.171.w2", "model.layers.28.block_sparse_moe.experts.172.w2", "model.layers.28.block_sparse_moe.experts.173.w2", "model.layers.28.block_sparse_moe.experts.174.w2", "model.layers.28.block_sparse_moe.experts.175.w2", "model.layers.28.block_sparse_moe.experts.176.w2", "model.layers.28.block_sparse_moe.experts.177.w2", "model.layers.28.block_sparse_moe.experts.178.w2", "model.layers.28.block_sparse_moe.experts.179.w2", "model.layers.28.block_sparse_moe.experts.180.w2", "model.layers.28.block_sparse_moe.experts.181.w2", "model.layers.28.block_sparse_moe.experts.182.w2", "model.layers.28.block_sparse_moe.experts.183.w2", "model.layers.28.block_sparse_moe.experts.184.w2", "model.layers.28.block_sparse_moe.experts.185.w2", "model.layers.28.block_sparse_moe.experts.186.w2", "model.layers.28.block_sparse_moe.experts.187.w2", "model.layers.28.block_sparse_moe.experts.188.w2", "model.layers.28.block_sparse_moe.experts.189.w2", "model.layers.28.block_sparse_moe.experts.190.w2", "model.layers.28.block_sparse_moe.experts.191.w2", "model.layers.28.block_sparse_moe.experts.192.w2", "model.layers.28.block_sparse_moe.experts.193.w2", "model.layers.28.block_sparse_moe.experts.194.w2", "model.layers.28.block_sparse_moe.experts.195.w2", "model.layers.28.block_sparse_moe.experts.196.w2", "model.layers.28.block_sparse_moe.experts.197.w2", "model.layers.28.block_sparse_moe.experts.198.w2", "model.layers.28.block_sparse_moe.experts.199.w2", "model.layers.28.block_sparse_moe.experts.200.w2", "model.layers.28.block_sparse_moe.experts.201.w2", "model.layers.28.block_sparse_moe.experts.202.w2", "model.layers.28.block_sparse_moe.experts.203.w2", "model.layers.28.block_sparse_moe.experts.204.w2", "model.layers.28.block_sparse_moe.experts.205.w2", "model.layers.28.block_sparse_moe.experts.206.w2", "model.layers.28.block_sparse_moe.experts.207.w2", "model.layers.28.block_sparse_moe.experts.208.w2", "model.layers.28.block_sparse_moe.experts.209.w2", "model.layers.28.block_sparse_moe.experts.210.w2", "model.layers.28.block_sparse_moe.experts.211.w2", "model.layers.28.block_sparse_moe.experts.212.w2", "model.layers.28.block_sparse_moe.experts.213.w2", "model.layers.28.block_sparse_moe.experts.214.w2", "model.layers.28.block_sparse_moe.experts.215.w2", "model.layers.28.block_sparse_moe.experts.216.w2", "model.layers.28.block_sparse_moe.experts.217.w2", "model.layers.28.block_sparse_moe.experts.218.w2", "model.layers.28.block_sparse_moe.experts.219.w2", "model.layers.28.block_sparse_moe.experts.220.w2", "model.layers.28.block_sparse_moe.experts.221.w2", "model.layers.28.block_sparse_moe.experts.222.w2", "model.layers.28.block_sparse_moe.experts.223.w2", "model.layers.28.block_sparse_moe.experts.224.w2", "model.layers.28.block_sparse_moe.experts.225.w2", "model.layers.28.block_sparse_moe.experts.226.w2", "model.layers.28.block_sparse_moe.experts.227.w2", "model.layers.28.block_sparse_moe.experts.228.w2", "model.layers.28.block_sparse_moe.experts.229.w2", "model.layers.28.block_sparse_moe.experts.230.w2", "model.layers.28.block_sparse_moe.experts.231.w2", "model.layers.28.block_sparse_moe.experts.232.w2", "model.layers.28.block_sparse_moe.experts.233.w2", "model.layers.28.block_sparse_moe.experts.234.w2", "model.layers.28.block_sparse_moe.experts.235.w2", "model.layers.28.block_sparse_moe.experts.236.w2", "model.layers.28.block_sparse_moe.experts.237.w2", "model.layers.28.block_sparse_moe.experts.238.w2", "model.layers.28.block_sparse_moe.experts.239.w2", "model.layers.28.block_sparse_moe.experts.240.w2", "model.layers.28.block_sparse_moe.experts.241.w2", "model.layers.28.block_sparse_moe.experts.242.w2", "model.layers.28.block_sparse_moe.experts.243.w2", "model.layers.28.block_sparse_moe.experts.244.w2", "model.layers.28.block_sparse_moe.experts.245.w2", "model.layers.28.block_sparse_moe.experts.246.w2", "model.layers.28.block_sparse_moe.experts.247.w2", "model.layers.28.block_sparse_moe.experts.248.w2", "model.layers.28.block_sparse_moe.experts.249.w2", "model.layers.28.block_sparse_moe.experts.250.w2", "model.layers.28.block_sparse_moe.experts.251.w2", "model.layers.28.block_sparse_moe.experts.252.w2", "model.layers.28.block_sparse_moe.experts.253.w2", "model.layers.28.block_sparse_moe.experts.254.w2", "model.layers.28.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009291052818297674, "dbits": 1207959552 } ] }, { "idx": 145, "layers": [ "model.layers.29.self_attn.q_proj" ], "candidates": [ { "dkld": 0.007633039355278037, "dbits": 18874368 } ] }, { "idx": 146, "layers": [ "model.layers.29.self_attn.k_proj", "model.layers.29.self_attn.v_proj" ], "candidates": [ { "dkld": 0.009351199865341275, "dbits": 6291456 } ] }, { "idx": 147, "layers": [ "model.layers.29.self_attn.o_proj" ], "candidates": [ { "dkld": 0.003286302089691162, "dbits": 18874368 } ] }, { "idx": 148, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w1", "model.layers.29.block_sparse_moe.experts.1.w1", "model.layers.29.block_sparse_moe.experts.2.w1", "model.layers.29.block_sparse_moe.experts.3.w1", "model.layers.29.block_sparse_moe.experts.4.w1", "model.layers.29.block_sparse_moe.experts.5.w1", "model.layers.29.block_sparse_moe.experts.6.w1", "model.layers.29.block_sparse_moe.experts.7.w1", "model.layers.29.block_sparse_moe.experts.8.w1", "model.layers.29.block_sparse_moe.experts.9.w1", "model.layers.29.block_sparse_moe.experts.10.w1", "model.layers.29.block_sparse_moe.experts.11.w1", "model.layers.29.block_sparse_moe.experts.12.w1", "model.layers.29.block_sparse_moe.experts.13.w1", "model.layers.29.block_sparse_moe.experts.14.w1", "model.layers.29.block_sparse_moe.experts.15.w1", "model.layers.29.block_sparse_moe.experts.16.w1", "model.layers.29.block_sparse_moe.experts.17.w1", "model.layers.29.block_sparse_moe.experts.18.w1", "model.layers.29.block_sparse_moe.experts.19.w1", "model.layers.29.block_sparse_moe.experts.20.w1", "model.layers.29.block_sparse_moe.experts.21.w1", "model.layers.29.block_sparse_moe.experts.22.w1", "model.layers.29.block_sparse_moe.experts.23.w1", "model.layers.29.block_sparse_moe.experts.24.w1", "model.layers.29.block_sparse_moe.experts.25.w1", "model.layers.29.block_sparse_moe.experts.26.w1", "model.layers.29.block_sparse_moe.experts.27.w1", "model.layers.29.block_sparse_moe.experts.28.w1", "model.layers.29.block_sparse_moe.experts.29.w1", "model.layers.29.block_sparse_moe.experts.30.w1", "model.layers.29.block_sparse_moe.experts.31.w1", "model.layers.29.block_sparse_moe.experts.32.w1", "model.layers.29.block_sparse_moe.experts.33.w1", "model.layers.29.block_sparse_moe.experts.34.w1", "model.layers.29.block_sparse_moe.experts.35.w1", "model.layers.29.block_sparse_moe.experts.36.w1", "model.layers.29.block_sparse_moe.experts.37.w1", "model.layers.29.block_sparse_moe.experts.38.w1", "model.layers.29.block_sparse_moe.experts.39.w1", "model.layers.29.block_sparse_moe.experts.40.w1", "model.layers.29.block_sparse_moe.experts.41.w1", "model.layers.29.block_sparse_moe.experts.42.w1", "model.layers.29.block_sparse_moe.experts.43.w1", "model.layers.29.block_sparse_moe.experts.44.w1", "model.layers.29.block_sparse_moe.experts.45.w1", "model.layers.29.block_sparse_moe.experts.46.w1", "model.layers.29.block_sparse_moe.experts.47.w1", "model.layers.29.block_sparse_moe.experts.48.w1", "model.layers.29.block_sparse_moe.experts.49.w1", "model.layers.29.block_sparse_moe.experts.50.w1", "model.layers.29.block_sparse_moe.experts.51.w1", "model.layers.29.block_sparse_moe.experts.52.w1", "model.layers.29.block_sparse_moe.experts.53.w1", "model.layers.29.block_sparse_moe.experts.54.w1", "model.layers.29.block_sparse_moe.experts.55.w1", "model.layers.29.block_sparse_moe.experts.56.w1", "model.layers.29.block_sparse_moe.experts.57.w1", "model.layers.29.block_sparse_moe.experts.58.w1", "model.layers.29.block_sparse_moe.experts.59.w1", "model.layers.29.block_sparse_moe.experts.60.w1", "model.layers.29.block_sparse_moe.experts.61.w1", "model.layers.29.block_sparse_moe.experts.62.w1", "model.layers.29.block_sparse_moe.experts.63.w1", "model.layers.29.block_sparse_moe.experts.64.w1", "model.layers.29.block_sparse_moe.experts.65.w1", "model.layers.29.block_sparse_moe.experts.66.w1", "model.layers.29.block_sparse_moe.experts.67.w1", "model.layers.29.block_sparse_moe.experts.68.w1", "model.layers.29.block_sparse_moe.experts.69.w1", "model.layers.29.block_sparse_moe.experts.70.w1", "model.layers.29.block_sparse_moe.experts.71.w1", "model.layers.29.block_sparse_moe.experts.72.w1", "model.layers.29.block_sparse_moe.experts.73.w1", "model.layers.29.block_sparse_moe.experts.74.w1", "model.layers.29.block_sparse_moe.experts.75.w1", "model.layers.29.block_sparse_moe.experts.76.w1", "model.layers.29.block_sparse_moe.experts.77.w1", "model.layers.29.block_sparse_moe.experts.78.w1", "model.layers.29.block_sparse_moe.experts.79.w1", "model.layers.29.block_sparse_moe.experts.80.w1", "model.layers.29.block_sparse_moe.experts.81.w1", "model.layers.29.block_sparse_moe.experts.82.w1", "model.layers.29.block_sparse_moe.experts.83.w1", "model.layers.29.block_sparse_moe.experts.84.w1", "model.layers.29.block_sparse_moe.experts.85.w1", "model.layers.29.block_sparse_moe.experts.86.w1", "model.layers.29.block_sparse_moe.experts.87.w1", "model.layers.29.block_sparse_moe.experts.88.w1", "model.layers.29.block_sparse_moe.experts.89.w1", "model.layers.29.block_sparse_moe.experts.90.w1", "model.layers.29.block_sparse_moe.experts.91.w1", "model.layers.29.block_sparse_moe.experts.92.w1", "model.layers.29.block_sparse_moe.experts.93.w1", "model.layers.29.block_sparse_moe.experts.94.w1", "model.layers.29.block_sparse_moe.experts.95.w1", "model.layers.29.block_sparse_moe.experts.96.w1", "model.layers.29.block_sparse_moe.experts.97.w1", "model.layers.29.block_sparse_moe.experts.98.w1", "model.layers.29.block_sparse_moe.experts.99.w1", "model.layers.29.block_sparse_moe.experts.100.w1", "model.layers.29.block_sparse_moe.experts.101.w1", "model.layers.29.block_sparse_moe.experts.102.w1", "model.layers.29.block_sparse_moe.experts.103.w1", "model.layers.29.block_sparse_moe.experts.104.w1", "model.layers.29.block_sparse_moe.experts.105.w1", "model.layers.29.block_sparse_moe.experts.106.w1", "model.layers.29.block_sparse_moe.experts.107.w1", "model.layers.29.block_sparse_moe.experts.108.w1", "model.layers.29.block_sparse_moe.experts.109.w1", "model.layers.29.block_sparse_moe.experts.110.w1", "model.layers.29.block_sparse_moe.experts.111.w1", "model.layers.29.block_sparse_moe.experts.112.w1", "model.layers.29.block_sparse_moe.experts.113.w1", "model.layers.29.block_sparse_moe.experts.114.w1", "model.layers.29.block_sparse_moe.experts.115.w1", "model.layers.29.block_sparse_moe.experts.116.w1", "model.layers.29.block_sparse_moe.experts.117.w1", "model.layers.29.block_sparse_moe.experts.118.w1", "model.layers.29.block_sparse_moe.experts.119.w1", "model.layers.29.block_sparse_moe.experts.120.w1", "model.layers.29.block_sparse_moe.experts.121.w1", "model.layers.29.block_sparse_moe.experts.122.w1", "model.layers.29.block_sparse_moe.experts.123.w1", "model.layers.29.block_sparse_moe.experts.124.w1", "model.layers.29.block_sparse_moe.experts.125.w1", "model.layers.29.block_sparse_moe.experts.126.w1", "model.layers.29.block_sparse_moe.experts.127.w1", "model.layers.29.block_sparse_moe.experts.128.w1", "model.layers.29.block_sparse_moe.experts.129.w1", "model.layers.29.block_sparse_moe.experts.130.w1", "model.layers.29.block_sparse_moe.experts.131.w1", "model.layers.29.block_sparse_moe.experts.132.w1", "model.layers.29.block_sparse_moe.experts.133.w1", "model.layers.29.block_sparse_moe.experts.134.w1", "model.layers.29.block_sparse_moe.experts.135.w1", "model.layers.29.block_sparse_moe.experts.136.w1", "model.layers.29.block_sparse_moe.experts.137.w1", "model.layers.29.block_sparse_moe.experts.138.w1", "model.layers.29.block_sparse_moe.experts.139.w1", "model.layers.29.block_sparse_moe.experts.140.w1", "model.layers.29.block_sparse_moe.experts.141.w1", "model.layers.29.block_sparse_moe.experts.142.w1", "model.layers.29.block_sparse_moe.experts.143.w1", "model.layers.29.block_sparse_moe.experts.144.w1", "model.layers.29.block_sparse_moe.experts.145.w1", "model.layers.29.block_sparse_moe.experts.146.w1", "model.layers.29.block_sparse_moe.experts.147.w1", "model.layers.29.block_sparse_moe.experts.148.w1", "model.layers.29.block_sparse_moe.experts.149.w1", "model.layers.29.block_sparse_moe.experts.150.w1", "model.layers.29.block_sparse_moe.experts.151.w1", "model.layers.29.block_sparse_moe.experts.152.w1", "model.layers.29.block_sparse_moe.experts.153.w1", "model.layers.29.block_sparse_moe.experts.154.w1", "model.layers.29.block_sparse_moe.experts.155.w1", "model.layers.29.block_sparse_moe.experts.156.w1", "model.layers.29.block_sparse_moe.experts.157.w1", "model.layers.29.block_sparse_moe.experts.158.w1", "model.layers.29.block_sparse_moe.experts.159.w1", "model.layers.29.block_sparse_moe.experts.160.w1", "model.layers.29.block_sparse_moe.experts.161.w1", "model.layers.29.block_sparse_moe.experts.162.w1", "model.layers.29.block_sparse_moe.experts.163.w1", "model.layers.29.block_sparse_moe.experts.164.w1", "model.layers.29.block_sparse_moe.experts.165.w1", "model.layers.29.block_sparse_moe.experts.166.w1", "model.layers.29.block_sparse_moe.experts.167.w1", "model.layers.29.block_sparse_moe.experts.168.w1", "model.layers.29.block_sparse_moe.experts.169.w1", "model.layers.29.block_sparse_moe.experts.170.w1", "model.layers.29.block_sparse_moe.experts.171.w1", "model.layers.29.block_sparse_moe.experts.172.w1", "model.layers.29.block_sparse_moe.experts.173.w1", "model.layers.29.block_sparse_moe.experts.174.w1", "model.layers.29.block_sparse_moe.experts.175.w1", "model.layers.29.block_sparse_moe.experts.176.w1", "model.layers.29.block_sparse_moe.experts.177.w1", "model.layers.29.block_sparse_moe.experts.178.w1", "model.layers.29.block_sparse_moe.experts.179.w1", "model.layers.29.block_sparse_moe.experts.180.w1", "model.layers.29.block_sparse_moe.experts.181.w1", "model.layers.29.block_sparse_moe.experts.182.w1", "model.layers.29.block_sparse_moe.experts.183.w1", "model.layers.29.block_sparse_moe.experts.184.w1", "model.layers.29.block_sparse_moe.experts.185.w1", "model.layers.29.block_sparse_moe.experts.186.w1", "model.layers.29.block_sparse_moe.experts.187.w1", "model.layers.29.block_sparse_moe.experts.188.w1", "model.layers.29.block_sparse_moe.experts.189.w1", "model.layers.29.block_sparse_moe.experts.190.w1", "model.layers.29.block_sparse_moe.experts.191.w1", "model.layers.29.block_sparse_moe.experts.192.w1", "model.layers.29.block_sparse_moe.experts.193.w1", "model.layers.29.block_sparse_moe.experts.194.w1", "model.layers.29.block_sparse_moe.experts.195.w1", "model.layers.29.block_sparse_moe.experts.196.w1", "model.layers.29.block_sparse_moe.experts.197.w1", "model.layers.29.block_sparse_moe.experts.198.w1", "model.layers.29.block_sparse_moe.experts.199.w1", "model.layers.29.block_sparse_moe.experts.200.w1", "model.layers.29.block_sparse_moe.experts.201.w1", "model.layers.29.block_sparse_moe.experts.202.w1", "model.layers.29.block_sparse_moe.experts.203.w1", "model.layers.29.block_sparse_moe.experts.204.w1", "model.layers.29.block_sparse_moe.experts.205.w1", "model.layers.29.block_sparse_moe.experts.206.w1", "model.layers.29.block_sparse_moe.experts.207.w1", "model.layers.29.block_sparse_moe.experts.208.w1", "model.layers.29.block_sparse_moe.experts.209.w1", "model.layers.29.block_sparse_moe.experts.210.w1", "model.layers.29.block_sparse_moe.experts.211.w1", "model.layers.29.block_sparse_moe.experts.212.w1", "model.layers.29.block_sparse_moe.experts.213.w1", "model.layers.29.block_sparse_moe.experts.214.w1", "model.layers.29.block_sparse_moe.experts.215.w1", "model.layers.29.block_sparse_moe.experts.216.w1", "model.layers.29.block_sparse_moe.experts.217.w1", "model.layers.29.block_sparse_moe.experts.218.w1", "model.layers.29.block_sparse_moe.experts.219.w1", "model.layers.29.block_sparse_moe.experts.220.w1", "model.layers.29.block_sparse_moe.experts.221.w1", "model.layers.29.block_sparse_moe.experts.222.w1", "model.layers.29.block_sparse_moe.experts.223.w1", "model.layers.29.block_sparse_moe.experts.224.w1", "model.layers.29.block_sparse_moe.experts.225.w1", "model.layers.29.block_sparse_moe.experts.226.w1", "model.layers.29.block_sparse_moe.experts.227.w1", "model.layers.29.block_sparse_moe.experts.228.w1", "model.layers.29.block_sparse_moe.experts.229.w1", "model.layers.29.block_sparse_moe.experts.230.w1", "model.layers.29.block_sparse_moe.experts.231.w1", "model.layers.29.block_sparse_moe.experts.232.w1", "model.layers.29.block_sparse_moe.experts.233.w1", "model.layers.29.block_sparse_moe.experts.234.w1", "model.layers.29.block_sparse_moe.experts.235.w1", "model.layers.29.block_sparse_moe.experts.236.w1", "model.layers.29.block_sparse_moe.experts.237.w1", "model.layers.29.block_sparse_moe.experts.238.w1", "model.layers.29.block_sparse_moe.experts.239.w1", "model.layers.29.block_sparse_moe.experts.240.w1", "model.layers.29.block_sparse_moe.experts.241.w1", "model.layers.29.block_sparse_moe.experts.242.w1", "model.layers.29.block_sparse_moe.experts.243.w1", "model.layers.29.block_sparse_moe.experts.244.w1", "model.layers.29.block_sparse_moe.experts.245.w1", "model.layers.29.block_sparse_moe.experts.246.w1", "model.layers.29.block_sparse_moe.experts.247.w1", "model.layers.29.block_sparse_moe.experts.248.w1", "model.layers.29.block_sparse_moe.experts.249.w1", "model.layers.29.block_sparse_moe.experts.250.w1", "model.layers.29.block_sparse_moe.experts.251.w1", "model.layers.29.block_sparse_moe.experts.252.w1", "model.layers.29.block_sparse_moe.experts.253.w1", "model.layers.29.block_sparse_moe.experts.254.w1", "model.layers.29.block_sparse_moe.experts.255.w1", "model.layers.29.block_sparse_moe.experts.0.w3", "model.layers.29.block_sparse_moe.experts.1.w3", "model.layers.29.block_sparse_moe.experts.2.w3", "model.layers.29.block_sparse_moe.experts.3.w3", "model.layers.29.block_sparse_moe.experts.4.w3", "model.layers.29.block_sparse_moe.experts.5.w3", "model.layers.29.block_sparse_moe.experts.6.w3", "model.layers.29.block_sparse_moe.experts.7.w3", "model.layers.29.block_sparse_moe.experts.8.w3", "model.layers.29.block_sparse_moe.experts.9.w3", "model.layers.29.block_sparse_moe.experts.10.w3", "model.layers.29.block_sparse_moe.experts.11.w3", "model.layers.29.block_sparse_moe.experts.12.w3", "model.layers.29.block_sparse_moe.experts.13.w3", "model.layers.29.block_sparse_moe.experts.14.w3", "model.layers.29.block_sparse_moe.experts.15.w3", "model.layers.29.block_sparse_moe.experts.16.w3", "model.layers.29.block_sparse_moe.experts.17.w3", "model.layers.29.block_sparse_moe.experts.18.w3", "model.layers.29.block_sparse_moe.experts.19.w3", "model.layers.29.block_sparse_moe.experts.20.w3", "model.layers.29.block_sparse_moe.experts.21.w3", "model.layers.29.block_sparse_moe.experts.22.w3", "model.layers.29.block_sparse_moe.experts.23.w3", "model.layers.29.block_sparse_moe.experts.24.w3", "model.layers.29.block_sparse_moe.experts.25.w3", "model.layers.29.block_sparse_moe.experts.26.w3", "model.layers.29.block_sparse_moe.experts.27.w3", "model.layers.29.block_sparse_moe.experts.28.w3", "model.layers.29.block_sparse_moe.experts.29.w3", "model.layers.29.block_sparse_moe.experts.30.w3", "model.layers.29.block_sparse_moe.experts.31.w3", "model.layers.29.block_sparse_moe.experts.32.w3", "model.layers.29.block_sparse_moe.experts.33.w3", "model.layers.29.block_sparse_moe.experts.34.w3", "model.layers.29.block_sparse_moe.experts.35.w3", "model.layers.29.block_sparse_moe.experts.36.w3", "model.layers.29.block_sparse_moe.experts.37.w3", "model.layers.29.block_sparse_moe.experts.38.w3", "model.layers.29.block_sparse_moe.experts.39.w3", "model.layers.29.block_sparse_moe.experts.40.w3", "model.layers.29.block_sparse_moe.experts.41.w3", "model.layers.29.block_sparse_moe.experts.42.w3", "model.layers.29.block_sparse_moe.experts.43.w3", "model.layers.29.block_sparse_moe.experts.44.w3", "model.layers.29.block_sparse_moe.experts.45.w3", "model.layers.29.block_sparse_moe.experts.46.w3", "model.layers.29.block_sparse_moe.experts.47.w3", "model.layers.29.block_sparse_moe.experts.48.w3", "model.layers.29.block_sparse_moe.experts.49.w3", "model.layers.29.block_sparse_moe.experts.50.w3", "model.layers.29.block_sparse_moe.experts.51.w3", "model.layers.29.block_sparse_moe.experts.52.w3", "model.layers.29.block_sparse_moe.experts.53.w3", "model.layers.29.block_sparse_moe.experts.54.w3", "model.layers.29.block_sparse_moe.experts.55.w3", "model.layers.29.block_sparse_moe.experts.56.w3", "model.layers.29.block_sparse_moe.experts.57.w3", "model.layers.29.block_sparse_moe.experts.58.w3", "model.layers.29.block_sparse_moe.experts.59.w3", "model.layers.29.block_sparse_moe.experts.60.w3", "model.layers.29.block_sparse_moe.experts.61.w3", "model.layers.29.block_sparse_moe.experts.62.w3", "model.layers.29.block_sparse_moe.experts.63.w3", "model.layers.29.block_sparse_moe.experts.64.w3", "model.layers.29.block_sparse_moe.experts.65.w3", "model.layers.29.block_sparse_moe.experts.66.w3", "model.layers.29.block_sparse_moe.experts.67.w3", "model.layers.29.block_sparse_moe.experts.68.w3", "model.layers.29.block_sparse_moe.experts.69.w3", "model.layers.29.block_sparse_moe.experts.70.w3", "model.layers.29.block_sparse_moe.experts.71.w3", "model.layers.29.block_sparse_moe.experts.72.w3", "model.layers.29.block_sparse_moe.experts.73.w3", "model.layers.29.block_sparse_moe.experts.74.w3", "model.layers.29.block_sparse_moe.experts.75.w3", "model.layers.29.block_sparse_moe.experts.76.w3", "model.layers.29.block_sparse_moe.experts.77.w3", "model.layers.29.block_sparse_moe.experts.78.w3", "model.layers.29.block_sparse_moe.experts.79.w3", "model.layers.29.block_sparse_moe.experts.80.w3", "model.layers.29.block_sparse_moe.experts.81.w3", "model.layers.29.block_sparse_moe.experts.82.w3", "model.layers.29.block_sparse_moe.experts.83.w3", "model.layers.29.block_sparse_moe.experts.84.w3", "model.layers.29.block_sparse_moe.experts.85.w3", "model.layers.29.block_sparse_moe.experts.86.w3", "model.layers.29.block_sparse_moe.experts.87.w3", "model.layers.29.block_sparse_moe.experts.88.w3", "model.layers.29.block_sparse_moe.experts.89.w3", "model.layers.29.block_sparse_moe.experts.90.w3", "model.layers.29.block_sparse_moe.experts.91.w3", "model.layers.29.block_sparse_moe.experts.92.w3", "model.layers.29.block_sparse_moe.experts.93.w3", "model.layers.29.block_sparse_moe.experts.94.w3", "model.layers.29.block_sparse_moe.experts.95.w3", "model.layers.29.block_sparse_moe.experts.96.w3", "model.layers.29.block_sparse_moe.experts.97.w3", "model.layers.29.block_sparse_moe.experts.98.w3", "model.layers.29.block_sparse_moe.experts.99.w3", "model.layers.29.block_sparse_moe.experts.100.w3", "model.layers.29.block_sparse_moe.experts.101.w3", "model.layers.29.block_sparse_moe.experts.102.w3", "model.layers.29.block_sparse_moe.experts.103.w3", "model.layers.29.block_sparse_moe.experts.104.w3", "model.layers.29.block_sparse_moe.experts.105.w3", "model.layers.29.block_sparse_moe.experts.106.w3", "model.layers.29.block_sparse_moe.experts.107.w3", "model.layers.29.block_sparse_moe.experts.108.w3", "model.layers.29.block_sparse_moe.experts.109.w3", "model.layers.29.block_sparse_moe.experts.110.w3", "model.layers.29.block_sparse_moe.experts.111.w3", "model.layers.29.block_sparse_moe.experts.112.w3", "model.layers.29.block_sparse_moe.experts.113.w3", "model.layers.29.block_sparse_moe.experts.114.w3", "model.layers.29.block_sparse_moe.experts.115.w3", "model.layers.29.block_sparse_moe.experts.116.w3", "model.layers.29.block_sparse_moe.experts.117.w3", "model.layers.29.block_sparse_moe.experts.118.w3", "model.layers.29.block_sparse_moe.experts.119.w3", "model.layers.29.block_sparse_moe.experts.120.w3", "model.layers.29.block_sparse_moe.experts.121.w3", "model.layers.29.block_sparse_moe.experts.122.w3", "model.layers.29.block_sparse_moe.experts.123.w3", "model.layers.29.block_sparse_moe.experts.124.w3", "model.layers.29.block_sparse_moe.experts.125.w3", "model.layers.29.block_sparse_moe.experts.126.w3", "model.layers.29.block_sparse_moe.experts.127.w3", "model.layers.29.block_sparse_moe.experts.128.w3", "model.layers.29.block_sparse_moe.experts.129.w3", "model.layers.29.block_sparse_moe.experts.130.w3", "model.layers.29.block_sparse_moe.experts.131.w3", "model.layers.29.block_sparse_moe.experts.132.w3", "model.layers.29.block_sparse_moe.experts.133.w3", "model.layers.29.block_sparse_moe.experts.134.w3", "model.layers.29.block_sparse_moe.experts.135.w3", "model.layers.29.block_sparse_moe.experts.136.w3", "model.layers.29.block_sparse_moe.experts.137.w3", "model.layers.29.block_sparse_moe.experts.138.w3", "model.layers.29.block_sparse_moe.experts.139.w3", "model.layers.29.block_sparse_moe.experts.140.w3", "model.layers.29.block_sparse_moe.experts.141.w3", "model.layers.29.block_sparse_moe.experts.142.w3", "model.layers.29.block_sparse_moe.experts.143.w3", "model.layers.29.block_sparse_moe.experts.144.w3", "model.layers.29.block_sparse_moe.experts.145.w3", "model.layers.29.block_sparse_moe.experts.146.w3", "model.layers.29.block_sparse_moe.experts.147.w3", "model.layers.29.block_sparse_moe.experts.148.w3", "model.layers.29.block_sparse_moe.experts.149.w3", "model.layers.29.block_sparse_moe.experts.150.w3", "model.layers.29.block_sparse_moe.experts.151.w3", "model.layers.29.block_sparse_moe.experts.152.w3", "model.layers.29.block_sparse_moe.experts.153.w3", "model.layers.29.block_sparse_moe.experts.154.w3", "model.layers.29.block_sparse_moe.experts.155.w3", "model.layers.29.block_sparse_moe.experts.156.w3", "model.layers.29.block_sparse_moe.experts.157.w3", "model.layers.29.block_sparse_moe.experts.158.w3", "model.layers.29.block_sparse_moe.experts.159.w3", "model.layers.29.block_sparse_moe.experts.160.w3", "model.layers.29.block_sparse_moe.experts.161.w3", "model.layers.29.block_sparse_moe.experts.162.w3", "model.layers.29.block_sparse_moe.experts.163.w3", "model.layers.29.block_sparse_moe.experts.164.w3", "model.layers.29.block_sparse_moe.experts.165.w3", "model.layers.29.block_sparse_moe.experts.166.w3", "model.layers.29.block_sparse_moe.experts.167.w3", "model.layers.29.block_sparse_moe.experts.168.w3", "model.layers.29.block_sparse_moe.experts.169.w3", "model.layers.29.block_sparse_moe.experts.170.w3", "model.layers.29.block_sparse_moe.experts.171.w3", "model.layers.29.block_sparse_moe.experts.172.w3", "model.layers.29.block_sparse_moe.experts.173.w3", "model.layers.29.block_sparse_moe.experts.174.w3", "model.layers.29.block_sparse_moe.experts.175.w3", "model.layers.29.block_sparse_moe.experts.176.w3", "model.layers.29.block_sparse_moe.experts.177.w3", "model.layers.29.block_sparse_moe.experts.178.w3", "model.layers.29.block_sparse_moe.experts.179.w3", "model.layers.29.block_sparse_moe.experts.180.w3", "model.layers.29.block_sparse_moe.experts.181.w3", "model.layers.29.block_sparse_moe.experts.182.w3", "model.layers.29.block_sparse_moe.experts.183.w3", "model.layers.29.block_sparse_moe.experts.184.w3", "model.layers.29.block_sparse_moe.experts.185.w3", "model.layers.29.block_sparse_moe.experts.186.w3", "model.layers.29.block_sparse_moe.experts.187.w3", "model.layers.29.block_sparse_moe.experts.188.w3", "model.layers.29.block_sparse_moe.experts.189.w3", "model.layers.29.block_sparse_moe.experts.190.w3", "model.layers.29.block_sparse_moe.experts.191.w3", "model.layers.29.block_sparse_moe.experts.192.w3", "model.layers.29.block_sparse_moe.experts.193.w3", "model.layers.29.block_sparse_moe.experts.194.w3", "model.layers.29.block_sparse_moe.experts.195.w3", "model.layers.29.block_sparse_moe.experts.196.w3", "model.layers.29.block_sparse_moe.experts.197.w3", "model.layers.29.block_sparse_moe.experts.198.w3", "model.layers.29.block_sparse_moe.experts.199.w3", "model.layers.29.block_sparse_moe.experts.200.w3", "model.layers.29.block_sparse_moe.experts.201.w3", "model.layers.29.block_sparse_moe.experts.202.w3", "model.layers.29.block_sparse_moe.experts.203.w3", "model.layers.29.block_sparse_moe.experts.204.w3", "model.layers.29.block_sparse_moe.experts.205.w3", "model.layers.29.block_sparse_moe.experts.206.w3", "model.layers.29.block_sparse_moe.experts.207.w3", "model.layers.29.block_sparse_moe.experts.208.w3", "model.layers.29.block_sparse_moe.experts.209.w3", "model.layers.29.block_sparse_moe.experts.210.w3", "model.layers.29.block_sparse_moe.experts.211.w3", "model.layers.29.block_sparse_moe.experts.212.w3", "model.layers.29.block_sparse_moe.experts.213.w3", "model.layers.29.block_sparse_moe.experts.214.w3", "model.layers.29.block_sparse_moe.experts.215.w3", "model.layers.29.block_sparse_moe.experts.216.w3", "model.layers.29.block_sparse_moe.experts.217.w3", "model.layers.29.block_sparse_moe.experts.218.w3", "model.layers.29.block_sparse_moe.experts.219.w3", "model.layers.29.block_sparse_moe.experts.220.w3", "model.layers.29.block_sparse_moe.experts.221.w3", "model.layers.29.block_sparse_moe.experts.222.w3", "model.layers.29.block_sparse_moe.experts.223.w3", "model.layers.29.block_sparse_moe.experts.224.w3", "model.layers.29.block_sparse_moe.experts.225.w3", "model.layers.29.block_sparse_moe.experts.226.w3", "model.layers.29.block_sparse_moe.experts.227.w3", "model.layers.29.block_sparse_moe.experts.228.w3", "model.layers.29.block_sparse_moe.experts.229.w3", "model.layers.29.block_sparse_moe.experts.230.w3", "model.layers.29.block_sparse_moe.experts.231.w3", "model.layers.29.block_sparse_moe.experts.232.w3", "model.layers.29.block_sparse_moe.experts.233.w3", "model.layers.29.block_sparse_moe.experts.234.w3", "model.layers.29.block_sparse_moe.experts.235.w3", "model.layers.29.block_sparse_moe.experts.236.w3", "model.layers.29.block_sparse_moe.experts.237.w3", "model.layers.29.block_sparse_moe.experts.238.w3", "model.layers.29.block_sparse_moe.experts.239.w3", "model.layers.29.block_sparse_moe.experts.240.w3", "model.layers.29.block_sparse_moe.experts.241.w3", "model.layers.29.block_sparse_moe.experts.242.w3", "model.layers.29.block_sparse_moe.experts.243.w3", "model.layers.29.block_sparse_moe.experts.244.w3", "model.layers.29.block_sparse_moe.experts.245.w3", "model.layers.29.block_sparse_moe.experts.246.w3", "model.layers.29.block_sparse_moe.experts.247.w3", "model.layers.29.block_sparse_moe.experts.248.w3", "model.layers.29.block_sparse_moe.experts.249.w3", "model.layers.29.block_sparse_moe.experts.250.w3", "model.layers.29.block_sparse_moe.experts.251.w3", "model.layers.29.block_sparse_moe.experts.252.w3", "model.layers.29.block_sparse_moe.experts.253.w3", "model.layers.29.block_sparse_moe.experts.254.w3", "model.layers.29.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.007537439465522766, "dbits": 2415919104 } ] }, { "idx": 149, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w2", "model.layers.29.block_sparse_moe.experts.1.w2", "model.layers.29.block_sparse_moe.experts.2.w2", "model.layers.29.block_sparse_moe.experts.3.w2", "model.layers.29.block_sparse_moe.experts.4.w2", "model.layers.29.block_sparse_moe.experts.5.w2", "model.layers.29.block_sparse_moe.experts.6.w2", "model.layers.29.block_sparse_moe.experts.7.w2", "model.layers.29.block_sparse_moe.experts.8.w2", "model.layers.29.block_sparse_moe.experts.9.w2", "model.layers.29.block_sparse_moe.experts.10.w2", "model.layers.29.block_sparse_moe.experts.11.w2", "model.layers.29.block_sparse_moe.experts.12.w2", "model.layers.29.block_sparse_moe.experts.13.w2", "model.layers.29.block_sparse_moe.experts.14.w2", "model.layers.29.block_sparse_moe.experts.15.w2", "model.layers.29.block_sparse_moe.experts.16.w2", "model.layers.29.block_sparse_moe.experts.17.w2", "model.layers.29.block_sparse_moe.experts.18.w2", "model.layers.29.block_sparse_moe.experts.19.w2", "model.layers.29.block_sparse_moe.experts.20.w2", "model.layers.29.block_sparse_moe.experts.21.w2", "model.layers.29.block_sparse_moe.experts.22.w2", "model.layers.29.block_sparse_moe.experts.23.w2", "model.layers.29.block_sparse_moe.experts.24.w2", "model.layers.29.block_sparse_moe.experts.25.w2", "model.layers.29.block_sparse_moe.experts.26.w2", "model.layers.29.block_sparse_moe.experts.27.w2", "model.layers.29.block_sparse_moe.experts.28.w2", "model.layers.29.block_sparse_moe.experts.29.w2", "model.layers.29.block_sparse_moe.experts.30.w2", "model.layers.29.block_sparse_moe.experts.31.w2", "model.layers.29.block_sparse_moe.experts.32.w2", "model.layers.29.block_sparse_moe.experts.33.w2", "model.layers.29.block_sparse_moe.experts.34.w2", "model.layers.29.block_sparse_moe.experts.35.w2", "model.layers.29.block_sparse_moe.experts.36.w2", "model.layers.29.block_sparse_moe.experts.37.w2", "model.layers.29.block_sparse_moe.experts.38.w2", "model.layers.29.block_sparse_moe.experts.39.w2", "model.layers.29.block_sparse_moe.experts.40.w2", "model.layers.29.block_sparse_moe.experts.41.w2", "model.layers.29.block_sparse_moe.experts.42.w2", "model.layers.29.block_sparse_moe.experts.43.w2", "model.layers.29.block_sparse_moe.experts.44.w2", "model.layers.29.block_sparse_moe.experts.45.w2", "model.layers.29.block_sparse_moe.experts.46.w2", "model.layers.29.block_sparse_moe.experts.47.w2", "model.layers.29.block_sparse_moe.experts.48.w2", "model.layers.29.block_sparse_moe.experts.49.w2", "model.layers.29.block_sparse_moe.experts.50.w2", "model.layers.29.block_sparse_moe.experts.51.w2", "model.layers.29.block_sparse_moe.experts.52.w2", "model.layers.29.block_sparse_moe.experts.53.w2", "model.layers.29.block_sparse_moe.experts.54.w2", "model.layers.29.block_sparse_moe.experts.55.w2", "model.layers.29.block_sparse_moe.experts.56.w2", "model.layers.29.block_sparse_moe.experts.57.w2", "model.layers.29.block_sparse_moe.experts.58.w2", "model.layers.29.block_sparse_moe.experts.59.w2", "model.layers.29.block_sparse_moe.experts.60.w2", "model.layers.29.block_sparse_moe.experts.61.w2", "model.layers.29.block_sparse_moe.experts.62.w2", "model.layers.29.block_sparse_moe.experts.63.w2", "model.layers.29.block_sparse_moe.experts.64.w2", "model.layers.29.block_sparse_moe.experts.65.w2", "model.layers.29.block_sparse_moe.experts.66.w2", "model.layers.29.block_sparse_moe.experts.67.w2", "model.layers.29.block_sparse_moe.experts.68.w2", "model.layers.29.block_sparse_moe.experts.69.w2", "model.layers.29.block_sparse_moe.experts.70.w2", "model.layers.29.block_sparse_moe.experts.71.w2", "model.layers.29.block_sparse_moe.experts.72.w2", "model.layers.29.block_sparse_moe.experts.73.w2", "model.layers.29.block_sparse_moe.experts.74.w2", "model.layers.29.block_sparse_moe.experts.75.w2", "model.layers.29.block_sparse_moe.experts.76.w2", "model.layers.29.block_sparse_moe.experts.77.w2", "model.layers.29.block_sparse_moe.experts.78.w2", "model.layers.29.block_sparse_moe.experts.79.w2", "model.layers.29.block_sparse_moe.experts.80.w2", "model.layers.29.block_sparse_moe.experts.81.w2", "model.layers.29.block_sparse_moe.experts.82.w2", "model.layers.29.block_sparse_moe.experts.83.w2", "model.layers.29.block_sparse_moe.experts.84.w2", "model.layers.29.block_sparse_moe.experts.85.w2", "model.layers.29.block_sparse_moe.experts.86.w2", "model.layers.29.block_sparse_moe.experts.87.w2", "model.layers.29.block_sparse_moe.experts.88.w2", "model.layers.29.block_sparse_moe.experts.89.w2", "model.layers.29.block_sparse_moe.experts.90.w2", "model.layers.29.block_sparse_moe.experts.91.w2", "model.layers.29.block_sparse_moe.experts.92.w2", "model.layers.29.block_sparse_moe.experts.93.w2", "model.layers.29.block_sparse_moe.experts.94.w2", "model.layers.29.block_sparse_moe.experts.95.w2", "model.layers.29.block_sparse_moe.experts.96.w2", "model.layers.29.block_sparse_moe.experts.97.w2", "model.layers.29.block_sparse_moe.experts.98.w2", "model.layers.29.block_sparse_moe.experts.99.w2", "model.layers.29.block_sparse_moe.experts.100.w2", "model.layers.29.block_sparse_moe.experts.101.w2", "model.layers.29.block_sparse_moe.experts.102.w2", "model.layers.29.block_sparse_moe.experts.103.w2", "model.layers.29.block_sparse_moe.experts.104.w2", "model.layers.29.block_sparse_moe.experts.105.w2", "model.layers.29.block_sparse_moe.experts.106.w2", "model.layers.29.block_sparse_moe.experts.107.w2", "model.layers.29.block_sparse_moe.experts.108.w2", "model.layers.29.block_sparse_moe.experts.109.w2", "model.layers.29.block_sparse_moe.experts.110.w2", "model.layers.29.block_sparse_moe.experts.111.w2", "model.layers.29.block_sparse_moe.experts.112.w2", "model.layers.29.block_sparse_moe.experts.113.w2", "model.layers.29.block_sparse_moe.experts.114.w2", "model.layers.29.block_sparse_moe.experts.115.w2", "model.layers.29.block_sparse_moe.experts.116.w2", "model.layers.29.block_sparse_moe.experts.117.w2", "model.layers.29.block_sparse_moe.experts.118.w2", "model.layers.29.block_sparse_moe.experts.119.w2", "model.layers.29.block_sparse_moe.experts.120.w2", "model.layers.29.block_sparse_moe.experts.121.w2", "model.layers.29.block_sparse_moe.experts.122.w2", "model.layers.29.block_sparse_moe.experts.123.w2", "model.layers.29.block_sparse_moe.experts.124.w2", "model.layers.29.block_sparse_moe.experts.125.w2", "model.layers.29.block_sparse_moe.experts.126.w2", "model.layers.29.block_sparse_moe.experts.127.w2", "model.layers.29.block_sparse_moe.experts.128.w2", "model.layers.29.block_sparse_moe.experts.129.w2", "model.layers.29.block_sparse_moe.experts.130.w2", "model.layers.29.block_sparse_moe.experts.131.w2", "model.layers.29.block_sparse_moe.experts.132.w2", "model.layers.29.block_sparse_moe.experts.133.w2", "model.layers.29.block_sparse_moe.experts.134.w2", "model.layers.29.block_sparse_moe.experts.135.w2", "model.layers.29.block_sparse_moe.experts.136.w2", "model.layers.29.block_sparse_moe.experts.137.w2", "model.layers.29.block_sparse_moe.experts.138.w2", "model.layers.29.block_sparse_moe.experts.139.w2", "model.layers.29.block_sparse_moe.experts.140.w2", "model.layers.29.block_sparse_moe.experts.141.w2", "model.layers.29.block_sparse_moe.experts.142.w2", "model.layers.29.block_sparse_moe.experts.143.w2", "model.layers.29.block_sparse_moe.experts.144.w2", "model.layers.29.block_sparse_moe.experts.145.w2", "model.layers.29.block_sparse_moe.experts.146.w2", "model.layers.29.block_sparse_moe.experts.147.w2", "model.layers.29.block_sparse_moe.experts.148.w2", "model.layers.29.block_sparse_moe.experts.149.w2", "model.layers.29.block_sparse_moe.experts.150.w2", "model.layers.29.block_sparse_moe.experts.151.w2", "model.layers.29.block_sparse_moe.experts.152.w2", "model.layers.29.block_sparse_moe.experts.153.w2", "model.layers.29.block_sparse_moe.experts.154.w2", "model.layers.29.block_sparse_moe.experts.155.w2", "model.layers.29.block_sparse_moe.experts.156.w2", "model.layers.29.block_sparse_moe.experts.157.w2", "model.layers.29.block_sparse_moe.experts.158.w2", "model.layers.29.block_sparse_moe.experts.159.w2", "model.layers.29.block_sparse_moe.experts.160.w2", "model.layers.29.block_sparse_moe.experts.161.w2", "model.layers.29.block_sparse_moe.experts.162.w2", "model.layers.29.block_sparse_moe.experts.163.w2", "model.layers.29.block_sparse_moe.experts.164.w2", "model.layers.29.block_sparse_moe.experts.165.w2", "model.layers.29.block_sparse_moe.experts.166.w2", "model.layers.29.block_sparse_moe.experts.167.w2", "model.layers.29.block_sparse_moe.experts.168.w2", "model.layers.29.block_sparse_moe.experts.169.w2", "model.layers.29.block_sparse_moe.experts.170.w2", "model.layers.29.block_sparse_moe.experts.171.w2", "model.layers.29.block_sparse_moe.experts.172.w2", "model.layers.29.block_sparse_moe.experts.173.w2", "model.layers.29.block_sparse_moe.experts.174.w2", "model.layers.29.block_sparse_moe.experts.175.w2", "model.layers.29.block_sparse_moe.experts.176.w2", "model.layers.29.block_sparse_moe.experts.177.w2", "model.layers.29.block_sparse_moe.experts.178.w2", "model.layers.29.block_sparse_moe.experts.179.w2", "model.layers.29.block_sparse_moe.experts.180.w2", "model.layers.29.block_sparse_moe.experts.181.w2", "model.layers.29.block_sparse_moe.experts.182.w2", "model.layers.29.block_sparse_moe.experts.183.w2", "model.layers.29.block_sparse_moe.experts.184.w2", "model.layers.29.block_sparse_moe.experts.185.w2", "model.layers.29.block_sparse_moe.experts.186.w2", "model.layers.29.block_sparse_moe.experts.187.w2", "model.layers.29.block_sparse_moe.experts.188.w2", "model.layers.29.block_sparse_moe.experts.189.w2", "model.layers.29.block_sparse_moe.experts.190.w2", "model.layers.29.block_sparse_moe.experts.191.w2", "model.layers.29.block_sparse_moe.experts.192.w2", "model.layers.29.block_sparse_moe.experts.193.w2", "model.layers.29.block_sparse_moe.experts.194.w2", "model.layers.29.block_sparse_moe.experts.195.w2", "model.layers.29.block_sparse_moe.experts.196.w2", "model.layers.29.block_sparse_moe.experts.197.w2", "model.layers.29.block_sparse_moe.experts.198.w2", "model.layers.29.block_sparse_moe.experts.199.w2", "model.layers.29.block_sparse_moe.experts.200.w2", "model.layers.29.block_sparse_moe.experts.201.w2", "model.layers.29.block_sparse_moe.experts.202.w2", "model.layers.29.block_sparse_moe.experts.203.w2", "model.layers.29.block_sparse_moe.experts.204.w2", "model.layers.29.block_sparse_moe.experts.205.w2", "model.layers.29.block_sparse_moe.experts.206.w2", "model.layers.29.block_sparse_moe.experts.207.w2", "model.layers.29.block_sparse_moe.experts.208.w2", "model.layers.29.block_sparse_moe.experts.209.w2", "model.layers.29.block_sparse_moe.experts.210.w2", "model.layers.29.block_sparse_moe.experts.211.w2", "model.layers.29.block_sparse_moe.experts.212.w2", "model.layers.29.block_sparse_moe.experts.213.w2", "model.layers.29.block_sparse_moe.experts.214.w2", "model.layers.29.block_sparse_moe.experts.215.w2", "model.layers.29.block_sparse_moe.experts.216.w2", "model.layers.29.block_sparse_moe.experts.217.w2", "model.layers.29.block_sparse_moe.experts.218.w2", "model.layers.29.block_sparse_moe.experts.219.w2", "model.layers.29.block_sparse_moe.experts.220.w2", "model.layers.29.block_sparse_moe.experts.221.w2", "model.layers.29.block_sparse_moe.experts.222.w2", "model.layers.29.block_sparse_moe.experts.223.w2", "model.layers.29.block_sparse_moe.experts.224.w2", "model.layers.29.block_sparse_moe.experts.225.w2", "model.layers.29.block_sparse_moe.experts.226.w2", "model.layers.29.block_sparse_moe.experts.227.w2", "model.layers.29.block_sparse_moe.experts.228.w2", "model.layers.29.block_sparse_moe.experts.229.w2", "model.layers.29.block_sparse_moe.experts.230.w2", "model.layers.29.block_sparse_moe.experts.231.w2", "model.layers.29.block_sparse_moe.experts.232.w2", "model.layers.29.block_sparse_moe.experts.233.w2", "model.layers.29.block_sparse_moe.experts.234.w2", "model.layers.29.block_sparse_moe.experts.235.w2", "model.layers.29.block_sparse_moe.experts.236.w2", "model.layers.29.block_sparse_moe.experts.237.w2", "model.layers.29.block_sparse_moe.experts.238.w2", "model.layers.29.block_sparse_moe.experts.239.w2", "model.layers.29.block_sparse_moe.experts.240.w2", "model.layers.29.block_sparse_moe.experts.241.w2", "model.layers.29.block_sparse_moe.experts.242.w2", "model.layers.29.block_sparse_moe.experts.243.w2", "model.layers.29.block_sparse_moe.experts.244.w2", "model.layers.29.block_sparse_moe.experts.245.w2", "model.layers.29.block_sparse_moe.experts.246.w2", "model.layers.29.block_sparse_moe.experts.247.w2", "model.layers.29.block_sparse_moe.experts.248.w2", "model.layers.29.block_sparse_moe.experts.249.w2", "model.layers.29.block_sparse_moe.experts.250.w2", "model.layers.29.block_sparse_moe.experts.251.w2", "model.layers.29.block_sparse_moe.experts.252.w2", "model.layers.29.block_sparse_moe.experts.253.w2", "model.layers.29.block_sparse_moe.experts.254.w2", "model.layers.29.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0010919123888015747, "dbits": 1207959552 } ] }, { "idx": 150, "layers": [ "model.layers.30.self_attn.q_proj" ], "candidates": [ { "dkld": -0.009118014574050815, "dbits": 18874368 } ] }, { "idx": 151, "layers": [ "model.layers.30.self_attn.k_proj", "model.layers.30.self_attn.v_proj" ], "candidates": [ { "dkld": -0.010835880041122348, "dbits": 6291456 } ] }, { "idx": 152, "layers": [ "model.layers.30.self_attn.o_proj" ], "candidates": [ { "dkld": -0.015108813345432237, "dbits": 18874368 } ] }, { "idx": 153, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w1", "model.layers.30.block_sparse_moe.experts.1.w1", "model.layers.30.block_sparse_moe.experts.2.w1", "model.layers.30.block_sparse_moe.experts.3.w1", "model.layers.30.block_sparse_moe.experts.4.w1", "model.layers.30.block_sparse_moe.experts.5.w1", "model.layers.30.block_sparse_moe.experts.6.w1", "model.layers.30.block_sparse_moe.experts.7.w1", "model.layers.30.block_sparse_moe.experts.8.w1", "model.layers.30.block_sparse_moe.experts.9.w1", "model.layers.30.block_sparse_moe.experts.10.w1", "model.layers.30.block_sparse_moe.experts.11.w1", "model.layers.30.block_sparse_moe.experts.12.w1", "model.layers.30.block_sparse_moe.experts.13.w1", "model.layers.30.block_sparse_moe.experts.14.w1", "model.layers.30.block_sparse_moe.experts.15.w1", "model.layers.30.block_sparse_moe.experts.16.w1", "model.layers.30.block_sparse_moe.experts.17.w1", "model.layers.30.block_sparse_moe.experts.18.w1", "model.layers.30.block_sparse_moe.experts.19.w1", "model.layers.30.block_sparse_moe.experts.20.w1", "model.layers.30.block_sparse_moe.experts.21.w1", "model.layers.30.block_sparse_moe.experts.22.w1", "model.layers.30.block_sparse_moe.experts.23.w1", "model.layers.30.block_sparse_moe.experts.24.w1", "model.layers.30.block_sparse_moe.experts.25.w1", "model.layers.30.block_sparse_moe.experts.26.w1", "model.layers.30.block_sparse_moe.experts.27.w1", "model.layers.30.block_sparse_moe.experts.28.w1", "model.layers.30.block_sparse_moe.experts.29.w1", "model.layers.30.block_sparse_moe.experts.30.w1", "model.layers.30.block_sparse_moe.experts.31.w1", "model.layers.30.block_sparse_moe.experts.32.w1", "model.layers.30.block_sparse_moe.experts.33.w1", "model.layers.30.block_sparse_moe.experts.34.w1", "model.layers.30.block_sparse_moe.experts.35.w1", "model.layers.30.block_sparse_moe.experts.36.w1", "model.layers.30.block_sparse_moe.experts.37.w1", "model.layers.30.block_sparse_moe.experts.38.w1", "model.layers.30.block_sparse_moe.experts.39.w1", "model.layers.30.block_sparse_moe.experts.40.w1", "model.layers.30.block_sparse_moe.experts.41.w1", "model.layers.30.block_sparse_moe.experts.42.w1", "model.layers.30.block_sparse_moe.experts.43.w1", "model.layers.30.block_sparse_moe.experts.44.w1", "model.layers.30.block_sparse_moe.experts.45.w1", "model.layers.30.block_sparse_moe.experts.46.w1", "model.layers.30.block_sparse_moe.experts.47.w1", "model.layers.30.block_sparse_moe.experts.48.w1", "model.layers.30.block_sparse_moe.experts.49.w1", "model.layers.30.block_sparse_moe.experts.50.w1", "model.layers.30.block_sparse_moe.experts.51.w1", "model.layers.30.block_sparse_moe.experts.52.w1", "model.layers.30.block_sparse_moe.experts.53.w1", "model.layers.30.block_sparse_moe.experts.54.w1", "model.layers.30.block_sparse_moe.experts.55.w1", "model.layers.30.block_sparse_moe.experts.56.w1", "model.layers.30.block_sparse_moe.experts.57.w1", "model.layers.30.block_sparse_moe.experts.58.w1", "model.layers.30.block_sparse_moe.experts.59.w1", "model.layers.30.block_sparse_moe.experts.60.w1", "model.layers.30.block_sparse_moe.experts.61.w1", "model.layers.30.block_sparse_moe.experts.62.w1", "model.layers.30.block_sparse_moe.experts.63.w1", "model.layers.30.block_sparse_moe.experts.64.w1", "model.layers.30.block_sparse_moe.experts.65.w1", "model.layers.30.block_sparse_moe.experts.66.w1", "model.layers.30.block_sparse_moe.experts.67.w1", "model.layers.30.block_sparse_moe.experts.68.w1", "model.layers.30.block_sparse_moe.experts.69.w1", "model.layers.30.block_sparse_moe.experts.70.w1", "model.layers.30.block_sparse_moe.experts.71.w1", "model.layers.30.block_sparse_moe.experts.72.w1", "model.layers.30.block_sparse_moe.experts.73.w1", "model.layers.30.block_sparse_moe.experts.74.w1", "model.layers.30.block_sparse_moe.experts.75.w1", "model.layers.30.block_sparse_moe.experts.76.w1", "model.layers.30.block_sparse_moe.experts.77.w1", "model.layers.30.block_sparse_moe.experts.78.w1", "model.layers.30.block_sparse_moe.experts.79.w1", "model.layers.30.block_sparse_moe.experts.80.w1", "model.layers.30.block_sparse_moe.experts.81.w1", "model.layers.30.block_sparse_moe.experts.82.w1", "model.layers.30.block_sparse_moe.experts.83.w1", "model.layers.30.block_sparse_moe.experts.84.w1", "model.layers.30.block_sparse_moe.experts.85.w1", "model.layers.30.block_sparse_moe.experts.86.w1", "model.layers.30.block_sparse_moe.experts.87.w1", "model.layers.30.block_sparse_moe.experts.88.w1", "model.layers.30.block_sparse_moe.experts.89.w1", "model.layers.30.block_sparse_moe.experts.90.w1", "model.layers.30.block_sparse_moe.experts.91.w1", "model.layers.30.block_sparse_moe.experts.92.w1", "model.layers.30.block_sparse_moe.experts.93.w1", "model.layers.30.block_sparse_moe.experts.94.w1", "model.layers.30.block_sparse_moe.experts.95.w1", "model.layers.30.block_sparse_moe.experts.96.w1", "model.layers.30.block_sparse_moe.experts.97.w1", "model.layers.30.block_sparse_moe.experts.98.w1", "model.layers.30.block_sparse_moe.experts.99.w1", "model.layers.30.block_sparse_moe.experts.100.w1", "model.layers.30.block_sparse_moe.experts.101.w1", "model.layers.30.block_sparse_moe.experts.102.w1", "model.layers.30.block_sparse_moe.experts.103.w1", "model.layers.30.block_sparse_moe.experts.104.w1", "model.layers.30.block_sparse_moe.experts.105.w1", "model.layers.30.block_sparse_moe.experts.106.w1", "model.layers.30.block_sparse_moe.experts.107.w1", "model.layers.30.block_sparse_moe.experts.108.w1", "model.layers.30.block_sparse_moe.experts.109.w1", "model.layers.30.block_sparse_moe.experts.110.w1", "model.layers.30.block_sparse_moe.experts.111.w1", "model.layers.30.block_sparse_moe.experts.112.w1", "model.layers.30.block_sparse_moe.experts.113.w1", "model.layers.30.block_sparse_moe.experts.114.w1", "model.layers.30.block_sparse_moe.experts.115.w1", "model.layers.30.block_sparse_moe.experts.116.w1", "model.layers.30.block_sparse_moe.experts.117.w1", "model.layers.30.block_sparse_moe.experts.118.w1", "model.layers.30.block_sparse_moe.experts.119.w1", "model.layers.30.block_sparse_moe.experts.120.w1", "model.layers.30.block_sparse_moe.experts.121.w1", "model.layers.30.block_sparse_moe.experts.122.w1", "model.layers.30.block_sparse_moe.experts.123.w1", "model.layers.30.block_sparse_moe.experts.124.w1", "model.layers.30.block_sparse_moe.experts.125.w1", "model.layers.30.block_sparse_moe.experts.126.w1", "model.layers.30.block_sparse_moe.experts.127.w1", "model.layers.30.block_sparse_moe.experts.128.w1", "model.layers.30.block_sparse_moe.experts.129.w1", "model.layers.30.block_sparse_moe.experts.130.w1", "model.layers.30.block_sparse_moe.experts.131.w1", "model.layers.30.block_sparse_moe.experts.132.w1", "model.layers.30.block_sparse_moe.experts.133.w1", "model.layers.30.block_sparse_moe.experts.134.w1", "model.layers.30.block_sparse_moe.experts.135.w1", "model.layers.30.block_sparse_moe.experts.136.w1", "model.layers.30.block_sparse_moe.experts.137.w1", "model.layers.30.block_sparse_moe.experts.138.w1", "model.layers.30.block_sparse_moe.experts.139.w1", "model.layers.30.block_sparse_moe.experts.140.w1", "model.layers.30.block_sparse_moe.experts.141.w1", "model.layers.30.block_sparse_moe.experts.142.w1", "model.layers.30.block_sparse_moe.experts.143.w1", "model.layers.30.block_sparse_moe.experts.144.w1", "model.layers.30.block_sparse_moe.experts.145.w1", "model.layers.30.block_sparse_moe.experts.146.w1", "model.layers.30.block_sparse_moe.experts.147.w1", "model.layers.30.block_sparse_moe.experts.148.w1", "model.layers.30.block_sparse_moe.experts.149.w1", "model.layers.30.block_sparse_moe.experts.150.w1", "model.layers.30.block_sparse_moe.experts.151.w1", "model.layers.30.block_sparse_moe.experts.152.w1", "model.layers.30.block_sparse_moe.experts.153.w1", "model.layers.30.block_sparse_moe.experts.154.w1", "model.layers.30.block_sparse_moe.experts.155.w1", "model.layers.30.block_sparse_moe.experts.156.w1", "model.layers.30.block_sparse_moe.experts.157.w1", "model.layers.30.block_sparse_moe.experts.158.w1", "model.layers.30.block_sparse_moe.experts.159.w1", "model.layers.30.block_sparse_moe.experts.160.w1", "model.layers.30.block_sparse_moe.experts.161.w1", "model.layers.30.block_sparse_moe.experts.162.w1", "model.layers.30.block_sparse_moe.experts.163.w1", "model.layers.30.block_sparse_moe.experts.164.w1", "model.layers.30.block_sparse_moe.experts.165.w1", "model.layers.30.block_sparse_moe.experts.166.w1", "model.layers.30.block_sparse_moe.experts.167.w1", "model.layers.30.block_sparse_moe.experts.168.w1", "model.layers.30.block_sparse_moe.experts.169.w1", "model.layers.30.block_sparse_moe.experts.170.w1", "model.layers.30.block_sparse_moe.experts.171.w1", "model.layers.30.block_sparse_moe.experts.172.w1", "model.layers.30.block_sparse_moe.experts.173.w1", "model.layers.30.block_sparse_moe.experts.174.w1", "model.layers.30.block_sparse_moe.experts.175.w1", "model.layers.30.block_sparse_moe.experts.176.w1", "model.layers.30.block_sparse_moe.experts.177.w1", "model.layers.30.block_sparse_moe.experts.178.w1", "model.layers.30.block_sparse_moe.experts.179.w1", "model.layers.30.block_sparse_moe.experts.180.w1", "model.layers.30.block_sparse_moe.experts.181.w1", "model.layers.30.block_sparse_moe.experts.182.w1", "model.layers.30.block_sparse_moe.experts.183.w1", "model.layers.30.block_sparse_moe.experts.184.w1", "model.layers.30.block_sparse_moe.experts.185.w1", "model.layers.30.block_sparse_moe.experts.186.w1", "model.layers.30.block_sparse_moe.experts.187.w1", "model.layers.30.block_sparse_moe.experts.188.w1", "model.layers.30.block_sparse_moe.experts.189.w1", "model.layers.30.block_sparse_moe.experts.190.w1", "model.layers.30.block_sparse_moe.experts.191.w1", "model.layers.30.block_sparse_moe.experts.192.w1", "model.layers.30.block_sparse_moe.experts.193.w1", "model.layers.30.block_sparse_moe.experts.194.w1", "model.layers.30.block_sparse_moe.experts.195.w1", "model.layers.30.block_sparse_moe.experts.196.w1", "model.layers.30.block_sparse_moe.experts.197.w1", "model.layers.30.block_sparse_moe.experts.198.w1", "model.layers.30.block_sparse_moe.experts.199.w1", "model.layers.30.block_sparse_moe.experts.200.w1", "model.layers.30.block_sparse_moe.experts.201.w1", "model.layers.30.block_sparse_moe.experts.202.w1", "model.layers.30.block_sparse_moe.experts.203.w1", "model.layers.30.block_sparse_moe.experts.204.w1", "model.layers.30.block_sparse_moe.experts.205.w1", "model.layers.30.block_sparse_moe.experts.206.w1", "model.layers.30.block_sparse_moe.experts.207.w1", "model.layers.30.block_sparse_moe.experts.208.w1", "model.layers.30.block_sparse_moe.experts.209.w1", "model.layers.30.block_sparse_moe.experts.210.w1", "model.layers.30.block_sparse_moe.experts.211.w1", "model.layers.30.block_sparse_moe.experts.212.w1", "model.layers.30.block_sparse_moe.experts.213.w1", "model.layers.30.block_sparse_moe.experts.214.w1", "model.layers.30.block_sparse_moe.experts.215.w1", "model.layers.30.block_sparse_moe.experts.216.w1", "model.layers.30.block_sparse_moe.experts.217.w1", "model.layers.30.block_sparse_moe.experts.218.w1", "model.layers.30.block_sparse_moe.experts.219.w1", "model.layers.30.block_sparse_moe.experts.220.w1", "model.layers.30.block_sparse_moe.experts.221.w1", "model.layers.30.block_sparse_moe.experts.222.w1", "model.layers.30.block_sparse_moe.experts.223.w1", "model.layers.30.block_sparse_moe.experts.224.w1", "model.layers.30.block_sparse_moe.experts.225.w1", "model.layers.30.block_sparse_moe.experts.226.w1", "model.layers.30.block_sparse_moe.experts.227.w1", "model.layers.30.block_sparse_moe.experts.228.w1", "model.layers.30.block_sparse_moe.experts.229.w1", "model.layers.30.block_sparse_moe.experts.230.w1", "model.layers.30.block_sparse_moe.experts.231.w1", "model.layers.30.block_sparse_moe.experts.232.w1", "model.layers.30.block_sparse_moe.experts.233.w1", "model.layers.30.block_sparse_moe.experts.234.w1", "model.layers.30.block_sparse_moe.experts.235.w1", "model.layers.30.block_sparse_moe.experts.236.w1", "model.layers.30.block_sparse_moe.experts.237.w1", "model.layers.30.block_sparse_moe.experts.238.w1", "model.layers.30.block_sparse_moe.experts.239.w1", "model.layers.30.block_sparse_moe.experts.240.w1", "model.layers.30.block_sparse_moe.experts.241.w1", "model.layers.30.block_sparse_moe.experts.242.w1", "model.layers.30.block_sparse_moe.experts.243.w1", "model.layers.30.block_sparse_moe.experts.244.w1", "model.layers.30.block_sparse_moe.experts.245.w1", "model.layers.30.block_sparse_moe.experts.246.w1", "model.layers.30.block_sparse_moe.experts.247.w1", "model.layers.30.block_sparse_moe.experts.248.w1", "model.layers.30.block_sparse_moe.experts.249.w1", "model.layers.30.block_sparse_moe.experts.250.w1", "model.layers.30.block_sparse_moe.experts.251.w1", "model.layers.30.block_sparse_moe.experts.252.w1", "model.layers.30.block_sparse_moe.experts.253.w1", "model.layers.30.block_sparse_moe.experts.254.w1", "model.layers.30.block_sparse_moe.experts.255.w1", "model.layers.30.block_sparse_moe.experts.0.w3", "model.layers.30.block_sparse_moe.experts.1.w3", "model.layers.30.block_sparse_moe.experts.2.w3", "model.layers.30.block_sparse_moe.experts.3.w3", "model.layers.30.block_sparse_moe.experts.4.w3", "model.layers.30.block_sparse_moe.experts.5.w3", "model.layers.30.block_sparse_moe.experts.6.w3", "model.layers.30.block_sparse_moe.experts.7.w3", "model.layers.30.block_sparse_moe.experts.8.w3", "model.layers.30.block_sparse_moe.experts.9.w3", "model.layers.30.block_sparse_moe.experts.10.w3", "model.layers.30.block_sparse_moe.experts.11.w3", "model.layers.30.block_sparse_moe.experts.12.w3", "model.layers.30.block_sparse_moe.experts.13.w3", "model.layers.30.block_sparse_moe.experts.14.w3", "model.layers.30.block_sparse_moe.experts.15.w3", "model.layers.30.block_sparse_moe.experts.16.w3", "model.layers.30.block_sparse_moe.experts.17.w3", "model.layers.30.block_sparse_moe.experts.18.w3", "model.layers.30.block_sparse_moe.experts.19.w3", "model.layers.30.block_sparse_moe.experts.20.w3", "model.layers.30.block_sparse_moe.experts.21.w3", "model.layers.30.block_sparse_moe.experts.22.w3", "model.layers.30.block_sparse_moe.experts.23.w3", "model.layers.30.block_sparse_moe.experts.24.w3", "model.layers.30.block_sparse_moe.experts.25.w3", "model.layers.30.block_sparse_moe.experts.26.w3", "model.layers.30.block_sparse_moe.experts.27.w3", "model.layers.30.block_sparse_moe.experts.28.w3", "model.layers.30.block_sparse_moe.experts.29.w3", "model.layers.30.block_sparse_moe.experts.30.w3", "model.layers.30.block_sparse_moe.experts.31.w3", "model.layers.30.block_sparse_moe.experts.32.w3", "model.layers.30.block_sparse_moe.experts.33.w3", "model.layers.30.block_sparse_moe.experts.34.w3", "model.layers.30.block_sparse_moe.experts.35.w3", "model.layers.30.block_sparse_moe.experts.36.w3", "model.layers.30.block_sparse_moe.experts.37.w3", "model.layers.30.block_sparse_moe.experts.38.w3", "model.layers.30.block_sparse_moe.experts.39.w3", "model.layers.30.block_sparse_moe.experts.40.w3", "model.layers.30.block_sparse_moe.experts.41.w3", "model.layers.30.block_sparse_moe.experts.42.w3", "model.layers.30.block_sparse_moe.experts.43.w3", "model.layers.30.block_sparse_moe.experts.44.w3", "model.layers.30.block_sparse_moe.experts.45.w3", "model.layers.30.block_sparse_moe.experts.46.w3", "model.layers.30.block_sparse_moe.experts.47.w3", "model.layers.30.block_sparse_moe.experts.48.w3", "model.layers.30.block_sparse_moe.experts.49.w3", "model.layers.30.block_sparse_moe.experts.50.w3", "model.layers.30.block_sparse_moe.experts.51.w3", "model.layers.30.block_sparse_moe.experts.52.w3", "model.layers.30.block_sparse_moe.experts.53.w3", "model.layers.30.block_sparse_moe.experts.54.w3", "model.layers.30.block_sparse_moe.experts.55.w3", "model.layers.30.block_sparse_moe.experts.56.w3", "model.layers.30.block_sparse_moe.experts.57.w3", "model.layers.30.block_sparse_moe.experts.58.w3", "model.layers.30.block_sparse_moe.experts.59.w3", "model.layers.30.block_sparse_moe.experts.60.w3", "model.layers.30.block_sparse_moe.experts.61.w3", "model.layers.30.block_sparse_moe.experts.62.w3", "model.layers.30.block_sparse_moe.experts.63.w3", "model.layers.30.block_sparse_moe.experts.64.w3", "model.layers.30.block_sparse_moe.experts.65.w3", "model.layers.30.block_sparse_moe.experts.66.w3", "model.layers.30.block_sparse_moe.experts.67.w3", "model.layers.30.block_sparse_moe.experts.68.w3", "model.layers.30.block_sparse_moe.experts.69.w3", "model.layers.30.block_sparse_moe.experts.70.w3", "model.layers.30.block_sparse_moe.experts.71.w3", "model.layers.30.block_sparse_moe.experts.72.w3", "model.layers.30.block_sparse_moe.experts.73.w3", "model.layers.30.block_sparse_moe.experts.74.w3", "model.layers.30.block_sparse_moe.experts.75.w3", "model.layers.30.block_sparse_moe.experts.76.w3", "model.layers.30.block_sparse_moe.experts.77.w3", "model.layers.30.block_sparse_moe.experts.78.w3", "model.layers.30.block_sparse_moe.experts.79.w3", "model.layers.30.block_sparse_moe.experts.80.w3", "model.layers.30.block_sparse_moe.experts.81.w3", "model.layers.30.block_sparse_moe.experts.82.w3", "model.layers.30.block_sparse_moe.experts.83.w3", "model.layers.30.block_sparse_moe.experts.84.w3", "model.layers.30.block_sparse_moe.experts.85.w3", "model.layers.30.block_sparse_moe.experts.86.w3", "model.layers.30.block_sparse_moe.experts.87.w3", "model.layers.30.block_sparse_moe.experts.88.w3", "model.layers.30.block_sparse_moe.experts.89.w3", "model.layers.30.block_sparse_moe.experts.90.w3", "model.layers.30.block_sparse_moe.experts.91.w3", "model.layers.30.block_sparse_moe.experts.92.w3", "model.layers.30.block_sparse_moe.experts.93.w3", "model.layers.30.block_sparse_moe.experts.94.w3", "model.layers.30.block_sparse_moe.experts.95.w3", "model.layers.30.block_sparse_moe.experts.96.w3", "model.layers.30.block_sparse_moe.experts.97.w3", "model.layers.30.block_sparse_moe.experts.98.w3", "model.layers.30.block_sparse_moe.experts.99.w3", "model.layers.30.block_sparse_moe.experts.100.w3", "model.layers.30.block_sparse_moe.experts.101.w3", "model.layers.30.block_sparse_moe.experts.102.w3", "model.layers.30.block_sparse_moe.experts.103.w3", "model.layers.30.block_sparse_moe.experts.104.w3", "model.layers.30.block_sparse_moe.experts.105.w3", "model.layers.30.block_sparse_moe.experts.106.w3", "model.layers.30.block_sparse_moe.experts.107.w3", "model.layers.30.block_sparse_moe.experts.108.w3", "model.layers.30.block_sparse_moe.experts.109.w3", "model.layers.30.block_sparse_moe.experts.110.w3", "model.layers.30.block_sparse_moe.experts.111.w3", "model.layers.30.block_sparse_moe.experts.112.w3", "model.layers.30.block_sparse_moe.experts.113.w3", "model.layers.30.block_sparse_moe.experts.114.w3", "model.layers.30.block_sparse_moe.experts.115.w3", "model.layers.30.block_sparse_moe.experts.116.w3", "model.layers.30.block_sparse_moe.experts.117.w3", "model.layers.30.block_sparse_moe.experts.118.w3", "model.layers.30.block_sparse_moe.experts.119.w3", "model.layers.30.block_sparse_moe.experts.120.w3", "model.layers.30.block_sparse_moe.experts.121.w3", "model.layers.30.block_sparse_moe.experts.122.w3", "model.layers.30.block_sparse_moe.experts.123.w3", "model.layers.30.block_sparse_moe.experts.124.w3", "model.layers.30.block_sparse_moe.experts.125.w3", "model.layers.30.block_sparse_moe.experts.126.w3", "model.layers.30.block_sparse_moe.experts.127.w3", "model.layers.30.block_sparse_moe.experts.128.w3", "model.layers.30.block_sparse_moe.experts.129.w3", "model.layers.30.block_sparse_moe.experts.130.w3", "model.layers.30.block_sparse_moe.experts.131.w3", "model.layers.30.block_sparse_moe.experts.132.w3", "model.layers.30.block_sparse_moe.experts.133.w3", "model.layers.30.block_sparse_moe.experts.134.w3", "model.layers.30.block_sparse_moe.experts.135.w3", "model.layers.30.block_sparse_moe.experts.136.w3", "model.layers.30.block_sparse_moe.experts.137.w3", "model.layers.30.block_sparse_moe.experts.138.w3", "model.layers.30.block_sparse_moe.experts.139.w3", "model.layers.30.block_sparse_moe.experts.140.w3", "model.layers.30.block_sparse_moe.experts.141.w3", "model.layers.30.block_sparse_moe.experts.142.w3", "model.layers.30.block_sparse_moe.experts.143.w3", "model.layers.30.block_sparse_moe.experts.144.w3", "model.layers.30.block_sparse_moe.experts.145.w3", "model.layers.30.block_sparse_moe.experts.146.w3", "model.layers.30.block_sparse_moe.experts.147.w3", "model.layers.30.block_sparse_moe.experts.148.w3", "model.layers.30.block_sparse_moe.experts.149.w3", "model.layers.30.block_sparse_moe.experts.150.w3", "model.layers.30.block_sparse_moe.experts.151.w3", "model.layers.30.block_sparse_moe.experts.152.w3", "model.layers.30.block_sparse_moe.experts.153.w3", "model.layers.30.block_sparse_moe.experts.154.w3", "model.layers.30.block_sparse_moe.experts.155.w3", "model.layers.30.block_sparse_moe.experts.156.w3", "model.layers.30.block_sparse_moe.experts.157.w3", "model.layers.30.block_sparse_moe.experts.158.w3", "model.layers.30.block_sparse_moe.experts.159.w3", "model.layers.30.block_sparse_moe.experts.160.w3", "model.layers.30.block_sparse_moe.experts.161.w3", "model.layers.30.block_sparse_moe.experts.162.w3", "model.layers.30.block_sparse_moe.experts.163.w3", "model.layers.30.block_sparse_moe.experts.164.w3", "model.layers.30.block_sparse_moe.experts.165.w3", "model.layers.30.block_sparse_moe.experts.166.w3", "model.layers.30.block_sparse_moe.experts.167.w3", "model.layers.30.block_sparse_moe.experts.168.w3", "model.layers.30.block_sparse_moe.experts.169.w3", "model.layers.30.block_sparse_moe.experts.170.w3", "model.layers.30.block_sparse_moe.experts.171.w3", "model.layers.30.block_sparse_moe.experts.172.w3", "model.layers.30.block_sparse_moe.experts.173.w3", "model.layers.30.block_sparse_moe.experts.174.w3", "model.layers.30.block_sparse_moe.experts.175.w3", "model.layers.30.block_sparse_moe.experts.176.w3", "model.layers.30.block_sparse_moe.experts.177.w3", "model.layers.30.block_sparse_moe.experts.178.w3", "model.layers.30.block_sparse_moe.experts.179.w3", "model.layers.30.block_sparse_moe.experts.180.w3", "model.layers.30.block_sparse_moe.experts.181.w3", "model.layers.30.block_sparse_moe.experts.182.w3", "model.layers.30.block_sparse_moe.experts.183.w3", "model.layers.30.block_sparse_moe.experts.184.w3", "model.layers.30.block_sparse_moe.experts.185.w3", "model.layers.30.block_sparse_moe.experts.186.w3", "model.layers.30.block_sparse_moe.experts.187.w3", "model.layers.30.block_sparse_moe.experts.188.w3", "model.layers.30.block_sparse_moe.experts.189.w3", "model.layers.30.block_sparse_moe.experts.190.w3", "model.layers.30.block_sparse_moe.experts.191.w3", "model.layers.30.block_sparse_moe.experts.192.w3", "model.layers.30.block_sparse_moe.experts.193.w3", "model.layers.30.block_sparse_moe.experts.194.w3", "model.layers.30.block_sparse_moe.experts.195.w3", "model.layers.30.block_sparse_moe.experts.196.w3", "model.layers.30.block_sparse_moe.experts.197.w3", "model.layers.30.block_sparse_moe.experts.198.w3", "model.layers.30.block_sparse_moe.experts.199.w3", "model.layers.30.block_sparse_moe.experts.200.w3", "model.layers.30.block_sparse_moe.experts.201.w3", "model.layers.30.block_sparse_moe.experts.202.w3", "model.layers.30.block_sparse_moe.experts.203.w3", "model.layers.30.block_sparse_moe.experts.204.w3", "model.layers.30.block_sparse_moe.experts.205.w3", "model.layers.30.block_sparse_moe.experts.206.w3", "model.layers.30.block_sparse_moe.experts.207.w3", "model.layers.30.block_sparse_moe.experts.208.w3", "model.layers.30.block_sparse_moe.experts.209.w3", "model.layers.30.block_sparse_moe.experts.210.w3", "model.layers.30.block_sparse_moe.experts.211.w3", "model.layers.30.block_sparse_moe.experts.212.w3", "model.layers.30.block_sparse_moe.experts.213.w3", "model.layers.30.block_sparse_moe.experts.214.w3", "model.layers.30.block_sparse_moe.experts.215.w3", "model.layers.30.block_sparse_moe.experts.216.w3", "model.layers.30.block_sparse_moe.experts.217.w3", "model.layers.30.block_sparse_moe.experts.218.w3", "model.layers.30.block_sparse_moe.experts.219.w3", "model.layers.30.block_sparse_moe.experts.220.w3", "model.layers.30.block_sparse_moe.experts.221.w3", "model.layers.30.block_sparse_moe.experts.222.w3", "model.layers.30.block_sparse_moe.experts.223.w3", "model.layers.30.block_sparse_moe.experts.224.w3", "model.layers.30.block_sparse_moe.experts.225.w3", "model.layers.30.block_sparse_moe.experts.226.w3", "model.layers.30.block_sparse_moe.experts.227.w3", "model.layers.30.block_sparse_moe.experts.228.w3", "model.layers.30.block_sparse_moe.experts.229.w3", "model.layers.30.block_sparse_moe.experts.230.w3", "model.layers.30.block_sparse_moe.experts.231.w3", "model.layers.30.block_sparse_moe.experts.232.w3", "model.layers.30.block_sparse_moe.experts.233.w3", "model.layers.30.block_sparse_moe.experts.234.w3", "model.layers.30.block_sparse_moe.experts.235.w3", "model.layers.30.block_sparse_moe.experts.236.w3", "model.layers.30.block_sparse_moe.experts.237.w3", "model.layers.30.block_sparse_moe.experts.238.w3", "model.layers.30.block_sparse_moe.experts.239.w3", "model.layers.30.block_sparse_moe.experts.240.w3", "model.layers.30.block_sparse_moe.experts.241.w3", "model.layers.30.block_sparse_moe.experts.242.w3", "model.layers.30.block_sparse_moe.experts.243.w3", "model.layers.30.block_sparse_moe.experts.244.w3", "model.layers.30.block_sparse_moe.experts.245.w3", "model.layers.30.block_sparse_moe.experts.246.w3", "model.layers.30.block_sparse_moe.experts.247.w3", "model.layers.30.block_sparse_moe.experts.248.w3", "model.layers.30.block_sparse_moe.experts.249.w3", "model.layers.30.block_sparse_moe.experts.250.w3", "model.layers.30.block_sparse_moe.experts.251.w3", "model.layers.30.block_sparse_moe.experts.252.w3", "model.layers.30.block_sparse_moe.experts.253.w3", "model.layers.30.block_sparse_moe.experts.254.w3", "model.layers.30.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002587348222732544, "dbits": 2415919104 } ] }, { "idx": 154, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w2", "model.layers.30.block_sparse_moe.experts.1.w2", "model.layers.30.block_sparse_moe.experts.2.w2", "model.layers.30.block_sparse_moe.experts.3.w2", "model.layers.30.block_sparse_moe.experts.4.w2", "model.layers.30.block_sparse_moe.experts.5.w2", "model.layers.30.block_sparse_moe.experts.6.w2", "model.layers.30.block_sparse_moe.experts.7.w2", "model.layers.30.block_sparse_moe.experts.8.w2", "model.layers.30.block_sparse_moe.experts.9.w2", "model.layers.30.block_sparse_moe.experts.10.w2", "model.layers.30.block_sparse_moe.experts.11.w2", "model.layers.30.block_sparse_moe.experts.12.w2", "model.layers.30.block_sparse_moe.experts.13.w2", "model.layers.30.block_sparse_moe.experts.14.w2", "model.layers.30.block_sparse_moe.experts.15.w2", "model.layers.30.block_sparse_moe.experts.16.w2", "model.layers.30.block_sparse_moe.experts.17.w2", "model.layers.30.block_sparse_moe.experts.18.w2", "model.layers.30.block_sparse_moe.experts.19.w2", "model.layers.30.block_sparse_moe.experts.20.w2", "model.layers.30.block_sparse_moe.experts.21.w2", "model.layers.30.block_sparse_moe.experts.22.w2", "model.layers.30.block_sparse_moe.experts.23.w2", "model.layers.30.block_sparse_moe.experts.24.w2", "model.layers.30.block_sparse_moe.experts.25.w2", "model.layers.30.block_sparse_moe.experts.26.w2", "model.layers.30.block_sparse_moe.experts.27.w2", "model.layers.30.block_sparse_moe.experts.28.w2", "model.layers.30.block_sparse_moe.experts.29.w2", "model.layers.30.block_sparse_moe.experts.30.w2", "model.layers.30.block_sparse_moe.experts.31.w2", "model.layers.30.block_sparse_moe.experts.32.w2", "model.layers.30.block_sparse_moe.experts.33.w2", "model.layers.30.block_sparse_moe.experts.34.w2", "model.layers.30.block_sparse_moe.experts.35.w2", "model.layers.30.block_sparse_moe.experts.36.w2", "model.layers.30.block_sparse_moe.experts.37.w2", "model.layers.30.block_sparse_moe.experts.38.w2", "model.layers.30.block_sparse_moe.experts.39.w2", "model.layers.30.block_sparse_moe.experts.40.w2", "model.layers.30.block_sparse_moe.experts.41.w2", "model.layers.30.block_sparse_moe.experts.42.w2", "model.layers.30.block_sparse_moe.experts.43.w2", "model.layers.30.block_sparse_moe.experts.44.w2", "model.layers.30.block_sparse_moe.experts.45.w2", "model.layers.30.block_sparse_moe.experts.46.w2", "model.layers.30.block_sparse_moe.experts.47.w2", "model.layers.30.block_sparse_moe.experts.48.w2", "model.layers.30.block_sparse_moe.experts.49.w2", "model.layers.30.block_sparse_moe.experts.50.w2", "model.layers.30.block_sparse_moe.experts.51.w2", "model.layers.30.block_sparse_moe.experts.52.w2", "model.layers.30.block_sparse_moe.experts.53.w2", "model.layers.30.block_sparse_moe.experts.54.w2", "model.layers.30.block_sparse_moe.experts.55.w2", "model.layers.30.block_sparse_moe.experts.56.w2", "model.layers.30.block_sparse_moe.experts.57.w2", "model.layers.30.block_sparse_moe.experts.58.w2", "model.layers.30.block_sparse_moe.experts.59.w2", "model.layers.30.block_sparse_moe.experts.60.w2", "model.layers.30.block_sparse_moe.experts.61.w2", "model.layers.30.block_sparse_moe.experts.62.w2", "model.layers.30.block_sparse_moe.experts.63.w2", "model.layers.30.block_sparse_moe.experts.64.w2", "model.layers.30.block_sparse_moe.experts.65.w2", "model.layers.30.block_sparse_moe.experts.66.w2", "model.layers.30.block_sparse_moe.experts.67.w2", "model.layers.30.block_sparse_moe.experts.68.w2", "model.layers.30.block_sparse_moe.experts.69.w2", "model.layers.30.block_sparse_moe.experts.70.w2", "model.layers.30.block_sparse_moe.experts.71.w2", "model.layers.30.block_sparse_moe.experts.72.w2", "model.layers.30.block_sparse_moe.experts.73.w2", "model.layers.30.block_sparse_moe.experts.74.w2", "model.layers.30.block_sparse_moe.experts.75.w2", "model.layers.30.block_sparse_moe.experts.76.w2", "model.layers.30.block_sparse_moe.experts.77.w2", "model.layers.30.block_sparse_moe.experts.78.w2", "model.layers.30.block_sparse_moe.experts.79.w2", "model.layers.30.block_sparse_moe.experts.80.w2", "model.layers.30.block_sparse_moe.experts.81.w2", "model.layers.30.block_sparse_moe.experts.82.w2", "model.layers.30.block_sparse_moe.experts.83.w2", "model.layers.30.block_sparse_moe.experts.84.w2", "model.layers.30.block_sparse_moe.experts.85.w2", "model.layers.30.block_sparse_moe.experts.86.w2", "model.layers.30.block_sparse_moe.experts.87.w2", "model.layers.30.block_sparse_moe.experts.88.w2", "model.layers.30.block_sparse_moe.experts.89.w2", "model.layers.30.block_sparse_moe.experts.90.w2", "model.layers.30.block_sparse_moe.experts.91.w2", "model.layers.30.block_sparse_moe.experts.92.w2", "model.layers.30.block_sparse_moe.experts.93.w2", "model.layers.30.block_sparse_moe.experts.94.w2", "model.layers.30.block_sparse_moe.experts.95.w2", "model.layers.30.block_sparse_moe.experts.96.w2", "model.layers.30.block_sparse_moe.experts.97.w2", "model.layers.30.block_sparse_moe.experts.98.w2", "model.layers.30.block_sparse_moe.experts.99.w2", "model.layers.30.block_sparse_moe.experts.100.w2", "model.layers.30.block_sparse_moe.experts.101.w2", "model.layers.30.block_sparse_moe.experts.102.w2", "model.layers.30.block_sparse_moe.experts.103.w2", "model.layers.30.block_sparse_moe.experts.104.w2", "model.layers.30.block_sparse_moe.experts.105.w2", "model.layers.30.block_sparse_moe.experts.106.w2", "model.layers.30.block_sparse_moe.experts.107.w2", "model.layers.30.block_sparse_moe.experts.108.w2", "model.layers.30.block_sparse_moe.experts.109.w2", "model.layers.30.block_sparse_moe.experts.110.w2", "model.layers.30.block_sparse_moe.experts.111.w2", "model.layers.30.block_sparse_moe.experts.112.w2", "model.layers.30.block_sparse_moe.experts.113.w2", "model.layers.30.block_sparse_moe.experts.114.w2", "model.layers.30.block_sparse_moe.experts.115.w2", "model.layers.30.block_sparse_moe.experts.116.w2", "model.layers.30.block_sparse_moe.experts.117.w2", "model.layers.30.block_sparse_moe.experts.118.w2", "model.layers.30.block_sparse_moe.experts.119.w2", "model.layers.30.block_sparse_moe.experts.120.w2", "model.layers.30.block_sparse_moe.experts.121.w2", "model.layers.30.block_sparse_moe.experts.122.w2", "model.layers.30.block_sparse_moe.experts.123.w2", "model.layers.30.block_sparse_moe.experts.124.w2", "model.layers.30.block_sparse_moe.experts.125.w2", "model.layers.30.block_sparse_moe.experts.126.w2", "model.layers.30.block_sparse_moe.experts.127.w2", "model.layers.30.block_sparse_moe.experts.128.w2", "model.layers.30.block_sparse_moe.experts.129.w2", "model.layers.30.block_sparse_moe.experts.130.w2", "model.layers.30.block_sparse_moe.experts.131.w2", "model.layers.30.block_sparse_moe.experts.132.w2", "model.layers.30.block_sparse_moe.experts.133.w2", "model.layers.30.block_sparse_moe.experts.134.w2", "model.layers.30.block_sparse_moe.experts.135.w2", "model.layers.30.block_sparse_moe.experts.136.w2", "model.layers.30.block_sparse_moe.experts.137.w2", "model.layers.30.block_sparse_moe.experts.138.w2", "model.layers.30.block_sparse_moe.experts.139.w2", "model.layers.30.block_sparse_moe.experts.140.w2", "model.layers.30.block_sparse_moe.experts.141.w2", "model.layers.30.block_sparse_moe.experts.142.w2", "model.layers.30.block_sparse_moe.experts.143.w2", "model.layers.30.block_sparse_moe.experts.144.w2", "model.layers.30.block_sparse_moe.experts.145.w2", "model.layers.30.block_sparse_moe.experts.146.w2", "model.layers.30.block_sparse_moe.experts.147.w2", "model.layers.30.block_sparse_moe.experts.148.w2", "model.layers.30.block_sparse_moe.experts.149.w2", "model.layers.30.block_sparse_moe.experts.150.w2", "model.layers.30.block_sparse_moe.experts.151.w2", "model.layers.30.block_sparse_moe.experts.152.w2", "model.layers.30.block_sparse_moe.experts.153.w2", "model.layers.30.block_sparse_moe.experts.154.w2", "model.layers.30.block_sparse_moe.experts.155.w2", "model.layers.30.block_sparse_moe.experts.156.w2", "model.layers.30.block_sparse_moe.experts.157.w2", "model.layers.30.block_sparse_moe.experts.158.w2", "model.layers.30.block_sparse_moe.experts.159.w2", "model.layers.30.block_sparse_moe.experts.160.w2", "model.layers.30.block_sparse_moe.experts.161.w2", "model.layers.30.block_sparse_moe.experts.162.w2", "model.layers.30.block_sparse_moe.experts.163.w2", "model.layers.30.block_sparse_moe.experts.164.w2", "model.layers.30.block_sparse_moe.experts.165.w2", "model.layers.30.block_sparse_moe.experts.166.w2", "model.layers.30.block_sparse_moe.experts.167.w2", "model.layers.30.block_sparse_moe.experts.168.w2", "model.layers.30.block_sparse_moe.experts.169.w2", "model.layers.30.block_sparse_moe.experts.170.w2", "model.layers.30.block_sparse_moe.experts.171.w2", "model.layers.30.block_sparse_moe.experts.172.w2", "model.layers.30.block_sparse_moe.experts.173.w2", "model.layers.30.block_sparse_moe.experts.174.w2", "model.layers.30.block_sparse_moe.experts.175.w2", "model.layers.30.block_sparse_moe.experts.176.w2", "model.layers.30.block_sparse_moe.experts.177.w2", "model.layers.30.block_sparse_moe.experts.178.w2", "model.layers.30.block_sparse_moe.experts.179.w2", "model.layers.30.block_sparse_moe.experts.180.w2", "model.layers.30.block_sparse_moe.experts.181.w2", "model.layers.30.block_sparse_moe.experts.182.w2", "model.layers.30.block_sparse_moe.experts.183.w2", "model.layers.30.block_sparse_moe.experts.184.w2", "model.layers.30.block_sparse_moe.experts.185.w2", "model.layers.30.block_sparse_moe.experts.186.w2", "model.layers.30.block_sparse_moe.experts.187.w2", "model.layers.30.block_sparse_moe.experts.188.w2", "model.layers.30.block_sparse_moe.experts.189.w2", "model.layers.30.block_sparse_moe.experts.190.w2", "model.layers.30.block_sparse_moe.experts.191.w2", "model.layers.30.block_sparse_moe.experts.192.w2", "model.layers.30.block_sparse_moe.experts.193.w2", "model.layers.30.block_sparse_moe.experts.194.w2", "model.layers.30.block_sparse_moe.experts.195.w2", "model.layers.30.block_sparse_moe.experts.196.w2", "model.layers.30.block_sparse_moe.experts.197.w2", "model.layers.30.block_sparse_moe.experts.198.w2", "model.layers.30.block_sparse_moe.experts.199.w2", "model.layers.30.block_sparse_moe.experts.200.w2", "model.layers.30.block_sparse_moe.experts.201.w2", "model.layers.30.block_sparse_moe.experts.202.w2", "model.layers.30.block_sparse_moe.experts.203.w2", "model.layers.30.block_sparse_moe.experts.204.w2", "model.layers.30.block_sparse_moe.experts.205.w2", "model.layers.30.block_sparse_moe.experts.206.w2", "model.layers.30.block_sparse_moe.experts.207.w2", "model.layers.30.block_sparse_moe.experts.208.w2", "model.layers.30.block_sparse_moe.experts.209.w2", "model.layers.30.block_sparse_moe.experts.210.w2", "model.layers.30.block_sparse_moe.experts.211.w2", "model.layers.30.block_sparse_moe.experts.212.w2", "model.layers.30.block_sparse_moe.experts.213.w2", "model.layers.30.block_sparse_moe.experts.214.w2", "model.layers.30.block_sparse_moe.experts.215.w2", "model.layers.30.block_sparse_moe.experts.216.w2", "model.layers.30.block_sparse_moe.experts.217.w2", "model.layers.30.block_sparse_moe.experts.218.w2", "model.layers.30.block_sparse_moe.experts.219.w2", "model.layers.30.block_sparse_moe.experts.220.w2", "model.layers.30.block_sparse_moe.experts.221.w2", "model.layers.30.block_sparse_moe.experts.222.w2", "model.layers.30.block_sparse_moe.experts.223.w2", "model.layers.30.block_sparse_moe.experts.224.w2", "model.layers.30.block_sparse_moe.experts.225.w2", "model.layers.30.block_sparse_moe.experts.226.w2", "model.layers.30.block_sparse_moe.experts.227.w2", "model.layers.30.block_sparse_moe.experts.228.w2", "model.layers.30.block_sparse_moe.experts.229.w2", "model.layers.30.block_sparse_moe.experts.230.w2", "model.layers.30.block_sparse_moe.experts.231.w2", "model.layers.30.block_sparse_moe.experts.232.w2", "model.layers.30.block_sparse_moe.experts.233.w2", "model.layers.30.block_sparse_moe.experts.234.w2", "model.layers.30.block_sparse_moe.experts.235.w2", "model.layers.30.block_sparse_moe.experts.236.w2", "model.layers.30.block_sparse_moe.experts.237.w2", "model.layers.30.block_sparse_moe.experts.238.w2", "model.layers.30.block_sparse_moe.experts.239.w2", "model.layers.30.block_sparse_moe.experts.240.w2", "model.layers.30.block_sparse_moe.experts.241.w2", "model.layers.30.block_sparse_moe.experts.242.w2", "model.layers.30.block_sparse_moe.experts.243.w2", "model.layers.30.block_sparse_moe.experts.244.w2", "model.layers.30.block_sparse_moe.experts.245.w2", "model.layers.30.block_sparse_moe.experts.246.w2", "model.layers.30.block_sparse_moe.experts.247.w2", "model.layers.30.block_sparse_moe.experts.248.w2", "model.layers.30.block_sparse_moe.experts.249.w2", "model.layers.30.block_sparse_moe.experts.250.w2", "model.layers.30.block_sparse_moe.experts.251.w2", "model.layers.30.block_sparse_moe.experts.252.w2", "model.layers.30.block_sparse_moe.experts.253.w2", "model.layers.30.block_sparse_moe.experts.254.w2", "model.layers.30.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009196519851684126, "dbits": 1207959552 } ] }, { "idx": 155, "layers": [ "model.layers.31.self_attn.q_proj" ], "candidates": [ { "dkld": -0.004894536733627275, "dbits": 18874368 } ] }, { "idx": 156, "layers": [ "model.layers.31.self_attn.k_proj", "model.layers.31.self_attn.v_proj" ], "candidates": [ { "dkld": -0.01476204991340635, "dbits": 6291456 } ] }, { "idx": 157, "layers": [ "model.layers.31.self_attn.o_proj" ], "candidates": [ { "dkld": -0.017787644267082126, "dbits": 18874368 } ] }, { "idx": 158, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w1", "model.layers.31.block_sparse_moe.experts.1.w1", "model.layers.31.block_sparse_moe.experts.2.w1", "model.layers.31.block_sparse_moe.experts.3.w1", "model.layers.31.block_sparse_moe.experts.4.w1", "model.layers.31.block_sparse_moe.experts.5.w1", "model.layers.31.block_sparse_moe.experts.6.w1", "model.layers.31.block_sparse_moe.experts.7.w1", "model.layers.31.block_sparse_moe.experts.8.w1", "model.layers.31.block_sparse_moe.experts.9.w1", "model.layers.31.block_sparse_moe.experts.10.w1", "model.layers.31.block_sparse_moe.experts.11.w1", "model.layers.31.block_sparse_moe.experts.12.w1", "model.layers.31.block_sparse_moe.experts.13.w1", "model.layers.31.block_sparse_moe.experts.14.w1", "model.layers.31.block_sparse_moe.experts.15.w1", "model.layers.31.block_sparse_moe.experts.16.w1", "model.layers.31.block_sparse_moe.experts.17.w1", "model.layers.31.block_sparse_moe.experts.18.w1", "model.layers.31.block_sparse_moe.experts.19.w1", "model.layers.31.block_sparse_moe.experts.20.w1", "model.layers.31.block_sparse_moe.experts.21.w1", "model.layers.31.block_sparse_moe.experts.22.w1", "model.layers.31.block_sparse_moe.experts.23.w1", "model.layers.31.block_sparse_moe.experts.24.w1", "model.layers.31.block_sparse_moe.experts.25.w1", "model.layers.31.block_sparse_moe.experts.26.w1", "model.layers.31.block_sparse_moe.experts.27.w1", "model.layers.31.block_sparse_moe.experts.28.w1", "model.layers.31.block_sparse_moe.experts.29.w1", "model.layers.31.block_sparse_moe.experts.30.w1", "model.layers.31.block_sparse_moe.experts.31.w1", "model.layers.31.block_sparse_moe.experts.32.w1", "model.layers.31.block_sparse_moe.experts.33.w1", "model.layers.31.block_sparse_moe.experts.34.w1", "model.layers.31.block_sparse_moe.experts.35.w1", "model.layers.31.block_sparse_moe.experts.36.w1", "model.layers.31.block_sparse_moe.experts.37.w1", "model.layers.31.block_sparse_moe.experts.38.w1", "model.layers.31.block_sparse_moe.experts.39.w1", "model.layers.31.block_sparse_moe.experts.40.w1", "model.layers.31.block_sparse_moe.experts.41.w1", "model.layers.31.block_sparse_moe.experts.42.w1", "model.layers.31.block_sparse_moe.experts.43.w1", "model.layers.31.block_sparse_moe.experts.44.w1", "model.layers.31.block_sparse_moe.experts.45.w1", "model.layers.31.block_sparse_moe.experts.46.w1", "model.layers.31.block_sparse_moe.experts.47.w1", "model.layers.31.block_sparse_moe.experts.48.w1", "model.layers.31.block_sparse_moe.experts.49.w1", "model.layers.31.block_sparse_moe.experts.50.w1", "model.layers.31.block_sparse_moe.experts.51.w1", "model.layers.31.block_sparse_moe.experts.52.w1", "model.layers.31.block_sparse_moe.experts.53.w1", "model.layers.31.block_sparse_moe.experts.54.w1", "model.layers.31.block_sparse_moe.experts.55.w1", "model.layers.31.block_sparse_moe.experts.56.w1", "model.layers.31.block_sparse_moe.experts.57.w1", "model.layers.31.block_sparse_moe.experts.58.w1", "model.layers.31.block_sparse_moe.experts.59.w1", "model.layers.31.block_sparse_moe.experts.60.w1", "model.layers.31.block_sparse_moe.experts.61.w1", "model.layers.31.block_sparse_moe.experts.62.w1", "model.layers.31.block_sparse_moe.experts.63.w1", "model.layers.31.block_sparse_moe.experts.64.w1", "model.layers.31.block_sparse_moe.experts.65.w1", "model.layers.31.block_sparse_moe.experts.66.w1", "model.layers.31.block_sparse_moe.experts.67.w1", "model.layers.31.block_sparse_moe.experts.68.w1", "model.layers.31.block_sparse_moe.experts.69.w1", "model.layers.31.block_sparse_moe.experts.70.w1", "model.layers.31.block_sparse_moe.experts.71.w1", "model.layers.31.block_sparse_moe.experts.72.w1", "model.layers.31.block_sparse_moe.experts.73.w1", "model.layers.31.block_sparse_moe.experts.74.w1", "model.layers.31.block_sparse_moe.experts.75.w1", "model.layers.31.block_sparse_moe.experts.76.w1", "model.layers.31.block_sparse_moe.experts.77.w1", "model.layers.31.block_sparse_moe.experts.78.w1", "model.layers.31.block_sparse_moe.experts.79.w1", "model.layers.31.block_sparse_moe.experts.80.w1", "model.layers.31.block_sparse_moe.experts.81.w1", "model.layers.31.block_sparse_moe.experts.82.w1", "model.layers.31.block_sparse_moe.experts.83.w1", "model.layers.31.block_sparse_moe.experts.84.w1", "model.layers.31.block_sparse_moe.experts.85.w1", "model.layers.31.block_sparse_moe.experts.86.w1", "model.layers.31.block_sparse_moe.experts.87.w1", "model.layers.31.block_sparse_moe.experts.88.w1", "model.layers.31.block_sparse_moe.experts.89.w1", "model.layers.31.block_sparse_moe.experts.90.w1", "model.layers.31.block_sparse_moe.experts.91.w1", "model.layers.31.block_sparse_moe.experts.92.w1", "model.layers.31.block_sparse_moe.experts.93.w1", "model.layers.31.block_sparse_moe.experts.94.w1", "model.layers.31.block_sparse_moe.experts.95.w1", "model.layers.31.block_sparse_moe.experts.96.w1", "model.layers.31.block_sparse_moe.experts.97.w1", "model.layers.31.block_sparse_moe.experts.98.w1", "model.layers.31.block_sparse_moe.experts.99.w1", "model.layers.31.block_sparse_moe.experts.100.w1", "model.layers.31.block_sparse_moe.experts.101.w1", "model.layers.31.block_sparse_moe.experts.102.w1", "model.layers.31.block_sparse_moe.experts.103.w1", "model.layers.31.block_sparse_moe.experts.104.w1", "model.layers.31.block_sparse_moe.experts.105.w1", "model.layers.31.block_sparse_moe.experts.106.w1", "model.layers.31.block_sparse_moe.experts.107.w1", "model.layers.31.block_sparse_moe.experts.108.w1", "model.layers.31.block_sparse_moe.experts.109.w1", "model.layers.31.block_sparse_moe.experts.110.w1", "model.layers.31.block_sparse_moe.experts.111.w1", "model.layers.31.block_sparse_moe.experts.112.w1", "model.layers.31.block_sparse_moe.experts.113.w1", "model.layers.31.block_sparse_moe.experts.114.w1", "model.layers.31.block_sparse_moe.experts.115.w1", "model.layers.31.block_sparse_moe.experts.116.w1", "model.layers.31.block_sparse_moe.experts.117.w1", "model.layers.31.block_sparse_moe.experts.118.w1", "model.layers.31.block_sparse_moe.experts.119.w1", "model.layers.31.block_sparse_moe.experts.120.w1", "model.layers.31.block_sparse_moe.experts.121.w1", "model.layers.31.block_sparse_moe.experts.122.w1", "model.layers.31.block_sparse_moe.experts.123.w1", "model.layers.31.block_sparse_moe.experts.124.w1", "model.layers.31.block_sparse_moe.experts.125.w1", "model.layers.31.block_sparse_moe.experts.126.w1", "model.layers.31.block_sparse_moe.experts.127.w1", "model.layers.31.block_sparse_moe.experts.128.w1", "model.layers.31.block_sparse_moe.experts.129.w1", "model.layers.31.block_sparse_moe.experts.130.w1", "model.layers.31.block_sparse_moe.experts.131.w1", "model.layers.31.block_sparse_moe.experts.132.w1", "model.layers.31.block_sparse_moe.experts.133.w1", "model.layers.31.block_sparse_moe.experts.134.w1", "model.layers.31.block_sparse_moe.experts.135.w1", "model.layers.31.block_sparse_moe.experts.136.w1", "model.layers.31.block_sparse_moe.experts.137.w1", "model.layers.31.block_sparse_moe.experts.138.w1", "model.layers.31.block_sparse_moe.experts.139.w1", "model.layers.31.block_sparse_moe.experts.140.w1", "model.layers.31.block_sparse_moe.experts.141.w1", "model.layers.31.block_sparse_moe.experts.142.w1", "model.layers.31.block_sparse_moe.experts.143.w1", "model.layers.31.block_sparse_moe.experts.144.w1", "model.layers.31.block_sparse_moe.experts.145.w1", "model.layers.31.block_sparse_moe.experts.146.w1", "model.layers.31.block_sparse_moe.experts.147.w1", "model.layers.31.block_sparse_moe.experts.148.w1", "model.layers.31.block_sparse_moe.experts.149.w1", "model.layers.31.block_sparse_moe.experts.150.w1", "model.layers.31.block_sparse_moe.experts.151.w1", "model.layers.31.block_sparse_moe.experts.152.w1", "model.layers.31.block_sparse_moe.experts.153.w1", "model.layers.31.block_sparse_moe.experts.154.w1", "model.layers.31.block_sparse_moe.experts.155.w1", "model.layers.31.block_sparse_moe.experts.156.w1", "model.layers.31.block_sparse_moe.experts.157.w1", "model.layers.31.block_sparse_moe.experts.158.w1", "model.layers.31.block_sparse_moe.experts.159.w1", "model.layers.31.block_sparse_moe.experts.160.w1", "model.layers.31.block_sparse_moe.experts.161.w1", "model.layers.31.block_sparse_moe.experts.162.w1", "model.layers.31.block_sparse_moe.experts.163.w1", "model.layers.31.block_sparse_moe.experts.164.w1", "model.layers.31.block_sparse_moe.experts.165.w1", "model.layers.31.block_sparse_moe.experts.166.w1", "model.layers.31.block_sparse_moe.experts.167.w1", "model.layers.31.block_sparse_moe.experts.168.w1", "model.layers.31.block_sparse_moe.experts.169.w1", "model.layers.31.block_sparse_moe.experts.170.w1", "model.layers.31.block_sparse_moe.experts.171.w1", "model.layers.31.block_sparse_moe.experts.172.w1", "model.layers.31.block_sparse_moe.experts.173.w1", "model.layers.31.block_sparse_moe.experts.174.w1", "model.layers.31.block_sparse_moe.experts.175.w1", "model.layers.31.block_sparse_moe.experts.176.w1", "model.layers.31.block_sparse_moe.experts.177.w1", "model.layers.31.block_sparse_moe.experts.178.w1", "model.layers.31.block_sparse_moe.experts.179.w1", "model.layers.31.block_sparse_moe.experts.180.w1", "model.layers.31.block_sparse_moe.experts.181.w1", "model.layers.31.block_sparse_moe.experts.182.w1", "model.layers.31.block_sparse_moe.experts.183.w1", "model.layers.31.block_sparse_moe.experts.184.w1", "model.layers.31.block_sparse_moe.experts.185.w1", "model.layers.31.block_sparse_moe.experts.186.w1", "model.layers.31.block_sparse_moe.experts.187.w1", "model.layers.31.block_sparse_moe.experts.188.w1", "model.layers.31.block_sparse_moe.experts.189.w1", "model.layers.31.block_sparse_moe.experts.190.w1", "model.layers.31.block_sparse_moe.experts.191.w1", "model.layers.31.block_sparse_moe.experts.192.w1", "model.layers.31.block_sparse_moe.experts.193.w1", "model.layers.31.block_sparse_moe.experts.194.w1", "model.layers.31.block_sparse_moe.experts.195.w1", "model.layers.31.block_sparse_moe.experts.196.w1", "model.layers.31.block_sparse_moe.experts.197.w1", "model.layers.31.block_sparse_moe.experts.198.w1", "model.layers.31.block_sparse_moe.experts.199.w1", "model.layers.31.block_sparse_moe.experts.200.w1", "model.layers.31.block_sparse_moe.experts.201.w1", "model.layers.31.block_sparse_moe.experts.202.w1", "model.layers.31.block_sparse_moe.experts.203.w1", "model.layers.31.block_sparse_moe.experts.204.w1", "model.layers.31.block_sparse_moe.experts.205.w1", "model.layers.31.block_sparse_moe.experts.206.w1", "model.layers.31.block_sparse_moe.experts.207.w1", "model.layers.31.block_sparse_moe.experts.208.w1", "model.layers.31.block_sparse_moe.experts.209.w1", "model.layers.31.block_sparse_moe.experts.210.w1", "model.layers.31.block_sparse_moe.experts.211.w1", "model.layers.31.block_sparse_moe.experts.212.w1", "model.layers.31.block_sparse_moe.experts.213.w1", "model.layers.31.block_sparse_moe.experts.214.w1", "model.layers.31.block_sparse_moe.experts.215.w1", "model.layers.31.block_sparse_moe.experts.216.w1", "model.layers.31.block_sparse_moe.experts.217.w1", "model.layers.31.block_sparse_moe.experts.218.w1", "model.layers.31.block_sparse_moe.experts.219.w1", "model.layers.31.block_sparse_moe.experts.220.w1", "model.layers.31.block_sparse_moe.experts.221.w1", "model.layers.31.block_sparse_moe.experts.222.w1", "model.layers.31.block_sparse_moe.experts.223.w1", "model.layers.31.block_sparse_moe.experts.224.w1", "model.layers.31.block_sparse_moe.experts.225.w1", "model.layers.31.block_sparse_moe.experts.226.w1", "model.layers.31.block_sparse_moe.experts.227.w1", "model.layers.31.block_sparse_moe.experts.228.w1", "model.layers.31.block_sparse_moe.experts.229.w1", "model.layers.31.block_sparse_moe.experts.230.w1", "model.layers.31.block_sparse_moe.experts.231.w1", "model.layers.31.block_sparse_moe.experts.232.w1", "model.layers.31.block_sparse_moe.experts.233.w1", "model.layers.31.block_sparse_moe.experts.234.w1", "model.layers.31.block_sparse_moe.experts.235.w1", "model.layers.31.block_sparse_moe.experts.236.w1", "model.layers.31.block_sparse_moe.experts.237.w1", "model.layers.31.block_sparse_moe.experts.238.w1", "model.layers.31.block_sparse_moe.experts.239.w1", "model.layers.31.block_sparse_moe.experts.240.w1", "model.layers.31.block_sparse_moe.experts.241.w1", "model.layers.31.block_sparse_moe.experts.242.w1", "model.layers.31.block_sparse_moe.experts.243.w1", "model.layers.31.block_sparse_moe.experts.244.w1", "model.layers.31.block_sparse_moe.experts.245.w1", "model.layers.31.block_sparse_moe.experts.246.w1", "model.layers.31.block_sparse_moe.experts.247.w1", "model.layers.31.block_sparse_moe.experts.248.w1", "model.layers.31.block_sparse_moe.experts.249.w1", "model.layers.31.block_sparse_moe.experts.250.w1", "model.layers.31.block_sparse_moe.experts.251.w1", "model.layers.31.block_sparse_moe.experts.252.w1", "model.layers.31.block_sparse_moe.experts.253.w1", "model.layers.31.block_sparse_moe.experts.254.w1", "model.layers.31.block_sparse_moe.experts.255.w1", "model.layers.31.block_sparse_moe.experts.0.w3", "model.layers.31.block_sparse_moe.experts.1.w3", "model.layers.31.block_sparse_moe.experts.2.w3", "model.layers.31.block_sparse_moe.experts.3.w3", "model.layers.31.block_sparse_moe.experts.4.w3", "model.layers.31.block_sparse_moe.experts.5.w3", "model.layers.31.block_sparse_moe.experts.6.w3", "model.layers.31.block_sparse_moe.experts.7.w3", "model.layers.31.block_sparse_moe.experts.8.w3", "model.layers.31.block_sparse_moe.experts.9.w3", "model.layers.31.block_sparse_moe.experts.10.w3", "model.layers.31.block_sparse_moe.experts.11.w3", "model.layers.31.block_sparse_moe.experts.12.w3", "model.layers.31.block_sparse_moe.experts.13.w3", "model.layers.31.block_sparse_moe.experts.14.w3", "model.layers.31.block_sparse_moe.experts.15.w3", "model.layers.31.block_sparse_moe.experts.16.w3", "model.layers.31.block_sparse_moe.experts.17.w3", "model.layers.31.block_sparse_moe.experts.18.w3", "model.layers.31.block_sparse_moe.experts.19.w3", "model.layers.31.block_sparse_moe.experts.20.w3", "model.layers.31.block_sparse_moe.experts.21.w3", "model.layers.31.block_sparse_moe.experts.22.w3", "model.layers.31.block_sparse_moe.experts.23.w3", "model.layers.31.block_sparse_moe.experts.24.w3", "model.layers.31.block_sparse_moe.experts.25.w3", "model.layers.31.block_sparse_moe.experts.26.w3", "model.layers.31.block_sparse_moe.experts.27.w3", "model.layers.31.block_sparse_moe.experts.28.w3", "model.layers.31.block_sparse_moe.experts.29.w3", "model.layers.31.block_sparse_moe.experts.30.w3", "model.layers.31.block_sparse_moe.experts.31.w3", "model.layers.31.block_sparse_moe.experts.32.w3", "model.layers.31.block_sparse_moe.experts.33.w3", "model.layers.31.block_sparse_moe.experts.34.w3", "model.layers.31.block_sparse_moe.experts.35.w3", "model.layers.31.block_sparse_moe.experts.36.w3", "model.layers.31.block_sparse_moe.experts.37.w3", "model.layers.31.block_sparse_moe.experts.38.w3", "model.layers.31.block_sparse_moe.experts.39.w3", "model.layers.31.block_sparse_moe.experts.40.w3", "model.layers.31.block_sparse_moe.experts.41.w3", "model.layers.31.block_sparse_moe.experts.42.w3", "model.layers.31.block_sparse_moe.experts.43.w3", "model.layers.31.block_sparse_moe.experts.44.w3", "model.layers.31.block_sparse_moe.experts.45.w3", "model.layers.31.block_sparse_moe.experts.46.w3", "model.layers.31.block_sparse_moe.experts.47.w3", "model.layers.31.block_sparse_moe.experts.48.w3", "model.layers.31.block_sparse_moe.experts.49.w3", "model.layers.31.block_sparse_moe.experts.50.w3", "model.layers.31.block_sparse_moe.experts.51.w3", "model.layers.31.block_sparse_moe.experts.52.w3", "model.layers.31.block_sparse_moe.experts.53.w3", "model.layers.31.block_sparse_moe.experts.54.w3", "model.layers.31.block_sparse_moe.experts.55.w3", "model.layers.31.block_sparse_moe.experts.56.w3", "model.layers.31.block_sparse_moe.experts.57.w3", "model.layers.31.block_sparse_moe.experts.58.w3", "model.layers.31.block_sparse_moe.experts.59.w3", "model.layers.31.block_sparse_moe.experts.60.w3", "model.layers.31.block_sparse_moe.experts.61.w3", "model.layers.31.block_sparse_moe.experts.62.w3", "model.layers.31.block_sparse_moe.experts.63.w3", "model.layers.31.block_sparse_moe.experts.64.w3", "model.layers.31.block_sparse_moe.experts.65.w3", "model.layers.31.block_sparse_moe.experts.66.w3", "model.layers.31.block_sparse_moe.experts.67.w3", "model.layers.31.block_sparse_moe.experts.68.w3", "model.layers.31.block_sparse_moe.experts.69.w3", "model.layers.31.block_sparse_moe.experts.70.w3", "model.layers.31.block_sparse_moe.experts.71.w3", "model.layers.31.block_sparse_moe.experts.72.w3", "model.layers.31.block_sparse_moe.experts.73.w3", "model.layers.31.block_sparse_moe.experts.74.w3", "model.layers.31.block_sparse_moe.experts.75.w3", "model.layers.31.block_sparse_moe.experts.76.w3", "model.layers.31.block_sparse_moe.experts.77.w3", "model.layers.31.block_sparse_moe.experts.78.w3", "model.layers.31.block_sparse_moe.experts.79.w3", "model.layers.31.block_sparse_moe.experts.80.w3", "model.layers.31.block_sparse_moe.experts.81.w3", "model.layers.31.block_sparse_moe.experts.82.w3", "model.layers.31.block_sparse_moe.experts.83.w3", "model.layers.31.block_sparse_moe.experts.84.w3", "model.layers.31.block_sparse_moe.experts.85.w3", "model.layers.31.block_sparse_moe.experts.86.w3", "model.layers.31.block_sparse_moe.experts.87.w3", "model.layers.31.block_sparse_moe.experts.88.w3", "model.layers.31.block_sparse_moe.experts.89.w3", "model.layers.31.block_sparse_moe.experts.90.w3", "model.layers.31.block_sparse_moe.experts.91.w3", "model.layers.31.block_sparse_moe.experts.92.w3", "model.layers.31.block_sparse_moe.experts.93.w3", "model.layers.31.block_sparse_moe.experts.94.w3", "model.layers.31.block_sparse_moe.experts.95.w3", "model.layers.31.block_sparse_moe.experts.96.w3", "model.layers.31.block_sparse_moe.experts.97.w3", "model.layers.31.block_sparse_moe.experts.98.w3", "model.layers.31.block_sparse_moe.experts.99.w3", "model.layers.31.block_sparse_moe.experts.100.w3", "model.layers.31.block_sparse_moe.experts.101.w3", "model.layers.31.block_sparse_moe.experts.102.w3", "model.layers.31.block_sparse_moe.experts.103.w3", "model.layers.31.block_sparse_moe.experts.104.w3", "model.layers.31.block_sparse_moe.experts.105.w3", "model.layers.31.block_sparse_moe.experts.106.w3", "model.layers.31.block_sparse_moe.experts.107.w3", "model.layers.31.block_sparse_moe.experts.108.w3", "model.layers.31.block_sparse_moe.experts.109.w3", "model.layers.31.block_sparse_moe.experts.110.w3", "model.layers.31.block_sparse_moe.experts.111.w3", "model.layers.31.block_sparse_moe.experts.112.w3", "model.layers.31.block_sparse_moe.experts.113.w3", "model.layers.31.block_sparse_moe.experts.114.w3", "model.layers.31.block_sparse_moe.experts.115.w3", "model.layers.31.block_sparse_moe.experts.116.w3", "model.layers.31.block_sparse_moe.experts.117.w3", "model.layers.31.block_sparse_moe.experts.118.w3", "model.layers.31.block_sparse_moe.experts.119.w3", "model.layers.31.block_sparse_moe.experts.120.w3", "model.layers.31.block_sparse_moe.experts.121.w3", "model.layers.31.block_sparse_moe.experts.122.w3", "model.layers.31.block_sparse_moe.experts.123.w3", "model.layers.31.block_sparse_moe.experts.124.w3", "model.layers.31.block_sparse_moe.experts.125.w3", "model.layers.31.block_sparse_moe.experts.126.w3", "model.layers.31.block_sparse_moe.experts.127.w3", "model.layers.31.block_sparse_moe.experts.128.w3", "model.layers.31.block_sparse_moe.experts.129.w3", "model.layers.31.block_sparse_moe.experts.130.w3", "model.layers.31.block_sparse_moe.experts.131.w3", "model.layers.31.block_sparse_moe.experts.132.w3", "model.layers.31.block_sparse_moe.experts.133.w3", "model.layers.31.block_sparse_moe.experts.134.w3", "model.layers.31.block_sparse_moe.experts.135.w3", "model.layers.31.block_sparse_moe.experts.136.w3", "model.layers.31.block_sparse_moe.experts.137.w3", "model.layers.31.block_sparse_moe.experts.138.w3", "model.layers.31.block_sparse_moe.experts.139.w3", "model.layers.31.block_sparse_moe.experts.140.w3", "model.layers.31.block_sparse_moe.experts.141.w3", "model.layers.31.block_sparse_moe.experts.142.w3", "model.layers.31.block_sparse_moe.experts.143.w3", "model.layers.31.block_sparse_moe.experts.144.w3", "model.layers.31.block_sparse_moe.experts.145.w3", "model.layers.31.block_sparse_moe.experts.146.w3", "model.layers.31.block_sparse_moe.experts.147.w3", "model.layers.31.block_sparse_moe.experts.148.w3", "model.layers.31.block_sparse_moe.experts.149.w3", "model.layers.31.block_sparse_moe.experts.150.w3", "model.layers.31.block_sparse_moe.experts.151.w3", "model.layers.31.block_sparse_moe.experts.152.w3", "model.layers.31.block_sparse_moe.experts.153.w3", "model.layers.31.block_sparse_moe.experts.154.w3", "model.layers.31.block_sparse_moe.experts.155.w3", "model.layers.31.block_sparse_moe.experts.156.w3", "model.layers.31.block_sparse_moe.experts.157.w3", "model.layers.31.block_sparse_moe.experts.158.w3", "model.layers.31.block_sparse_moe.experts.159.w3", "model.layers.31.block_sparse_moe.experts.160.w3", "model.layers.31.block_sparse_moe.experts.161.w3", "model.layers.31.block_sparse_moe.experts.162.w3", "model.layers.31.block_sparse_moe.experts.163.w3", "model.layers.31.block_sparse_moe.experts.164.w3", "model.layers.31.block_sparse_moe.experts.165.w3", "model.layers.31.block_sparse_moe.experts.166.w3", "model.layers.31.block_sparse_moe.experts.167.w3", "model.layers.31.block_sparse_moe.experts.168.w3", "model.layers.31.block_sparse_moe.experts.169.w3", "model.layers.31.block_sparse_moe.experts.170.w3", "model.layers.31.block_sparse_moe.experts.171.w3", "model.layers.31.block_sparse_moe.experts.172.w3", "model.layers.31.block_sparse_moe.experts.173.w3", "model.layers.31.block_sparse_moe.experts.174.w3", "model.layers.31.block_sparse_moe.experts.175.w3", "model.layers.31.block_sparse_moe.experts.176.w3", "model.layers.31.block_sparse_moe.experts.177.w3", "model.layers.31.block_sparse_moe.experts.178.w3", "model.layers.31.block_sparse_moe.experts.179.w3", "model.layers.31.block_sparse_moe.experts.180.w3", "model.layers.31.block_sparse_moe.experts.181.w3", "model.layers.31.block_sparse_moe.experts.182.w3", "model.layers.31.block_sparse_moe.experts.183.w3", "model.layers.31.block_sparse_moe.experts.184.w3", "model.layers.31.block_sparse_moe.experts.185.w3", "model.layers.31.block_sparse_moe.experts.186.w3", "model.layers.31.block_sparse_moe.experts.187.w3", "model.layers.31.block_sparse_moe.experts.188.w3", "model.layers.31.block_sparse_moe.experts.189.w3", "model.layers.31.block_sparse_moe.experts.190.w3", "model.layers.31.block_sparse_moe.experts.191.w3", "model.layers.31.block_sparse_moe.experts.192.w3", "model.layers.31.block_sparse_moe.experts.193.w3", "model.layers.31.block_sparse_moe.experts.194.w3", "model.layers.31.block_sparse_moe.experts.195.w3", "model.layers.31.block_sparse_moe.experts.196.w3", "model.layers.31.block_sparse_moe.experts.197.w3", "model.layers.31.block_sparse_moe.experts.198.w3", "model.layers.31.block_sparse_moe.experts.199.w3", "model.layers.31.block_sparse_moe.experts.200.w3", "model.layers.31.block_sparse_moe.experts.201.w3", "model.layers.31.block_sparse_moe.experts.202.w3", "model.layers.31.block_sparse_moe.experts.203.w3", "model.layers.31.block_sparse_moe.experts.204.w3", "model.layers.31.block_sparse_moe.experts.205.w3", "model.layers.31.block_sparse_moe.experts.206.w3", "model.layers.31.block_sparse_moe.experts.207.w3", "model.layers.31.block_sparse_moe.experts.208.w3", "model.layers.31.block_sparse_moe.experts.209.w3", "model.layers.31.block_sparse_moe.experts.210.w3", "model.layers.31.block_sparse_moe.experts.211.w3", "model.layers.31.block_sparse_moe.experts.212.w3", "model.layers.31.block_sparse_moe.experts.213.w3", "model.layers.31.block_sparse_moe.experts.214.w3", "model.layers.31.block_sparse_moe.experts.215.w3", "model.layers.31.block_sparse_moe.experts.216.w3", "model.layers.31.block_sparse_moe.experts.217.w3", "model.layers.31.block_sparse_moe.experts.218.w3", "model.layers.31.block_sparse_moe.experts.219.w3", "model.layers.31.block_sparse_moe.experts.220.w3", "model.layers.31.block_sparse_moe.experts.221.w3", "model.layers.31.block_sparse_moe.experts.222.w3", "model.layers.31.block_sparse_moe.experts.223.w3", "model.layers.31.block_sparse_moe.experts.224.w3", "model.layers.31.block_sparse_moe.experts.225.w3", "model.layers.31.block_sparse_moe.experts.226.w3", "model.layers.31.block_sparse_moe.experts.227.w3", "model.layers.31.block_sparse_moe.experts.228.w3", "model.layers.31.block_sparse_moe.experts.229.w3", "model.layers.31.block_sparse_moe.experts.230.w3", "model.layers.31.block_sparse_moe.experts.231.w3", "model.layers.31.block_sparse_moe.experts.232.w3", "model.layers.31.block_sparse_moe.experts.233.w3", "model.layers.31.block_sparse_moe.experts.234.w3", "model.layers.31.block_sparse_moe.experts.235.w3", "model.layers.31.block_sparse_moe.experts.236.w3", "model.layers.31.block_sparse_moe.experts.237.w3", "model.layers.31.block_sparse_moe.experts.238.w3", "model.layers.31.block_sparse_moe.experts.239.w3", "model.layers.31.block_sparse_moe.experts.240.w3", "model.layers.31.block_sparse_moe.experts.241.w3", "model.layers.31.block_sparse_moe.experts.242.w3", "model.layers.31.block_sparse_moe.experts.243.w3", "model.layers.31.block_sparse_moe.experts.244.w3", "model.layers.31.block_sparse_moe.experts.245.w3", "model.layers.31.block_sparse_moe.experts.246.w3", "model.layers.31.block_sparse_moe.experts.247.w3", "model.layers.31.block_sparse_moe.experts.248.w3", "model.layers.31.block_sparse_moe.experts.249.w3", "model.layers.31.block_sparse_moe.experts.250.w3", "model.layers.31.block_sparse_moe.experts.251.w3", "model.layers.31.block_sparse_moe.experts.252.w3", "model.layers.31.block_sparse_moe.experts.253.w3", "model.layers.31.block_sparse_moe.experts.254.w3", "model.layers.31.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.003323236107826144, "dbits": 2415919104 } ] }, { "idx": 159, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w2", "model.layers.31.block_sparse_moe.experts.1.w2", "model.layers.31.block_sparse_moe.experts.2.w2", "model.layers.31.block_sparse_moe.experts.3.w2", "model.layers.31.block_sparse_moe.experts.4.w2", "model.layers.31.block_sparse_moe.experts.5.w2", "model.layers.31.block_sparse_moe.experts.6.w2", "model.layers.31.block_sparse_moe.experts.7.w2", "model.layers.31.block_sparse_moe.experts.8.w2", "model.layers.31.block_sparse_moe.experts.9.w2", "model.layers.31.block_sparse_moe.experts.10.w2", "model.layers.31.block_sparse_moe.experts.11.w2", "model.layers.31.block_sparse_moe.experts.12.w2", "model.layers.31.block_sparse_moe.experts.13.w2", "model.layers.31.block_sparse_moe.experts.14.w2", "model.layers.31.block_sparse_moe.experts.15.w2", "model.layers.31.block_sparse_moe.experts.16.w2", "model.layers.31.block_sparse_moe.experts.17.w2", "model.layers.31.block_sparse_moe.experts.18.w2", "model.layers.31.block_sparse_moe.experts.19.w2", "model.layers.31.block_sparse_moe.experts.20.w2", "model.layers.31.block_sparse_moe.experts.21.w2", "model.layers.31.block_sparse_moe.experts.22.w2", "model.layers.31.block_sparse_moe.experts.23.w2", "model.layers.31.block_sparse_moe.experts.24.w2", "model.layers.31.block_sparse_moe.experts.25.w2", "model.layers.31.block_sparse_moe.experts.26.w2", "model.layers.31.block_sparse_moe.experts.27.w2", "model.layers.31.block_sparse_moe.experts.28.w2", "model.layers.31.block_sparse_moe.experts.29.w2", "model.layers.31.block_sparse_moe.experts.30.w2", "model.layers.31.block_sparse_moe.experts.31.w2", "model.layers.31.block_sparse_moe.experts.32.w2", "model.layers.31.block_sparse_moe.experts.33.w2", "model.layers.31.block_sparse_moe.experts.34.w2", "model.layers.31.block_sparse_moe.experts.35.w2", "model.layers.31.block_sparse_moe.experts.36.w2", "model.layers.31.block_sparse_moe.experts.37.w2", "model.layers.31.block_sparse_moe.experts.38.w2", "model.layers.31.block_sparse_moe.experts.39.w2", "model.layers.31.block_sparse_moe.experts.40.w2", "model.layers.31.block_sparse_moe.experts.41.w2", "model.layers.31.block_sparse_moe.experts.42.w2", "model.layers.31.block_sparse_moe.experts.43.w2", "model.layers.31.block_sparse_moe.experts.44.w2", "model.layers.31.block_sparse_moe.experts.45.w2", "model.layers.31.block_sparse_moe.experts.46.w2", "model.layers.31.block_sparse_moe.experts.47.w2", "model.layers.31.block_sparse_moe.experts.48.w2", "model.layers.31.block_sparse_moe.experts.49.w2", "model.layers.31.block_sparse_moe.experts.50.w2", "model.layers.31.block_sparse_moe.experts.51.w2", "model.layers.31.block_sparse_moe.experts.52.w2", "model.layers.31.block_sparse_moe.experts.53.w2", "model.layers.31.block_sparse_moe.experts.54.w2", "model.layers.31.block_sparse_moe.experts.55.w2", "model.layers.31.block_sparse_moe.experts.56.w2", "model.layers.31.block_sparse_moe.experts.57.w2", "model.layers.31.block_sparse_moe.experts.58.w2", "model.layers.31.block_sparse_moe.experts.59.w2", "model.layers.31.block_sparse_moe.experts.60.w2", "model.layers.31.block_sparse_moe.experts.61.w2", "model.layers.31.block_sparse_moe.experts.62.w2", "model.layers.31.block_sparse_moe.experts.63.w2", "model.layers.31.block_sparse_moe.experts.64.w2", "model.layers.31.block_sparse_moe.experts.65.w2", "model.layers.31.block_sparse_moe.experts.66.w2", "model.layers.31.block_sparse_moe.experts.67.w2", "model.layers.31.block_sparse_moe.experts.68.w2", "model.layers.31.block_sparse_moe.experts.69.w2", "model.layers.31.block_sparse_moe.experts.70.w2", "model.layers.31.block_sparse_moe.experts.71.w2", "model.layers.31.block_sparse_moe.experts.72.w2", "model.layers.31.block_sparse_moe.experts.73.w2", "model.layers.31.block_sparse_moe.experts.74.w2", "model.layers.31.block_sparse_moe.experts.75.w2", "model.layers.31.block_sparse_moe.experts.76.w2", "model.layers.31.block_sparse_moe.experts.77.w2", "model.layers.31.block_sparse_moe.experts.78.w2", "model.layers.31.block_sparse_moe.experts.79.w2", "model.layers.31.block_sparse_moe.experts.80.w2", "model.layers.31.block_sparse_moe.experts.81.w2", "model.layers.31.block_sparse_moe.experts.82.w2", "model.layers.31.block_sparse_moe.experts.83.w2", "model.layers.31.block_sparse_moe.experts.84.w2", "model.layers.31.block_sparse_moe.experts.85.w2", "model.layers.31.block_sparse_moe.experts.86.w2", "model.layers.31.block_sparse_moe.experts.87.w2", "model.layers.31.block_sparse_moe.experts.88.w2", "model.layers.31.block_sparse_moe.experts.89.w2", "model.layers.31.block_sparse_moe.experts.90.w2", "model.layers.31.block_sparse_moe.experts.91.w2", "model.layers.31.block_sparse_moe.experts.92.w2", "model.layers.31.block_sparse_moe.experts.93.w2", "model.layers.31.block_sparse_moe.experts.94.w2", "model.layers.31.block_sparse_moe.experts.95.w2", "model.layers.31.block_sparse_moe.experts.96.w2", "model.layers.31.block_sparse_moe.experts.97.w2", "model.layers.31.block_sparse_moe.experts.98.w2", "model.layers.31.block_sparse_moe.experts.99.w2", "model.layers.31.block_sparse_moe.experts.100.w2", "model.layers.31.block_sparse_moe.experts.101.w2", "model.layers.31.block_sparse_moe.experts.102.w2", "model.layers.31.block_sparse_moe.experts.103.w2", "model.layers.31.block_sparse_moe.experts.104.w2", "model.layers.31.block_sparse_moe.experts.105.w2", "model.layers.31.block_sparse_moe.experts.106.w2", "model.layers.31.block_sparse_moe.experts.107.w2", "model.layers.31.block_sparse_moe.experts.108.w2", "model.layers.31.block_sparse_moe.experts.109.w2", "model.layers.31.block_sparse_moe.experts.110.w2", "model.layers.31.block_sparse_moe.experts.111.w2", "model.layers.31.block_sparse_moe.experts.112.w2", "model.layers.31.block_sparse_moe.experts.113.w2", "model.layers.31.block_sparse_moe.experts.114.w2", "model.layers.31.block_sparse_moe.experts.115.w2", "model.layers.31.block_sparse_moe.experts.116.w2", "model.layers.31.block_sparse_moe.experts.117.w2", "model.layers.31.block_sparse_moe.experts.118.w2", "model.layers.31.block_sparse_moe.experts.119.w2", "model.layers.31.block_sparse_moe.experts.120.w2", "model.layers.31.block_sparse_moe.experts.121.w2", "model.layers.31.block_sparse_moe.experts.122.w2", "model.layers.31.block_sparse_moe.experts.123.w2", "model.layers.31.block_sparse_moe.experts.124.w2", "model.layers.31.block_sparse_moe.experts.125.w2", "model.layers.31.block_sparse_moe.experts.126.w2", "model.layers.31.block_sparse_moe.experts.127.w2", "model.layers.31.block_sparse_moe.experts.128.w2", "model.layers.31.block_sparse_moe.experts.129.w2", "model.layers.31.block_sparse_moe.experts.130.w2", "model.layers.31.block_sparse_moe.experts.131.w2", "model.layers.31.block_sparse_moe.experts.132.w2", "model.layers.31.block_sparse_moe.experts.133.w2", "model.layers.31.block_sparse_moe.experts.134.w2", "model.layers.31.block_sparse_moe.experts.135.w2", "model.layers.31.block_sparse_moe.experts.136.w2", "model.layers.31.block_sparse_moe.experts.137.w2", "model.layers.31.block_sparse_moe.experts.138.w2", "model.layers.31.block_sparse_moe.experts.139.w2", "model.layers.31.block_sparse_moe.experts.140.w2", "model.layers.31.block_sparse_moe.experts.141.w2", "model.layers.31.block_sparse_moe.experts.142.w2", "model.layers.31.block_sparse_moe.experts.143.w2", "model.layers.31.block_sparse_moe.experts.144.w2", "model.layers.31.block_sparse_moe.experts.145.w2", "model.layers.31.block_sparse_moe.experts.146.w2", "model.layers.31.block_sparse_moe.experts.147.w2", "model.layers.31.block_sparse_moe.experts.148.w2", "model.layers.31.block_sparse_moe.experts.149.w2", "model.layers.31.block_sparse_moe.experts.150.w2", "model.layers.31.block_sparse_moe.experts.151.w2", "model.layers.31.block_sparse_moe.experts.152.w2", "model.layers.31.block_sparse_moe.experts.153.w2", "model.layers.31.block_sparse_moe.experts.154.w2", "model.layers.31.block_sparse_moe.experts.155.w2", "model.layers.31.block_sparse_moe.experts.156.w2", "model.layers.31.block_sparse_moe.experts.157.w2", "model.layers.31.block_sparse_moe.experts.158.w2", "model.layers.31.block_sparse_moe.experts.159.w2", "model.layers.31.block_sparse_moe.experts.160.w2", "model.layers.31.block_sparse_moe.experts.161.w2", "model.layers.31.block_sparse_moe.experts.162.w2", "model.layers.31.block_sparse_moe.experts.163.w2", "model.layers.31.block_sparse_moe.experts.164.w2", "model.layers.31.block_sparse_moe.experts.165.w2", "model.layers.31.block_sparse_moe.experts.166.w2", "model.layers.31.block_sparse_moe.experts.167.w2", "model.layers.31.block_sparse_moe.experts.168.w2", "model.layers.31.block_sparse_moe.experts.169.w2", "model.layers.31.block_sparse_moe.experts.170.w2", "model.layers.31.block_sparse_moe.experts.171.w2", "model.layers.31.block_sparse_moe.experts.172.w2", "model.layers.31.block_sparse_moe.experts.173.w2", "model.layers.31.block_sparse_moe.experts.174.w2", "model.layers.31.block_sparse_moe.experts.175.w2", "model.layers.31.block_sparse_moe.experts.176.w2", "model.layers.31.block_sparse_moe.experts.177.w2", "model.layers.31.block_sparse_moe.experts.178.w2", "model.layers.31.block_sparse_moe.experts.179.w2", "model.layers.31.block_sparse_moe.experts.180.w2", "model.layers.31.block_sparse_moe.experts.181.w2", "model.layers.31.block_sparse_moe.experts.182.w2", "model.layers.31.block_sparse_moe.experts.183.w2", "model.layers.31.block_sparse_moe.experts.184.w2", "model.layers.31.block_sparse_moe.experts.185.w2", "model.layers.31.block_sparse_moe.experts.186.w2", "model.layers.31.block_sparse_moe.experts.187.w2", "model.layers.31.block_sparse_moe.experts.188.w2", "model.layers.31.block_sparse_moe.experts.189.w2", "model.layers.31.block_sparse_moe.experts.190.w2", "model.layers.31.block_sparse_moe.experts.191.w2", "model.layers.31.block_sparse_moe.experts.192.w2", "model.layers.31.block_sparse_moe.experts.193.w2", "model.layers.31.block_sparse_moe.experts.194.w2", "model.layers.31.block_sparse_moe.experts.195.w2", "model.layers.31.block_sparse_moe.experts.196.w2", "model.layers.31.block_sparse_moe.experts.197.w2", "model.layers.31.block_sparse_moe.experts.198.w2", "model.layers.31.block_sparse_moe.experts.199.w2", "model.layers.31.block_sparse_moe.experts.200.w2", "model.layers.31.block_sparse_moe.experts.201.w2", "model.layers.31.block_sparse_moe.experts.202.w2", "model.layers.31.block_sparse_moe.experts.203.w2", "model.layers.31.block_sparse_moe.experts.204.w2", "model.layers.31.block_sparse_moe.experts.205.w2", "model.layers.31.block_sparse_moe.experts.206.w2", "model.layers.31.block_sparse_moe.experts.207.w2", "model.layers.31.block_sparse_moe.experts.208.w2", "model.layers.31.block_sparse_moe.experts.209.w2", "model.layers.31.block_sparse_moe.experts.210.w2", "model.layers.31.block_sparse_moe.experts.211.w2", "model.layers.31.block_sparse_moe.experts.212.w2", "model.layers.31.block_sparse_moe.experts.213.w2", "model.layers.31.block_sparse_moe.experts.214.w2", "model.layers.31.block_sparse_moe.experts.215.w2", "model.layers.31.block_sparse_moe.experts.216.w2", "model.layers.31.block_sparse_moe.experts.217.w2", "model.layers.31.block_sparse_moe.experts.218.w2", "model.layers.31.block_sparse_moe.experts.219.w2", "model.layers.31.block_sparse_moe.experts.220.w2", "model.layers.31.block_sparse_moe.experts.221.w2", "model.layers.31.block_sparse_moe.experts.222.w2", "model.layers.31.block_sparse_moe.experts.223.w2", "model.layers.31.block_sparse_moe.experts.224.w2", "model.layers.31.block_sparse_moe.experts.225.w2", "model.layers.31.block_sparse_moe.experts.226.w2", "model.layers.31.block_sparse_moe.experts.227.w2", "model.layers.31.block_sparse_moe.experts.228.w2", "model.layers.31.block_sparse_moe.experts.229.w2", "model.layers.31.block_sparse_moe.experts.230.w2", "model.layers.31.block_sparse_moe.experts.231.w2", "model.layers.31.block_sparse_moe.experts.232.w2", "model.layers.31.block_sparse_moe.experts.233.w2", "model.layers.31.block_sparse_moe.experts.234.w2", "model.layers.31.block_sparse_moe.experts.235.w2", "model.layers.31.block_sparse_moe.experts.236.w2", "model.layers.31.block_sparse_moe.experts.237.w2", "model.layers.31.block_sparse_moe.experts.238.w2", "model.layers.31.block_sparse_moe.experts.239.w2", "model.layers.31.block_sparse_moe.experts.240.w2", "model.layers.31.block_sparse_moe.experts.241.w2", "model.layers.31.block_sparse_moe.experts.242.w2", "model.layers.31.block_sparse_moe.experts.243.w2", "model.layers.31.block_sparse_moe.experts.244.w2", "model.layers.31.block_sparse_moe.experts.245.w2", "model.layers.31.block_sparse_moe.experts.246.w2", "model.layers.31.block_sparse_moe.experts.247.w2", "model.layers.31.block_sparse_moe.experts.248.w2", "model.layers.31.block_sparse_moe.experts.249.w2", "model.layers.31.block_sparse_moe.experts.250.w2", "model.layers.31.block_sparse_moe.experts.251.w2", "model.layers.31.block_sparse_moe.experts.252.w2", "model.layers.31.block_sparse_moe.experts.253.w2", "model.layers.31.block_sparse_moe.experts.254.w2", "model.layers.31.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007630079984664029, "dbits": 1207959552 } ] }, { "idx": 160, "layers": [ "model.layers.32.self_attn.q_proj" ], "candidates": [ { "dkld": 0.01239756047725682, "dbits": 18874368 } ] }, { "idx": 161, "layers": [ "model.layers.32.self_attn.k_proj", "model.layers.32.self_attn.v_proj" ], "candidates": [ { "dkld": -0.001315069198608354, "dbits": 6291456 } ] }, { "idx": 162, "layers": [ "model.layers.32.self_attn.o_proj" ], "candidates": [ { "dkld": -0.012906777858734064, "dbits": 18874368 } ] }, { "idx": 163, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w1", "model.layers.32.block_sparse_moe.experts.1.w1", "model.layers.32.block_sparse_moe.experts.2.w1", "model.layers.32.block_sparse_moe.experts.3.w1", "model.layers.32.block_sparse_moe.experts.4.w1", "model.layers.32.block_sparse_moe.experts.5.w1", "model.layers.32.block_sparse_moe.experts.6.w1", "model.layers.32.block_sparse_moe.experts.7.w1", "model.layers.32.block_sparse_moe.experts.8.w1", "model.layers.32.block_sparse_moe.experts.9.w1", "model.layers.32.block_sparse_moe.experts.10.w1", "model.layers.32.block_sparse_moe.experts.11.w1", "model.layers.32.block_sparse_moe.experts.12.w1", "model.layers.32.block_sparse_moe.experts.13.w1", "model.layers.32.block_sparse_moe.experts.14.w1", "model.layers.32.block_sparse_moe.experts.15.w1", "model.layers.32.block_sparse_moe.experts.16.w1", "model.layers.32.block_sparse_moe.experts.17.w1", "model.layers.32.block_sparse_moe.experts.18.w1", "model.layers.32.block_sparse_moe.experts.19.w1", "model.layers.32.block_sparse_moe.experts.20.w1", "model.layers.32.block_sparse_moe.experts.21.w1", "model.layers.32.block_sparse_moe.experts.22.w1", "model.layers.32.block_sparse_moe.experts.23.w1", "model.layers.32.block_sparse_moe.experts.24.w1", "model.layers.32.block_sparse_moe.experts.25.w1", "model.layers.32.block_sparse_moe.experts.26.w1", "model.layers.32.block_sparse_moe.experts.27.w1", "model.layers.32.block_sparse_moe.experts.28.w1", "model.layers.32.block_sparse_moe.experts.29.w1", "model.layers.32.block_sparse_moe.experts.30.w1", "model.layers.32.block_sparse_moe.experts.31.w1", "model.layers.32.block_sparse_moe.experts.32.w1", "model.layers.32.block_sparse_moe.experts.33.w1", "model.layers.32.block_sparse_moe.experts.34.w1", "model.layers.32.block_sparse_moe.experts.35.w1", "model.layers.32.block_sparse_moe.experts.36.w1", "model.layers.32.block_sparse_moe.experts.37.w1", "model.layers.32.block_sparse_moe.experts.38.w1", "model.layers.32.block_sparse_moe.experts.39.w1", "model.layers.32.block_sparse_moe.experts.40.w1", "model.layers.32.block_sparse_moe.experts.41.w1", "model.layers.32.block_sparse_moe.experts.42.w1", "model.layers.32.block_sparse_moe.experts.43.w1", "model.layers.32.block_sparse_moe.experts.44.w1", "model.layers.32.block_sparse_moe.experts.45.w1", "model.layers.32.block_sparse_moe.experts.46.w1", "model.layers.32.block_sparse_moe.experts.47.w1", "model.layers.32.block_sparse_moe.experts.48.w1", "model.layers.32.block_sparse_moe.experts.49.w1", "model.layers.32.block_sparse_moe.experts.50.w1", "model.layers.32.block_sparse_moe.experts.51.w1", "model.layers.32.block_sparse_moe.experts.52.w1", "model.layers.32.block_sparse_moe.experts.53.w1", "model.layers.32.block_sparse_moe.experts.54.w1", "model.layers.32.block_sparse_moe.experts.55.w1", "model.layers.32.block_sparse_moe.experts.56.w1", "model.layers.32.block_sparse_moe.experts.57.w1", "model.layers.32.block_sparse_moe.experts.58.w1", "model.layers.32.block_sparse_moe.experts.59.w1", "model.layers.32.block_sparse_moe.experts.60.w1", "model.layers.32.block_sparse_moe.experts.61.w1", "model.layers.32.block_sparse_moe.experts.62.w1", "model.layers.32.block_sparse_moe.experts.63.w1", "model.layers.32.block_sparse_moe.experts.64.w1", "model.layers.32.block_sparse_moe.experts.65.w1", "model.layers.32.block_sparse_moe.experts.66.w1", "model.layers.32.block_sparse_moe.experts.67.w1", "model.layers.32.block_sparse_moe.experts.68.w1", "model.layers.32.block_sparse_moe.experts.69.w1", "model.layers.32.block_sparse_moe.experts.70.w1", "model.layers.32.block_sparse_moe.experts.71.w1", "model.layers.32.block_sparse_moe.experts.72.w1", "model.layers.32.block_sparse_moe.experts.73.w1", "model.layers.32.block_sparse_moe.experts.74.w1", "model.layers.32.block_sparse_moe.experts.75.w1", "model.layers.32.block_sparse_moe.experts.76.w1", "model.layers.32.block_sparse_moe.experts.77.w1", "model.layers.32.block_sparse_moe.experts.78.w1", "model.layers.32.block_sparse_moe.experts.79.w1", "model.layers.32.block_sparse_moe.experts.80.w1", "model.layers.32.block_sparse_moe.experts.81.w1", "model.layers.32.block_sparse_moe.experts.82.w1", "model.layers.32.block_sparse_moe.experts.83.w1", "model.layers.32.block_sparse_moe.experts.84.w1", "model.layers.32.block_sparse_moe.experts.85.w1", "model.layers.32.block_sparse_moe.experts.86.w1", "model.layers.32.block_sparse_moe.experts.87.w1", "model.layers.32.block_sparse_moe.experts.88.w1", "model.layers.32.block_sparse_moe.experts.89.w1", "model.layers.32.block_sparse_moe.experts.90.w1", "model.layers.32.block_sparse_moe.experts.91.w1", "model.layers.32.block_sparse_moe.experts.92.w1", "model.layers.32.block_sparse_moe.experts.93.w1", "model.layers.32.block_sparse_moe.experts.94.w1", "model.layers.32.block_sparse_moe.experts.95.w1", "model.layers.32.block_sparse_moe.experts.96.w1", "model.layers.32.block_sparse_moe.experts.97.w1", "model.layers.32.block_sparse_moe.experts.98.w1", "model.layers.32.block_sparse_moe.experts.99.w1", "model.layers.32.block_sparse_moe.experts.100.w1", "model.layers.32.block_sparse_moe.experts.101.w1", "model.layers.32.block_sparse_moe.experts.102.w1", "model.layers.32.block_sparse_moe.experts.103.w1", "model.layers.32.block_sparse_moe.experts.104.w1", "model.layers.32.block_sparse_moe.experts.105.w1", "model.layers.32.block_sparse_moe.experts.106.w1", "model.layers.32.block_sparse_moe.experts.107.w1", "model.layers.32.block_sparse_moe.experts.108.w1", "model.layers.32.block_sparse_moe.experts.109.w1", "model.layers.32.block_sparse_moe.experts.110.w1", "model.layers.32.block_sparse_moe.experts.111.w1", "model.layers.32.block_sparse_moe.experts.112.w1", "model.layers.32.block_sparse_moe.experts.113.w1", "model.layers.32.block_sparse_moe.experts.114.w1", "model.layers.32.block_sparse_moe.experts.115.w1", "model.layers.32.block_sparse_moe.experts.116.w1", "model.layers.32.block_sparse_moe.experts.117.w1", "model.layers.32.block_sparse_moe.experts.118.w1", "model.layers.32.block_sparse_moe.experts.119.w1", "model.layers.32.block_sparse_moe.experts.120.w1", "model.layers.32.block_sparse_moe.experts.121.w1", "model.layers.32.block_sparse_moe.experts.122.w1", "model.layers.32.block_sparse_moe.experts.123.w1", "model.layers.32.block_sparse_moe.experts.124.w1", "model.layers.32.block_sparse_moe.experts.125.w1", "model.layers.32.block_sparse_moe.experts.126.w1", "model.layers.32.block_sparse_moe.experts.127.w1", "model.layers.32.block_sparse_moe.experts.128.w1", "model.layers.32.block_sparse_moe.experts.129.w1", "model.layers.32.block_sparse_moe.experts.130.w1", "model.layers.32.block_sparse_moe.experts.131.w1", "model.layers.32.block_sparse_moe.experts.132.w1", "model.layers.32.block_sparse_moe.experts.133.w1", "model.layers.32.block_sparse_moe.experts.134.w1", "model.layers.32.block_sparse_moe.experts.135.w1", "model.layers.32.block_sparse_moe.experts.136.w1", "model.layers.32.block_sparse_moe.experts.137.w1", "model.layers.32.block_sparse_moe.experts.138.w1", "model.layers.32.block_sparse_moe.experts.139.w1", "model.layers.32.block_sparse_moe.experts.140.w1", "model.layers.32.block_sparse_moe.experts.141.w1", "model.layers.32.block_sparse_moe.experts.142.w1", "model.layers.32.block_sparse_moe.experts.143.w1", "model.layers.32.block_sparse_moe.experts.144.w1", "model.layers.32.block_sparse_moe.experts.145.w1", "model.layers.32.block_sparse_moe.experts.146.w1", "model.layers.32.block_sparse_moe.experts.147.w1", "model.layers.32.block_sparse_moe.experts.148.w1", "model.layers.32.block_sparse_moe.experts.149.w1", "model.layers.32.block_sparse_moe.experts.150.w1", "model.layers.32.block_sparse_moe.experts.151.w1", "model.layers.32.block_sparse_moe.experts.152.w1", "model.layers.32.block_sparse_moe.experts.153.w1", "model.layers.32.block_sparse_moe.experts.154.w1", "model.layers.32.block_sparse_moe.experts.155.w1", "model.layers.32.block_sparse_moe.experts.156.w1", "model.layers.32.block_sparse_moe.experts.157.w1", "model.layers.32.block_sparse_moe.experts.158.w1", "model.layers.32.block_sparse_moe.experts.159.w1", "model.layers.32.block_sparse_moe.experts.160.w1", "model.layers.32.block_sparse_moe.experts.161.w1", "model.layers.32.block_sparse_moe.experts.162.w1", "model.layers.32.block_sparse_moe.experts.163.w1", "model.layers.32.block_sparse_moe.experts.164.w1", "model.layers.32.block_sparse_moe.experts.165.w1", "model.layers.32.block_sparse_moe.experts.166.w1", "model.layers.32.block_sparse_moe.experts.167.w1", "model.layers.32.block_sparse_moe.experts.168.w1", "model.layers.32.block_sparse_moe.experts.169.w1", "model.layers.32.block_sparse_moe.experts.170.w1", "model.layers.32.block_sparse_moe.experts.171.w1", "model.layers.32.block_sparse_moe.experts.172.w1", "model.layers.32.block_sparse_moe.experts.173.w1", "model.layers.32.block_sparse_moe.experts.174.w1", "model.layers.32.block_sparse_moe.experts.175.w1", "model.layers.32.block_sparse_moe.experts.176.w1", "model.layers.32.block_sparse_moe.experts.177.w1", "model.layers.32.block_sparse_moe.experts.178.w1", "model.layers.32.block_sparse_moe.experts.179.w1", "model.layers.32.block_sparse_moe.experts.180.w1", "model.layers.32.block_sparse_moe.experts.181.w1", "model.layers.32.block_sparse_moe.experts.182.w1", "model.layers.32.block_sparse_moe.experts.183.w1", "model.layers.32.block_sparse_moe.experts.184.w1", "model.layers.32.block_sparse_moe.experts.185.w1", "model.layers.32.block_sparse_moe.experts.186.w1", "model.layers.32.block_sparse_moe.experts.187.w1", "model.layers.32.block_sparse_moe.experts.188.w1", "model.layers.32.block_sparse_moe.experts.189.w1", "model.layers.32.block_sparse_moe.experts.190.w1", "model.layers.32.block_sparse_moe.experts.191.w1", "model.layers.32.block_sparse_moe.experts.192.w1", "model.layers.32.block_sparse_moe.experts.193.w1", "model.layers.32.block_sparse_moe.experts.194.w1", "model.layers.32.block_sparse_moe.experts.195.w1", "model.layers.32.block_sparse_moe.experts.196.w1", "model.layers.32.block_sparse_moe.experts.197.w1", "model.layers.32.block_sparse_moe.experts.198.w1", "model.layers.32.block_sparse_moe.experts.199.w1", "model.layers.32.block_sparse_moe.experts.200.w1", "model.layers.32.block_sparse_moe.experts.201.w1", "model.layers.32.block_sparse_moe.experts.202.w1", "model.layers.32.block_sparse_moe.experts.203.w1", "model.layers.32.block_sparse_moe.experts.204.w1", "model.layers.32.block_sparse_moe.experts.205.w1", "model.layers.32.block_sparse_moe.experts.206.w1", "model.layers.32.block_sparse_moe.experts.207.w1", "model.layers.32.block_sparse_moe.experts.208.w1", "model.layers.32.block_sparse_moe.experts.209.w1", "model.layers.32.block_sparse_moe.experts.210.w1", "model.layers.32.block_sparse_moe.experts.211.w1", "model.layers.32.block_sparse_moe.experts.212.w1", "model.layers.32.block_sparse_moe.experts.213.w1", "model.layers.32.block_sparse_moe.experts.214.w1", "model.layers.32.block_sparse_moe.experts.215.w1", "model.layers.32.block_sparse_moe.experts.216.w1", "model.layers.32.block_sparse_moe.experts.217.w1", "model.layers.32.block_sparse_moe.experts.218.w1", "model.layers.32.block_sparse_moe.experts.219.w1", "model.layers.32.block_sparse_moe.experts.220.w1", "model.layers.32.block_sparse_moe.experts.221.w1", "model.layers.32.block_sparse_moe.experts.222.w1", "model.layers.32.block_sparse_moe.experts.223.w1", "model.layers.32.block_sparse_moe.experts.224.w1", "model.layers.32.block_sparse_moe.experts.225.w1", "model.layers.32.block_sparse_moe.experts.226.w1", "model.layers.32.block_sparse_moe.experts.227.w1", "model.layers.32.block_sparse_moe.experts.228.w1", "model.layers.32.block_sparse_moe.experts.229.w1", "model.layers.32.block_sparse_moe.experts.230.w1", "model.layers.32.block_sparse_moe.experts.231.w1", "model.layers.32.block_sparse_moe.experts.232.w1", "model.layers.32.block_sparse_moe.experts.233.w1", "model.layers.32.block_sparse_moe.experts.234.w1", "model.layers.32.block_sparse_moe.experts.235.w1", "model.layers.32.block_sparse_moe.experts.236.w1", "model.layers.32.block_sparse_moe.experts.237.w1", "model.layers.32.block_sparse_moe.experts.238.w1", "model.layers.32.block_sparse_moe.experts.239.w1", "model.layers.32.block_sparse_moe.experts.240.w1", "model.layers.32.block_sparse_moe.experts.241.w1", "model.layers.32.block_sparse_moe.experts.242.w1", "model.layers.32.block_sparse_moe.experts.243.w1", "model.layers.32.block_sparse_moe.experts.244.w1", "model.layers.32.block_sparse_moe.experts.245.w1", "model.layers.32.block_sparse_moe.experts.246.w1", "model.layers.32.block_sparse_moe.experts.247.w1", "model.layers.32.block_sparse_moe.experts.248.w1", "model.layers.32.block_sparse_moe.experts.249.w1", "model.layers.32.block_sparse_moe.experts.250.w1", "model.layers.32.block_sparse_moe.experts.251.w1", "model.layers.32.block_sparse_moe.experts.252.w1", "model.layers.32.block_sparse_moe.experts.253.w1", "model.layers.32.block_sparse_moe.experts.254.w1", "model.layers.32.block_sparse_moe.experts.255.w1", "model.layers.32.block_sparse_moe.experts.0.w3", "model.layers.32.block_sparse_moe.experts.1.w3", "model.layers.32.block_sparse_moe.experts.2.w3", "model.layers.32.block_sparse_moe.experts.3.w3", "model.layers.32.block_sparse_moe.experts.4.w3", "model.layers.32.block_sparse_moe.experts.5.w3", "model.layers.32.block_sparse_moe.experts.6.w3", "model.layers.32.block_sparse_moe.experts.7.w3", "model.layers.32.block_sparse_moe.experts.8.w3", "model.layers.32.block_sparse_moe.experts.9.w3", "model.layers.32.block_sparse_moe.experts.10.w3", "model.layers.32.block_sparse_moe.experts.11.w3", "model.layers.32.block_sparse_moe.experts.12.w3", "model.layers.32.block_sparse_moe.experts.13.w3", "model.layers.32.block_sparse_moe.experts.14.w3", "model.layers.32.block_sparse_moe.experts.15.w3", "model.layers.32.block_sparse_moe.experts.16.w3", "model.layers.32.block_sparse_moe.experts.17.w3", "model.layers.32.block_sparse_moe.experts.18.w3", "model.layers.32.block_sparse_moe.experts.19.w3", "model.layers.32.block_sparse_moe.experts.20.w3", "model.layers.32.block_sparse_moe.experts.21.w3", "model.layers.32.block_sparse_moe.experts.22.w3", "model.layers.32.block_sparse_moe.experts.23.w3", "model.layers.32.block_sparse_moe.experts.24.w3", "model.layers.32.block_sparse_moe.experts.25.w3", "model.layers.32.block_sparse_moe.experts.26.w3", "model.layers.32.block_sparse_moe.experts.27.w3", "model.layers.32.block_sparse_moe.experts.28.w3", "model.layers.32.block_sparse_moe.experts.29.w3", "model.layers.32.block_sparse_moe.experts.30.w3", "model.layers.32.block_sparse_moe.experts.31.w3", "model.layers.32.block_sparse_moe.experts.32.w3", "model.layers.32.block_sparse_moe.experts.33.w3", "model.layers.32.block_sparse_moe.experts.34.w3", "model.layers.32.block_sparse_moe.experts.35.w3", "model.layers.32.block_sparse_moe.experts.36.w3", "model.layers.32.block_sparse_moe.experts.37.w3", "model.layers.32.block_sparse_moe.experts.38.w3", "model.layers.32.block_sparse_moe.experts.39.w3", "model.layers.32.block_sparse_moe.experts.40.w3", "model.layers.32.block_sparse_moe.experts.41.w3", "model.layers.32.block_sparse_moe.experts.42.w3", "model.layers.32.block_sparse_moe.experts.43.w3", "model.layers.32.block_sparse_moe.experts.44.w3", "model.layers.32.block_sparse_moe.experts.45.w3", "model.layers.32.block_sparse_moe.experts.46.w3", "model.layers.32.block_sparse_moe.experts.47.w3", "model.layers.32.block_sparse_moe.experts.48.w3", "model.layers.32.block_sparse_moe.experts.49.w3", "model.layers.32.block_sparse_moe.experts.50.w3", "model.layers.32.block_sparse_moe.experts.51.w3", "model.layers.32.block_sparse_moe.experts.52.w3", "model.layers.32.block_sparse_moe.experts.53.w3", "model.layers.32.block_sparse_moe.experts.54.w3", "model.layers.32.block_sparse_moe.experts.55.w3", "model.layers.32.block_sparse_moe.experts.56.w3", "model.layers.32.block_sparse_moe.experts.57.w3", "model.layers.32.block_sparse_moe.experts.58.w3", "model.layers.32.block_sparse_moe.experts.59.w3", "model.layers.32.block_sparse_moe.experts.60.w3", "model.layers.32.block_sparse_moe.experts.61.w3", "model.layers.32.block_sparse_moe.experts.62.w3", "model.layers.32.block_sparse_moe.experts.63.w3", "model.layers.32.block_sparse_moe.experts.64.w3", "model.layers.32.block_sparse_moe.experts.65.w3", "model.layers.32.block_sparse_moe.experts.66.w3", "model.layers.32.block_sparse_moe.experts.67.w3", "model.layers.32.block_sparse_moe.experts.68.w3", "model.layers.32.block_sparse_moe.experts.69.w3", "model.layers.32.block_sparse_moe.experts.70.w3", "model.layers.32.block_sparse_moe.experts.71.w3", "model.layers.32.block_sparse_moe.experts.72.w3", "model.layers.32.block_sparse_moe.experts.73.w3", "model.layers.32.block_sparse_moe.experts.74.w3", "model.layers.32.block_sparse_moe.experts.75.w3", "model.layers.32.block_sparse_moe.experts.76.w3", "model.layers.32.block_sparse_moe.experts.77.w3", "model.layers.32.block_sparse_moe.experts.78.w3", "model.layers.32.block_sparse_moe.experts.79.w3", "model.layers.32.block_sparse_moe.experts.80.w3", "model.layers.32.block_sparse_moe.experts.81.w3", "model.layers.32.block_sparse_moe.experts.82.w3", "model.layers.32.block_sparse_moe.experts.83.w3", "model.layers.32.block_sparse_moe.experts.84.w3", "model.layers.32.block_sparse_moe.experts.85.w3", "model.layers.32.block_sparse_moe.experts.86.w3", "model.layers.32.block_sparse_moe.experts.87.w3", "model.layers.32.block_sparse_moe.experts.88.w3", "model.layers.32.block_sparse_moe.experts.89.w3", "model.layers.32.block_sparse_moe.experts.90.w3", "model.layers.32.block_sparse_moe.experts.91.w3", "model.layers.32.block_sparse_moe.experts.92.w3", "model.layers.32.block_sparse_moe.experts.93.w3", "model.layers.32.block_sparse_moe.experts.94.w3", "model.layers.32.block_sparse_moe.experts.95.w3", "model.layers.32.block_sparse_moe.experts.96.w3", "model.layers.32.block_sparse_moe.experts.97.w3", "model.layers.32.block_sparse_moe.experts.98.w3", "model.layers.32.block_sparse_moe.experts.99.w3", "model.layers.32.block_sparse_moe.experts.100.w3", "model.layers.32.block_sparse_moe.experts.101.w3", "model.layers.32.block_sparse_moe.experts.102.w3", "model.layers.32.block_sparse_moe.experts.103.w3", "model.layers.32.block_sparse_moe.experts.104.w3", "model.layers.32.block_sparse_moe.experts.105.w3", "model.layers.32.block_sparse_moe.experts.106.w3", "model.layers.32.block_sparse_moe.experts.107.w3", "model.layers.32.block_sparse_moe.experts.108.w3", "model.layers.32.block_sparse_moe.experts.109.w3", "model.layers.32.block_sparse_moe.experts.110.w3", "model.layers.32.block_sparse_moe.experts.111.w3", "model.layers.32.block_sparse_moe.experts.112.w3", "model.layers.32.block_sparse_moe.experts.113.w3", "model.layers.32.block_sparse_moe.experts.114.w3", "model.layers.32.block_sparse_moe.experts.115.w3", "model.layers.32.block_sparse_moe.experts.116.w3", "model.layers.32.block_sparse_moe.experts.117.w3", "model.layers.32.block_sparse_moe.experts.118.w3", "model.layers.32.block_sparse_moe.experts.119.w3", "model.layers.32.block_sparse_moe.experts.120.w3", "model.layers.32.block_sparse_moe.experts.121.w3", "model.layers.32.block_sparse_moe.experts.122.w3", "model.layers.32.block_sparse_moe.experts.123.w3", "model.layers.32.block_sparse_moe.experts.124.w3", "model.layers.32.block_sparse_moe.experts.125.w3", "model.layers.32.block_sparse_moe.experts.126.w3", "model.layers.32.block_sparse_moe.experts.127.w3", "model.layers.32.block_sparse_moe.experts.128.w3", "model.layers.32.block_sparse_moe.experts.129.w3", "model.layers.32.block_sparse_moe.experts.130.w3", "model.layers.32.block_sparse_moe.experts.131.w3", "model.layers.32.block_sparse_moe.experts.132.w3", "model.layers.32.block_sparse_moe.experts.133.w3", "model.layers.32.block_sparse_moe.experts.134.w3", "model.layers.32.block_sparse_moe.experts.135.w3", "model.layers.32.block_sparse_moe.experts.136.w3", "model.layers.32.block_sparse_moe.experts.137.w3", "model.layers.32.block_sparse_moe.experts.138.w3", "model.layers.32.block_sparse_moe.experts.139.w3", "model.layers.32.block_sparse_moe.experts.140.w3", "model.layers.32.block_sparse_moe.experts.141.w3", "model.layers.32.block_sparse_moe.experts.142.w3", "model.layers.32.block_sparse_moe.experts.143.w3", "model.layers.32.block_sparse_moe.experts.144.w3", "model.layers.32.block_sparse_moe.experts.145.w3", "model.layers.32.block_sparse_moe.experts.146.w3", "model.layers.32.block_sparse_moe.experts.147.w3", "model.layers.32.block_sparse_moe.experts.148.w3", "model.layers.32.block_sparse_moe.experts.149.w3", "model.layers.32.block_sparse_moe.experts.150.w3", "model.layers.32.block_sparse_moe.experts.151.w3", "model.layers.32.block_sparse_moe.experts.152.w3", "model.layers.32.block_sparse_moe.experts.153.w3", "model.layers.32.block_sparse_moe.experts.154.w3", "model.layers.32.block_sparse_moe.experts.155.w3", "model.layers.32.block_sparse_moe.experts.156.w3", "model.layers.32.block_sparse_moe.experts.157.w3", "model.layers.32.block_sparse_moe.experts.158.w3", "model.layers.32.block_sparse_moe.experts.159.w3", "model.layers.32.block_sparse_moe.experts.160.w3", "model.layers.32.block_sparse_moe.experts.161.w3", "model.layers.32.block_sparse_moe.experts.162.w3", "model.layers.32.block_sparse_moe.experts.163.w3", "model.layers.32.block_sparse_moe.experts.164.w3", "model.layers.32.block_sparse_moe.experts.165.w3", "model.layers.32.block_sparse_moe.experts.166.w3", "model.layers.32.block_sparse_moe.experts.167.w3", "model.layers.32.block_sparse_moe.experts.168.w3", "model.layers.32.block_sparse_moe.experts.169.w3", "model.layers.32.block_sparse_moe.experts.170.w3", "model.layers.32.block_sparse_moe.experts.171.w3", "model.layers.32.block_sparse_moe.experts.172.w3", "model.layers.32.block_sparse_moe.experts.173.w3", "model.layers.32.block_sparse_moe.experts.174.w3", "model.layers.32.block_sparse_moe.experts.175.w3", "model.layers.32.block_sparse_moe.experts.176.w3", "model.layers.32.block_sparse_moe.experts.177.w3", "model.layers.32.block_sparse_moe.experts.178.w3", "model.layers.32.block_sparse_moe.experts.179.w3", "model.layers.32.block_sparse_moe.experts.180.w3", "model.layers.32.block_sparse_moe.experts.181.w3", "model.layers.32.block_sparse_moe.experts.182.w3", "model.layers.32.block_sparse_moe.experts.183.w3", "model.layers.32.block_sparse_moe.experts.184.w3", "model.layers.32.block_sparse_moe.experts.185.w3", "model.layers.32.block_sparse_moe.experts.186.w3", "model.layers.32.block_sparse_moe.experts.187.w3", "model.layers.32.block_sparse_moe.experts.188.w3", "model.layers.32.block_sparse_moe.experts.189.w3", "model.layers.32.block_sparse_moe.experts.190.w3", "model.layers.32.block_sparse_moe.experts.191.w3", "model.layers.32.block_sparse_moe.experts.192.w3", "model.layers.32.block_sparse_moe.experts.193.w3", "model.layers.32.block_sparse_moe.experts.194.w3", "model.layers.32.block_sparse_moe.experts.195.w3", "model.layers.32.block_sparse_moe.experts.196.w3", "model.layers.32.block_sparse_moe.experts.197.w3", "model.layers.32.block_sparse_moe.experts.198.w3", "model.layers.32.block_sparse_moe.experts.199.w3", "model.layers.32.block_sparse_moe.experts.200.w3", "model.layers.32.block_sparse_moe.experts.201.w3", "model.layers.32.block_sparse_moe.experts.202.w3", "model.layers.32.block_sparse_moe.experts.203.w3", "model.layers.32.block_sparse_moe.experts.204.w3", "model.layers.32.block_sparse_moe.experts.205.w3", "model.layers.32.block_sparse_moe.experts.206.w3", "model.layers.32.block_sparse_moe.experts.207.w3", "model.layers.32.block_sparse_moe.experts.208.w3", "model.layers.32.block_sparse_moe.experts.209.w3", "model.layers.32.block_sparse_moe.experts.210.w3", "model.layers.32.block_sparse_moe.experts.211.w3", "model.layers.32.block_sparse_moe.experts.212.w3", "model.layers.32.block_sparse_moe.experts.213.w3", "model.layers.32.block_sparse_moe.experts.214.w3", "model.layers.32.block_sparse_moe.experts.215.w3", "model.layers.32.block_sparse_moe.experts.216.w3", "model.layers.32.block_sparse_moe.experts.217.w3", "model.layers.32.block_sparse_moe.experts.218.w3", "model.layers.32.block_sparse_moe.experts.219.w3", "model.layers.32.block_sparse_moe.experts.220.w3", "model.layers.32.block_sparse_moe.experts.221.w3", "model.layers.32.block_sparse_moe.experts.222.w3", "model.layers.32.block_sparse_moe.experts.223.w3", "model.layers.32.block_sparse_moe.experts.224.w3", "model.layers.32.block_sparse_moe.experts.225.w3", "model.layers.32.block_sparse_moe.experts.226.w3", "model.layers.32.block_sparse_moe.experts.227.w3", "model.layers.32.block_sparse_moe.experts.228.w3", "model.layers.32.block_sparse_moe.experts.229.w3", "model.layers.32.block_sparse_moe.experts.230.w3", "model.layers.32.block_sparse_moe.experts.231.w3", "model.layers.32.block_sparse_moe.experts.232.w3", "model.layers.32.block_sparse_moe.experts.233.w3", "model.layers.32.block_sparse_moe.experts.234.w3", "model.layers.32.block_sparse_moe.experts.235.w3", "model.layers.32.block_sparse_moe.experts.236.w3", "model.layers.32.block_sparse_moe.experts.237.w3", "model.layers.32.block_sparse_moe.experts.238.w3", "model.layers.32.block_sparse_moe.experts.239.w3", "model.layers.32.block_sparse_moe.experts.240.w3", "model.layers.32.block_sparse_moe.experts.241.w3", "model.layers.32.block_sparse_moe.experts.242.w3", "model.layers.32.block_sparse_moe.experts.243.w3", "model.layers.32.block_sparse_moe.experts.244.w3", "model.layers.32.block_sparse_moe.experts.245.w3", "model.layers.32.block_sparse_moe.experts.246.w3", "model.layers.32.block_sparse_moe.experts.247.w3", "model.layers.32.block_sparse_moe.experts.248.w3", "model.layers.32.block_sparse_moe.experts.249.w3", "model.layers.32.block_sparse_moe.experts.250.w3", "model.layers.32.block_sparse_moe.experts.251.w3", "model.layers.32.block_sparse_moe.experts.252.w3", "model.layers.32.block_sparse_moe.experts.253.w3", "model.layers.32.block_sparse_moe.experts.254.w3", "model.layers.32.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00015715956687922805, "dbits": 2415919104 } ] }, { "idx": 164, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w2", "model.layers.32.block_sparse_moe.experts.1.w2", "model.layers.32.block_sparse_moe.experts.2.w2", "model.layers.32.block_sparse_moe.experts.3.w2", "model.layers.32.block_sparse_moe.experts.4.w2", "model.layers.32.block_sparse_moe.experts.5.w2", "model.layers.32.block_sparse_moe.experts.6.w2", "model.layers.32.block_sparse_moe.experts.7.w2", "model.layers.32.block_sparse_moe.experts.8.w2", "model.layers.32.block_sparse_moe.experts.9.w2", "model.layers.32.block_sparse_moe.experts.10.w2", "model.layers.32.block_sparse_moe.experts.11.w2", "model.layers.32.block_sparse_moe.experts.12.w2", "model.layers.32.block_sparse_moe.experts.13.w2", "model.layers.32.block_sparse_moe.experts.14.w2", "model.layers.32.block_sparse_moe.experts.15.w2", "model.layers.32.block_sparse_moe.experts.16.w2", "model.layers.32.block_sparse_moe.experts.17.w2", "model.layers.32.block_sparse_moe.experts.18.w2", "model.layers.32.block_sparse_moe.experts.19.w2", "model.layers.32.block_sparse_moe.experts.20.w2", "model.layers.32.block_sparse_moe.experts.21.w2", "model.layers.32.block_sparse_moe.experts.22.w2", "model.layers.32.block_sparse_moe.experts.23.w2", "model.layers.32.block_sparse_moe.experts.24.w2", "model.layers.32.block_sparse_moe.experts.25.w2", "model.layers.32.block_sparse_moe.experts.26.w2", "model.layers.32.block_sparse_moe.experts.27.w2", "model.layers.32.block_sparse_moe.experts.28.w2", "model.layers.32.block_sparse_moe.experts.29.w2", "model.layers.32.block_sparse_moe.experts.30.w2", "model.layers.32.block_sparse_moe.experts.31.w2", "model.layers.32.block_sparse_moe.experts.32.w2", "model.layers.32.block_sparse_moe.experts.33.w2", "model.layers.32.block_sparse_moe.experts.34.w2", "model.layers.32.block_sparse_moe.experts.35.w2", "model.layers.32.block_sparse_moe.experts.36.w2", "model.layers.32.block_sparse_moe.experts.37.w2", "model.layers.32.block_sparse_moe.experts.38.w2", "model.layers.32.block_sparse_moe.experts.39.w2", "model.layers.32.block_sparse_moe.experts.40.w2", "model.layers.32.block_sparse_moe.experts.41.w2", "model.layers.32.block_sparse_moe.experts.42.w2", "model.layers.32.block_sparse_moe.experts.43.w2", "model.layers.32.block_sparse_moe.experts.44.w2", "model.layers.32.block_sparse_moe.experts.45.w2", "model.layers.32.block_sparse_moe.experts.46.w2", "model.layers.32.block_sparse_moe.experts.47.w2", "model.layers.32.block_sparse_moe.experts.48.w2", "model.layers.32.block_sparse_moe.experts.49.w2", "model.layers.32.block_sparse_moe.experts.50.w2", "model.layers.32.block_sparse_moe.experts.51.w2", "model.layers.32.block_sparse_moe.experts.52.w2", "model.layers.32.block_sparse_moe.experts.53.w2", "model.layers.32.block_sparse_moe.experts.54.w2", "model.layers.32.block_sparse_moe.experts.55.w2", "model.layers.32.block_sparse_moe.experts.56.w2", "model.layers.32.block_sparse_moe.experts.57.w2", "model.layers.32.block_sparse_moe.experts.58.w2", "model.layers.32.block_sparse_moe.experts.59.w2", "model.layers.32.block_sparse_moe.experts.60.w2", "model.layers.32.block_sparse_moe.experts.61.w2", "model.layers.32.block_sparse_moe.experts.62.w2", "model.layers.32.block_sparse_moe.experts.63.w2", "model.layers.32.block_sparse_moe.experts.64.w2", "model.layers.32.block_sparse_moe.experts.65.w2", "model.layers.32.block_sparse_moe.experts.66.w2", "model.layers.32.block_sparse_moe.experts.67.w2", "model.layers.32.block_sparse_moe.experts.68.w2", "model.layers.32.block_sparse_moe.experts.69.w2", "model.layers.32.block_sparse_moe.experts.70.w2", "model.layers.32.block_sparse_moe.experts.71.w2", "model.layers.32.block_sparse_moe.experts.72.w2", "model.layers.32.block_sparse_moe.experts.73.w2", "model.layers.32.block_sparse_moe.experts.74.w2", "model.layers.32.block_sparse_moe.experts.75.w2", "model.layers.32.block_sparse_moe.experts.76.w2", "model.layers.32.block_sparse_moe.experts.77.w2", "model.layers.32.block_sparse_moe.experts.78.w2", "model.layers.32.block_sparse_moe.experts.79.w2", "model.layers.32.block_sparse_moe.experts.80.w2", "model.layers.32.block_sparse_moe.experts.81.w2", "model.layers.32.block_sparse_moe.experts.82.w2", "model.layers.32.block_sparse_moe.experts.83.w2", "model.layers.32.block_sparse_moe.experts.84.w2", "model.layers.32.block_sparse_moe.experts.85.w2", "model.layers.32.block_sparse_moe.experts.86.w2", "model.layers.32.block_sparse_moe.experts.87.w2", "model.layers.32.block_sparse_moe.experts.88.w2", "model.layers.32.block_sparse_moe.experts.89.w2", "model.layers.32.block_sparse_moe.experts.90.w2", "model.layers.32.block_sparse_moe.experts.91.w2", "model.layers.32.block_sparse_moe.experts.92.w2", "model.layers.32.block_sparse_moe.experts.93.w2", "model.layers.32.block_sparse_moe.experts.94.w2", "model.layers.32.block_sparse_moe.experts.95.w2", "model.layers.32.block_sparse_moe.experts.96.w2", "model.layers.32.block_sparse_moe.experts.97.w2", "model.layers.32.block_sparse_moe.experts.98.w2", "model.layers.32.block_sparse_moe.experts.99.w2", "model.layers.32.block_sparse_moe.experts.100.w2", "model.layers.32.block_sparse_moe.experts.101.w2", "model.layers.32.block_sparse_moe.experts.102.w2", "model.layers.32.block_sparse_moe.experts.103.w2", "model.layers.32.block_sparse_moe.experts.104.w2", "model.layers.32.block_sparse_moe.experts.105.w2", "model.layers.32.block_sparse_moe.experts.106.w2", "model.layers.32.block_sparse_moe.experts.107.w2", "model.layers.32.block_sparse_moe.experts.108.w2", "model.layers.32.block_sparse_moe.experts.109.w2", "model.layers.32.block_sparse_moe.experts.110.w2", "model.layers.32.block_sparse_moe.experts.111.w2", "model.layers.32.block_sparse_moe.experts.112.w2", "model.layers.32.block_sparse_moe.experts.113.w2", "model.layers.32.block_sparse_moe.experts.114.w2", "model.layers.32.block_sparse_moe.experts.115.w2", "model.layers.32.block_sparse_moe.experts.116.w2", "model.layers.32.block_sparse_moe.experts.117.w2", "model.layers.32.block_sparse_moe.experts.118.w2", "model.layers.32.block_sparse_moe.experts.119.w2", "model.layers.32.block_sparse_moe.experts.120.w2", "model.layers.32.block_sparse_moe.experts.121.w2", "model.layers.32.block_sparse_moe.experts.122.w2", "model.layers.32.block_sparse_moe.experts.123.w2", "model.layers.32.block_sparse_moe.experts.124.w2", "model.layers.32.block_sparse_moe.experts.125.w2", "model.layers.32.block_sparse_moe.experts.126.w2", "model.layers.32.block_sparse_moe.experts.127.w2", "model.layers.32.block_sparse_moe.experts.128.w2", "model.layers.32.block_sparse_moe.experts.129.w2", "model.layers.32.block_sparse_moe.experts.130.w2", "model.layers.32.block_sparse_moe.experts.131.w2", "model.layers.32.block_sparse_moe.experts.132.w2", "model.layers.32.block_sparse_moe.experts.133.w2", "model.layers.32.block_sparse_moe.experts.134.w2", "model.layers.32.block_sparse_moe.experts.135.w2", "model.layers.32.block_sparse_moe.experts.136.w2", "model.layers.32.block_sparse_moe.experts.137.w2", "model.layers.32.block_sparse_moe.experts.138.w2", "model.layers.32.block_sparse_moe.experts.139.w2", "model.layers.32.block_sparse_moe.experts.140.w2", "model.layers.32.block_sparse_moe.experts.141.w2", "model.layers.32.block_sparse_moe.experts.142.w2", "model.layers.32.block_sparse_moe.experts.143.w2", "model.layers.32.block_sparse_moe.experts.144.w2", "model.layers.32.block_sparse_moe.experts.145.w2", "model.layers.32.block_sparse_moe.experts.146.w2", "model.layers.32.block_sparse_moe.experts.147.w2", "model.layers.32.block_sparse_moe.experts.148.w2", "model.layers.32.block_sparse_moe.experts.149.w2", "model.layers.32.block_sparse_moe.experts.150.w2", "model.layers.32.block_sparse_moe.experts.151.w2", "model.layers.32.block_sparse_moe.experts.152.w2", "model.layers.32.block_sparse_moe.experts.153.w2", "model.layers.32.block_sparse_moe.experts.154.w2", "model.layers.32.block_sparse_moe.experts.155.w2", "model.layers.32.block_sparse_moe.experts.156.w2", "model.layers.32.block_sparse_moe.experts.157.w2", "model.layers.32.block_sparse_moe.experts.158.w2", "model.layers.32.block_sparse_moe.experts.159.w2", "model.layers.32.block_sparse_moe.experts.160.w2", "model.layers.32.block_sparse_moe.experts.161.w2", "model.layers.32.block_sparse_moe.experts.162.w2", "model.layers.32.block_sparse_moe.experts.163.w2", "model.layers.32.block_sparse_moe.experts.164.w2", "model.layers.32.block_sparse_moe.experts.165.w2", "model.layers.32.block_sparse_moe.experts.166.w2", "model.layers.32.block_sparse_moe.experts.167.w2", "model.layers.32.block_sparse_moe.experts.168.w2", "model.layers.32.block_sparse_moe.experts.169.w2", "model.layers.32.block_sparse_moe.experts.170.w2", "model.layers.32.block_sparse_moe.experts.171.w2", "model.layers.32.block_sparse_moe.experts.172.w2", "model.layers.32.block_sparse_moe.experts.173.w2", "model.layers.32.block_sparse_moe.experts.174.w2", "model.layers.32.block_sparse_moe.experts.175.w2", "model.layers.32.block_sparse_moe.experts.176.w2", "model.layers.32.block_sparse_moe.experts.177.w2", "model.layers.32.block_sparse_moe.experts.178.w2", "model.layers.32.block_sparse_moe.experts.179.w2", "model.layers.32.block_sparse_moe.experts.180.w2", "model.layers.32.block_sparse_moe.experts.181.w2", "model.layers.32.block_sparse_moe.experts.182.w2", "model.layers.32.block_sparse_moe.experts.183.w2", "model.layers.32.block_sparse_moe.experts.184.w2", "model.layers.32.block_sparse_moe.experts.185.w2", "model.layers.32.block_sparse_moe.experts.186.w2", "model.layers.32.block_sparse_moe.experts.187.w2", "model.layers.32.block_sparse_moe.experts.188.w2", "model.layers.32.block_sparse_moe.experts.189.w2", "model.layers.32.block_sparse_moe.experts.190.w2", "model.layers.32.block_sparse_moe.experts.191.w2", "model.layers.32.block_sparse_moe.experts.192.w2", "model.layers.32.block_sparse_moe.experts.193.w2", "model.layers.32.block_sparse_moe.experts.194.w2", "model.layers.32.block_sparse_moe.experts.195.w2", "model.layers.32.block_sparse_moe.experts.196.w2", "model.layers.32.block_sparse_moe.experts.197.w2", "model.layers.32.block_sparse_moe.experts.198.w2", "model.layers.32.block_sparse_moe.experts.199.w2", "model.layers.32.block_sparse_moe.experts.200.w2", "model.layers.32.block_sparse_moe.experts.201.w2", "model.layers.32.block_sparse_moe.experts.202.w2", "model.layers.32.block_sparse_moe.experts.203.w2", "model.layers.32.block_sparse_moe.experts.204.w2", "model.layers.32.block_sparse_moe.experts.205.w2", "model.layers.32.block_sparse_moe.experts.206.w2", "model.layers.32.block_sparse_moe.experts.207.w2", "model.layers.32.block_sparse_moe.experts.208.w2", "model.layers.32.block_sparse_moe.experts.209.w2", "model.layers.32.block_sparse_moe.experts.210.w2", "model.layers.32.block_sparse_moe.experts.211.w2", "model.layers.32.block_sparse_moe.experts.212.w2", "model.layers.32.block_sparse_moe.experts.213.w2", "model.layers.32.block_sparse_moe.experts.214.w2", "model.layers.32.block_sparse_moe.experts.215.w2", "model.layers.32.block_sparse_moe.experts.216.w2", "model.layers.32.block_sparse_moe.experts.217.w2", "model.layers.32.block_sparse_moe.experts.218.w2", "model.layers.32.block_sparse_moe.experts.219.w2", "model.layers.32.block_sparse_moe.experts.220.w2", "model.layers.32.block_sparse_moe.experts.221.w2", "model.layers.32.block_sparse_moe.experts.222.w2", "model.layers.32.block_sparse_moe.experts.223.w2", "model.layers.32.block_sparse_moe.experts.224.w2", "model.layers.32.block_sparse_moe.experts.225.w2", "model.layers.32.block_sparse_moe.experts.226.w2", "model.layers.32.block_sparse_moe.experts.227.w2", "model.layers.32.block_sparse_moe.experts.228.w2", "model.layers.32.block_sparse_moe.experts.229.w2", "model.layers.32.block_sparse_moe.experts.230.w2", "model.layers.32.block_sparse_moe.experts.231.w2", "model.layers.32.block_sparse_moe.experts.232.w2", "model.layers.32.block_sparse_moe.experts.233.w2", "model.layers.32.block_sparse_moe.experts.234.w2", "model.layers.32.block_sparse_moe.experts.235.w2", "model.layers.32.block_sparse_moe.experts.236.w2", "model.layers.32.block_sparse_moe.experts.237.w2", "model.layers.32.block_sparse_moe.experts.238.w2", "model.layers.32.block_sparse_moe.experts.239.w2", "model.layers.32.block_sparse_moe.experts.240.w2", "model.layers.32.block_sparse_moe.experts.241.w2", "model.layers.32.block_sparse_moe.experts.242.w2", "model.layers.32.block_sparse_moe.experts.243.w2", "model.layers.32.block_sparse_moe.experts.244.w2", "model.layers.32.block_sparse_moe.experts.245.w2", "model.layers.32.block_sparse_moe.experts.246.w2", "model.layers.32.block_sparse_moe.experts.247.w2", "model.layers.32.block_sparse_moe.experts.248.w2", "model.layers.32.block_sparse_moe.experts.249.w2", "model.layers.32.block_sparse_moe.experts.250.w2", "model.layers.32.block_sparse_moe.experts.251.w2", "model.layers.32.block_sparse_moe.experts.252.w2", "model.layers.32.block_sparse_moe.experts.253.w2", "model.layers.32.block_sparse_moe.experts.254.w2", "model.layers.32.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -7.077455520621001e-05, "dbits": 1207959552 } ] }, { "idx": 165, "layers": [ "model.layers.33.self_attn.q_proj" ], "candidates": [ { "dkld": 0.01086524724960336, "dbits": 18874368 } ] }, { "idx": 166, "layers": [ "model.layers.33.self_attn.k_proj", "model.layers.33.self_attn.v_proj" ], "candidates": [ { "dkld": -0.021779054403304965, "dbits": 6291456 } ] }, { "idx": 167, "layers": [ "model.layers.33.self_attn.o_proj" ], "candidates": [ { "dkld": -0.01852632164955137, "dbits": 18874368 } ] }, { "idx": 168, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w1", "model.layers.33.block_sparse_moe.experts.1.w1", "model.layers.33.block_sparse_moe.experts.2.w1", "model.layers.33.block_sparse_moe.experts.3.w1", "model.layers.33.block_sparse_moe.experts.4.w1", "model.layers.33.block_sparse_moe.experts.5.w1", "model.layers.33.block_sparse_moe.experts.6.w1", "model.layers.33.block_sparse_moe.experts.7.w1", "model.layers.33.block_sparse_moe.experts.8.w1", "model.layers.33.block_sparse_moe.experts.9.w1", "model.layers.33.block_sparse_moe.experts.10.w1", "model.layers.33.block_sparse_moe.experts.11.w1", "model.layers.33.block_sparse_moe.experts.12.w1", "model.layers.33.block_sparse_moe.experts.13.w1", "model.layers.33.block_sparse_moe.experts.14.w1", "model.layers.33.block_sparse_moe.experts.15.w1", "model.layers.33.block_sparse_moe.experts.16.w1", "model.layers.33.block_sparse_moe.experts.17.w1", "model.layers.33.block_sparse_moe.experts.18.w1", "model.layers.33.block_sparse_moe.experts.19.w1", "model.layers.33.block_sparse_moe.experts.20.w1", "model.layers.33.block_sparse_moe.experts.21.w1", "model.layers.33.block_sparse_moe.experts.22.w1", "model.layers.33.block_sparse_moe.experts.23.w1", "model.layers.33.block_sparse_moe.experts.24.w1", "model.layers.33.block_sparse_moe.experts.25.w1", "model.layers.33.block_sparse_moe.experts.26.w1", "model.layers.33.block_sparse_moe.experts.27.w1", "model.layers.33.block_sparse_moe.experts.28.w1", "model.layers.33.block_sparse_moe.experts.29.w1", "model.layers.33.block_sparse_moe.experts.30.w1", "model.layers.33.block_sparse_moe.experts.31.w1", "model.layers.33.block_sparse_moe.experts.32.w1", "model.layers.33.block_sparse_moe.experts.33.w1", "model.layers.33.block_sparse_moe.experts.34.w1", "model.layers.33.block_sparse_moe.experts.35.w1", "model.layers.33.block_sparse_moe.experts.36.w1", "model.layers.33.block_sparse_moe.experts.37.w1", "model.layers.33.block_sparse_moe.experts.38.w1", "model.layers.33.block_sparse_moe.experts.39.w1", "model.layers.33.block_sparse_moe.experts.40.w1", "model.layers.33.block_sparse_moe.experts.41.w1", "model.layers.33.block_sparse_moe.experts.42.w1", "model.layers.33.block_sparse_moe.experts.43.w1", "model.layers.33.block_sparse_moe.experts.44.w1", "model.layers.33.block_sparse_moe.experts.45.w1", "model.layers.33.block_sparse_moe.experts.46.w1", "model.layers.33.block_sparse_moe.experts.47.w1", "model.layers.33.block_sparse_moe.experts.48.w1", "model.layers.33.block_sparse_moe.experts.49.w1", "model.layers.33.block_sparse_moe.experts.50.w1", "model.layers.33.block_sparse_moe.experts.51.w1", "model.layers.33.block_sparse_moe.experts.52.w1", "model.layers.33.block_sparse_moe.experts.53.w1", "model.layers.33.block_sparse_moe.experts.54.w1", "model.layers.33.block_sparse_moe.experts.55.w1", "model.layers.33.block_sparse_moe.experts.56.w1", "model.layers.33.block_sparse_moe.experts.57.w1", "model.layers.33.block_sparse_moe.experts.58.w1", "model.layers.33.block_sparse_moe.experts.59.w1", "model.layers.33.block_sparse_moe.experts.60.w1", "model.layers.33.block_sparse_moe.experts.61.w1", "model.layers.33.block_sparse_moe.experts.62.w1", "model.layers.33.block_sparse_moe.experts.63.w1", "model.layers.33.block_sparse_moe.experts.64.w1", "model.layers.33.block_sparse_moe.experts.65.w1", "model.layers.33.block_sparse_moe.experts.66.w1", "model.layers.33.block_sparse_moe.experts.67.w1", "model.layers.33.block_sparse_moe.experts.68.w1", "model.layers.33.block_sparse_moe.experts.69.w1", "model.layers.33.block_sparse_moe.experts.70.w1", "model.layers.33.block_sparse_moe.experts.71.w1", "model.layers.33.block_sparse_moe.experts.72.w1", "model.layers.33.block_sparse_moe.experts.73.w1", "model.layers.33.block_sparse_moe.experts.74.w1", "model.layers.33.block_sparse_moe.experts.75.w1", "model.layers.33.block_sparse_moe.experts.76.w1", "model.layers.33.block_sparse_moe.experts.77.w1", "model.layers.33.block_sparse_moe.experts.78.w1", "model.layers.33.block_sparse_moe.experts.79.w1", "model.layers.33.block_sparse_moe.experts.80.w1", "model.layers.33.block_sparse_moe.experts.81.w1", "model.layers.33.block_sparse_moe.experts.82.w1", "model.layers.33.block_sparse_moe.experts.83.w1", "model.layers.33.block_sparse_moe.experts.84.w1", "model.layers.33.block_sparse_moe.experts.85.w1", "model.layers.33.block_sparse_moe.experts.86.w1", "model.layers.33.block_sparse_moe.experts.87.w1", "model.layers.33.block_sparse_moe.experts.88.w1", "model.layers.33.block_sparse_moe.experts.89.w1", "model.layers.33.block_sparse_moe.experts.90.w1", "model.layers.33.block_sparse_moe.experts.91.w1", "model.layers.33.block_sparse_moe.experts.92.w1", "model.layers.33.block_sparse_moe.experts.93.w1", "model.layers.33.block_sparse_moe.experts.94.w1", "model.layers.33.block_sparse_moe.experts.95.w1", "model.layers.33.block_sparse_moe.experts.96.w1", "model.layers.33.block_sparse_moe.experts.97.w1", "model.layers.33.block_sparse_moe.experts.98.w1", "model.layers.33.block_sparse_moe.experts.99.w1", "model.layers.33.block_sparse_moe.experts.100.w1", "model.layers.33.block_sparse_moe.experts.101.w1", "model.layers.33.block_sparse_moe.experts.102.w1", "model.layers.33.block_sparse_moe.experts.103.w1", "model.layers.33.block_sparse_moe.experts.104.w1", "model.layers.33.block_sparse_moe.experts.105.w1", "model.layers.33.block_sparse_moe.experts.106.w1", "model.layers.33.block_sparse_moe.experts.107.w1", "model.layers.33.block_sparse_moe.experts.108.w1", "model.layers.33.block_sparse_moe.experts.109.w1", "model.layers.33.block_sparse_moe.experts.110.w1", "model.layers.33.block_sparse_moe.experts.111.w1", "model.layers.33.block_sparse_moe.experts.112.w1", "model.layers.33.block_sparse_moe.experts.113.w1", "model.layers.33.block_sparse_moe.experts.114.w1", "model.layers.33.block_sparse_moe.experts.115.w1", "model.layers.33.block_sparse_moe.experts.116.w1", "model.layers.33.block_sparse_moe.experts.117.w1", "model.layers.33.block_sparse_moe.experts.118.w1", "model.layers.33.block_sparse_moe.experts.119.w1", "model.layers.33.block_sparse_moe.experts.120.w1", "model.layers.33.block_sparse_moe.experts.121.w1", "model.layers.33.block_sparse_moe.experts.122.w1", "model.layers.33.block_sparse_moe.experts.123.w1", "model.layers.33.block_sparse_moe.experts.124.w1", "model.layers.33.block_sparse_moe.experts.125.w1", "model.layers.33.block_sparse_moe.experts.126.w1", "model.layers.33.block_sparse_moe.experts.127.w1", "model.layers.33.block_sparse_moe.experts.128.w1", "model.layers.33.block_sparse_moe.experts.129.w1", "model.layers.33.block_sparse_moe.experts.130.w1", "model.layers.33.block_sparse_moe.experts.131.w1", "model.layers.33.block_sparse_moe.experts.132.w1", "model.layers.33.block_sparse_moe.experts.133.w1", "model.layers.33.block_sparse_moe.experts.134.w1", "model.layers.33.block_sparse_moe.experts.135.w1", "model.layers.33.block_sparse_moe.experts.136.w1", "model.layers.33.block_sparse_moe.experts.137.w1", "model.layers.33.block_sparse_moe.experts.138.w1", "model.layers.33.block_sparse_moe.experts.139.w1", "model.layers.33.block_sparse_moe.experts.140.w1", "model.layers.33.block_sparse_moe.experts.141.w1", "model.layers.33.block_sparse_moe.experts.142.w1", "model.layers.33.block_sparse_moe.experts.143.w1", "model.layers.33.block_sparse_moe.experts.144.w1", "model.layers.33.block_sparse_moe.experts.145.w1", "model.layers.33.block_sparse_moe.experts.146.w1", "model.layers.33.block_sparse_moe.experts.147.w1", "model.layers.33.block_sparse_moe.experts.148.w1", "model.layers.33.block_sparse_moe.experts.149.w1", "model.layers.33.block_sparse_moe.experts.150.w1", "model.layers.33.block_sparse_moe.experts.151.w1", "model.layers.33.block_sparse_moe.experts.152.w1", "model.layers.33.block_sparse_moe.experts.153.w1", "model.layers.33.block_sparse_moe.experts.154.w1", "model.layers.33.block_sparse_moe.experts.155.w1", "model.layers.33.block_sparse_moe.experts.156.w1", "model.layers.33.block_sparse_moe.experts.157.w1", "model.layers.33.block_sparse_moe.experts.158.w1", "model.layers.33.block_sparse_moe.experts.159.w1", "model.layers.33.block_sparse_moe.experts.160.w1", "model.layers.33.block_sparse_moe.experts.161.w1", "model.layers.33.block_sparse_moe.experts.162.w1", "model.layers.33.block_sparse_moe.experts.163.w1", "model.layers.33.block_sparse_moe.experts.164.w1", "model.layers.33.block_sparse_moe.experts.165.w1", "model.layers.33.block_sparse_moe.experts.166.w1", "model.layers.33.block_sparse_moe.experts.167.w1", "model.layers.33.block_sparse_moe.experts.168.w1", "model.layers.33.block_sparse_moe.experts.169.w1", "model.layers.33.block_sparse_moe.experts.170.w1", "model.layers.33.block_sparse_moe.experts.171.w1", "model.layers.33.block_sparse_moe.experts.172.w1", "model.layers.33.block_sparse_moe.experts.173.w1", "model.layers.33.block_sparse_moe.experts.174.w1", "model.layers.33.block_sparse_moe.experts.175.w1", "model.layers.33.block_sparse_moe.experts.176.w1", "model.layers.33.block_sparse_moe.experts.177.w1", "model.layers.33.block_sparse_moe.experts.178.w1", "model.layers.33.block_sparse_moe.experts.179.w1", "model.layers.33.block_sparse_moe.experts.180.w1", "model.layers.33.block_sparse_moe.experts.181.w1", "model.layers.33.block_sparse_moe.experts.182.w1", "model.layers.33.block_sparse_moe.experts.183.w1", "model.layers.33.block_sparse_moe.experts.184.w1", "model.layers.33.block_sparse_moe.experts.185.w1", "model.layers.33.block_sparse_moe.experts.186.w1", "model.layers.33.block_sparse_moe.experts.187.w1", "model.layers.33.block_sparse_moe.experts.188.w1", "model.layers.33.block_sparse_moe.experts.189.w1", "model.layers.33.block_sparse_moe.experts.190.w1", "model.layers.33.block_sparse_moe.experts.191.w1", "model.layers.33.block_sparse_moe.experts.192.w1", "model.layers.33.block_sparse_moe.experts.193.w1", "model.layers.33.block_sparse_moe.experts.194.w1", "model.layers.33.block_sparse_moe.experts.195.w1", "model.layers.33.block_sparse_moe.experts.196.w1", "model.layers.33.block_sparse_moe.experts.197.w1", "model.layers.33.block_sparse_moe.experts.198.w1", "model.layers.33.block_sparse_moe.experts.199.w1", "model.layers.33.block_sparse_moe.experts.200.w1", "model.layers.33.block_sparse_moe.experts.201.w1", "model.layers.33.block_sparse_moe.experts.202.w1", "model.layers.33.block_sparse_moe.experts.203.w1", "model.layers.33.block_sparse_moe.experts.204.w1", "model.layers.33.block_sparse_moe.experts.205.w1", "model.layers.33.block_sparse_moe.experts.206.w1", "model.layers.33.block_sparse_moe.experts.207.w1", "model.layers.33.block_sparse_moe.experts.208.w1", "model.layers.33.block_sparse_moe.experts.209.w1", "model.layers.33.block_sparse_moe.experts.210.w1", "model.layers.33.block_sparse_moe.experts.211.w1", "model.layers.33.block_sparse_moe.experts.212.w1", "model.layers.33.block_sparse_moe.experts.213.w1", "model.layers.33.block_sparse_moe.experts.214.w1", "model.layers.33.block_sparse_moe.experts.215.w1", "model.layers.33.block_sparse_moe.experts.216.w1", "model.layers.33.block_sparse_moe.experts.217.w1", "model.layers.33.block_sparse_moe.experts.218.w1", "model.layers.33.block_sparse_moe.experts.219.w1", "model.layers.33.block_sparse_moe.experts.220.w1", "model.layers.33.block_sparse_moe.experts.221.w1", "model.layers.33.block_sparse_moe.experts.222.w1", "model.layers.33.block_sparse_moe.experts.223.w1", "model.layers.33.block_sparse_moe.experts.224.w1", "model.layers.33.block_sparse_moe.experts.225.w1", "model.layers.33.block_sparse_moe.experts.226.w1", "model.layers.33.block_sparse_moe.experts.227.w1", "model.layers.33.block_sparse_moe.experts.228.w1", "model.layers.33.block_sparse_moe.experts.229.w1", "model.layers.33.block_sparse_moe.experts.230.w1", "model.layers.33.block_sparse_moe.experts.231.w1", "model.layers.33.block_sparse_moe.experts.232.w1", "model.layers.33.block_sparse_moe.experts.233.w1", "model.layers.33.block_sparse_moe.experts.234.w1", "model.layers.33.block_sparse_moe.experts.235.w1", "model.layers.33.block_sparse_moe.experts.236.w1", "model.layers.33.block_sparse_moe.experts.237.w1", "model.layers.33.block_sparse_moe.experts.238.w1", "model.layers.33.block_sparse_moe.experts.239.w1", "model.layers.33.block_sparse_moe.experts.240.w1", "model.layers.33.block_sparse_moe.experts.241.w1", "model.layers.33.block_sparse_moe.experts.242.w1", "model.layers.33.block_sparse_moe.experts.243.w1", "model.layers.33.block_sparse_moe.experts.244.w1", "model.layers.33.block_sparse_moe.experts.245.w1", "model.layers.33.block_sparse_moe.experts.246.w1", "model.layers.33.block_sparse_moe.experts.247.w1", "model.layers.33.block_sparse_moe.experts.248.w1", "model.layers.33.block_sparse_moe.experts.249.w1", "model.layers.33.block_sparse_moe.experts.250.w1", "model.layers.33.block_sparse_moe.experts.251.w1", "model.layers.33.block_sparse_moe.experts.252.w1", "model.layers.33.block_sparse_moe.experts.253.w1", "model.layers.33.block_sparse_moe.experts.254.w1", "model.layers.33.block_sparse_moe.experts.255.w1", "model.layers.33.block_sparse_moe.experts.0.w3", "model.layers.33.block_sparse_moe.experts.1.w3", "model.layers.33.block_sparse_moe.experts.2.w3", "model.layers.33.block_sparse_moe.experts.3.w3", "model.layers.33.block_sparse_moe.experts.4.w3", "model.layers.33.block_sparse_moe.experts.5.w3", "model.layers.33.block_sparse_moe.experts.6.w3", "model.layers.33.block_sparse_moe.experts.7.w3", "model.layers.33.block_sparse_moe.experts.8.w3", "model.layers.33.block_sparse_moe.experts.9.w3", "model.layers.33.block_sparse_moe.experts.10.w3", "model.layers.33.block_sparse_moe.experts.11.w3", "model.layers.33.block_sparse_moe.experts.12.w3", "model.layers.33.block_sparse_moe.experts.13.w3", "model.layers.33.block_sparse_moe.experts.14.w3", "model.layers.33.block_sparse_moe.experts.15.w3", "model.layers.33.block_sparse_moe.experts.16.w3", "model.layers.33.block_sparse_moe.experts.17.w3", "model.layers.33.block_sparse_moe.experts.18.w3", "model.layers.33.block_sparse_moe.experts.19.w3", "model.layers.33.block_sparse_moe.experts.20.w3", "model.layers.33.block_sparse_moe.experts.21.w3", "model.layers.33.block_sparse_moe.experts.22.w3", "model.layers.33.block_sparse_moe.experts.23.w3", "model.layers.33.block_sparse_moe.experts.24.w3", "model.layers.33.block_sparse_moe.experts.25.w3", "model.layers.33.block_sparse_moe.experts.26.w3", "model.layers.33.block_sparse_moe.experts.27.w3", "model.layers.33.block_sparse_moe.experts.28.w3", "model.layers.33.block_sparse_moe.experts.29.w3", "model.layers.33.block_sparse_moe.experts.30.w3", "model.layers.33.block_sparse_moe.experts.31.w3", "model.layers.33.block_sparse_moe.experts.32.w3", "model.layers.33.block_sparse_moe.experts.33.w3", "model.layers.33.block_sparse_moe.experts.34.w3", "model.layers.33.block_sparse_moe.experts.35.w3", "model.layers.33.block_sparse_moe.experts.36.w3", "model.layers.33.block_sparse_moe.experts.37.w3", "model.layers.33.block_sparse_moe.experts.38.w3", "model.layers.33.block_sparse_moe.experts.39.w3", "model.layers.33.block_sparse_moe.experts.40.w3", "model.layers.33.block_sparse_moe.experts.41.w3", "model.layers.33.block_sparse_moe.experts.42.w3", "model.layers.33.block_sparse_moe.experts.43.w3", "model.layers.33.block_sparse_moe.experts.44.w3", "model.layers.33.block_sparse_moe.experts.45.w3", "model.layers.33.block_sparse_moe.experts.46.w3", "model.layers.33.block_sparse_moe.experts.47.w3", "model.layers.33.block_sparse_moe.experts.48.w3", "model.layers.33.block_sparse_moe.experts.49.w3", "model.layers.33.block_sparse_moe.experts.50.w3", "model.layers.33.block_sparse_moe.experts.51.w3", "model.layers.33.block_sparse_moe.experts.52.w3", "model.layers.33.block_sparse_moe.experts.53.w3", "model.layers.33.block_sparse_moe.experts.54.w3", "model.layers.33.block_sparse_moe.experts.55.w3", "model.layers.33.block_sparse_moe.experts.56.w3", "model.layers.33.block_sparse_moe.experts.57.w3", "model.layers.33.block_sparse_moe.experts.58.w3", "model.layers.33.block_sparse_moe.experts.59.w3", "model.layers.33.block_sparse_moe.experts.60.w3", "model.layers.33.block_sparse_moe.experts.61.w3", "model.layers.33.block_sparse_moe.experts.62.w3", "model.layers.33.block_sparse_moe.experts.63.w3", "model.layers.33.block_sparse_moe.experts.64.w3", "model.layers.33.block_sparse_moe.experts.65.w3", "model.layers.33.block_sparse_moe.experts.66.w3", "model.layers.33.block_sparse_moe.experts.67.w3", "model.layers.33.block_sparse_moe.experts.68.w3", "model.layers.33.block_sparse_moe.experts.69.w3", "model.layers.33.block_sparse_moe.experts.70.w3", "model.layers.33.block_sparse_moe.experts.71.w3", "model.layers.33.block_sparse_moe.experts.72.w3", "model.layers.33.block_sparse_moe.experts.73.w3", "model.layers.33.block_sparse_moe.experts.74.w3", "model.layers.33.block_sparse_moe.experts.75.w3", "model.layers.33.block_sparse_moe.experts.76.w3", "model.layers.33.block_sparse_moe.experts.77.w3", "model.layers.33.block_sparse_moe.experts.78.w3", "model.layers.33.block_sparse_moe.experts.79.w3", "model.layers.33.block_sparse_moe.experts.80.w3", "model.layers.33.block_sparse_moe.experts.81.w3", "model.layers.33.block_sparse_moe.experts.82.w3", "model.layers.33.block_sparse_moe.experts.83.w3", "model.layers.33.block_sparse_moe.experts.84.w3", "model.layers.33.block_sparse_moe.experts.85.w3", "model.layers.33.block_sparse_moe.experts.86.w3", "model.layers.33.block_sparse_moe.experts.87.w3", "model.layers.33.block_sparse_moe.experts.88.w3", "model.layers.33.block_sparse_moe.experts.89.w3", "model.layers.33.block_sparse_moe.experts.90.w3", "model.layers.33.block_sparse_moe.experts.91.w3", "model.layers.33.block_sparse_moe.experts.92.w3", "model.layers.33.block_sparse_moe.experts.93.w3", "model.layers.33.block_sparse_moe.experts.94.w3", "model.layers.33.block_sparse_moe.experts.95.w3", "model.layers.33.block_sparse_moe.experts.96.w3", "model.layers.33.block_sparse_moe.experts.97.w3", "model.layers.33.block_sparse_moe.experts.98.w3", "model.layers.33.block_sparse_moe.experts.99.w3", "model.layers.33.block_sparse_moe.experts.100.w3", "model.layers.33.block_sparse_moe.experts.101.w3", "model.layers.33.block_sparse_moe.experts.102.w3", "model.layers.33.block_sparse_moe.experts.103.w3", "model.layers.33.block_sparse_moe.experts.104.w3", "model.layers.33.block_sparse_moe.experts.105.w3", "model.layers.33.block_sparse_moe.experts.106.w3", "model.layers.33.block_sparse_moe.experts.107.w3", "model.layers.33.block_sparse_moe.experts.108.w3", "model.layers.33.block_sparse_moe.experts.109.w3", "model.layers.33.block_sparse_moe.experts.110.w3", "model.layers.33.block_sparse_moe.experts.111.w3", "model.layers.33.block_sparse_moe.experts.112.w3", "model.layers.33.block_sparse_moe.experts.113.w3", "model.layers.33.block_sparse_moe.experts.114.w3", "model.layers.33.block_sparse_moe.experts.115.w3", "model.layers.33.block_sparse_moe.experts.116.w3", "model.layers.33.block_sparse_moe.experts.117.w3", "model.layers.33.block_sparse_moe.experts.118.w3", "model.layers.33.block_sparse_moe.experts.119.w3", "model.layers.33.block_sparse_moe.experts.120.w3", "model.layers.33.block_sparse_moe.experts.121.w3", "model.layers.33.block_sparse_moe.experts.122.w3", "model.layers.33.block_sparse_moe.experts.123.w3", "model.layers.33.block_sparse_moe.experts.124.w3", "model.layers.33.block_sparse_moe.experts.125.w3", "model.layers.33.block_sparse_moe.experts.126.w3", "model.layers.33.block_sparse_moe.experts.127.w3", "model.layers.33.block_sparse_moe.experts.128.w3", "model.layers.33.block_sparse_moe.experts.129.w3", "model.layers.33.block_sparse_moe.experts.130.w3", "model.layers.33.block_sparse_moe.experts.131.w3", "model.layers.33.block_sparse_moe.experts.132.w3", "model.layers.33.block_sparse_moe.experts.133.w3", "model.layers.33.block_sparse_moe.experts.134.w3", "model.layers.33.block_sparse_moe.experts.135.w3", "model.layers.33.block_sparse_moe.experts.136.w3", "model.layers.33.block_sparse_moe.experts.137.w3", "model.layers.33.block_sparse_moe.experts.138.w3", "model.layers.33.block_sparse_moe.experts.139.w3", "model.layers.33.block_sparse_moe.experts.140.w3", "model.layers.33.block_sparse_moe.experts.141.w3", "model.layers.33.block_sparse_moe.experts.142.w3", "model.layers.33.block_sparse_moe.experts.143.w3", "model.layers.33.block_sparse_moe.experts.144.w3", "model.layers.33.block_sparse_moe.experts.145.w3", "model.layers.33.block_sparse_moe.experts.146.w3", "model.layers.33.block_sparse_moe.experts.147.w3", "model.layers.33.block_sparse_moe.experts.148.w3", "model.layers.33.block_sparse_moe.experts.149.w3", "model.layers.33.block_sparse_moe.experts.150.w3", "model.layers.33.block_sparse_moe.experts.151.w3", "model.layers.33.block_sparse_moe.experts.152.w3", "model.layers.33.block_sparse_moe.experts.153.w3", "model.layers.33.block_sparse_moe.experts.154.w3", "model.layers.33.block_sparse_moe.experts.155.w3", "model.layers.33.block_sparse_moe.experts.156.w3", "model.layers.33.block_sparse_moe.experts.157.w3", "model.layers.33.block_sparse_moe.experts.158.w3", "model.layers.33.block_sparse_moe.experts.159.w3", "model.layers.33.block_sparse_moe.experts.160.w3", "model.layers.33.block_sparse_moe.experts.161.w3", "model.layers.33.block_sparse_moe.experts.162.w3", "model.layers.33.block_sparse_moe.experts.163.w3", "model.layers.33.block_sparse_moe.experts.164.w3", "model.layers.33.block_sparse_moe.experts.165.w3", "model.layers.33.block_sparse_moe.experts.166.w3", "model.layers.33.block_sparse_moe.experts.167.w3", "model.layers.33.block_sparse_moe.experts.168.w3", "model.layers.33.block_sparse_moe.experts.169.w3", "model.layers.33.block_sparse_moe.experts.170.w3", "model.layers.33.block_sparse_moe.experts.171.w3", "model.layers.33.block_sparse_moe.experts.172.w3", "model.layers.33.block_sparse_moe.experts.173.w3", "model.layers.33.block_sparse_moe.experts.174.w3", "model.layers.33.block_sparse_moe.experts.175.w3", "model.layers.33.block_sparse_moe.experts.176.w3", "model.layers.33.block_sparse_moe.experts.177.w3", "model.layers.33.block_sparse_moe.experts.178.w3", "model.layers.33.block_sparse_moe.experts.179.w3", "model.layers.33.block_sparse_moe.experts.180.w3", "model.layers.33.block_sparse_moe.experts.181.w3", "model.layers.33.block_sparse_moe.experts.182.w3", "model.layers.33.block_sparse_moe.experts.183.w3", "model.layers.33.block_sparse_moe.experts.184.w3", "model.layers.33.block_sparse_moe.experts.185.w3", "model.layers.33.block_sparse_moe.experts.186.w3", "model.layers.33.block_sparse_moe.experts.187.w3", "model.layers.33.block_sparse_moe.experts.188.w3", "model.layers.33.block_sparse_moe.experts.189.w3", "model.layers.33.block_sparse_moe.experts.190.w3", "model.layers.33.block_sparse_moe.experts.191.w3", "model.layers.33.block_sparse_moe.experts.192.w3", "model.layers.33.block_sparse_moe.experts.193.w3", "model.layers.33.block_sparse_moe.experts.194.w3", "model.layers.33.block_sparse_moe.experts.195.w3", "model.layers.33.block_sparse_moe.experts.196.w3", "model.layers.33.block_sparse_moe.experts.197.w3", "model.layers.33.block_sparse_moe.experts.198.w3", "model.layers.33.block_sparse_moe.experts.199.w3", "model.layers.33.block_sparse_moe.experts.200.w3", "model.layers.33.block_sparse_moe.experts.201.w3", "model.layers.33.block_sparse_moe.experts.202.w3", "model.layers.33.block_sparse_moe.experts.203.w3", "model.layers.33.block_sparse_moe.experts.204.w3", "model.layers.33.block_sparse_moe.experts.205.w3", "model.layers.33.block_sparse_moe.experts.206.w3", "model.layers.33.block_sparse_moe.experts.207.w3", "model.layers.33.block_sparse_moe.experts.208.w3", "model.layers.33.block_sparse_moe.experts.209.w3", "model.layers.33.block_sparse_moe.experts.210.w3", "model.layers.33.block_sparse_moe.experts.211.w3", "model.layers.33.block_sparse_moe.experts.212.w3", "model.layers.33.block_sparse_moe.experts.213.w3", "model.layers.33.block_sparse_moe.experts.214.w3", "model.layers.33.block_sparse_moe.experts.215.w3", "model.layers.33.block_sparse_moe.experts.216.w3", "model.layers.33.block_sparse_moe.experts.217.w3", "model.layers.33.block_sparse_moe.experts.218.w3", "model.layers.33.block_sparse_moe.experts.219.w3", "model.layers.33.block_sparse_moe.experts.220.w3", "model.layers.33.block_sparse_moe.experts.221.w3", "model.layers.33.block_sparse_moe.experts.222.w3", "model.layers.33.block_sparse_moe.experts.223.w3", "model.layers.33.block_sparse_moe.experts.224.w3", "model.layers.33.block_sparse_moe.experts.225.w3", "model.layers.33.block_sparse_moe.experts.226.w3", "model.layers.33.block_sparse_moe.experts.227.w3", "model.layers.33.block_sparse_moe.experts.228.w3", "model.layers.33.block_sparse_moe.experts.229.w3", "model.layers.33.block_sparse_moe.experts.230.w3", "model.layers.33.block_sparse_moe.experts.231.w3", "model.layers.33.block_sparse_moe.experts.232.w3", "model.layers.33.block_sparse_moe.experts.233.w3", "model.layers.33.block_sparse_moe.experts.234.w3", "model.layers.33.block_sparse_moe.experts.235.w3", "model.layers.33.block_sparse_moe.experts.236.w3", "model.layers.33.block_sparse_moe.experts.237.w3", "model.layers.33.block_sparse_moe.experts.238.w3", "model.layers.33.block_sparse_moe.experts.239.w3", "model.layers.33.block_sparse_moe.experts.240.w3", "model.layers.33.block_sparse_moe.experts.241.w3", "model.layers.33.block_sparse_moe.experts.242.w3", "model.layers.33.block_sparse_moe.experts.243.w3", "model.layers.33.block_sparse_moe.experts.244.w3", "model.layers.33.block_sparse_moe.experts.245.w3", "model.layers.33.block_sparse_moe.experts.246.w3", "model.layers.33.block_sparse_moe.experts.247.w3", "model.layers.33.block_sparse_moe.experts.248.w3", "model.layers.33.block_sparse_moe.experts.249.w3", "model.layers.33.block_sparse_moe.experts.250.w3", "model.layers.33.block_sparse_moe.experts.251.w3", "model.layers.33.block_sparse_moe.experts.252.w3", "model.layers.33.block_sparse_moe.experts.253.w3", "model.layers.33.block_sparse_moe.experts.254.w3", "model.layers.33.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.003376990556716919, "dbits": 2415919104 } ] }, { "idx": 169, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w2", "model.layers.33.block_sparse_moe.experts.1.w2", "model.layers.33.block_sparse_moe.experts.2.w2", "model.layers.33.block_sparse_moe.experts.3.w2", "model.layers.33.block_sparse_moe.experts.4.w2", "model.layers.33.block_sparse_moe.experts.5.w2", "model.layers.33.block_sparse_moe.experts.6.w2", "model.layers.33.block_sparse_moe.experts.7.w2", "model.layers.33.block_sparse_moe.experts.8.w2", "model.layers.33.block_sparse_moe.experts.9.w2", "model.layers.33.block_sparse_moe.experts.10.w2", "model.layers.33.block_sparse_moe.experts.11.w2", "model.layers.33.block_sparse_moe.experts.12.w2", "model.layers.33.block_sparse_moe.experts.13.w2", "model.layers.33.block_sparse_moe.experts.14.w2", "model.layers.33.block_sparse_moe.experts.15.w2", "model.layers.33.block_sparse_moe.experts.16.w2", "model.layers.33.block_sparse_moe.experts.17.w2", "model.layers.33.block_sparse_moe.experts.18.w2", "model.layers.33.block_sparse_moe.experts.19.w2", "model.layers.33.block_sparse_moe.experts.20.w2", "model.layers.33.block_sparse_moe.experts.21.w2", "model.layers.33.block_sparse_moe.experts.22.w2", "model.layers.33.block_sparse_moe.experts.23.w2", "model.layers.33.block_sparse_moe.experts.24.w2", "model.layers.33.block_sparse_moe.experts.25.w2", "model.layers.33.block_sparse_moe.experts.26.w2", "model.layers.33.block_sparse_moe.experts.27.w2", "model.layers.33.block_sparse_moe.experts.28.w2", "model.layers.33.block_sparse_moe.experts.29.w2", "model.layers.33.block_sparse_moe.experts.30.w2", "model.layers.33.block_sparse_moe.experts.31.w2", "model.layers.33.block_sparse_moe.experts.32.w2", "model.layers.33.block_sparse_moe.experts.33.w2", "model.layers.33.block_sparse_moe.experts.34.w2", "model.layers.33.block_sparse_moe.experts.35.w2", "model.layers.33.block_sparse_moe.experts.36.w2", "model.layers.33.block_sparse_moe.experts.37.w2", "model.layers.33.block_sparse_moe.experts.38.w2", "model.layers.33.block_sparse_moe.experts.39.w2", "model.layers.33.block_sparse_moe.experts.40.w2", "model.layers.33.block_sparse_moe.experts.41.w2", "model.layers.33.block_sparse_moe.experts.42.w2", "model.layers.33.block_sparse_moe.experts.43.w2", "model.layers.33.block_sparse_moe.experts.44.w2", "model.layers.33.block_sparse_moe.experts.45.w2", "model.layers.33.block_sparse_moe.experts.46.w2", "model.layers.33.block_sparse_moe.experts.47.w2", "model.layers.33.block_sparse_moe.experts.48.w2", "model.layers.33.block_sparse_moe.experts.49.w2", "model.layers.33.block_sparse_moe.experts.50.w2", "model.layers.33.block_sparse_moe.experts.51.w2", "model.layers.33.block_sparse_moe.experts.52.w2", "model.layers.33.block_sparse_moe.experts.53.w2", "model.layers.33.block_sparse_moe.experts.54.w2", "model.layers.33.block_sparse_moe.experts.55.w2", "model.layers.33.block_sparse_moe.experts.56.w2", "model.layers.33.block_sparse_moe.experts.57.w2", "model.layers.33.block_sparse_moe.experts.58.w2", "model.layers.33.block_sparse_moe.experts.59.w2", "model.layers.33.block_sparse_moe.experts.60.w2", "model.layers.33.block_sparse_moe.experts.61.w2", "model.layers.33.block_sparse_moe.experts.62.w2", "model.layers.33.block_sparse_moe.experts.63.w2", "model.layers.33.block_sparse_moe.experts.64.w2", "model.layers.33.block_sparse_moe.experts.65.w2", "model.layers.33.block_sparse_moe.experts.66.w2", "model.layers.33.block_sparse_moe.experts.67.w2", "model.layers.33.block_sparse_moe.experts.68.w2", "model.layers.33.block_sparse_moe.experts.69.w2", "model.layers.33.block_sparse_moe.experts.70.w2", "model.layers.33.block_sparse_moe.experts.71.w2", "model.layers.33.block_sparse_moe.experts.72.w2", "model.layers.33.block_sparse_moe.experts.73.w2", "model.layers.33.block_sparse_moe.experts.74.w2", "model.layers.33.block_sparse_moe.experts.75.w2", "model.layers.33.block_sparse_moe.experts.76.w2", "model.layers.33.block_sparse_moe.experts.77.w2", "model.layers.33.block_sparse_moe.experts.78.w2", "model.layers.33.block_sparse_moe.experts.79.w2", "model.layers.33.block_sparse_moe.experts.80.w2", "model.layers.33.block_sparse_moe.experts.81.w2", "model.layers.33.block_sparse_moe.experts.82.w2", "model.layers.33.block_sparse_moe.experts.83.w2", "model.layers.33.block_sparse_moe.experts.84.w2", "model.layers.33.block_sparse_moe.experts.85.w2", "model.layers.33.block_sparse_moe.experts.86.w2", "model.layers.33.block_sparse_moe.experts.87.w2", "model.layers.33.block_sparse_moe.experts.88.w2", "model.layers.33.block_sparse_moe.experts.89.w2", "model.layers.33.block_sparse_moe.experts.90.w2", "model.layers.33.block_sparse_moe.experts.91.w2", "model.layers.33.block_sparse_moe.experts.92.w2", "model.layers.33.block_sparse_moe.experts.93.w2", "model.layers.33.block_sparse_moe.experts.94.w2", "model.layers.33.block_sparse_moe.experts.95.w2", "model.layers.33.block_sparse_moe.experts.96.w2", "model.layers.33.block_sparse_moe.experts.97.w2", "model.layers.33.block_sparse_moe.experts.98.w2", "model.layers.33.block_sparse_moe.experts.99.w2", "model.layers.33.block_sparse_moe.experts.100.w2", "model.layers.33.block_sparse_moe.experts.101.w2", "model.layers.33.block_sparse_moe.experts.102.w2", "model.layers.33.block_sparse_moe.experts.103.w2", "model.layers.33.block_sparse_moe.experts.104.w2", "model.layers.33.block_sparse_moe.experts.105.w2", "model.layers.33.block_sparse_moe.experts.106.w2", "model.layers.33.block_sparse_moe.experts.107.w2", "model.layers.33.block_sparse_moe.experts.108.w2", "model.layers.33.block_sparse_moe.experts.109.w2", "model.layers.33.block_sparse_moe.experts.110.w2", "model.layers.33.block_sparse_moe.experts.111.w2", "model.layers.33.block_sparse_moe.experts.112.w2", "model.layers.33.block_sparse_moe.experts.113.w2", "model.layers.33.block_sparse_moe.experts.114.w2", "model.layers.33.block_sparse_moe.experts.115.w2", "model.layers.33.block_sparse_moe.experts.116.w2", "model.layers.33.block_sparse_moe.experts.117.w2", "model.layers.33.block_sparse_moe.experts.118.w2", "model.layers.33.block_sparse_moe.experts.119.w2", "model.layers.33.block_sparse_moe.experts.120.w2", "model.layers.33.block_sparse_moe.experts.121.w2", "model.layers.33.block_sparse_moe.experts.122.w2", "model.layers.33.block_sparse_moe.experts.123.w2", "model.layers.33.block_sparse_moe.experts.124.w2", "model.layers.33.block_sparse_moe.experts.125.w2", "model.layers.33.block_sparse_moe.experts.126.w2", "model.layers.33.block_sparse_moe.experts.127.w2", "model.layers.33.block_sparse_moe.experts.128.w2", "model.layers.33.block_sparse_moe.experts.129.w2", "model.layers.33.block_sparse_moe.experts.130.w2", "model.layers.33.block_sparse_moe.experts.131.w2", "model.layers.33.block_sparse_moe.experts.132.w2", "model.layers.33.block_sparse_moe.experts.133.w2", "model.layers.33.block_sparse_moe.experts.134.w2", "model.layers.33.block_sparse_moe.experts.135.w2", "model.layers.33.block_sparse_moe.experts.136.w2", "model.layers.33.block_sparse_moe.experts.137.w2", "model.layers.33.block_sparse_moe.experts.138.w2", "model.layers.33.block_sparse_moe.experts.139.w2", "model.layers.33.block_sparse_moe.experts.140.w2", "model.layers.33.block_sparse_moe.experts.141.w2", "model.layers.33.block_sparse_moe.experts.142.w2", "model.layers.33.block_sparse_moe.experts.143.w2", "model.layers.33.block_sparse_moe.experts.144.w2", "model.layers.33.block_sparse_moe.experts.145.w2", "model.layers.33.block_sparse_moe.experts.146.w2", "model.layers.33.block_sparse_moe.experts.147.w2", "model.layers.33.block_sparse_moe.experts.148.w2", "model.layers.33.block_sparse_moe.experts.149.w2", "model.layers.33.block_sparse_moe.experts.150.w2", "model.layers.33.block_sparse_moe.experts.151.w2", "model.layers.33.block_sparse_moe.experts.152.w2", "model.layers.33.block_sparse_moe.experts.153.w2", "model.layers.33.block_sparse_moe.experts.154.w2", "model.layers.33.block_sparse_moe.experts.155.w2", "model.layers.33.block_sparse_moe.experts.156.w2", "model.layers.33.block_sparse_moe.experts.157.w2", "model.layers.33.block_sparse_moe.experts.158.w2", "model.layers.33.block_sparse_moe.experts.159.w2", "model.layers.33.block_sparse_moe.experts.160.w2", "model.layers.33.block_sparse_moe.experts.161.w2", "model.layers.33.block_sparse_moe.experts.162.w2", "model.layers.33.block_sparse_moe.experts.163.w2", "model.layers.33.block_sparse_moe.experts.164.w2", "model.layers.33.block_sparse_moe.experts.165.w2", "model.layers.33.block_sparse_moe.experts.166.w2", "model.layers.33.block_sparse_moe.experts.167.w2", "model.layers.33.block_sparse_moe.experts.168.w2", "model.layers.33.block_sparse_moe.experts.169.w2", "model.layers.33.block_sparse_moe.experts.170.w2", "model.layers.33.block_sparse_moe.experts.171.w2", "model.layers.33.block_sparse_moe.experts.172.w2", "model.layers.33.block_sparse_moe.experts.173.w2", "model.layers.33.block_sparse_moe.experts.174.w2", "model.layers.33.block_sparse_moe.experts.175.w2", "model.layers.33.block_sparse_moe.experts.176.w2", "model.layers.33.block_sparse_moe.experts.177.w2", "model.layers.33.block_sparse_moe.experts.178.w2", "model.layers.33.block_sparse_moe.experts.179.w2", "model.layers.33.block_sparse_moe.experts.180.w2", "model.layers.33.block_sparse_moe.experts.181.w2", "model.layers.33.block_sparse_moe.experts.182.w2", "model.layers.33.block_sparse_moe.experts.183.w2", "model.layers.33.block_sparse_moe.experts.184.w2", "model.layers.33.block_sparse_moe.experts.185.w2", "model.layers.33.block_sparse_moe.experts.186.w2", "model.layers.33.block_sparse_moe.experts.187.w2", "model.layers.33.block_sparse_moe.experts.188.w2", "model.layers.33.block_sparse_moe.experts.189.w2", "model.layers.33.block_sparse_moe.experts.190.w2", "model.layers.33.block_sparse_moe.experts.191.w2", "model.layers.33.block_sparse_moe.experts.192.w2", "model.layers.33.block_sparse_moe.experts.193.w2", "model.layers.33.block_sparse_moe.experts.194.w2", "model.layers.33.block_sparse_moe.experts.195.w2", "model.layers.33.block_sparse_moe.experts.196.w2", "model.layers.33.block_sparse_moe.experts.197.w2", "model.layers.33.block_sparse_moe.experts.198.w2", "model.layers.33.block_sparse_moe.experts.199.w2", "model.layers.33.block_sparse_moe.experts.200.w2", "model.layers.33.block_sparse_moe.experts.201.w2", "model.layers.33.block_sparse_moe.experts.202.w2", "model.layers.33.block_sparse_moe.experts.203.w2", "model.layers.33.block_sparse_moe.experts.204.w2", "model.layers.33.block_sparse_moe.experts.205.w2", "model.layers.33.block_sparse_moe.experts.206.w2", "model.layers.33.block_sparse_moe.experts.207.w2", "model.layers.33.block_sparse_moe.experts.208.w2", "model.layers.33.block_sparse_moe.experts.209.w2", "model.layers.33.block_sparse_moe.experts.210.w2", "model.layers.33.block_sparse_moe.experts.211.w2", "model.layers.33.block_sparse_moe.experts.212.w2", "model.layers.33.block_sparse_moe.experts.213.w2", "model.layers.33.block_sparse_moe.experts.214.w2", "model.layers.33.block_sparse_moe.experts.215.w2", "model.layers.33.block_sparse_moe.experts.216.w2", "model.layers.33.block_sparse_moe.experts.217.w2", "model.layers.33.block_sparse_moe.experts.218.w2", "model.layers.33.block_sparse_moe.experts.219.w2", "model.layers.33.block_sparse_moe.experts.220.w2", "model.layers.33.block_sparse_moe.experts.221.w2", "model.layers.33.block_sparse_moe.experts.222.w2", "model.layers.33.block_sparse_moe.experts.223.w2", "model.layers.33.block_sparse_moe.experts.224.w2", "model.layers.33.block_sparse_moe.experts.225.w2", "model.layers.33.block_sparse_moe.experts.226.w2", "model.layers.33.block_sparse_moe.experts.227.w2", "model.layers.33.block_sparse_moe.experts.228.w2", "model.layers.33.block_sparse_moe.experts.229.w2", "model.layers.33.block_sparse_moe.experts.230.w2", "model.layers.33.block_sparse_moe.experts.231.w2", "model.layers.33.block_sparse_moe.experts.232.w2", "model.layers.33.block_sparse_moe.experts.233.w2", "model.layers.33.block_sparse_moe.experts.234.w2", "model.layers.33.block_sparse_moe.experts.235.w2", "model.layers.33.block_sparse_moe.experts.236.w2", "model.layers.33.block_sparse_moe.experts.237.w2", "model.layers.33.block_sparse_moe.experts.238.w2", "model.layers.33.block_sparse_moe.experts.239.w2", "model.layers.33.block_sparse_moe.experts.240.w2", "model.layers.33.block_sparse_moe.experts.241.w2", "model.layers.33.block_sparse_moe.experts.242.w2", "model.layers.33.block_sparse_moe.experts.243.w2", "model.layers.33.block_sparse_moe.experts.244.w2", "model.layers.33.block_sparse_moe.experts.245.w2", "model.layers.33.block_sparse_moe.experts.246.w2", "model.layers.33.block_sparse_moe.experts.247.w2", "model.layers.33.block_sparse_moe.experts.248.w2", "model.layers.33.block_sparse_moe.experts.249.w2", "model.layers.33.block_sparse_moe.experts.250.w2", "model.layers.33.block_sparse_moe.experts.251.w2", "model.layers.33.block_sparse_moe.experts.252.w2", "model.layers.33.block_sparse_moe.experts.253.w2", "model.layers.33.block_sparse_moe.experts.254.w2", "model.layers.33.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00022166371345511138, "dbits": 1207959552 } ] }, { "idx": 170, "layers": [ "model.layers.34.self_attn.q_proj" ], "candidates": [ { "dkld": 0.007163864374160833, "dbits": 18874368 } ] }, { "idx": 171, "layers": [ "model.layers.34.self_attn.k_proj", "model.layers.34.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009067618846893222, "dbits": 6291456 } ] }, { "idx": 172, "layers": [ "model.layers.34.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0032477796077728938, "dbits": 18874368 } ] }, { "idx": 173, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w1", "model.layers.34.block_sparse_moe.experts.1.w1", "model.layers.34.block_sparse_moe.experts.2.w1", "model.layers.34.block_sparse_moe.experts.3.w1", "model.layers.34.block_sparse_moe.experts.4.w1", "model.layers.34.block_sparse_moe.experts.5.w1", "model.layers.34.block_sparse_moe.experts.6.w1", "model.layers.34.block_sparse_moe.experts.7.w1", "model.layers.34.block_sparse_moe.experts.8.w1", "model.layers.34.block_sparse_moe.experts.9.w1", "model.layers.34.block_sparse_moe.experts.10.w1", "model.layers.34.block_sparse_moe.experts.11.w1", "model.layers.34.block_sparse_moe.experts.12.w1", "model.layers.34.block_sparse_moe.experts.13.w1", "model.layers.34.block_sparse_moe.experts.14.w1", "model.layers.34.block_sparse_moe.experts.15.w1", "model.layers.34.block_sparse_moe.experts.16.w1", "model.layers.34.block_sparse_moe.experts.17.w1", "model.layers.34.block_sparse_moe.experts.18.w1", "model.layers.34.block_sparse_moe.experts.19.w1", "model.layers.34.block_sparse_moe.experts.20.w1", "model.layers.34.block_sparse_moe.experts.21.w1", "model.layers.34.block_sparse_moe.experts.22.w1", "model.layers.34.block_sparse_moe.experts.23.w1", "model.layers.34.block_sparse_moe.experts.24.w1", "model.layers.34.block_sparse_moe.experts.25.w1", "model.layers.34.block_sparse_moe.experts.26.w1", "model.layers.34.block_sparse_moe.experts.27.w1", "model.layers.34.block_sparse_moe.experts.28.w1", "model.layers.34.block_sparse_moe.experts.29.w1", "model.layers.34.block_sparse_moe.experts.30.w1", "model.layers.34.block_sparse_moe.experts.31.w1", "model.layers.34.block_sparse_moe.experts.32.w1", "model.layers.34.block_sparse_moe.experts.33.w1", "model.layers.34.block_sparse_moe.experts.34.w1", "model.layers.34.block_sparse_moe.experts.35.w1", "model.layers.34.block_sparse_moe.experts.36.w1", "model.layers.34.block_sparse_moe.experts.37.w1", "model.layers.34.block_sparse_moe.experts.38.w1", "model.layers.34.block_sparse_moe.experts.39.w1", "model.layers.34.block_sparse_moe.experts.40.w1", "model.layers.34.block_sparse_moe.experts.41.w1", "model.layers.34.block_sparse_moe.experts.42.w1", "model.layers.34.block_sparse_moe.experts.43.w1", "model.layers.34.block_sparse_moe.experts.44.w1", "model.layers.34.block_sparse_moe.experts.45.w1", "model.layers.34.block_sparse_moe.experts.46.w1", "model.layers.34.block_sparse_moe.experts.47.w1", "model.layers.34.block_sparse_moe.experts.48.w1", "model.layers.34.block_sparse_moe.experts.49.w1", "model.layers.34.block_sparse_moe.experts.50.w1", "model.layers.34.block_sparse_moe.experts.51.w1", "model.layers.34.block_sparse_moe.experts.52.w1", "model.layers.34.block_sparse_moe.experts.53.w1", "model.layers.34.block_sparse_moe.experts.54.w1", "model.layers.34.block_sparse_moe.experts.55.w1", "model.layers.34.block_sparse_moe.experts.56.w1", "model.layers.34.block_sparse_moe.experts.57.w1", "model.layers.34.block_sparse_moe.experts.58.w1", "model.layers.34.block_sparse_moe.experts.59.w1", "model.layers.34.block_sparse_moe.experts.60.w1", "model.layers.34.block_sparse_moe.experts.61.w1", "model.layers.34.block_sparse_moe.experts.62.w1", "model.layers.34.block_sparse_moe.experts.63.w1", "model.layers.34.block_sparse_moe.experts.64.w1", "model.layers.34.block_sparse_moe.experts.65.w1", "model.layers.34.block_sparse_moe.experts.66.w1", "model.layers.34.block_sparse_moe.experts.67.w1", "model.layers.34.block_sparse_moe.experts.68.w1", "model.layers.34.block_sparse_moe.experts.69.w1", "model.layers.34.block_sparse_moe.experts.70.w1", "model.layers.34.block_sparse_moe.experts.71.w1", "model.layers.34.block_sparse_moe.experts.72.w1", "model.layers.34.block_sparse_moe.experts.73.w1", "model.layers.34.block_sparse_moe.experts.74.w1", "model.layers.34.block_sparse_moe.experts.75.w1", "model.layers.34.block_sparse_moe.experts.76.w1", "model.layers.34.block_sparse_moe.experts.77.w1", "model.layers.34.block_sparse_moe.experts.78.w1", "model.layers.34.block_sparse_moe.experts.79.w1", "model.layers.34.block_sparse_moe.experts.80.w1", "model.layers.34.block_sparse_moe.experts.81.w1", "model.layers.34.block_sparse_moe.experts.82.w1", "model.layers.34.block_sparse_moe.experts.83.w1", "model.layers.34.block_sparse_moe.experts.84.w1", "model.layers.34.block_sparse_moe.experts.85.w1", "model.layers.34.block_sparse_moe.experts.86.w1", "model.layers.34.block_sparse_moe.experts.87.w1", "model.layers.34.block_sparse_moe.experts.88.w1", "model.layers.34.block_sparse_moe.experts.89.w1", "model.layers.34.block_sparse_moe.experts.90.w1", "model.layers.34.block_sparse_moe.experts.91.w1", "model.layers.34.block_sparse_moe.experts.92.w1", "model.layers.34.block_sparse_moe.experts.93.w1", "model.layers.34.block_sparse_moe.experts.94.w1", "model.layers.34.block_sparse_moe.experts.95.w1", "model.layers.34.block_sparse_moe.experts.96.w1", "model.layers.34.block_sparse_moe.experts.97.w1", "model.layers.34.block_sparse_moe.experts.98.w1", "model.layers.34.block_sparse_moe.experts.99.w1", "model.layers.34.block_sparse_moe.experts.100.w1", "model.layers.34.block_sparse_moe.experts.101.w1", "model.layers.34.block_sparse_moe.experts.102.w1", "model.layers.34.block_sparse_moe.experts.103.w1", "model.layers.34.block_sparse_moe.experts.104.w1", "model.layers.34.block_sparse_moe.experts.105.w1", "model.layers.34.block_sparse_moe.experts.106.w1", "model.layers.34.block_sparse_moe.experts.107.w1", "model.layers.34.block_sparse_moe.experts.108.w1", "model.layers.34.block_sparse_moe.experts.109.w1", "model.layers.34.block_sparse_moe.experts.110.w1", "model.layers.34.block_sparse_moe.experts.111.w1", "model.layers.34.block_sparse_moe.experts.112.w1", "model.layers.34.block_sparse_moe.experts.113.w1", "model.layers.34.block_sparse_moe.experts.114.w1", "model.layers.34.block_sparse_moe.experts.115.w1", "model.layers.34.block_sparse_moe.experts.116.w1", "model.layers.34.block_sparse_moe.experts.117.w1", "model.layers.34.block_sparse_moe.experts.118.w1", "model.layers.34.block_sparse_moe.experts.119.w1", "model.layers.34.block_sparse_moe.experts.120.w1", "model.layers.34.block_sparse_moe.experts.121.w1", "model.layers.34.block_sparse_moe.experts.122.w1", "model.layers.34.block_sparse_moe.experts.123.w1", "model.layers.34.block_sparse_moe.experts.124.w1", "model.layers.34.block_sparse_moe.experts.125.w1", "model.layers.34.block_sparse_moe.experts.126.w1", "model.layers.34.block_sparse_moe.experts.127.w1", "model.layers.34.block_sparse_moe.experts.128.w1", "model.layers.34.block_sparse_moe.experts.129.w1", "model.layers.34.block_sparse_moe.experts.130.w1", "model.layers.34.block_sparse_moe.experts.131.w1", "model.layers.34.block_sparse_moe.experts.132.w1", "model.layers.34.block_sparse_moe.experts.133.w1", "model.layers.34.block_sparse_moe.experts.134.w1", "model.layers.34.block_sparse_moe.experts.135.w1", "model.layers.34.block_sparse_moe.experts.136.w1", "model.layers.34.block_sparse_moe.experts.137.w1", "model.layers.34.block_sparse_moe.experts.138.w1", "model.layers.34.block_sparse_moe.experts.139.w1", "model.layers.34.block_sparse_moe.experts.140.w1", "model.layers.34.block_sparse_moe.experts.141.w1", "model.layers.34.block_sparse_moe.experts.142.w1", "model.layers.34.block_sparse_moe.experts.143.w1", "model.layers.34.block_sparse_moe.experts.144.w1", "model.layers.34.block_sparse_moe.experts.145.w1", "model.layers.34.block_sparse_moe.experts.146.w1", "model.layers.34.block_sparse_moe.experts.147.w1", "model.layers.34.block_sparse_moe.experts.148.w1", "model.layers.34.block_sparse_moe.experts.149.w1", "model.layers.34.block_sparse_moe.experts.150.w1", "model.layers.34.block_sparse_moe.experts.151.w1", "model.layers.34.block_sparse_moe.experts.152.w1", "model.layers.34.block_sparse_moe.experts.153.w1", "model.layers.34.block_sparse_moe.experts.154.w1", "model.layers.34.block_sparse_moe.experts.155.w1", "model.layers.34.block_sparse_moe.experts.156.w1", "model.layers.34.block_sparse_moe.experts.157.w1", "model.layers.34.block_sparse_moe.experts.158.w1", "model.layers.34.block_sparse_moe.experts.159.w1", "model.layers.34.block_sparse_moe.experts.160.w1", "model.layers.34.block_sparse_moe.experts.161.w1", "model.layers.34.block_sparse_moe.experts.162.w1", "model.layers.34.block_sparse_moe.experts.163.w1", "model.layers.34.block_sparse_moe.experts.164.w1", "model.layers.34.block_sparse_moe.experts.165.w1", "model.layers.34.block_sparse_moe.experts.166.w1", "model.layers.34.block_sparse_moe.experts.167.w1", "model.layers.34.block_sparse_moe.experts.168.w1", "model.layers.34.block_sparse_moe.experts.169.w1", "model.layers.34.block_sparse_moe.experts.170.w1", "model.layers.34.block_sparse_moe.experts.171.w1", "model.layers.34.block_sparse_moe.experts.172.w1", "model.layers.34.block_sparse_moe.experts.173.w1", "model.layers.34.block_sparse_moe.experts.174.w1", "model.layers.34.block_sparse_moe.experts.175.w1", "model.layers.34.block_sparse_moe.experts.176.w1", "model.layers.34.block_sparse_moe.experts.177.w1", "model.layers.34.block_sparse_moe.experts.178.w1", "model.layers.34.block_sparse_moe.experts.179.w1", "model.layers.34.block_sparse_moe.experts.180.w1", "model.layers.34.block_sparse_moe.experts.181.w1", "model.layers.34.block_sparse_moe.experts.182.w1", "model.layers.34.block_sparse_moe.experts.183.w1", "model.layers.34.block_sparse_moe.experts.184.w1", "model.layers.34.block_sparse_moe.experts.185.w1", "model.layers.34.block_sparse_moe.experts.186.w1", "model.layers.34.block_sparse_moe.experts.187.w1", "model.layers.34.block_sparse_moe.experts.188.w1", "model.layers.34.block_sparse_moe.experts.189.w1", "model.layers.34.block_sparse_moe.experts.190.w1", "model.layers.34.block_sparse_moe.experts.191.w1", "model.layers.34.block_sparse_moe.experts.192.w1", "model.layers.34.block_sparse_moe.experts.193.w1", "model.layers.34.block_sparse_moe.experts.194.w1", "model.layers.34.block_sparse_moe.experts.195.w1", "model.layers.34.block_sparse_moe.experts.196.w1", "model.layers.34.block_sparse_moe.experts.197.w1", "model.layers.34.block_sparse_moe.experts.198.w1", "model.layers.34.block_sparse_moe.experts.199.w1", "model.layers.34.block_sparse_moe.experts.200.w1", "model.layers.34.block_sparse_moe.experts.201.w1", "model.layers.34.block_sparse_moe.experts.202.w1", "model.layers.34.block_sparse_moe.experts.203.w1", "model.layers.34.block_sparse_moe.experts.204.w1", "model.layers.34.block_sparse_moe.experts.205.w1", "model.layers.34.block_sparse_moe.experts.206.w1", "model.layers.34.block_sparse_moe.experts.207.w1", "model.layers.34.block_sparse_moe.experts.208.w1", "model.layers.34.block_sparse_moe.experts.209.w1", "model.layers.34.block_sparse_moe.experts.210.w1", "model.layers.34.block_sparse_moe.experts.211.w1", "model.layers.34.block_sparse_moe.experts.212.w1", "model.layers.34.block_sparse_moe.experts.213.w1", "model.layers.34.block_sparse_moe.experts.214.w1", "model.layers.34.block_sparse_moe.experts.215.w1", "model.layers.34.block_sparse_moe.experts.216.w1", "model.layers.34.block_sparse_moe.experts.217.w1", "model.layers.34.block_sparse_moe.experts.218.w1", "model.layers.34.block_sparse_moe.experts.219.w1", "model.layers.34.block_sparse_moe.experts.220.w1", "model.layers.34.block_sparse_moe.experts.221.w1", "model.layers.34.block_sparse_moe.experts.222.w1", "model.layers.34.block_sparse_moe.experts.223.w1", "model.layers.34.block_sparse_moe.experts.224.w1", "model.layers.34.block_sparse_moe.experts.225.w1", "model.layers.34.block_sparse_moe.experts.226.w1", "model.layers.34.block_sparse_moe.experts.227.w1", "model.layers.34.block_sparse_moe.experts.228.w1", "model.layers.34.block_sparse_moe.experts.229.w1", "model.layers.34.block_sparse_moe.experts.230.w1", "model.layers.34.block_sparse_moe.experts.231.w1", "model.layers.34.block_sparse_moe.experts.232.w1", "model.layers.34.block_sparse_moe.experts.233.w1", "model.layers.34.block_sparse_moe.experts.234.w1", "model.layers.34.block_sparse_moe.experts.235.w1", "model.layers.34.block_sparse_moe.experts.236.w1", "model.layers.34.block_sparse_moe.experts.237.w1", "model.layers.34.block_sparse_moe.experts.238.w1", "model.layers.34.block_sparse_moe.experts.239.w1", "model.layers.34.block_sparse_moe.experts.240.w1", "model.layers.34.block_sparse_moe.experts.241.w1", "model.layers.34.block_sparse_moe.experts.242.w1", "model.layers.34.block_sparse_moe.experts.243.w1", "model.layers.34.block_sparse_moe.experts.244.w1", "model.layers.34.block_sparse_moe.experts.245.w1", "model.layers.34.block_sparse_moe.experts.246.w1", "model.layers.34.block_sparse_moe.experts.247.w1", "model.layers.34.block_sparse_moe.experts.248.w1", "model.layers.34.block_sparse_moe.experts.249.w1", "model.layers.34.block_sparse_moe.experts.250.w1", "model.layers.34.block_sparse_moe.experts.251.w1", "model.layers.34.block_sparse_moe.experts.252.w1", "model.layers.34.block_sparse_moe.experts.253.w1", "model.layers.34.block_sparse_moe.experts.254.w1", "model.layers.34.block_sparse_moe.experts.255.w1", "model.layers.34.block_sparse_moe.experts.0.w3", "model.layers.34.block_sparse_moe.experts.1.w3", "model.layers.34.block_sparse_moe.experts.2.w3", "model.layers.34.block_sparse_moe.experts.3.w3", "model.layers.34.block_sparse_moe.experts.4.w3", "model.layers.34.block_sparse_moe.experts.5.w3", "model.layers.34.block_sparse_moe.experts.6.w3", "model.layers.34.block_sparse_moe.experts.7.w3", "model.layers.34.block_sparse_moe.experts.8.w3", "model.layers.34.block_sparse_moe.experts.9.w3", "model.layers.34.block_sparse_moe.experts.10.w3", "model.layers.34.block_sparse_moe.experts.11.w3", "model.layers.34.block_sparse_moe.experts.12.w3", "model.layers.34.block_sparse_moe.experts.13.w3", "model.layers.34.block_sparse_moe.experts.14.w3", "model.layers.34.block_sparse_moe.experts.15.w3", "model.layers.34.block_sparse_moe.experts.16.w3", "model.layers.34.block_sparse_moe.experts.17.w3", "model.layers.34.block_sparse_moe.experts.18.w3", "model.layers.34.block_sparse_moe.experts.19.w3", "model.layers.34.block_sparse_moe.experts.20.w3", "model.layers.34.block_sparse_moe.experts.21.w3", "model.layers.34.block_sparse_moe.experts.22.w3", "model.layers.34.block_sparse_moe.experts.23.w3", "model.layers.34.block_sparse_moe.experts.24.w3", "model.layers.34.block_sparse_moe.experts.25.w3", "model.layers.34.block_sparse_moe.experts.26.w3", "model.layers.34.block_sparse_moe.experts.27.w3", "model.layers.34.block_sparse_moe.experts.28.w3", "model.layers.34.block_sparse_moe.experts.29.w3", "model.layers.34.block_sparse_moe.experts.30.w3", "model.layers.34.block_sparse_moe.experts.31.w3", "model.layers.34.block_sparse_moe.experts.32.w3", "model.layers.34.block_sparse_moe.experts.33.w3", "model.layers.34.block_sparse_moe.experts.34.w3", "model.layers.34.block_sparse_moe.experts.35.w3", "model.layers.34.block_sparse_moe.experts.36.w3", "model.layers.34.block_sparse_moe.experts.37.w3", "model.layers.34.block_sparse_moe.experts.38.w3", "model.layers.34.block_sparse_moe.experts.39.w3", "model.layers.34.block_sparse_moe.experts.40.w3", "model.layers.34.block_sparse_moe.experts.41.w3", "model.layers.34.block_sparse_moe.experts.42.w3", "model.layers.34.block_sparse_moe.experts.43.w3", "model.layers.34.block_sparse_moe.experts.44.w3", "model.layers.34.block_sparse_moe.experts.45.w3", "model.layers.34.block_sparse_moe.experts.46.w3", "model.layers.34.block_sparse_moe.experts.47.w3", "model.layers.34.block_sparse_moe.experts.48.w3", "model.layers.34.block_sparse_moe.experts.49.w3", "model.layers.34.block_sparse_moe.experts.50.w3", "model.layers.34.block_sparse_moe.experts.51.w3", "model.layers.34.block_sparse_moe.experts.52.w3", "model.layers.34.block_sparse_moe.experts.53.w3", "model.layers.34.block_sparse_moe.experts.54.w3", "model.layers.34.block_sparse_moe.experts.55.w3", "model.layers.34.block_sparse_moe.experts.56.w3", "model.layers.34.block_sparse_moe.experts.57.w3", "model.layers.34.block_sparse_moe.experts.58.w3", "model.layers.34.block_sparse_moe.experts.59.w3", "model.layers.34.block_sparse_moe.experts.60.w3", "model.layers.34.block_sparse_moe.experts.61.w3", "model.layers.34.block_sparse_moe.experts.62.w3", "model.layers.34.block_sparse_moe.experts.63.w3", "model.layers.34.block_sparse_moe.experts.64.w3", "model.layers.34.block_sparse_moe.experts.65.w3", "model.layers.34.block_sparse_moe.experts.66.w3", "model.layers.34.block_sparse_moe.experts.67.w3", "model.layers.34.block_sparse_moe.experts.68.w3", "model.layers.34.block_sparse_moe.experts.69.w3", "model.layers.34.block_sparse_moe.experts.70.w3", "model.layers.34.block_sparse_moe.experts.71.w3", "model.layers.34.block_sparse_moe.experts.72.w3", "model.layers.34.block_sparse_moe.experts.73.w3", "model.layers.34.block_sparse_moe.experts.74.w3", "model.layers.34.block_sparse_moe.experts.75.w3", "model.layers.34.block_sparse_moe.experts.76.w3", "model.layers.34.block_sparse_moe.experts.77.w3", "model.layers.34.block_sparse_moe.experts.78.w3", "model.layers.34.block_sparse_moe.experts.79.w3", "model.layers.34.block_sparse_moe.experts.80.w3", "model.layers.34.block_sparse_moe.experts.81.w3", "model.layers.34.block_sparse_moe.experts.82.w3", "model.layers.34.block_sparse_moe.experts.83.w3", "model.layers.34.block_sparse_moe.experts.84.w3", "model.layers.34.block_sparse_moe.experts.85.w3", "model.layers.34.block_sparse_moe.experts.86.w3", "model.layers.34.block_sparse_moe.experts.87.w3", "model.layers.34.block_sparse_moe.experts.88.w3", "model.layers.34.block_sparse_moe.experts.89.w3", "model.layers.34.block_sparse_moe.experts.90.w3", "model.layers.34.block_sparse_moe.experts.91.w3", "model.layers.34.block_sparse_moe.experts.92.w3", "model.layers.34.block_sparse_moe.experts.93.w3", "model.layers.34.block_sparse_moe.experts.94.w3", "model.layers.34.block_sparse_moe.experts.95.w3", "model.layers.34.block_sparse_moe.experts.96.w3", "model.layers.34.block_sparse_moe.experts.97.w3", "model.layers.34.block_sparse_moe.experts.98.w3", "model.layers.34.block_sparse_moe.experts.99.w3", "model.layers.34.block_sparse_moe.experts.100.w3", "model.layers.34.block_sparse_moe.experts.101.w3", "model.layers.34.block_sparse_moe.experts.102.w3", "model.layers.34.block_sparse_moe.experts.103.w3", "model.layers.34.block_sparse_moe.experts.104.w3", "model.layers.34.block_sparse_moe.experts.105.w3", "model.layers.34.block_sparse_moe.experts.106.w3", "model.layers.34.block_sparse_moe.experts.107.w3", "model.layers.34.block_sparse_moe.experts.108.w3", "model.layers.34.block_sparse_moe.experts.109.w3", "model.layers.34.block_sparse_moe.experts.110.w3", "model.layers.34.block_sparse_moe.experts.111.w3", "model.layers.34.block_sparse_moe.experts.112.w3", "model.layers.34.block_sparse_moe.experts.113.w3", "model.layers.34.block_sparse_moe.experts.114.w3", "model.layers.34.block_sparse_moe.experts.115.w3", "model.layers.34.block_sparse_moe.experts.116.w3", "model.layers.34.block_sparse_moe.experts.117.w3", "model.layers.34.block_sparse_moe.experts.118.w3", "model.layers.34.block_sparse_moe.experts.119.w3", "model.layers.34.block_sparse_moe.experts.120.w3", "model.layers.34.block_sparse_moe.experts.121.w3", "model.layers.34.block_sparse_moe.experts.122.w3", "model.layers.34.block_sparse_moe.experts.123.w3", "model.layers.34.block_sparse_moe.experts.124.w3", "model.layers.34.block_sparse_moe.experts.125.w3", "model.layers.34.block_sparse_moe.experts.126.w3", "model.layers.34.block_sparse_moe.experts.127.w3", "model.layers.34.block_sparse_moe.experts.128.w3", "model.layers.34.block_sparse_moe.experts.129.w3", "model.layers.34.block_sparse_moe.experts.130.w3", "model.layers.34.block_sparse_moe.experts.131.w3", "model.layers.34.block_sparse_moe.experts.132.w3", "model.layers.34.block_sparse_moe.experts.133.w3", "model.layers.34.block_sparse_moe.experts.134.w3", "model.layers.34.block_sparse_moe.experts.135.w3", "model.layers.34.block_sparse_moe.experts.136.w3", "model.layers.34.block_sparse_moe.experts.137.w3", "model.layers.34.block_sparse_moe.experts.138.w3", "model.layers.34.block_sparse_moe.experts.139.w3", "model.layers.34.block_sparse_moe.experts.140.w3", "model.layers.34.block_sparse_moe.experts.141.w3", "model.layers.34.block_sparse_moe.experts.142.w3", "model.layers.34.block_sparse_moe.experts.143.w3", "model.layers.34.block_sparse_moe.experts.144.w3", "model.layers.34.block_sparse_moe.experts.145.w3", "model.layers.34.block_sparse_moe.experts.146.w3", "model.layers.34.block_sparse_moe.experts.147.w3", "model.layers.34.block_sparse_moe.experts.148.w3", "model.layers.34.block_sparse_moe.experts.149.w3", "model.layers.34.block_sparse_moe.experts.150.w3", "model.layers.34.block_sparse_moe.experts.151.w3", "model.layers.34.block_sparse_moe.experts.152.w3", "model.layers.34.block_sparse_moe.experts.153.w3", "model.layers.34.block_sparse_moe.experts.154.w3", "model.layers.34.block_sparse_moe.experts.155.w3", "model.layers.34.block_sparse_moe.experts.156.w3", "model.layers.34.block_sparse_moe.experts.157.w3", "model.layers.34.block_sparse_moe.experts.158.w3", "model.layers.34.block_sparse_moe.experts.159.w3", "model.layers.34.block_sparse_moe.experts.160.w3", "model.layers.34.block_sparse_moe.experts.161.w3", "model.layers.34.block_sparse_moe.experts.162.w3", "model.layers.34.block_sparse_moe.experts.163.w3", "model.layers.34.block_sparse_moe.experts.164.w3", "model.layers.34.block_sparse_moe.experts.165.w3", "model.layers.34.block_sparse_moe.experts.166.w3", "model.layers.34.block_sparse_moe.experts.167.w3", "model.layers.34.block_sparse_moe.experts.168.w3", "model.layers.34.block_sparse_moe.experts.169.w3", "model.layers.34.block_sparse_moe.experts.170.w3", "model.layers.34.block_sparse_moe.experts.171.w3", "model.layers.34.block_sparse_moe.experts.172.w3", "model.layers.34.block_sparse_moe.experts.173.w3", "model.layers.34.block_sparse_moe.experts.174.w3", "model.layers.34.block_sparse_moe.experts.175.w3", "model.layers.34.block_sparse_moe.experts.176.w3", "model.layers.34.block_sparse_moe.experts.177.w3", "model.layers.34.block_sparse_moe.experts.178.w3", "model.layers.34.block_sparse_moe.experts.179.w3", "model.layers.34.block_sparse_moe.experts.180.w3", "model.layers.34.block_sparse_moe.experts.181.w3", "model.layers.34.block_sparse_moe.experts.182.w3", "model.layers.34.block_sparse_moe.experts.183.w3", "model.layers.34.block_sparse_moe.experts.184.w3", "model.layers.34.block_sparse_moe.experts.185.w3", "model.layers.34.block_sparse_moe.experts.186.w3", "model.layers.34.block_sparse_moe.experts.187.w3", "model.layers.34.block_sparse_moe.experts.188.w3", "model.layers.34.block_sparse_moe.experts.189.w3", "model.layers.34.block_sparse_moe.experts.190.w3", "model.layers.34.block_sparse_moe.experts.191.w3", "model.layers.34.block_sparse_moe.experts.192.w3", "model.layers.34.block_sparse_moe.experts.193.w3", "model.layers.34.block_sparse_moe.experts.194.w3", "model.layers.34.block_sparse_moe.experts.195.w3", "model.layers.34.block_sparse_moe.experts.196.w3", "model.layers.34.block_sparse_moe.experts.197.w3", "model.layers.34.block_sparse_moe.experts.198.w3", "model.layers.34.block_sparse_moe.experts.199.w3", "model.layers.34.block_sparse_moe.experts.200.w3", "model.layers.34.block_sparse_moe.experts.201.w3", "model.layers.34.block_sparse_moe.experts.202.w3", "model.layers.34.block_sparse_moe.experts.203.w3", "model.layers.34.block_sparse_moe.experts.204.w3", "model.layers.34.block_sparse_moe.experts.205.w3", "model.layers.34.block_sparse_moe.experts.206.w3", "model.layers.34.block_sparse_moe.experts.207.w3", "model.layers.34.block_sparse_moe.experts.208.w3", "model.layers.34.block_sparse_moe.experts.209.w3", "model.layers.34.block_sparse_moe.experts.210.w3", "model.layers.34.block_sparse_moe.experts.211.w3", "model.layers.34.block_sparse_moe.experts.212.w3", "model.layers.34.block_sparse_moe.experts.213.w3", "model.layers.34.block_sparse_moe.experts.214.w3", "model.layers.34.block_sparse_moe.experts.215.w3", "model.layers.34.block_sparse_moe.experts.216.w3", "model.layers.34.block_sparse_moe.experts.217.w3", "model.layers.34.block_sparse_moe.experts.218.w3", "model.layers.34.block_sparse_moe.experts.219.w3", "model.layers.34.block_sparse_moe.experts.220.w3", "model.layers.34.block_sparse_moe.experts.221.w3", "model.layers.34.block_sparse_moe.experts.222.w3", "model.layers.34.block_sparse_moe.experts.223.w3", "model.layers.34.block_sparse_moe.experts.224.w3", "model.layers.34.block_sparse_moe.experts.225.w3", "model.layers.34.block_sparse_moe.experts.226.w3", "model.layers.34.block_sparse_moe.experts.227.w3", "model.layers.34.block_sparse_moe.experts.228.w3", "model.layers.34.block_sparse_moe.experts.229.w3", "model.layers.34.block_sparse_moe.experts.230.w3", "model.layers.34.block_sparse_moe.experts.231.w3", "model.layers.34.block_sparse_moe.experts.232.w3", "model.layers.34.block_sparse_moe.experts.233.w3", "model.layers.34.block_sparse_moe.experts.234.w3", "model.layers.34.block_sparse_moe.experts.235.w3", "model.layers.34.block_sparse_moe.experts.236.w3", "model.layers.34.block_sparse_moe.experts.237.w3", "model.layers.34.block_sparse_moe.experts.238.w3", "model.layers.34.block_sparse_moe.experts.239.w3", "model.layers.34.block_sparse_moe.experts.240.w3", "model.layers.34.block_sparse_moe.experts.241.w3", "model.layers.34.block_sparse_moe.experts.242.w3", "model.layers.34.block_sparse_moe.experts.243.w3", "model.layers.34.block_sparse_moe.experts.244.w3", "model.layers.34.block_sparse_moe.experts.245.w3", "model.layers.34.block_sparse_moe.experts.246.w3", "model.layers.34.block_sparse_moe.experts.247.w3", "model.layers.34.block_sparse_moe.experts.248.w3", "model.layers.34.block_sparse_moe.experts.249.w3", "model.layers.34.block_sparse_moe.experts.250.w3", "model.layers.34.block_sparse_moe.experts.251.w3", "model.layers.34.block_sparse_moe.experts.252.w3", "model.layers.34.block_sparse_moe.experts.253.w3", "model.layers.34.block_sparse_moe.experts.254.w3", "model.layers.34.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0012936085462569302, "dbits": 2415919104 } ] }, { "idx": 174, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w2", "model.layers.34.block_sparse_moe.experts.1.w2", "model.layers.34.block_sparse_moe.experts.2.w2", "model.layers.34.block_sparse_moe.experts.3.w2", "model.layers.34.block_sparse_moe.experts.4.w2", "model.layers.34.block_sparse_moe.experts.5.w2", "model.layers.34.block_sparse_moe.experts.6.w2", "model.layers.34.block_sparse_moe.experts.7.w2", "model.layers.34.block_sparse_moe.experts.8.w2", "model.layers.34.block_sparse_moe.experts.9.w2", "model.layers.34.block_sparse_moe.experts.10.w2", "model.layers.34.block_sparse_moe.experts.11.w2", "model.layers.34.block_sparse_moe.experts.12.w2", "model.layers.34.block_sparse_moe.experts.13.w2", "model.layers.34.block_sparse_moe.experts.14.w2", "model.layers.34.block_sparse_moe.experts.15.w2", "model.layers.34.block_sparse_moe.experts.16.w2", "model.layers.34.block_sparse_moe.experts.17.w2", "model.layers.34.block_sparse_moe.experts.18.w2", "model.layers.34.block_sparse_moe.experts.19.w2", "model.layers.34.block_sparse_moe.experts.20.w2", "model.layers.34.block_sparse_moe.experts.21.w2", "model.layers.34.block_sparse_moe.experts.22.w2", "model.layers.34.block_sparse_moe.experts.23.w2", "model.layers.34.block_sparse_moe.experts.24.w2", "model.layers.34.block_sparse_moe.experts.25.w2", "model.layers.34.block_sparse_moe.experts.26.w2", "model.layers.34.block_sparse_moe.experts.27.w2", "model.layers.34.block_sparse_moe.experts.28.w2", "model.layers.34.block_sparse_moe.experts.29.w2", "model.layers.34.block_sparse_moe.experts.30.w2", "model.layers.34.block_sparse_moe.experts.31.w2", "model.layers.34.block_sparse_moe.experts.32.w2", "model.layers.34.block_sparse_moe.experts.33.w2", "model.layers.34.block_sparse_moe.experts.34.w2", "model.layers.34.block_sparse_moe.experts.35.w2", "model.layers.34.block_sparse_moe.experts.36.w2", "model.layers.34.block_sparse_moe.experts.37.w2", "model.layers.34.block_sparse_moe.experts.38.w2", "model.layers.34.block_sparse_moe.experts.39.w2", "model.layers.34.block_sparse_moe.experts.40.w2", "model.layers.34.block_sparse_moe.experts.41.w2", "model.layers.34.block_sparse_moe.experts.42.w2", "model.layers.34.block_sparse_moe.experts.43.w2", "model.layers.34.block_sparse_moe.experts.44.w2", "model.layers.34.block_sparse_moe.experts.45.w2", "model.layers.34.block_sparse_moe.experts.46.w2", "model.layers.34.block_sparse_moe.experts.47.w2", "model.layers.34.block_sparse_moe.experts.48.w2", "model.layers.34.block_sparse_moe.experts.49.w2", "model.layers.34.block_sparse_moe.experts.50.w2", "model.layers.34.block_sparse_moe.experts.51.w2", "model.layers.34.block_sparse_moe.experts.52.w2", "model.layers.34.block_sparse_moe.experts.53.w2", "model.layers.34.block_sparse_moe.experts.54.w2", "model.layers.34.block_sparse_moe.experts.55.w2", "model.layers.34.block_sparse_moe.experts.56.w2", "model.layers.34.block_sparse_moe.experts.57.w2", "model.layers.34.block_sparse_moe.experts.58.w2", "model.layers.34.block_sparse_moe.experts.59.w2", "model.layers.34.block_sparse_moe.experts.60.w2", "model.layers.34.block_sparse_moe.experts.61.w2", "model.layers.34.block_sparse_moe.experts.62.w2", "model.layers.34.block_sparse_moe.experts.63.w2", "model.layers.34.block_sparse_moe.experts.64.w2", "model.layers.34.block_sparse_moe.experts.65.w2", "model.layers.34.block_sparse_moe.experts.66.w2", "model.layers.34.block_sparse_moe.experts.67.w2", "model.layers.34.block_sparse_moe.experts.68.w2", "model.layers.34.block_sparse_moe.experts.69.w2", "model.layers.34.block_sparse_moe.experts.70.w2", "model.layers.34.block_sparse_moe.experts.71.w2", "model.layers.34.block_sparse_moe.experts.72.w2", "model.layers.34.block_sparse_moe.experts.73.w2", "model.layers.34.block_sparse_moe.experts.74.w2", "model.layers.34.block_sparse_moe.experts.75.w2", "model.layers.34.block_sparse_moe.experts.76.w2", "model.layers.34.block_sparse_moe.experts.77.w2", "model.layers.34.block_sparse_moe.experts.78.w2", "model.layers.34.block_sparse_moe.experts.79.w2", "model.layers.34.block_sparse_moe.experts.80.w2", "model.layers.34.block_sparse_moe.experts.81.w2", "model.layers.34.block_sparse_moe.experts.82.w2", "model.layers.34.block_sparse_moe.experts.83.w2", "model.layers.34.block_sparse_moe.experts.84.w2", "model.layers.34.block_sparse_moe.experts.85.w2", "model.layers.34.block_sparse_moe.experts.86.w2", "model.layers.34.block_sparse_moe.experts.87.w2", "model.layers.34.block_sparse_moe.experts.88.w2", "model.layers.34.block_sparse_moe.experts.89.w2", "model.layers.34.block_sparse_moe.experts.90.w2", "model.layers.34.block_sparse_moe.experts.91.w2", "model.layers.34.block_sparse_moe.experts.92.w2", "model.layers.34.block_sparse_moe.experts.93.w2", "model.layers.34.block_sparse_moe.experts.94.w2", "model.layers.34.block_sparse_moe.experts.95.w2", "model.layers.34.block_sparse_moe.experts.96.w2", "model.layers.34.block_sparse_moe.experts.97.w2", "model.layers.34.block_sparse_moe.experts.98.w2", "model.layers.34.block_sparse_moe.experts.99.w2", "model.layers.34.block_sparse_moe.experts.100.w2", "model.layers.34.block_sparse_moe.experts.101.w2", "model.layers.34.block_sparse_moe.experts.102.w2", "model.layers.34.block_sparse_moe.experts.103.w2", "model.layers.34.block_sparse_moe.experts.104.w2", "model.layers.34.block_sparse_moe.experts.105.w2", "model.layers.34.block_sparse_moe.experts.106.w2", "model.layers.34.block_sparse_moe.experts.107.w2", "model.layers.34.block_sparse_moe.experts.108.w2", "model.layers.34.block_sparse_moe.experts.109.w2", "model.layers.34.block_sparse_moe.experts.110.w2", "model.layers.34.block_sparse_moe.experts.111.w2", "model.layers.34.block_sparse_moe.experts.112.w2", "model.layers.34.block_sparse_moe.experts.113.w2", "model.layers.34.block_sparse_moe.experts.114.w2", "model.layers.34.block_sparse_moe.experts.115.w2", "model.layers.34.block_sparse_moe.experts.116.w2", "model.layers.34.block_sparse_moe.experts.117.w2", "model.layers.34.block_sparse_moe.experts.118.w2", "model.layers.34.block_sparse_moe.experts.119.w2", "model.layers.34.block_sparse_moe.experts.120.w2", "model.layers.34.block_sparse_moe.experts.121.w2", "model.layers.34.block_sparse_moe.experts.122.w2", "model.layers.34.block_sparse_moe.experts.123.w2", "model.layers.34.block_sparse_moe.experts.124.w2", "model.layers.34.block_sparse_moe.experts.125.w2", "model.layers.34.block_sparse_moe.experts.126.w2", "model.layers.34.block_sparse_moe.experts.127.w2", "model.layers.34.block_sparse_moe.experts.128.w2", "model.layers.34.block_sparse_moe.experts.129.w2", "model.layers.34.block_sparse_moe.experts.130.w2", "model.layers.34.block_sparse_moe.experts.131.w2", "model.layers.34.block_sparse_moe.experts.132.w2", "model.layers.34.block_sparse_moe.experts.133.w2", "model.layers.34.block_sparse_moe.experts.134.w2", "model.layers.34.block_sparse_moe.experts.135.w2", "model.layers.34.block_sparse_moe.experts.136.w2", "model.layers.34.block_sparse_moe.experts.137.w2", "model.layers.34.block_sparse_moe.experts.138.w2", "model.layers.34.block_sparse_moe.experts.139.w2", "model.layers.34.block_sparse_moe.experts.140.w2", "model.layers.34.block_sparse_moe.experts.141.w2", "model.layers.34.block_sparse_moe.experts.142.w2", "model.layers.34.block_sparse_moe.experts.143.w2", "model.layers.34.block_sparse_moe.experts.144.w2", "model.layers.34.block_sparse_moe.experts.145.w2", "model.layers.34.block_sparse_moe.experts.146.w2", "model.layers.34.block_sparse_moe.experts.147.w2", "model.layers.34.block_sparse_moe.experts.148.w2", "model.layers.34.block_sparse_moe.experts.149.w2", "model.layers.34.block_sparse_moe.experts.150.w2", "model.layers.34.block_sparse_moe.experts.151.w2", "model.layers.34.block_sparse_moe.experts.152.w2", "model.layers.34.block_sparse_moe.experts.153.w2", "model.layers.34.block_sparse_moe.experts.154.w2", "model.layers.34.block_sparse_moe.experts.155.w2", "model.layers.34.block_sparse_moe.experts.156.w2", "model.layers.34.block_sparse_moe.experts.157.w2", "model.layers.34.block_sparse_moe.experts.158.w2", "model.layers.34.block_sparse_moe.experts.159.w2", "model.layers.34.block_sparse_moe.experts.160.w2", "model.layers.34.block_sparse_moe.experts.161.w2", "model.layers.34.block_sparse_moe.experts.162.w2", "model.layers.34.block_sparse_moe.experts.163.w2", "model.layers.34.block_sparse_moe.experts.164.w2", "model.layers.34.block_sparse_moe.experts.165.w2", "model.layers.34.block_sparse_moe.experts.166.w2", "model.layers.34.block_sparse_moe.experts.167.w2", "model.layers.34.block_sparse_moe.experts.168.w2", "model.layers.34.block_sparse_moe.experts.169.w2", "model.layers.34.block_sparse_moe.experts.170.w2", "model.layers.34.block_sparse_moe.experts.171.w2", "model.layers.34.block_sparse_moe.experts.172.w2", "model.layers.34.block_sparse_moe.experts.173.w2", "model.layers.34.block_sparse_moe.experts.174.w2", "model.layers.34.block_sparse_moe.experts.175.w2", "model.layers.34.block_sparse_moe.experts.176.w2", "model.layers.34.block_sparse_moe.experts.177.w2", "model.layers.34.block_sparse_moe.experts.178.w2", "model.layers.34.block_sparse_moe.experts.179.w2", "model.layers.34.block_sparse_moe.experts.180.w2", "model.layers.34.block_sparse_moe.experts.181.w2", "model.layers.34.block_sparse_moe.experts.182.w2", "model.layers.34.block_sparse_moe.experts.183.w2", "model.layers.34.block_sparse_moe.experts.184.w2", "model.layers.34.block_sparse_moe.experts.185.w2", "model.layers.34.block_sparse_moe.experts.186.w2", "model.layers.34.block_sparse_moe.experts.187.w2", "model.layers.34.block_sparse_moe.experts.188.w2", "model.layers.34.block_sparse_moe.experts.189.w2", "model.layers.34.block_sparse_moe.experts.190.w2", "model.layers.34.block_sparse_moe.experts.191.w2", "model.layers.34.block_sparse_moe.experts.192.w2", "model.layers.34.block_sparse_moe.experts.193.w2", "model.layers.34.block_sparse_moe.experts.194.w2", "model.layers.34.block_sparse_moe.experts.195.w2", "model.layers.34.block_sparse_moe.experts.196.w2", "model.layers.34.block_sparse_moe.experts.197.w2", "model.layers.34.block_sparse_moe.experts.198.w2", "model.layers.34.block_sparse_moe.experts.199.w2", "model.layers.34.block_sparse_moe.experts.200.w2", "model.layers.34.block_sparse_moe.experts.201.w2", "model.layers.34.block_sparse_moe.experts.202.w2", "model.layers.34.block_sparse_moe.experts.203.w2", "model.layers.34.block_sparse_moe.experts.204.w2", "model.layers.34.block_sparse_moe.experts.205.w2", "model.layers.34.block_sparse_moe.experts.206.w2", "model.layers.34.block_sparse_moe.experts.207.w2", "model.layers.34.block_sparse_moe.experts.208.w2", "model.layers.34.block_sparse_moe.experts.209.w2", "model.layers.34.block_sparse_moe.experts.210.w2", "model.layers.34.block_sparse_moe.experts.211.w2", "model.layers.34.block_sparse_moe.experts.212.w2", "model.layers.34.block_sparse_moe.experts.213.w2", "model.layers.34.block_sparse_moe.experts.214.w2", "model.layers.34.block_sparse_moe.experts.215.w2", "model.layers.34.block_sparse_moe.experts.216.w2", "model.layers.34.block_sparse_moe.experts.217.w2", "model.layers.34.block_sparse_moe.experts.218.w2", "model.layers.34.block_sparse_moe.experts.219.w2", "model.layers.34.block_sparse_moe.experts.220.w2", "model.layers.34.block_sparse_moe.experts.221.w2", "model.layers.34.block_sparse_moe.experts.222.w2", "model.layers.34.block_sparse_moe.experts.223.w2", "model.layers.34.block_sparse_moe.experts.224.w2", "model.layers.34.block_sparse_moe.experts.225.w2", "model.layers.34.block_sparse_moe.experts.226.w2", "model.layers.34.block_sparse_moe.experts.227.w2", "model.layers.34.block_sparse_moe.experts.228.w2", "model.layers.34.block_sparse_moe.experts.229.w2", "model.layers.34.block_sparse_moe.experts.230.w2", "model.layers.34.block_sparse_moe.experts.231.w2", "model.layers.34.block_sparse_moe.experts.232.w2", "model.layers.34.block_sparse_moe.experts.233.w2", "model.layers.34.block_sparse_moe.experts.234.w2", "model.layers.34.block_sparse_moe.experts.235.w2", "model.layers.34.block_sparse_moe.experts.236.w2", "model.layers.34.block_sparse_moe.experts.237.w2", "model.layers.34.block_sparse_moe.experts.238.w2", "model.layers.34.block_sparse_moe.experts.239.w2", "model.layers.34.block_sparse_moe.experts.240.w2", "model.layers.34.block_sparse_moe.experts.241.w2", "model.layers.34.block_sparse_moe.experts.242.w2", "model.layers.34.block_sparse_moe.experts.243.w2", "model.layers.34.block_sparse_moe.experts.244.w2", "model.layers.34.block_sparse_moe.experts.245.w2", "model.layers.34.block_sparse_moe.experts.246.w2", "model.layers.34.block_sparse_moe.experts.247.w2", "model.layers.34.block_sparse_moe.experts.248.w2", "model.layers.34.block_sparse_moe.experts.249.w2", "model.layers.34.block_sparse_moe.experts.250.w2", "model.layers.34.block_sparse_moe.experts.251.w2", "model.layers.34.block_sparse_moe.experts.252.w2", "model.layers.34.block_sparse_moe.experts.253.w2", "model.layers.34.block_sparse_moe.experts.254.w2", "model.layers.34.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0009909719228744507, "dbits": 1207959552 } ] }, { "idx": 175, "layers": [ "model.layers.35.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0013143867254257202, "dbits": 18874368 } ] }, { "idx": 176, "layers": [ "model.layers.35.self_attn.k_proj", "model.layers.35.self_attn.v_proj" ], "candidates": [ { "dkld": -0.013112545013427734, "dbits": 6291456 } ] }, { "idx": 177, "layers": [ "model.layers.35.self_attn.o_proj" ], "candidates": [ { "dkld": 0.004877826571464627, "dbits": 18874368 } ] }, { "idx": 178, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w1", "model.layers.35.block_sparse_moe.experts.1.w1", "model.layers.35.block_sparse_moe.experts.2.w1", "model.layers.35.block_sparse_moe.experts.3.w1", "model.layers.35.block_sparse_moe.experts.4.w1", "model.layers.35.block_sparse_moe.experts.5.w1", "model.layers.35.block_sparse_moe.experts.6.w1", "model.layers.35.block_sparse_moe.experts.7.w1", "model.layers.35.block_sparse_moe.experts.8.w1", "model.layers.35.block_sparse_moe.experts.9.w1", "model.layers.35.block_sparse_moe.experts.10.w1", "model.layers.35.block_sparse_moe.experts.11.w1", "model.layers.35.block_sparse_moe.experts.12.w1", "model.layers.35.block_sparse_moe.experts.13.w1", "model.layers.35.block_sparse_moe.experts.14.w1", "model.layers.35.block_sparse_moe.experts.15.w1", "model.layers.35.block_sparse_moe.experts.16.w1", "model.layers.35.block_sparse_moe.experts.17.w1", "model.layers.35.block_sparse_moe.experts.18.w1", "model.layers.35.block_sparse_moe.experts.19.w1", "model.layers.35.block_sparse_moe.experts.20.w1", "model.layers.35.block_sparse_moe.experts.21.w1", "model.layers.35.block_sparse_moe.experts.22.w1", "model.layers.35.block_sparse_moe.experts.23.w1", "model.layers.35.block_sparse_moe.experts.24.w1", "model.layers.35.block_sparse_moe.experts.25.w1", "model.layers.35.block_sparse_moe.experts.26.w1", "model.layers.35.block_sparse_moe.experts.27.w1", "model.layers.35.block_sparse_moe.experts.28.w1", "model.layers.35.block_sparse_moe.experts.29.w1", "model.layers.35.block_sparse_moe.experts.30.w1", "model.layers.35.block_sparse_moe.experts.31.w1", "model.layers.35.block_sparse_moe.experts.32.w1", "model.layers.35.block_sparse_moe.experts.33.w1", "model.layers.35.block_sparse_moe.experts.34.w1", "model.layers.35.block_sparse_moe.experts.35.w1", "model.layers.35.block_sparse_moe.experts.36.w1", "model.layers.35.block_sparse_moe.experts.37.w1", "model.layers.35.block_sparse_moe.experts.38.w1", "model.layers.35.block_sparse_moe.experts.39.w1", "model.layers.35.block_sparse_moe.experts.40.w1", "model.layers.35.block_sparse_moe.experts.41.w1", "model.layers.35.block_sparse_moe.experts.42.w1", "model.layers.35.block_sparse_moe.experts.43.w1", "model.layers.35.block_sparse_moe.experts.44.w1", "model.layers.35.block_sparse_moe.experts.45.w1", "model.layers.35.block_sparse_moe.experts.46.w1", "model.layers.35.block_sparse_moe.experts.47.w1", "model.layers.35.block_sparse_moe.experts.48.w1", "model.layers.35.block_sparse_moe.experts.49.w1", "model.layers.35.block_sparse_moe.experts.50.w1", "model.layers.35.block_sparse_moe.experts.51.w1", "model.layers.35.block_sparse_moe.experts.52.w1", "model.layers.35.block_sparse_moe.experts.53.w1", "model.layers.35.block_sparse_moe.experts.54.w1", "model.layers.35.block_sparse_moe.experts.55.w1", "model.layers.35.block_sparse_moe.experts.56.w1", "model.layers.35.block_sparse_moe.experts.57.w1", "model.layers.35.block_sparse_moe.experts.58.w1", "model.layers.35.block_sparse_moe.experts.59.w1", "model.layers.35.block_sparse_moe.experts.60.w1", "model.layers.35.block_sparse_moe.experts.61.w1", "model.layers.35.block_sparse_moe.experts.62.w1", "model.layers.35.block_sparse_moe.experts.63.w1", "model.layers.35.block_sparse_moe.experts.64.w1", "model.layers.35.block_sparse_moe.experts.65.w1", "model.layers.35.block_sparse_moe.experts.66.w1", "model.layers.35.block_sparse_moe.experts.67.w1", "model.layers.35.block_sparse_moe.experts.68.w1", "model.layers.35.block_sparse_moe.experts.69.w1", "model.layers.35.block_sparse_moe.experts.70.w1", "model.layers.35.block_sparse_moe.experts.71.w1", "model.layers.35.block_sparse_moe.experts.72.w1", "model.layers.35.block_sparse_moe.experts.73.w1", "model.layers.35.block_sparse_moe.experts.74.w1", "model.layers.35.block_sparse_moe.experts.75.w1", "model.layers.35.block_sparse_moe.experts.76.w1", "model.layers.35.block_sparse_moe.experts.77.w1", "model.layers.35.block_sparse_moe.experts.78.w1", "model.layers.35.block_sparse_moe.experts.79.w1", "model.layers.35.block_sparse_moe.experts.80.w1", "model.layers.35.block_sparse_moe.experts.81.w1", "model.layers.35.block_sparse_moe.experts.82.w1", "model.layers.35.block_sparse_moe.experts.83.w1", "model.layers.35.block_sparse_moe.experts.84.w1", "model.layers.35.block_sparse_moe.experts.85.w1", "model.layers.35.block_sparse_moe.experts.86.w1", "model.layers.35.block_sparse_moe.experts.87.w1", "model.layers.35.block_sparse_moe.experts.88.w1", "model.layers.35.block_sparse_moe.experts.89.w1", "model.layers.35.block_sparse_moe.experts.90.w1", "model.layers.35.block_sparse_moe.experts.91.w1", "model.layers.35.block_sparse_moe.experts.92.w1", "model.layers.35.block_sparse_moe.experts.93.w1", "model.layers.35.block_sparse_moe.experts.94.w1", "model.layers.35.block_sparse_moe.experts.95.w1", "model.layers.35.block_sparse_moe.experts.96.w1", "model.layers.35.block_sparse_moe.experts.97.w1", "model.layers.35.block_sparse_moe.experts.98.w1", "model.layers.35.block_sparse_moe.experts.99.w1", "model.layers.35.block_sparse_moe.experts.100.w1", "model.layers.35.block_sparse_moe.experts.101.w1", "model.layers.35.block_sparse_moe.experts.102.w1", "model.layers.35.block_sparse_moe.experts.103.w1", "model.layers.35.block_sparse_moe.experts.104.w1", "model.layers.35.block_sparse_moe.experts.105.w1", "model.layers.35.block_sparse_moe.experts.106.w1", "model.layers.35.block_sparse_moe.experts.107.w1", "model.layers.35.block_sparse_moe.experts.108.w1", "model.layers.35.block_sparse_moe.experts.109.w1", "model.layers.35.block_sparse_moe.experts.110.w1", "model.layers.35.block_sparse_moe.experts.111.w1", "model.layers.35.block_sparse_moe.experts.112.w1", "model.layers.35.block_sparse_moe.experts.113.w1", "model.layers.35.block_sparse_moe.experts.114.w1", "model.layers.35.block_sparse_moe.experts.115.w1", "model.layers.35.block_sparse_moe.experts.116.w1", "model.layers.35.block_sparse_moe.experts.117.w1", "model.layers.35.block_sparse_moe.experts.118.w1", "model.layers.35.block_sparse_moe.experts.119.w1", "model.layers.35.block_sparse_moe.experts.120.w1", "model.layers.35.block_sparse_moe.experts.121.w1", "model.layers.35.block_sparse_moe.experts.122.w1", "model.layers.35.block_sparse_moe.experts.123.w1", "model.layers.35.block_sparse_moe.experts.124.w1", "model.layers.35.block_sparse_moe.experts.125.w1", "model.layers.35.block_sparse_moe.experts.126.w1", "model.layers.35.block_sparse_moe.experts.127.w1", "model.layers.35.block_sparse_moe.experts.128.w1", "model.layers.35.block_sparse_moe.experts.129.w1", "model.layers.35.block_sparse_moe.experts.130.w1", "model.layers.35.block_sparse_moe.experts.131.w1", "model.layers.35.block_sparse_moe.experts.132.w1", "model.layers.35.block_sparse_moe.experts.133.w1", "model.layers.35.block_sparse_moe.experts.134.w1", "model.layers.35.block_sparse_moe.experts.135.w1", "model.layers.35.block_sparse_moe.experts.136.w1", "model.layers.35.block_sparse_moe.experts.137.w1", "model.layers.35.block_sparse_moe.experts.138.w1", "model.layers.35.block_sparse_moe.experts.139.w1", "model.layers.35.block_sparse_moe.experts.140.w1", "model.layers.35.block_sparse_moe.experts.141.w1", "model.layers.35.block_sparse_moe.experts.142.w1", "model.layers.35.block_sparse_moe.experts.143.w1", "model.layers.35.block_sparse_moe.experts.144.w1", "model.layers.35.block_sparse_moe.experts.145.w1", "model.layers.35.block_sparse_moe.experts.146.w1", "model.layers.35.block_sparse_moe.experts.147.w1", "model.layers.35.block_sparse_moe.experts.148.w1", "model.layers.35.block_sparse_moe.experts.149.w1", "model.layers.35.block_sparse_moe.experts.150.w1", "model.layers.35.block_sparse_moe.experts.151.w1", "model.layers.35.block_sparse_moe.experts.152.w1", "model.layers.35.block_sparse_moe.experts.153.w1", "model.layers.35.block_sparse_moe.experts.154.w1", "model.layers.35.block_sparse_moe.experts.155.w1", "model.layers.35.block_sparse_moe.experts.156.w1", "model.layers.35.block_sparse_moe.experts.157.w1", "model.layers.35.block_sparse_moe.experts.158.w1", "model.layers.35.block_sparse_moe.experts.159.w1", "model.layers.35.block_sparse_moe.experts.160.w1", "model.layers.35.block_sparse_moe.experts.161.w1", "model.layers.35.block_sparse_moe.experts.162.w1", "model.layers.35.block_sparse_moe.experts.163.w1", "model.layers.35.block_sparse_moe.experts.164.w1", "model.layers.35.block_sparse_moe.experts.165.w1", "model.layers.35.block_sparse_moe.experts.166.w1", "model.layers.35.block_sparse_moe.experts.167.w1", "model.layers.35.block_sparse_moe.experts.168.w1", "model.layers.35.block_sparse_moe.experts.169.w1", "model.layers.35.block_sparse_moe.experts.170.w1", "model.layers.35.block_sparse_moe.experts.171.w1", "model.layers.35.block_sparse_moe.experts.172.w1", "model.layers.35.block_sparse_moe.experts.173.w1", "model.layers.35.block_sparse_moe.experts.174.w1", "model.layers.35.block_sparse_moe.experts.175.w1", "model.layers.35.block_sparse_moe.experts.176.w1", "model.layers.35.block_sparse_moe.experts.177.w1", "model.layers.35.block_sparse_moe.experts.178.w1", "model.layers.35.block_sparse_moe.experts.179.w1", "model.layers.35.block_sparse_moe.experts.180.w1", "model.layers.35.block_sparse_moe.experts.181.w1", "model.layers.35.block_sparse_moe.experts.182.w1", "model.layers.35.block_sparse_moe.experts.183.w1", "model.layers.35.block_sparse_moe.experts.184.w1", "model.layers.35.block_sparse_moe.experts.185.w1", "model.layers.35.block_sparse_moe.experts.186.w1", "model.layers.35.block_sparse_moe.experts.187.w1", "model.layers.35.block_sparse_moe.experts.188.w1", "model.layers.35.block_sparse_moe.experts.189.w1", "model.layers.35.block_sparse_moe.experts.190.w1", "model.layers.35.block_sparse_moe.experts.191.w1", "model.layers.35.block_sparse_moe.experts.192.w1", "model.layers.35.block_sparse_moe.experts.193.w1", "model.layers.35.block_sparse_moe.experts.194.w1", "model.layers.35.block_sparse_moe.experts.195.w1", "model.layers.35.block_sparse_moe.experts.196.w1", "model.layers.35.block_sparse_moe.experts.197.w1", "model.layers.35.block_sparse_moe.experts.198.w1", "model.layers.35.block_sparse_moe.experts.199.w1", "model.layers.35.block_sparse_moe.experts.200.w1", "model.layers.35.block_sparse_moe.experts.201.w1", "model.layers.35.block_sparse_moe.experts.202.w1", "model.layers.35.block_sparse_moe.experts.203.w1", "model.layers.35.block_sparse_moe.experts.204.w1", "model.layers.35.block_sparse_moe.experts.205.w1", "model.layers.35.block_sparse_moe.experts.206.w1", "model.layers.35.block_sparse_moe.experts.207.w1", "model.layers.35.block_sparse_moe.experts.208.w1", "model.layers.35.block_sparse_moe.experts.209.w1", "model.layers.35.block_sparse_moe.experts.210.w1", "model.layers.35.block_sparse_moe.experts.211.w1", "model.layers.35.block_sparse_moe.experts.212.w1", "model.layers.35.block_sparse_moe.experts.213.w1", "model.layers.35.block_sparse_moe.experts.214.w1", "model.layers.35.block_sparse_moe.experts.215.w1", "model.layers.35.block_sparse_moe.experts.216.w1", "model.layers.35.block_sparse_moe.experts.217.w1", "model.layers.35.block_sparse_moe.experts.218.w1", "model.layers.35.block_sparse_moe.experts.219.w1", "model.layers.35.block_sparse_moe.experts.220.w1", "model.layers.35.block_sparse_moe.experts.221.w1", "model.layers.35.block_sparse_moe.experts.222.w1", "model.layers.35.block_sparse_moe.experts.223.w1", "model.layers.35.block_sparse_moe.experts.224.w1", "model.layers.35.block_sparse_moe.experts.225.w1", "model.layers.35.block_sparse_moe.experts.226.w1", "model.layers.35.block_sparse_moe.experts.227.w1", "model.layers.35.block_sparse_moe.experts.228.w1", "model.layers.35.block_sparse_moe.experts.229.w1", "model.layers.35.block_sparse_moe.experts.230.w1", "model.layers.35.block_sparse_moe.experts.231.w1", "model.layers.35.block_sparse_moe.experts.232.w1", "model.layers.35.block_sparse_moe.experts.233.w1", "model.layers.35.block_sparse_moe.experts.234.w1", "model.layers.35.block_sparse_moe.experts.235.w1", "model.layers.35.block_sparse_moe.experts.236.w1", "model.layers.35.block_sparse_moe.experts.237.w1", "model.layers.35.block_sparse_moe.experts.238.w1", "model.layers.35.block_sparse_moe.experts.239.w1", "model.layers.35.block_sparse_moe.experts.240.w1", "model.layers.35.block_sparse_moe.experts.241.w1", "model.layers.35.block_sparse_moe.experts.242.w1", "model.layers.35.block_sparse_moe.experts.243.w1", "model.layers.35.block_sparse_moe.experts.244.w1", "model.layers.35.block_sparse_moe.experts.245.w1", "model.layers.35.block_sparse_moe.experts.246.w1", "model.layers.35.block_sparse_moe.experts.247.w1", "model.layers.35.block_sparse_moe.experts.248.w1", "model.layers.35.block_sparse_moe.experts.249.w1", "model.layers.35.block_sparse_moe.experts.250.w1", "model.layers.35.block_sparse_moe.experts.251.w1", "model.layers.35.block_sparse_moe.experts.252.w1", "model.layers.35.block_sparse_moe.experts.253.w1", "model.layers.35.block_sparse_moe.experts.254.w1", "model.layers.35.block_sparse_moe.experts.255.w1", "model.layers.35.block_sparse_moe.experts.0.w3", "model.layers.35.block_sparse_moe.experts.1.w3", "model.layers.35.block_sparse_moe.experts.2.w3", "model.layers.35.block_sparse_moe.experts.3.w3", "model.layers.35.block_sparse_moe.experts.4.w3", "model.layers.35.block_sparse_moe.experts.5.w3", "model.layers.35.block_sparse_moe.experts.6.w3", "model.layers.35.block_sparse_moe.experts.7.w3", "model.layers.35.block_sparse_moe.experts.8.w3", "model.layers.35.block_sparse_moe.experts.9.w3", "model.layers.35.block_sparse_moe.experts.10.w3", "model.layers.35.block_sparse_moe.experts.11.w3", "model.layers.35.block_sparse_moe.experts.12.w3", "model.layers.35.block_sparse_moe.experts.13.w3", "model.layers.35.block_sparse_moe.experts.14.w3", "model.layers.35.block_sparse_moe.experts.15.w3", "model.layers.35.block_sparse_moe.experts.16.w3", "model.layers.35.block_sparse_moe.experts.17.w3", "model.layers.35.block_sparse_moe.experts.18.w3", "model.layers.35.block_sparse_moe.experts.19.w3", "model.layers.35.block_sparse_moe.experts.20.w3", "model.layers.35.block_sparse_moe.experts.21.w3", "model.layers.35.block_sparse_moe.experts.22.w3", "model.layers.35.block_sparse_moe.experts.23.w3", "model.layers.35.block_sparse_moe.experts.24.w3", "model.layers.35.block_sparse_moe.experts.25.w3", "model.layers.35.block_sparse_moe.experts.26.w3", "model.layers.35.block_sparse_moe.experts.27.w3", "model.layers.35.block_sparse_moe.experts.28.w3", "model.layers.35.block_sparse_moe.experts.29.w3", "model.layers.35.block_sparse_moe.experts.30.w3", "model.layers.35.block_sparse_moe.experts.31.w3", "model.layers.35.block_sparse_moe.experts.32.w3", "model.layers.35.block_sparse_moe.experts.33.w3", "model.layers.35.block_sparse_moe.experts.34.w3", "model.layers.35.block_sparse_moe.experts.35.w3", "model.layers.35.block_sparse_moe.experts.36.w3", "model.layers.35.block_sparse_moe.experts.37.w3", "model.layers.35.block_sparse_moe.experts.38.w3", "model.layers.35.block_sparse_moe.experts.39.w3", "model.layers.35.block_sparse_moe.experts.40.w3", "model.layers.35.block_sparse_moe.experts.41.w3", "model.layers.35.block_sparse_moe.experts.42.w3", "model.layers.35.block_sparse_moe.experts.43.w3", "model.layers.35.block_sparse_moe.experts.44.w3", "model.layers.35.block_sparse_moe.experts.45.w3", "model.layers.35.block_sparse_moe.experts.46.w3", "model.layers.35.block_sparse_moe.experts.47.w3", "model.layers.35.block_sparse_moe.experts.48.w3", "model.layers.35.block_sparse_moe.experts.49.w3", "model.layers.35.block_sparse_moe.experts.50.w3", "model.layers.35.block_sparse_moe.experts.51.w3", "model.layers.35.block_sparse_moe.experts.52.w3", "model.layers.35.block_sparse_moe.experts.53.w3", "model.layers.35.block_sparse_moe.experts.54.w3", "model.layers.35.block_sparse_moe.experts.55.w3", "model.layers.35.block_sparse_moe.experts.56.w3", "model.layers.35.block_sparse_moe.experts.57.w3", "model.layers.35.block_sparse_moe.experts.58.w3", "model.layers.35.block_sparse_moe.experts.59.w3", "model.layers.35.block_sparse_moe.experts.60.w3", "model.layers.35.block_sparse_moe.experts.61.w3", "model.layers.35.block_sparse_moe.experts.62.w3", "model.layers.35.block_sparse_moe.experts.63.w3", "model.layers.35.block_sparse_moe.experts.64.w3", "model.layers.35.block_sparse_moe.experts.65.w3", "model.layers.35.block_sparse_moe.experts.66.w3", "model.layers.35.block_sparse_moe.experts.67.w3", "model.layers.35.block_sparse_moe.experts.68.w3", "model.layers.35.block_sparse_moe.experts.69.w3", "model.layers.35.block_sparse_moe.experts.70.w3", "model.layers.35.block_sparse_moe.experts.71.w3", "model.layers.35.block_sparse_moe.experts.72.w3", "model.layers.35.block_sparse_moe.experts.73.w3", "model.layers.35.block_sparse_moe.experts.74.w3", "model.layers.35.block_sparse_moe.experts.75.w3", "model.layers.35.block_sparse_moe.experts.76.w3", "model.layers.35.block_sparse_moe.experts.77.w3", "model.layers.35.block_sparse_moe.experts.78.w3", "model.layers.35.block_sparse_moe.experts.79.w3", "model.layers.35.block_sparse_moe.experts.80.w3", "model.layers.35.block_sparse_moe.experts.81.w3", "model.layers.35.block_sparse_moe.experts.82.w3", "model.layers.35.block_sparse_moe.experts.83.w3", "model.layers.35.block_sparse_moe.experts.84.w3", "model.layers.35.block_sparse_moe.experts.85.w3", "model.layers.35.block_sparse_moe.experts.86.w3", "model.layers.35.block_sparse_moe.experts.87.w3", "model.layers.35.block_sparse_moe.experts.88.w3", "model.layers.35.block_sparse_moe.experts.89.w3", "model.layers.35.block_sparse_moe.experts.90.w3", "model.layers.35.block_sparse_moe.experts.91.w3", "model.layers.35.block_sparse_moe.experts.92.w3", "model.layers.35.block_sparse_moe.experts.93.w3", "model.layers.35.block_sparse_moe.experts.94.w3", "model.layers.35.block_sparse_moe.experts.95.w3", "model.layers.35.block_sparse_moe.experts.96.w3", "model.layers.35.block_sparse_moe.experts.97.w3", "model.layers.35.block_sparse_moe.experts.98.w3", "model.layers.35.block_sparse_moe.experts.99.w3", "model.layers.35.block_sparse_moe.experts.100.w3", "model.layers.35.block_sparse_moe.experts.101.w3", "model.layers.35.block_sparse_moe.experts.102.w3", "model.layers.35.block_sparse_moe.experts.103.w3", "model.layers.35.block_sparse_moe.experts.104.w3", "model.layers.35.block_sparse_moe.experts.105.w3", "model.layers.35.block_sparse_moe.experts.106.w3", "model.layers.35.block_sparse_moe.experts.107.w3", "model.layers.35.block_sparse_moe.experts.108.w3", "model.layers.35.block_sparse_moe.experts.109.w3", "model.layers.35.block_sparse_moe.experts.110.w3", "model.layers.35.block_sparse_moe.experts.111.w3", "model.layers.35.block_sparse_moe.experts.112.w3", "model.layers.35.block_sparse_moe.experts.113.w3", "model.layers.35.block_sparse_moe.experts.114.w3", "model.layers.35.block_sparse_moe.experts.115.w3", "model.layers.35.block_sparse_moe.experts.116.w3", "model.layers.35.block_sparse_moe.experts.117.w3", "model.layers.35.block_sparse_moe.experts.118.w3", "model.layers.35.block_sparse_moe.experts.119.w3", "model.layers.35.block_sparse_moe.experts.120.w3", "model.layers.35.block_sparse_moe.experts.121.w3", "model.layers.35.block_sparse_moe.experts.122.w3", "model.layers.35.block_sparse_moe.experts.123.w3", "model.layers.35.block_sparse_moe.experts.124.w3", "model.layers.35.block_sparse_moe.experts.125.w3", "model.layers.35.block_sparse_moe.experts.126.w3", "model.layers.35.block_sparse_moe.experts.127.w3", "model.layers.35.block_sparse_moe.experts.128.w3", "model.layers.35.block_sparse_moe.experts.129.w3", "model.layers.35.block_sparse_moe.experts.130.w3", "model.layers.35.block_sparse_moe.experts.131.w3", "model.layers.35.block_sparse_moe.experts.132.w3", "model.layers.35.block_sparse_moe.experts.133.w3", "model.layers.35.block_sparse_moe.experts.134.w3", "model.layers.35.block_sparse_moe.experts.135.w3", "model.layers.35.block_sparse_moe.experts.136.w3", "model.layers.35.block_sparse_moe.experts.137.w3", "model.layers.35.block_sparse_moe.experts.138.w3", "model.layers.35.block_sparse_moe.experts.139.w3", "model.layers.35.block_sparse_moe.experts.140.w3", "model.layers.35.block_sparse_moe.experts.141.w3", "model.layers.35.block_sparse_moe.experts.142.w3", "model.layers.35.block_sparse_moe.experts.143.w3", "model.layers.35.block_sparse_moe.experts.144.w3", "model.layers.35.block_sparse_moe.experts.145.w3", "model.layers.35.block_sparse_moe.experts.146.w3", "model.layers.35.block_sparse_moe.experts.147.w3", "model.layers.35.block_sparse_moe.experts.148.w3", "model.layers.35.block_sparse_moe.experts.149.w3", "model.layers.35.block_sparse_moe.experts.150.w3", "model.layers.35.block_sparse_moe.experts.151.w3", "model.layers.35.block_sparse_moe.experts.152.w3", "model.layers.35.block_sparse_moe.experts.153.w3", "model.layers.35.block_sparse_moe.experts.154.w3", "model.layers.35.block_sparse_moe.experts.155.w3", "model.layers.35.block_sparse_moe.experts.156.w3", "model.layers.35.block_sparse_moe.experts.157.w3", "model.layers.35.block_sparse_moe.experts.158.w3", "model.layers.35.block_sparse_moe.experts.159.w3", "model.layers.35.block_sparse_moe.experts.160.w3", "model.layers.35.block_sparse_moe.experts.161.w3", "model.layers.35.block_sparse_moe.experts.162.w3", "model.layers.35.block_sparse_moe.experts.163.w3", "model.layers.35.block_sparse_moe.experts.164.w3", "model.layers.35.block_sparse_moe.experts.165.w3", "model.layers.35.block_sparse_moe.experts.166.w3", "model.layers.35.block_sparse_moe.experts.167.w3", "model.layers.35.block_sparse_moe.experts.168.w3", "model.layers.35.block_sparse_moe.experts.169.w3", "model.layers.35.block_sparse_moe.experts.170.w3", "model.layers.35.block_sparse_moe.experts.171.w3", "model.layers.35.block_sparse_moe.experts.172.w3", "model.layers.35.block_sparse_moe.experts.173.w3", "model.layers.35.block_sparse_moe.experts.174.w3", "model.layers.35.block_sparse_moe.experts.175.w3", "model.layers.35.block_sparse_moe.experts.176.w3", "model.layers.35.block_sparse_moe.experts.177.w3", "model.layers.35.block_sparse_moe.experts.178.w3", "model.layers.35.block_sparse_moe.experts.179.w3", "model.layers.35.block_sparse_moe.experts.180.w3", "model.layers.35.block_sparse_moe.experts.181.w3", "model.layers.35.block_sparse_moe.experts.182.w3", "model.layers.35.block_sparse_moe.experts.183.w3", "model.layers.35.block_sparse_moe.experts.184.w3", "model.layers.35.block_sparse_moe.experts.185.w3", "model.layers.35.block_sparse_moe.experts.186.w3", "model.layers.35.block_sparse_moe.experts.187.w3", "model.layers.35.block_sparse_moe.experts.188.w3", "model.layers.35.block_sparse_moe.experts.189.w3", "model.layers.35.block_sparse_moe.experts.190.w3", "model.layers.35.block_sparse_moe.experts.191.w3", "model.layers.35.block_sparse_moe.experts.192.w3", "model.layers.35.block_sparse_moe.experts.193.w3", "model.layers.35.block_sparse_moe.experts.194.w3", "model.layers.35.block_sparse_moe.experts.195.w3", "model.layers.35.block_sparse_moe.experts.196.w3", "model.layers.35.block_sparse_moe.experts.197.w3", "model.layers.35.block_sparse_moe.experts.198.w3", "model.layers.35.block_sparse_moe.experts.199.w3", "model.layers.35.block_sparse_moe.experts.200.w3", "model.layers.35.block_sparse_moe.experts.201.w3", "model.layers.35.block_sparse_moe.experts.202.w3", "model.layers.35.block_sparse_moe.experts.203.w3", "model.layers.35.block_sparse_moe.experts.204.w3", "model.layers.35.block_sparse_moe.experts.205.w3", "model.layers.35.block_sparse_moe.experts.206.w3", "model.layers.35.block_sparse_moe.experts.207.w3", "model.layers.35.block_sparse_moe.experts.208.w3", "model.layers.35.block_sparse_moe.experts.209.w3", "model.layers.35.block_sparse_moe.experts.210.w3", "model.layers.35.block_sparse_moe.experts.211.w3", "model.layers.35.block_sparse_moe.experts.212.w3", "model.layers.35.block_sparse_moe.experts.213.w3", "model.layers.35.block_sparse_moe.experts.214.w3", "model.layers.35.block_sparse_moe.experts.215.w3", "model.layers.35.block_sparse_moe.experts.216.w3", "model.layers.35.block_sparse_moe.experts.217.w3", "model.layers.35.block_sparse_moe.experts.218.w3", "model.layers.35.block_sparse_moe.experts.219.w3", "model.layers.35.block_sparse_moe.experts.220.w3", "model.layers.35.block_sparse_moe.experts.221.w3", "model.layers.35.block_sparse_moe.experts.222.w3", "model.layers.35.block_sparse_moe.experts.223.w3", "model.layers.35.block_sparse_moe.experts.224.w3", "model.layers.35.block_sparse_moe.experts.225.w3", "model.layers.35.block_sparse_moe.experts.226.w3", "model.layers.35.block_sparse_moe.experts.227.w3", "model.layers.35.block_sparse_moe.experts.228.w3", "model.layers.35.block_sparse_moe.experts.229.w3", "model.layers.35.block_sparse_moe.experts.230.w3", "model.layers.35.block_sparse_moe.experts.231.w3", "model.layers.35.block_sparse_moe.experts.232.w3", "model.layers.35.block_sparse_moe.experts.233.w3", "model.layers.35.block_sparse_moe.experts.234.w3", "model.layers.35.block_sparse_moe.experts.235.w3", "model.layers.35.block_sparse_moe.experts.236.w3", "model.layers.35.block_sparse_moe.experts.237.w3", "model.layers.35.block_sparse_moe.experts.238.w3", "model.layers.35.block_sparse_moe.experts.239.w3", "model.layers.35.block_sparse_moe.experts.240.w3", "model.layers.35.block_sparse_moe.experts.241.w3", "model.layers.35.block_sparse_moe.experts.242.w3", "model.layers.35.block_sparse_moe.experts.243.w3", "model.layers.35.block_sparse_moe.experts.244.w3", "model.layers.35.block_sparse_moe.experts.245.w3", "model.layers.35.block_sparse_moe.experts.246.w3", "model.layers.35.block_sparse_moe.experts.247.w3", "model.layers.35.block_sparse_moe.experts.248.w3", "model.layers.35.block_sparse_moe.experts.249.w3", "model.layers.35.block_sparse_moe.experts.250.w3", "model.layers.35.block_sparse_moe.experts.251.w3", "model.layers.35.block_sparse_moe.experts.252.w3", "model.layers.35.block_sparse_moe.experts.253.w3", "model.layers.35.block_sparse_moe.experts.254.w3", "model.layers.35.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0025495320558547307, "dbits": 2415919104 } ] }, { "idx": 179, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w2", "model.layers.35.block_sparse_moe.experts.1.w2", "model.layers.35.block_sparse_moe.experts.2.w2", "model.layers.35.block_sparse_moe.experts.3.w2", "model.layers.35.block_sparse_moe.experts.4.w2", "model.layers.35.block_sparse_moe.experts.5.w2", "model.layers.35.block_sparse_moe.experts.6.w2", "model.layers.35.block_sparse_moe.experts.7.w2", "model.layers.35.block_sparse_moe.experts.8.w2", "model.layers.35.block_sparse_moe.experts.9.w2", "model.layers.35.block_sparse_moe.experts.10.w2", "model.layers.35.block_sparse_moe.experts.11.w2", "model.layers.35.block_sparse_moe.experts.12.w2", "model.layers.35.block_sparse_moe.experts.13.w2", "model.layers.35.block_sparse_moe.experts.14.w2", "model.layers.35.block_sparse_moe.experts.15.w2", "model.layers.35.block_sparse_moe.experts.16.w2", "model.layers.35.block_sparse_moe.experts.17.w2", "model.layers.35.block_sparse_moe.experts.18.w2", "model.layers.35.block_sparse_moe.experts.19.w2", "model.layers.35.block_sparse_moe.experts.20.w2", "model.layers.35.block_sparse_moe.experts.21.w2", "model.layers.35.block_sparse_moe.experts.22.w2", "model.layers.35.block_sparse_moe.experts.23.w2", "model.layers.35.block_sparse_moe.experts.24.w2", "model.layers.35.block_sparse_moe.experts.25.w2", "model.layers.35.block_sparse_moe.experts.26.w2", "model.layers.35.block_sparse_moe.experts.27.w2", "model.layers.35.block_sparse_moe.experts.28.w2", "model.layers.35.block_sparse_moe.experts.29.w2", "model.layers.35.block_sparse_moe.experts.30.w2", "model.layers.35.block_sparse_moe.experts.31.w2", "model.layers.35.block_sparse_moe.experts.32.w2", "model.layers.35.block_sparse_moe.experts.33.w2", "model.layers.35.block_sparse_moe.experts.34.w2", "model.layers.35.block_sparse_moe.experts.35.w2", "model.layers.35.block_sparse_moe.experts.36.w2", "model.layers.35.block_sparse_moe.experts.37.w2", "model.layers.35.block_sparse_moe.experts.38.w2", "model.layers.35.block_sparse_moe.experts.39.w2", "model.layers.35.block_sparse_moe.experts.40.w2", "model.layers.35.block_sparse_moe.experts.41.w2", "model.layers.35.block_sparse_moe.experts.42.w2", "model.layers.35.block_sparse_moe.experts.43.w2", "model.layers.35.block_sparse_moe.experts.44.w2", "model.layers.35.block_sparse_moe.experts.45.w2", "model.layers.35.block_sparse_moe.experts.46.w2", "model.layers.35.block_sparse_moe.experts.47.w2", "model.layers.35.block_sparse_moe.experts.48.w2", "model.layers.35.block_sparse_moe.experts.49.w2", "model.layers.35.block_sparse_moe.experts.50.w2", "model.layers.35.block_sparse_moe.experts.51.w2", "model.layers.35.block_sparse_moe.experts.52.w2", "model.layers.35.block_sparse_moe.experts.53.w2", "model.layers.35.block_sparse_moe.experts.54.w2", "model.layers.35.block_sparse_moe.experts.55.w2", "model.layers.35.block_sparse_moe.experts.56.w2", "model.layers.35.block_sparse_moe.experts.57.w2", "model.layers.35.block_sparse_moe.experts.58.w2", "model.layers.35.block_sparse_moe.experts.59.w2", "model.layers.35.block_sparse_moe.experts.60.w2", "model.layers.35.block_sparse_moe.experts.61.w2", "model.layers.35.block_sparse_moe.experts.62.w2", "model.layers.35.block_sparse_moe.experts.63.w2", "model.layers.35.block_sparse_moe.experts.64.w2", "model.layers.35.block_sparse_moe.experts.65.w2", "model.layers.35.block_sparse_moe.experts.66.w2", "model.layers.35.block_sparse_moe.experts.67.w2", "model.layers.35.block_sparse_moe.experts.68.w2", "model.layers.35.block_sparse_moe.experts.69.w2", "model.layers.35.block_sparse_moe.experts.70.w2", "model.layers.35.block_sparse_moe.experts.71.w2", "model.layers.35.block_sparse_moe.experts.72.w2", "model.layers.35.block_sparse_moe.experts.73.w2", "model.layers.35.block_sparse_moe.experts.74.w2", "model.layers.35.block_sparse_moe.experts.75.w2", "model.layers.35.block_sparse_moe.experts.76.w2", "model.layers.35.block_sparse_moe.experts.77.w2", "model.layers.35.block_sparse_moe.experts.78.w2", "model.layers.35.block_sparse_moe.experts.79.w2", "model.layers.35.block_sparse_moe.experts.80.w2", "model.layers.35.block_sparse_moe.experts.81.w2", "model.layers.35.block_sparse_moe.experts.82.w2", "model.layers.35.block_sparse_moe.experts.83.w2", "model.layers.35.block_sparse_moe.experts.84.w2", "model.layers.35.block_sparse_moe.experts.85.w2", "model.layers.35.block_sparse_moe.experts.86.w2", "model.layers.35.block_sparse_moe.experts.87.w2", "model.layers.35.block_sparse_moe.experts.88.w2", "model.layers.35.block_sparse_moe.experts.89.w2", "model.layers.35.block_sparse_moe.experts.90.w2", "model.layers.35.block_sparse_moe.experts.91.w2", "model.layers.35.block_sparse_moe.experts.92.w2", "model.layers.35.block_sparse_moe.experts.93.w2", "model.layers.35.block_sparse_moe.experts.94.w2", "model.layers.35.block_sparse_moe.experts.95.w2", "model.layers.35.block_sparse_moe.experts.96.w2", "model.layers.35.block_sparse_moe.experts.97.w2", "model.layers.35.block_sparse_moe.experts.98.w2", "model.layers.35.block_sparse_moe.experts.99.w2", "model.layers.35.block_sparse_moe.experts.100.w2", "model.layers.35.block_sparse_moe.experts.101.w2", "model.layers.35.block_sparse_moe.experts.102.w2", "model.layers.35.block_sparse_moe.experts.103.w2", "model.layers.35.block_sparse_moe.experts.104.w2", "model.layers.35.block_sparse_moe.experts.105.w2", "model.layers.35.block_sparse_moe.experts.106.w2", "model.layers.35.block_sparse_moe.experts.107.w2", "model.layers.35.block_sparse_moe.experts.108.w2", "model.layers.35.block_sparse_moe.experts.109.w2", "model.layers.35.block_sparse_moe.experts.110.w2", "model.layers.35.block_sparse_moe.experts.111.w2", "model.layers.35.block_sparse_moe.experts.112.w2", "model.layers.35.block_sparse_moe.experts.113.w2", "model.layers.35.block_sparse_moe.experts.114.w2", "model.layers.35.block_sparse_moe.experts.115.w2", "model.layers.35.block_sparse_moe.experts.116.w2", "model.layers.35.block_sparse_moe.experts.117.w2", "model.layers.35.block_sparse_moe.experts.118.w2", "model.layers.35.block_sparse_moe.experts.119.w2", "model.layers.35.block_sparse_moe.experts.120.w2", "model.layers.35.block_sparse_moe.experts.121.w2", "model.layers.35.block_sparse_moe.experts.122.w2", "model.layers.35.block_sparse_moe.experts.123.w2", "model.layers.35.block_sparse_moe.experts.124.w2", "model.layers.35.block_sparse_moe.experts.125.w2", "model.layers.35.block_sparse_moe.experts.126.w2", "model.layers.35.block_sparse_moe.experts.127.w2", "model.layers.35.block_sparse_moe.experts.128.w2", "model.layers.35.block_sparse_moe.experts.129.w2", "model.layers.35.block_sparse_moe.experts.130.w2", "model.layers.35.block_sparse_moe.experts.131.w2", "model.layers.35.block_sparse_moe.experts.132.w2", "model.layers.35.block_sparse_moe.experts.133.w2", "model.layers.35.block_sparse_moe.experts.134.w2", "model.layers.35.block_sparse_moe.experts.135.w2", "model.layers.35.block_sparse_moe.experts.136.w2", "model.layers.35.block_sparse_moe.experts.137.w2", "model.layers.35.block_sparse_moe.experts.138.w2", "model.layers.35.block_sparse_moe.experts.139.w2", "model.layers.35.block_sparse_moe.experts.140.w2", "model.layers.35.block_sparse_moe.experts.141.w2", "model.layers.35.block_sparse_moe.experts.142.w2", "model.layers.35.block_sparse_moe.experts.143.w2", "model.layers.35.block_sparse_moe.experts.144.w2", "model.layers.35.block_sparse_moe.experts.145.w2", "model.layers.35.block_sparse_moe.experts.146.w2", "model.layers.35.block_sparse_moe.experts.147.w2", "model.layers.35.block_sparse_moe.experts.148.w2", "model.layers.35.block_sparse_moe.experts.149.w2", "model.layers.35.block_sparse_moe.experts.150.w2", "model.layers.35.block_sparse_moe.experts.151.w2", "model.layers.35.block_sparse_moe.experts.152.w2", "model.layers.35.block_sparse_moe.experts.153.w2", "model.layers.35.block_sparse_moe.experts.154.w2", "model.layers.35.block_sparse_moe.experts.155.w2", "model.layers.35.block_sparse_moe.experts.156.w2", "model.layers.35.block_sparse_moe.experts.157.w2", "model.layers.35.block_sparse_moe.experts.158.w2", "model.layers.35.block_sparse_moe.experts.159.w2", "model.layers.35.block_sparse_moe.experts.160.w2", "model.layers.35.block_sparse_moe.experts.161.w2", "model.layers.35.block_sparse_moe.experts.162.w2", "model.layers.35.block_sparse_moe.experts.163.w2", "model.layers.35.block_sparse_moe.experts.164.w2", "model.layers.35.block_sparse_moe.experts.165.w2", "model.layers.35.block_sparse_moe.experts.166.w2", "model.layers.35.block_sparse_moe.experts.167.w2", "model.layers.35.block_sparse_moe.experts.168.w2", "model.layers.35.block_sparse_moe.experts.169.w2", "model.layers.35.block_sparse_moe.experts.170.w2", "model.layers.35.block_sparse_moe.experts.171.w2", "model.layers.35.block_sparse_moe.experts.172.w2", "model.layers.35.block_sparse_moe.experts.173.w2", "model.layers.35.block_sparse_moe.experts.174.w2", "model.layers.35.block_sparse_moe.experts.175.w2", "model.layers.35.block_sparse_moe.experts.176.w2", "model.layers.35.block_sparse_moe.experts.177.w2", "model.layers.35.block_sparse_moe.experts.178.w2", "model.layers.35.block_sparse_moe.experts.179.w2", "model.layers.35.block_sparse_moe.experts.180.w2", "model.layers.35.block_sparse_moe.experts.181.w2", "model.layers.35.block_sparse_moe.experts.182.w2", "model.layers.35.block_sparse_moe.experts.183.w2", "model.layers.35.block_sparse_moe.experts.184.w2", "model.layers.35.block_sparse_moe.experts.185.w2", "model.layers.35.block_sparse_moe.experts.186.w2", "model.layers.35.block_sparse_moe.experts.187.w2", "model.layers.35.block_sparse_moe.experts.188.w2", "model.layers.35.block_sparse_moe.experts.189.w2", "model.layers.35.block_sparse_moe.experts.190.w2", "model.layers.35.block_sparse_moe.experts.191.w2", "model.layers.35.block_sparse_moe.experts.192.w2", "model.layers.35.block_sparse_moe.experts.193.w2", "model.layers.35.block_sparse_moe.experts.194.w2", "model.layers.35.block_sparse_moe.experts.195.w2", "model.layers.35.block_sparse_moe.experts.196.w2", "model.layers.35.block_sparse_moe.experts.197.w2", "model.layers.35.block_sparse_moe.experts.198.w2", "model.layers.35.block_sparse_moe.experts.199.w2", "model.layers.35.block_sparse_moe.experts.200.w2", "model.layers.35.block_sparse_moe.experts.201.w2", "model.layers.35.block_sparse_moe.experts.202.w2", "model.layers.35.block_sparse_moe.experts.203.w2", "model.layers.35.block_sparse_moe.experts.204.w2", "model.layers.35.block_sparse_moe.experts.205.w2", "model.layers.35.block_sparse_moe.experts.206.w2", "model.layers.35.block_sparse_moe.experts.207.w2", "model.layers.35.block_sparse_moe.experts.208.w2", "model.layers.35.block_sparse_moe.experts.209.w2", "model.layers.35.block_sparse_moe.experts.210.w2", "model.layers.35.block_sparse_moe.experts.211.w2", "model.layers.35.block_sparse_moe.experts.212.w2", "model.layers.35.block_sparse_moe.experts.213.w2", "model.layers.35.block_sparse_moe.experts.214.w2", "model.layers.35.block_sparse_moe.experts.215.w2", "model.layers.35.block_sparse_moe.experts.216.w2", "model.layers.35.block_sparse_moe.experts.217.w2", "model.layers.35.block_sparse_moe.experts.218.w2", "model.layers.35.block_sparse_moe.experts.219.w2", "model.layers.35.block_sparse_moe.experts.220.w2", "model.layers.35.block_sparse_moe.experts.221.w2", "model.layers.35.block_sparse_moe.experts.222.w2", "model.layers.35.block_sparse_moe.experts.223.w2", "model.layers.35.block_sparse_moe.experts.224.w2", "model.layers.35.block_sparse_moe.experts.225.w2", "model.layers.35.block_sparse_moe.experts.226.w2", "model.layers.35.block_sparse_moe.experts.227.w2", "model.layers.35.block_sparse_moe.experts.228.w2", "model.layers.35.block_sparse_moe.experts.229.w2", "model.layers.35.block_sparse_moe.experts.230.w2", "model.layers.35.block_sparse_moe.experts.231.w2", "model.layers.35.block_sparse_moe.experts.232.w2", "model.layers.35.block_sparse_moe.experts.233.w2", "model.layers.35.block_sparse_moe.experts.234.w2", "model.layers.35.block_sparse_moe.experts.235.w2", "model.layers.35.block_sparse_moe.experts.236.w2", "model.layers.35.block_sparse_moe.experts.237.w2", "model.layers.35.block_sparse_moe.experts.238.w2", "model.layers.35.block_sparse_moe.experts.239.w2", "model.layers.35.block_sparse_moe.experts.240.w2", "model.layers.35.block_sparse_moe.experts.241.w2", "model.layers.35.block_sparse_moe.experts.242.w2", "model.layers.35.block_sparse_moe.experts.243.w2", "model.layers.35.block_sparse_moe.experts.244.w2", "model.layers.35.block_sparse_moe.experts.245.w2", "model.layers.35.block_sparse_moe.experts.246.w2", "model.layers.35.block_sparse_moe.experts.247.w2", "model.layers.35.block_sparse_moe.experts.248.w2", "model.layers.35.block_sparse_moe.experts.249.w2", "model.layers.35.block_sparse_moe.experts.250.w2", "model.layers.35.block_sparse_moe.experts.251.w2", "model.layers.35.block_sparse_moe.experts.252.w2", "model.layers.35.block_sparse_moe.experts.253.w2", "model.layers.35.block_sparse_moe.experts.254.w2", "model.layers.35.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0016195207834243108, "dbits": 1207959552 } ] }, { "idx": 180, "layers": [ "model.layers.36.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0016018241643905862, "dbits": 18874368 } ] }, { "idx": 181, "layers": [ "model.layers.36.self_attn.k_proj", "model.layers.36.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00020807385444643334, "dbits": 6291456 } ] }, { "idx": 182, "layers": [ "model.layers.36.self_attn.o_proj" ], "candidates": [ { "dkld": -1.379847526550293e-05, "dbits": 18874368 } ] }, { "idx": 183, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w1", "model.layers.36.block_sparse_moe.experts.1.w1", "model.layers.36.block_sparse_moe.experts.2.w1", "model.layers.36.block_sparse_moe.experts.3.w1", "model.layers.36.block_sparse_moe.experts.4.w1", "model.layers.36.block_sparse_moe.experts.5.w1", "model.layers.36.block_sparse_moe.experts.6.w1", "model.layers.36.block_sparse_moe.experts.7.w1", "model.layers.36.block_sparse_moe.experts.8.w1", "model.layers.36.block_sparse_moe.experts.9.w1", "model.layers.36.block_sparse_moe.experts.10.w1", "model.layers.36.block_sparse_moe.experts.11.w1", "model.layers.36.block_sparse_moe.experts.12.w1", "model.layers.36.block_sparse_moe.experts.13.w1", "model.layers.36.block_sparse_moe.experts.14.w1", "model.layers.36.block_sparse_moe.experts.15.w1", "model.layers.36.block_sparse_moe.experts.16.w1", "model.layers.36.block_sparse_moe.experts.17.w1", "model.layers.36.block_sparse_moe.experts.18.w1", "model.layers.36.block_sparse_moe.experts.19.w1", "model.layers.36.block_sparse_moe.experts.20.w1", "model.layers.36.block_sparse_moe.experts.21.w1", "model.layers.36.block_sparse_moe.experts.22.w1", "model.layers.36.block_sparse_moe.experts.23.w1", "model.layers.36.block_sparse_moe.experts.24.w1", "model.layers.36.block_sparse_moe.experts.25.w1", "model.layers.36.block_sparse_moe.experts.26.w1", "model.layers.36.block_sparse_moe.experts.27.w1", "model.layers.36.block_sparse_moe.experts.28.w1", "model.layers.36.block_sparse_moe.experts.29.w1", "model.layers.36.block_sparse_moe.experts.30.w1", "model.layers.36.block_sparse_moe.experts.31.w1", "model.layers.36.block_sparse_moe.experts.32.w1", "model.layers.36.block_sparse_moe.experts.33.w1", "model.layers.36.block_sparse_moe.experts.34.w1", "model.layers.36.block_sparse_moe.experts.35.w1", "model.layers.36.block_sparse_moe.experts.36.w1", "model.layers.36.block_sparse_moe.experts.37.w1", "model.layers.36.block_sparse_moe.experts.38.w1", "model.layers.36.block_sparse_moe.experts.39.w1", "model.layers.36.block_sparse_moe.experts.40.w1", "model.layers.36.block_sparse_moe.experts.41.w1", "model.layers.36.block_sparse_moe.experts.42.w1", "model.layers.36.block_sparse_moe.experts.43.w1", "model.layers.36.block_sparse_moe.experts.44.w1", "model.layers.36.block_sparse_moe.experts.45.w1", "model.layers.36.block_sparse_moe.experts.46.w1", "model.layers.36.block_sparse_moe.experts.47.w1", "model.layers.36.block_sparse_moe.experts.48.w1", "model.layers.36.block_sparse_moe.experts.49.w1", "model.layers.36.block_sparse_moe.experts.50.w1", "model.layers.36.block_sparse_moe.experts.51.w1", "model.layers.36.block_sparse_moe.experts.52.w1", "model.layers.36.block_sparse_moe.experts.53.w1", "model.layers.36.block_sparse_moe.experts.54.w1", "model.layers.36.block_sparse_moe.experts.55.w1", "model.layers.36.block_sparse_moe.experts.56.w1", "model.layers.36.block_sparse_moe.experts.57.w1", "model.layers.36.block_sparse_moe.experts.58.w1", "model.layers.36.block_sparse_moe.experts.59.w1", "model.layers.36.block_sparse_moe.experts.60.w1", "model.layers.36.block_sparse_moe.experts.61.w1", "model.layers.36.block_sparse_moe.experts.62.w1", "model.layers.36.block_sparse_moe.experts.63.w1", "model.layers.36.block_sparse_moe.experts.64.w1", "model.layers.36.block_sparse_moe.experts.65.w1", "model.layers.36.block_sparse_moe.experts.66.w1", "model.layers.36.block_sparse_moe.experts.67.w1", "model.layers.36.block_sparse_moe.experts.68.w1", "model.layers.36.block_sparse_moe.experts.69.w1", "model.layers.36.block_sparse_moe.experts.70.w1", "model.layers.36.block_sparse_moe.experts.71.w1", "model.layers.36.block_sparse_moe.experts.72.w1", "model.layers.36.block_sparse_moe.experts.73.w1", "model.layers.36.block_sparse_moe.experts.74.w1", "model.layers.36.block_sparse_moe.experts.75.w1", "model.layers.36.block_sparse_moe.experts.76.w1", "model.layers.36.block_sparse_moe.experts.77.w1", "model.layers.36.block_sparse_moe.experts.78.w1", "model.layers.36.block_sparse_moe.experts.79.w1", "model.layers.36.block_sparse_moe.experts.80.w1", "model.layers.36.block_sparse_moe.experts.81.w1", "model.layers.36.block_sparse_moe.experts.82.w1", "model.layers.36.block_sparse_moe.experts.83.w1", "model.layers.36.block_sparse_moe.experts.84.w1", "model.layers.36.block_sparse_moe.experts.85.w1", "model.layers.36.block_sparse_moe.experts.86.w1", "model.layers.36.block_sparse_moe.experts.87.w1", "model.layers.36.block_sparse_moe.experts.88.w1", "model.layers.36.block_sparse_moe.experts.89.w1", "model.layers.36.block_sparse_moe.experts.90.w1", "model.layers.36.block_sparse_moe.experts.91.w1", "model.layers.36.block_sparse_moe.experts.92.w1", "model.layers.36.block_sparse_moe.experts.93.w1", "model.layers.36.block_sparse_moe.experts.94.w1", "model.layers.36.block_sparse_moe.experts.95.w1", "model.layers.36.block_sparse_moe.experts.96.w1", "model.layers.36.block_sparse_moe.experts.97.w1", "model.layers.36.block_sparse_moe.experts.98.w1", "model.layers.36.block_sparse_moe.experts.99.w1", "model.layers.36.block_sparse_moe.experts.100.w1", "model.layers.36.block_sparse_moe.experts.101.w1", "model.layers.36.block_sparse_moe.experts.102.w1", "model.layers.36.block_sparse_moe.experts.103.w1", "model.layers.36.block_sparse_moe.experts.104.w1", "model.layers.36.block_sparse_moe.experts.105.w1", "model.layers.36.block_sparse_moe.experts.106.w1", "model.layers.36.block_sparse_moe.experts.107.w1", "model.layers.36.block_sparse_moe.experts.108.w1", "model.layers.36.block_sparse_moe.experts.109.w1", "model.layers.36.block_sparse_moe.experts.110.w1", "model.layers.36.block_sparse_moe.experts.111.w1", "model.layers.36.block_sparse_moe.experts.112.w1", "model.layers.36.block_sparse_moe.experts.113.w1", "model.layers.36.block_sparse_moe.experts.114.w1", "model.layers.36.block_sparse_moe.experts.115.w1", "model.layers.36.block_sparse_moe.experts.116.w1", "model.layers.36.block_sparse_moe.experts.117.w1", "model.layers.36.block_sparse_moe.experts.118.w1", "model.layers.36.block_sparse_moe.experts.119.w1", "model.layers.36.block_sparse_moe.experts.120.w1", "model.layers.36.block_sparse_moe.experts.121.w1", "model.layers.36.block_sparse_moe.experts.122.w1", "model.layers.36.block_sparse_moe.experts.123.w1", "model.layers.36.block_sparse_moe.experts.124.w1", "model.layers.36.block_sparse_moe.experts.125.w1", "model.layers.36.block_sparse_moe.experts.126.w1", "model.layers.36.block_sparse_moe.experts.127.w1", "model.layers.36.block_sparse_moe.experts.128.w1", "model.layers.36.block_sparse_moe.experts.129.w1", "model.layers.36.block_sparse_moe.experts.130.w1", "model.layers.36.block_sparse_moe.experts.131.w1", "model.layers.36.block_sparse_moe.experts.132.w1", "model.layers.36.block_sparse_moe.experts.133.w1", "model.layers.36.block_sparse_moe.experts.134.w1", "model.layers.36.block_sparse_moe.experts.135.w1", "model.layers.36.block_sparse_moe.experts.136.w1", "model.layers.36.block_sparse_moe.experts.137.w1", "model.layers.36.block_sparse_moe.experts.138.w1", "model.layers.36.block_sparse_moe.experts.139.w1", "model.layers.36.block_sparse_moe.experts.140.w1", "model.layers.36.block_sparse_moe.experts.141.w1", "model.layers.36.block_sparse_moe.experts.142.w1", "model.layers.36.block_sparse_moe.experts.143.w1", "model.layers.36.block_sparse_moe.experts.144.w1", "model.layers.36.block_sparse_moe.experts.145.w1", "model.layers.36.block_sparse_moe.experts.146.w1", "model.layers.36.block_sparse_moe.experts.147.w1", "model.layers.36.block_sparse_moe.experts.148.w1", "model.layers.36.block_sparse_moe.experts.149.w1", "model.layers.36.block_sparse_moe.experts.150.w1", "model.layers.36.block_sparse_moe.experts.151.w1", "model.layers.36.block_sparse_moe.experts.152.w1", "model.layers.36.block_sparse_moe.experts.153.w1", "model.layers.36.block_sparse_moe.experts.154.w1", "model.layers.36.block_sparse_moe.experts.155.w1", "model.layers.36.block_sparse_moe.experts.156.w1", "model.layers.36.block_sparse_moe.experts.157.w1", "model.layers.36.block_sparse_moe.experts.158.w1", "model.layers.36.block_sparse_moe.experts.159.w1", "model.layers.36.block_sparse_moe.experts.160.w1", "model.layers.36.block_sparse_moe.experts.161.w1", "model.layers.36.block_sparse_moe.experts.162.w1", "model.layers.36.block_sparse_moe.experts.163.w1", "model.layers.36.block_sparse_moe.experts.164.w1", "model.layers.36.block_sparse_moe.experts.165.w1", "model.layers.36.block_sparse_moe.experts.166.w1", "model.layers.36.block_sparse_moe.experts.167.w1", "model.layers.36.block_sparse_moe.experts.168.w1", "model.layers.36.block_sparse_moe.experts.169.w1", "model.layers.36.block_sparse_moe.experts.170.w1", "model.layers.36.block_sparse_moe.experts.171.w1", "model.layers.36.block_sparse_moe.experts.172.w1", "model.layers.36.block_sparse_moe.experts.173.w1", "model.layers.36.block_sparse_moe.experts.174.w1", "model.layers.36.block_sparse_moe.experts.175.w1", "model.layers.36.block_sparse_moe.experts.176.w1", "model.layers.36.block_sparse_moe.experts.177.w1", "model.layers.36.block_sparse_moe.experts.178.w1", "model.layers.36.block_sparse_moe.experts.179.w1", "model.layers.36.block_sparse_moe.experts.180.w1", "model.layers.36.block_sparse_moe.experts.181.w1", "model.layers.36.block_sparse_moe.experts.182.w1", "model.layers.36.block_sparse_moe.experts.183.w1", "model.layers.36.block_sparse_moe.experts.184.w1", "model.layers.36.block_sparse_moe.experts.185.w1", "model.layers.36.block_sparse_moe.experts.186.w1", "model.layers.36.block_sparse_moe.experts.187.w1", "model.layers.36.block_sparse_moe.experts.188.w1", "model.layers.36.block_sparse_moe.experts.189.w1", "model.layers.36.block_sparse_moe.experts.190.w1", "model.layers.36.block_sparse_moe.experts.191.w1", "model.layers.36.block_sparse_moe.experts.192.w1", "model.layers.36.block_sparse_moe.experts.193.w1", "model.layers.36.block_sparse_moe.experts.194.w1", "model.layers.36.block_sparse_moe.experts.195.w1", "model.layers.36.block_sparse_moe.experts.196.w1", "model.layers.36.block_sparse_moe.experts.197.w1", "model.layers.36.block_sparse_moe.experts.198.w1", "model.layers.36.block_sparse_moe.experts.199.w1", "model.layers.36.block_sparse_moe.experts.200.w1", "model.layers.36.block_sparse_moe.experts.201.w1", "model.layers.36.block_sparse_moe.experts.202.w1", "model.layers.36.block_sparse_moe.experts.203.w1", "model.layers.36.block_sparse_moe.experts.204.w1", "model.layers.36.block_sparse_moe.experts.205.w1", "model.layers.36.block_sparse_moe.experts.206.w1", "model.layers.36.block_sparse_moe.experts.207.w1", "model.layers.36.block_sparse_moe.experts.208.w1", "model.layers.36.block_sparse_moe.experts.209.w1", "model.layers.36.block_sparse_moe.experts.210.w1", "model.layers.36.block_sparse_moe.experts.211.w1", "model.layers.36.block_sparse_moe.experts.212.w1", "model.layers.36.block_sparse_moe.experts.213.w1", "model.layers.36.block_sparse_moe.experts.214.w1", "model.layers.36.block_sparse_moe.experts.215.w1", "model.layers.36.block_sparse_moe.experts.216.w1", "model.layers.36.block_sparse_moe.experts.217.w1", "model.layers.36.block_sparse_moe.experts.218.w1", "model.layers.36.block_sparse_moe.experts.219.w1", "model.layers.36.block_sparse_moe.experts.220.w1", "model.layers.36.block_sparse_moe.experts.221.w1", "model.layers.36.block_sparse_moe.experts.222.w1", "model.layers.36.block_sparse_moe.experts.223.w1", "model.layers.36.block_sparse_moe.experts.224.w1", "model.layers.36.block_sparse_moe.experts.225.w1", "model.layers.36.block_sparse_moe.experts.226.w1", "model.layers.36.block_sparse_moe.experts.227.w1", "model.layers.36.block_sparse_moe.experts.228.w1", "model.layers.36.block_sparse_moe.experts.229.w1", "model.layers.36.block_sparse_moe.experts.230.w1", "model.layers.36.block_sparse_moe.experts.231.w1", "model.layers.36.block_sparse_moe.experts.232.w1", "model.layers.36.block_sparse_moe.experts.233.w1", "model.layers.36.block_sparse_moe.experts.234.w1", "model.layers.36.block_sparse_moe.experts.235.w1", "model.layers.36.block_sparse_moe.experts.236.w1", "model.layers.36.block_sparse_moe.experts.237.w1", "model.layers.36.block_sparse_moe.experts.238.w1", "model.layers.36.block_sparse_moe.experts.239.w1", "model.layers.36.block_sparse_moe.experts.240.w1", "model.layers.36.block_sparse_moe.experts.241.w1", "model.layers.36.block_sparse_moe.experts.242.w1", "model.layers.36.block_sparse_moe.experts.243.w1", "model.layers.36.block_sparse_moe.experts.244.w1", "model.layers.36.block_sparse_moe.experts.245.w1", "model.layers.36.block_sparse_moe.experts.246.w1", "model.layers.36.block_sparse_moe.experts.247.w1", "model.layers.36.block_sparse_moe.experts.248.w1", "model.layers.36.block_sparse_moe.experts.249.w1", "model.layers.36.block_sparse_moe.experts.250.w1", "model.layers.36.block_sparse_moe.experts.251.w1", "model.layers.36.block_sparse_moe.experts.252.w1", "model.layers.36.block_sparse_moe.experts.253.w1", "model.layers.36.block_sparse_moe.experts.254.w1", "model.layers.36.block_sparse_moe.experts.255.w1", "model.layers.36.block_sparse_moe.experts.0.w3", "model.layers.36.block_sparse_moe.experts.1.w3", "model.layers.36.block_sparse_moe.experts.2.w3", "model.layers.36.block_sparse_moe.experts.3.w3", "model.layers.36.block_sparse_moe.experts.4.w3", "model.layers.36.block_sparse_moe.experts.5.w3", "model.layers.36.block_sparse_moe.experts.6.w3", "model.layers.36.block_sparse_moe.experts.7.w3", "model.layers.36.block_sparse_moe.experts.8.w3", "model.layers.36.block_sparse_moe.experts.9.w3", "model.layers.36.block_sparse_moe.experts.10.w3", "model.layers.36.block_sparse_moe.experts.11.w3", "model.layers.36.block_sparse_moe.experts.12.w3", "model.layers.36.block_sparse_moe.experts.13.w3", "model.layers.36.block_sparse_moe.experts.14.w3", "model.layers.36.block_sparse_moe.experts.15.w3", "model.layers.36.block_sparse_moe.experts.16.w3", "model.layers.36.block_sparse_moe.experts.17.w3", "model.layers.36.block_sparse_moe.experts.18.w3", "model.layers.36.block_sparse_moe.experts.19.w3", "model.layers.36.block_sparse_moe.experts.20.w3", "model.layers.36.block_sparse_moe.experts.21.w3", "model.layers.36.block_sparse_moe.experts.22.w3", "model.layers.36.block_sparse_moe.experts.23.w3", "model.layers.36.block_sparse_moe.experts.24.w3", "model.layers.36.block_sparse_moe.experts.25.w3", "model.layers.36.block_sparse_moe.experts.26.w3", "model.layers.36.block_sparse_moe.experts.27.w3", "model.layers.36.block_sparse_moe.experts.28.w3", "model.layers.36.block_sparse_moe.experts.29.w3", "model.layers.36.block_sparse_moe.experts.30.w3", "model.layers.36.block_sparse_moe.experts.31.w3", "model.layers.36.block_sparse_moe.experts.32.w3", "model.layers.36.block_sparse_moe.experts.33.w3", "model.layers.36.block_sparse_moe.experts.34.w3", "model.layers.36.block_sparse_moe.experts.35.w3", "model.layers.36.block_sparse_moe.experts.36.w3", "model.layers.36.block_sparse_moe.experts.37.w3", "model.layers.36.block_sparse_moe.experts.38.w3", "model.layers.36.block_sparse_moe.experts.39.w3", "model.layers.36.block_sparse_moe.experts.40.w3", "model.layers.36.block_sparse_moe.experts.41.w3", "model.layers.36.block_sparse_moe.experts.42.w3", "model.layers.36.block_sparse_moe.experts.43.w3", "model.layers.36.block_sparse_moe.experts.44.w3", "model.layers.36.block_sparse_moe.experts.45.w3", "model.layers.36.block_sparse_moe.experts.46.w3", "model.layers.36.block_sparse_moe.experts.47.w3", "model.layers.36.block_sparse_moe.experts.48.w3", "model.layers.36.block_sparse_moe.experts.49.w3", "model.layers.36.block_sparse_moe.experts.50.w3", "model.layers.36.block_sparse_moe.experts.51.w3", "model.layers.36.block_sparse_moe.experts.52.w3", "model.layers.36.block_sparse_moe.experts.53.w3", "model.layers.36.block_sparse_moe.experts.54.w3", "model.layers.36.block_sparse_moe.experts.55.w3", "model.layers.36.block_sparse_moe.experts.56.w3", "model.layers.36.block_sparse_moe.experts.57.w3", "model.layers.36.block_sparse_moe.experts.58.w3", "model.layers.36.block_sparse_moe.experts.59.w3", "model.layers.36.block_sparse_moe.experts.60.w3", "model.layers.36.block_sparse_moe.experts.61.w3", "model.layers.36.block_sparse_moe.experts.62.w3", "model.layers.36.block_sparse_moe.experts.63.w3", "model.layers.36.block_sparse_moe.experts.64.w3", "model.layers.36.block_sparse_moe.experts.65.w3", "model.layers.36.block_sparse_moe.experts.66.w3", "model.layers.36.block_sparse_moe.experts.67.w3", "model.layers.36.block_sparse_moe.experts.68.w3", "model.layers.36.block_sparse_moe.experts.69.w3", "model.layers.36.block_sparse_moe.experts.70.w3", "model.layers.36.block_sparse_moe.experts.71.w3", "model.layers.36.block_sparse_moe.experts.72.w3", "model.layers.36.block_sparse_moe.experts.73.w3", "model.layers.36.block_sparse_moe.experts.74.w3", "model.layers.36.block_sparse_moe.experts.75.w3", "model.layers.36.block_sparse_moe.experts.76.w3", "model.layers.36.block_sparse_moe.experts.77.w3", "model.layers.36.block_sparse_moe.experts.78.w3", "model.layers.36.block_sparse_moe.experts.79.w3", "model.layers.36.block_sparse_moe.experts.80.w3", "model.layers.36.block_sparse_moe.experts.81.w3", "model.layers.36.block_sparse_moe.experts.82.w3", "model.layers.36.block_sparse_moe.experts.83.w3", "model.layers.36.block_sparse_moe.experts.84.w3", "model.layers.36.block_sparse_moe.experts.85.w3", "model.layers.36.block_sparse_moe.experts.86.w3", "model.layers.36.block_sparse_moe.experts.87.w3", "model.layers.36.block_sparse_moe.experts.88.w3", "model.layers.36.block_sparse_moe.experts.89.w3", "model.layers.36.block_sparse_moe.experts.90.w3", "model.layers.36.block_sparse_moe.experts.91.w3", "model.layers.36.block_sparse_moe.experts.92.w3", "model.layers.36.block_sparse_moe.experts.93.w3", "model.layers.36.block_sparse_moe.experts.94.w3", "model.layers.36.block_sparse_moe.experts.95.w3", "model.layers.36.block_sparse_moe.experts.96.w3", "model.layers.36.block_sparse_moe.experts.97.w3", "model.layers.36.block_sparse_moe.experts.98.w3", "model.layers.36.block_sparse_moe.experts.99.w3", "model.layers.36.block_sparse_moe.experts.100.w3", "model.layers.36.block_sparse_moe.experts.101.w3", "model.layers.36.block_sparse_moe.experts.102.w3", "model.layers.36.block_sparse_moe.experts.103.w3", "model.layers.36.block_sparse_moe.experts.104.w3", "model.layers.36.block_sparse_moe.experts.105.w3", "model.layers.36.block_sparse_moe.experts.106.w3", "model.layers.36.block_sparse_moe.experts.107.w3", "model.layers.36.block_sparse_moe.experts.108.w3", "model.layers.36.block_sparse_moe.experts.109.w3", "model.layers.36.block_sparse_moe.experts.110.w3", "model.layers.36.block_sparse_moe.experts.111.w3", "model.layers.36.block_sparse_moe.experts.112.w3", "model.layers.36.block_sparse_moe.experts.113.w3", "model.layers.36.block_sparse_moe.experts.114.w3", "model.layers.36.block_sparse_moe.experts.115.w3", "model.layers.36.block_sparse_moe.experts.116.w3", "model.layers.36.block_sparse_moe.experts.117.w3", "model.layers.36.block_sparse_moe.experts.118.w3", "model.layers.36.block_sparse_moe.experts.119.w3", "model.layers.36.block_sparse_moe.experts.120.w3", "model.layers.36.block_sparse_moe.experts.121.w3", "model.layers.36.block_sparse_moe.experts.122.w3", "model.layers.36.block_sparse_moe.experts.123.w3", "model.layers.36.block_sparse_moe.experts.124.w3", "model.layers.36.block_sparse_moe.experts.125.w3", "model.layers.36.block_sparse_moe.experts.126.w3", "model.layers.36.block_sparse_moe.experts.127.w3", "model.layers.36.block_sparse_moe.experts.128.w3", "model.layers.36.block_sparse_moe.experts.129.w3", "model.layers.36.block_sparse_moe.experts.130.w3", "model.layers.36.block_sparse_moe.experts.131.w3", "model.layers.36.block_sparse_moe.experts.132.w3", "model.layers.36.block_sparse_moe.experts.133.w3", "model.layers.36.block_sparse_moe.experts.134.w3", "model.layers.36.block_sparse_moe.experts.135.w3", "model.layers.36.block_sparse_moe.experts.136.w3", "model.layers.36.block_sparse_moe.experts.137.w3", "model.layers.36.block_sparse_moe.experts.138.w3", "model.layers.36.block_sparse_moe.experts.139.w3", "model.layers.36.block_sparse_moe.experts.140.w3", "model.layers.36.block_sparse_moe.experts.141.w3", "model.layers.36.block_sparse_moe.experts.142.w3", "model.layers.36.block_sparse_moe.experts.143.w3", "model.layers.36.block_sparse_moe.experts.144.w3", "model.layers.36.block_sparse_moe.experts.145.w3", "model.layers.36.block_sparse_moe.experts.146.w3", "model.layers.36.block_sparse_moe.experts.147.w3", "model.layers.36.block_sparse_moe.experts.148.w3", "model.layers.36.block_sparse_moe.experts.149.w3", "model.layers.36.block_sparse_moe.experts.150.w3", "model.layers.36.block_sparse_moe.experts.151.w3", "model.layers.36.block_sparse_moe.experts.152.w3", "model.layers.36.block_sparse_moe.experts.153.w3", "model.layers.36.block_sparse_moe.experts.154.w3", "model.layers.36.block_sparse_moe.experts.155.w3", "model.layers.36.block_sparse_moe.experts.156.w3", "model.layers.36.block_sparse_moe.experts.157.w3", "model.layers.36.block_sparse_moe.experts.158.w3", "model.layers.36.block_sparse_moe.experts.159.w3", "model.layers.36.block_sparse_moe.experts.160.w3", "model.layers.36.block_sparse_moe.experts.161.w3", "model.layers.36.block_sparse_moe.experts.162.w3", "model.layers.36.block_sparse_moe.experts.163.w3", "model.layers.36.block_sparse_moe.experts.164.w3", "model.layers.36.block_sparse_moe.experts.165.w3", "model.layers.36.block_sparse_moe.experts.166.w3", "model.layers.36.block_sparse_moe.experts.167.w3", "model.layers.36.block_sparse_moe.experts.168.w3", "model.layers.36.block_sparse_moe.experts.169.w3", "model.layers.36.block_sparse_moe.experts.170.w3", "model.layers.36.block_sparse_moe.experts.171.w3", "model.layers.36.block_sparse_moe.experts.172.w3", "model.layers.36.block_sparse_moe.experts.173.w3", "model.layers.36.block_sparse_moe.experts.174.w3", "model.layers.36.block_sparse_moe.experts.175.w3", "model.layers.36.block_sparse_moe.experts.176.w3", "model.layers.36.block_sparse_moe.experts.177.w3", "model.layers.36.block_sparse_moe.experts.178.w3", "model.layers.36.block_sparse_moe.experts.179.w3", "model.layers.36.block_sparse_moe.experts.180.w3", "model.layers.36.block_sparse_moe.experts.181.w3", "model.layers.36.block_sparse_moe.experts.182.w3", "model.layers.36.block_sparse_moe.experts.183.w3", "model.layers.36.block_sparse_moe.experts.184.w3", "model.layers.36.block_sparse_moe.experts.185.w3", "model.layers.36.block_sparse_moe.experts.186.w3", "model.layers.36.block_sparse_moe.experts.187.w3", "model.layers.36.block_sparse_moe.experts.188.w3", "model.layers.36.block_sparse_moe.experts.189.w3", "model.layers.36.block_sparse_moe.experts.190.w3", "model.layers.36.block_sparse_moe.experts.191.w3", "model.layers.36.block_sparse_moe.experts.192.w3", "model.layers.36.block_sparse_moe.experts.193.w3", "model.layers.36.block_sparse_moe.experts.194.w3", "model.layers.36.block_sparse_moe.experts.195.w3", "model.layers.36.block_sparse_moe.experts.196.w3", "model.layers.36.block_sparse_moe.experts.197.w3", "model.layers.36.block_sparse_moe.experts.198.w3", "model.layers.36.block_sparse_moe.experts.199.w3", "model.layers.36.block_sparse_moe.experts.200.w3", "model.layers.36.block_sparse_moe.experts.201.w3", "model.layers.36.block_sparse_moe.experts.202.w3", "model.layers.36.block_sparse_moe.experts.203.w3", "model.layers.36.block_sparse_moe.experts.204.w3", "model.layers.36.block_sparse_moe.experts.205.w3", "model.layers.36.block_sparse_moe.experts.206.w3", "model.layers.36.block_sparse_moe.experts.207.w3", "model.layers.36.block_sparse_moe.experts.208.w3", "model.layers.36.block_sparse_moe.experts.209.w3", "model.layers.36.block_sparse_moe.experts.210.w3", "model.layers.36.block_sparse_moe.experts.211.w3", "model.layers.36.block_sparse_moe.experts.212.w3", "model.layers.36.block_sparse_moe.experts.213.w3", "model.layers.36.block_sparse_moe.experts.214.w3", "model.layers.36.block_sparse_moe.experts.215.w3", "model.layers.36.block_sparse_moe.experts.216.w3", "model.layers.36.block_sparse_moe.experts.217.w3", "model.layers.36.block_sparse_moe.experts.218.w3", "model.layers.36.block_sparse_moe.experts.219.w3", "model.layers.36.block_sparse_moe.experts.220.w3", "model.layers.36.block_sparse_moe.experts.221.w3", "model.layers.36.block_sparse_moe.experts.222.w3", "model.layers.36.block_sparse_moe.experts.223.w3", "model.layers.36.block_sparse_moe.experts.224.w3", "model.layers.36.block_sparse_moe.experts.225.w3", "model.layers.36.block_sparse_moe.experts.226.w3", "model.layers.36.block_sparse_moe.experts.227.w3", "model.layers.36.block_sparse_moe.experts.228.w3", "model.layers.36.block_sparse_moe.experts.229.w3", "model.layers.36.block_sparse_moe.experts.230.w3", "model.layers.36.block_sparse_moe.experts.231.w3", "model.layers.36.block_sparse_moe.experts.232.w3", "model.layers.36.block_sparse_moe.experts.233.w3", "model.layers.36.block_sparse_moe.experts.234.w3", "model.layers.36.block_sparse_moe.experts.235.w3", "model.layers.36.block_sparse_moe.experts.236.w3", "model.layers.36.block_sparse_moe.experts.237.w3", "model.layers.36.block_sparse_moe.experts.238.w3", "model.layers.36.block_sparse_moe.experts.239.w3", "model.layers.36.block_sparse_moe.experts.240.w3", "model.layers.36.block_sparse_moe.experts.241.w3", "model.layers.36.block_sparse_moe.experts.242.w3", "model.layers.36.block_sparse_moe.experts.243.w3", "model.layers.36.block_sparse_moe.experts.244.w3", "model.layers.36.block_sparse_moe.experts.245.w3", "model.layers.36.block_sparse_moe.experts.246.w3", "model.layers.36.block_sparse_moe.experts.247.w3", "model.layers.36.block_sparse_moe.experts.248.w3", "model.layers.36.block_sparse_moe.experts.249.w3", "model.layers.36.block_sparse_moe.experts.250.w3", "model.layers.36.block_sparse_moe.experts.251.w3", "model.layers.36.block_sparse_moe.experts.252.w3", "model.layers.36.block_sparse_moe.experts.253.w3", "model.layers.36.block_sparse_moe.experts.254.w3", "model.layers.36.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0016158431768417136, "dbits": 2415919104 } ] }, { "idx": 184, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w2", "model.layers.36.block_sparse_moe.experts.1.w2", "model.layers.36.block_sparse_moe.experts.2.w2", "model.layers.36.block_sparse_moe.experts.3.w2", "model.layers.36.block_sparse_moe.experts.4.w2", "model.layers.36.block_sparse_moe.experts.5.w2", "model.layers.36.block_sparse_moe.experts.6.w2", "model.layers.36.block_sparse_moe.experts.7.w2", "model.layers.36.block_sparse_moe.experts.8.w2", "model.layers.36.block_sparse_moe.experts.9.w2", "model.layers.36.block_sparse_moe.experts.10.w2", "model.layers.36.block_sparse_moe.experts.11.w2", "model.layers.36.block_sparse_moe.experts.12.w2", "model.layers.36.block_sparse_moe.experts.13.w2", "model.layers.36.block_sparse_moe.experts.14.w2", "model.layers.36.block_sparse_moe.experts.15.w2", "model.layers.36.block_sparse_moe.experts.16.w2", "model.layers.36.block_sparse_moe.experts.17.w2", "model.layers.36.block_sparse_moe.experts.18.w2", "model.layers.36.block_sparse_moe.experts.19.w2", "model.layers.36.block_sparse_moe.experts.20.w2", "model.layers.36.block_sparse_moe.experts.21.w2", "model.layers.36.block_sparse_moe.experts.22.w2", "model.layers.36.block_sparse_moe.experts.23.w2", "model.layers.36.block_sparse_moe.experts.24.w2", "model.layers.36.block_sparse_moe.experts.25.w2", "model.layers.36.block_sparse_moe.experts.26.w2", "model.layers.36.block_sparse_moe.experts.27.w2", "model.layers.36.block_sparse_moe.experts.28.w2", "model.layers.36.block_sparse_moe.experts.29.w2", "model.layers.36.block_sparse_moe.experts.30.w2", "model.layers.36.block_sparse_moe.experts.31.w2", "model.layers.36.block_sparse_moe.experts.32.w2", "model.layers.36.block_sparse_moe.experts.33.w2", "model.layers.36.block_sparse_moe.experts.34.w2", "model.layers.36.block_sparse_moe.experts.35.w2", "model.layers.36.block_sparse_moe.experts.36.w2", "model.layers.36.block_sparse_moe.experts.37.w2", "model.layers.36.block_sparse_moe.experts.38.w2", "model.layers.36.block_sparse_moe.experts.39.w2", "model.layers.36.block_sparse_moe.experts.40.w2", "model.layers.36.block_sparse_moe.experts.41.w2", "model.layers.36.block_sparse_moe.experts.42.w2", "model.layers.36.block_sparse_moe.experts.43.w2", "model.layers.36.block_sparse_moe.experts.44.w2", "model.layers.36.block_sparse_moe.experts.45.w2", "model.layers.36.block_sparse_moe.experts.46.w2", "model.layers.36.block_sparse_moe.experts.47.w2", "model.layers.36.block_sparse_moe.experts.48.w2", "model.layers.36.block_sparse_moe.experts.49.w2", "model.layers.36.block_sparse_moe.experts.50.w2", "model.layers.36.block_sparse_moe.experts.51.w2", "model.layers.36.block_sparse_moe.experts.52.w2", "model.layers.36.block_sparse_moe.experts.53.w2", "model.layers.36.block_sparse_moe.experts.54.w2", "model.layers.36.block_sparse_moe.experts.55.w2", "model.layers.36.block_sparse_moe.experts.56.w2", "model.layers.36.block_sparse_moe.experts.57.w2", "model.layers.36.block_sparse_moe.experts.58.w2", "model.layers.36.block_sparse_moe.experts.59.w2", "model.layers.36.block_sparse_moe.experts.60.w2", "model.layers.36.block_sparse_moe.experts.61.w2", "model.layers.36.block_sparse_moe.experts.62.w2", "model.layers.36.block_sparse_moe.experts.63.w2", "model.layers.36.block_sparse_moe.experts.64.w2", "model.layers.36.block_sparse_moe.experts.65.w2", "model.layers.36.block_sparse_moe.experts.66.w2", "model.layers.36.block_sparse_moe.experts.67.w2", "model.layers.36.block_sparse_moe.experts.68.w2", "model.layers.36.block_sparse_moe.experts.69.w2", "model.layers.36.block_sparse_moe.experts.70.w2", "model.layers.36.block_sparse_moe.experts.71.w2", "model.layers.36.block_sparse_moe.experts.72.w2", "model.layers.36.block_sparse_moe.experts.73.w2", "model.layers.36.block_sparse_moe.experts.74.w2", "model.layers.36.block_sparse_moe.experts.75.w2", "model.layers.36.block_sparse_moe.experts.76.w2", "model.layers.36.block_sparse_moe.experts.77.w2", "model.layers.36.block_sparse_moe.experts.78.w2", "model.layers.36.block_sparse_moe.experts.79.w2", "model.layers.36.block_sparse_moe.experts.80.w2", "model.layers.36.block_sparse_moe.experts.81.w2", "model.layers.36.block_sparse_moe.experts.82.w2", "model.layers.36.block_sparse_moe.experts.83.w2", "model.layers.36.block_sparse_moe.experts.84.w2", "model.layers.36.block_sparse_moe.experts.85.w2", "model.layers.36.block_sparse_moe.experts.86.w2", "model.layers.36.block_sparse_moe.experts.87.w2", "model.layers.36.block_sparse_moe.experts.88.w2", "model.layers.36.block_sparse_moe.experts.89.w2", "model.layers.36.block_sparse_moe.experts.90.w2", "model.layers.36.block_sparse_moe.experts.91.w2", "model.layers.36.block_sparse_moe.experts.92.w2", "model.layers.36.block_sparse_moe.experts.93.w2", "model.layers.36.block_sparse_moe.experts.94.w2", "model.layers.36.block_sparse_moe.experts.95.w2", "model.layers.36.block_sparse_moe.experts.96.w2", "model.layers.36.block_sparse_moe.experts.97.w2", "model.layers.36.block_sparse_moe.experts.98.w2", "model.layers.36.block_sparse_moe.experts.99.w2", "model.layers.36.block_sparse_moe.experts.100.w2", "model.layers.36.block_sparse_moe.experts.101.w2", "model.layers.36.block_sparse_moe.experts.102.w2", "model.layers.36.block_sparse_moe.experts.103.w2", "model.layers.36.block_sparse_moe.experts.104.w2", "model.layers.36.block_sparse_moe.experts.105.w2", "model.layers.36.block_sparse_moe.experts.106.w2", "model.layers.36.block_sparse_moe.experts.107.w2", "model.layers.36.block_sparse_moe.experts.108.w2", "model.layers.36.block_sparse_moe.experts.109.w2", "model.layers.36.block_sparse_moe.experts.110.w2", "model.layers.36.block_sparse_moe.experts.111.w2", "model.layers.36.block_sparse_moe.experts.112.w2", "model.layers.36.block_sparse_moe.experts.113.w2", "model.layers.36.block_sparse_moe.experts.114.w2", "model.layers.36.block_sparse_moe.experts.115.w2", "model.layers.36.block_sparse_moe.experts.116.w2", "model.layers.36.block_sparse_moe.experts.117.w2", "model.layers.36.block_sparse_moe.experts.118.w2", "model.layers.36.block_sparse_moe.experts.119.w2", "model.layers.36.block_sparse_moe.experts.120.w2", "model.layers.36.block_sparse_moe.experts.121.w2", "model.layers.36.block_sparse_moe.experts.122.w2", "model.layers.36.block_sparse_moe.experts.123.w2", "model.layers.36.block_sparse_moe.experts.124.w2", "model.layers.36.block_sparse_moe.experts.125.w2", "model.layers.36.block_sparse_moe.experts.126.w2", "model.layers.36.block_sparse_moe.experts.127.w2", "model.layers.36.block_sparse_moe.experts.128.w2", "model.layers.36.block_sparse_moe.experts.129.w2", "model.layers.36.block_sparse_moe.experts.130.w2", "model.layers.36.block_sparse_moe.experts.131.w2", "model.layers.36.block_sparse_moe.experts.132.w2", "model.layers.36.block_sparse_moe.experts.133.w2", "model.layers.36.block_sparse_moe.experts.134.w2", "model.layers.36.block_sparse_moe.experts.135.w2", "model.layers.36.block_sparse_moe.experts.136.w2", "model.layers.36.block_sparse_moe.experts.137.w2", "model.layers.36.block_sparse_moe.experts.138.w2", "model.layers.36.block_sparse_moe.experts.139.w2", "model.layers.36.block_sparse_moe.experts.140.w2", "model.layers.36.block_sparse_moe.experts.141.w2", "model.layers.36.block_sparse_moe.experts.142.w2", "model.layers.36.block_sparse_moe.experts.143.w2", "model.layers.36.block_sparse_moe.experts.144.w2", "model.layers.36.block_sparse_moe.experts.145.w2", "model.layers.36.block_sparse_moe.experts.146.w2", "model.layers.36.block_sparse_moe.experts.147.w2", "model.layers.36.block_sparse_moe.experts.148.w2", "model.layers.36.block_sparse_moe.experts.149.w2", "model.layers.36.block_sparse_moe.experts.150.w2", "model.layers.36.block_sparse_moe.experts.151.w2", "model.layers.36.block_sparse_moe.experts.152.w2", "model.layers.36.block_sparse_moe.experts.153.w2", "model.layers.36.block_sparse_moe.experts.154.w2", "model.layers.36.block_sparse_moe.experts.155.w2", "model.layers.36.block_sparse_moe.experts.156.w2", "model.layers.36.block_sparse_moe.experts.157.w2", "model.layers.36.block_sparse_moe.experts.158.w2", "model.layers.36.block_sparse_moe.experts.159.w2", "model.layers.36.block_sparse_moe.experts.160.w2", "model.layers.36.block_sparse_moe.experts.161.w2", "model.layers.36.block_sparse_moe.experts.162.w2", "model.layers.36.block_sparse_moe.experts.163.w2", "model.layers.36.block_sparse_moe.experts.164.w2", "model.layers.36.block_sparse_moe.experts.165.w2", "model.layers.36.block_sparse_moe.experts.166.w2", "model.layers.36.block_sparse_moe.experts.167.w2", "model.layers.36.block_sparse_moe.experts.168.w2", "model.layers.36.block_sparse_moe.experts.169.w2", "model.layers.36.block_sparse_moe.experts.170.w2", "model.layers.36.block_sparse_moe.experts.171.w2", "model.layers.36.block_sparse_moe.experts.172.w2", "model.layers.36.block_sparse_moe.experts.173.w2", "model.layers.36.block_sparse_moe.experts.174.w2", "model.layers.36.block_sparse_moe.experts.175.w2", "model.layers.36.block_sparse_moe.experts.176.w2", "model.layers.36.block_sparse_moe.experts.177.w2", "model.layers.36.block_sparse_moe.experts.178.w2", "model.layers.36.block_sparse_moe.experts.179.w2", "model.layers.36.block_sparse_moe.experts.180.w2", "model.layers.36.block_sparse_moe.experts.181.w2", "model.layers.36.block_sparse_moe.experts.182.w2", "model.layers.36.block_sparse_moe.experts.183.w2", "model.layers.36.block_sparse_moe.experts.184.w2", "model.layers.36.block_sparse_moe.experts.185.w2", "model.layers.36.block_sparse_moe.experts.186.w2", "model.layers.36.block_sparse_moe.experts.187.w2", "model.layers.36.block_sparse_moe.experts.188.w2", "model.layers.36.block_sparse_moe.experts.189.w2", "model.layers.36.block_sparse_moe.experts.190.w2", "model.layers.36.block_sparse_moe.experts.191.w2", "model.layers.36.block_sparse_moe.experts.192.w2", "model.layers.36.block_sparse_moe.experts.193.w2", "model.layers.36.block_sparse_moe.experts.194.w2", "model.layers.36.block_sparse_moe.experts.195.w2", "model.layers.36.block_sparse_moe.experts.196.w2", "model.layers.36.block_sparse_moe.experts.197.w2", "model.layers.36.block_sparse_moe.experts.198.w2", "model.layers.36.block_sparse_moe.experts.199.w2", "model.layers.36.block_sparse_moe.experts.200.w2", "model.layers.36.block_sparse_moe.experts.201.w2", "model.layers.36.block_sparse_moe.experts.202.w2", "model.layers.36.block_sparse_moe.experts.203.w2", "model.layers.36.block_sparse_moe.experts.204.w2", "model.layers.36.block_sparse_moe.experts.205.w2", "model.layers.36.block_sparse_moe.experts.206.w2", "model.layers.36.block_sparse_moe.experts.207.w2", "model.layers.36.block_sparse_moe.experts.208.w2", "model.layers.36.block_sparse_moe.experts.209.w2", "model.layers.36.block_sparse_moe.experts.210.w2", "model.layers.36.block_sparse_moe.experts.211.w2", "model.layers.36.block_sparse_moe.experts.212.w2", "model.layers.36.block_sparse_moe.experts.213.w2", "model.layers.36.block_sparse_moe.experts.214.w2", "model.layers.36.block_sparse_moe.experts.215.w2", "model.layers.36.block_sparse_moe.experts.216.w2", "model.layers.36.block_sparse_moe.experts.217.w2", "model.layers.36.block_sparse_moe.experts.218.w2", "model.layers.36.block_sparse_moe.experts.219.w2", "model.layers.36.block_sparse_moe.experts.220.w2", "model.layers.36.block_sparse_moe.experts.221.w2", "model.layers.36.block_sparse_moe.experts.222.w2", "model.layers.36.block_sparse_moe.experts.223.w2", "model.layers.36.block_sparse_moe.experts.224.w2", "model.layers.36.block_sparse_moe.experts.225.w2", "model.layers.36.block_sparse_moe.experts.226.w2", "model.layers.36.block_sparse_moe.experts.227.w2", "model.layers.36.block_sparse_moe.experts.228.w2", "model.layers.36.block_sparse_moe.experts.229.w2", "model.layers.36.block_sparse_moe.experts.230.w2", "model.layers.36.block_sparse_moe.experts.231.w2", "model.layers.36.block_sparse_moe.experts.232.w2", "model.layers.36.block_sparse_moe.experts.233.w2", "model.layers.36.block_sparse_moe.experts.234.w2", "model.layers.36.block_sparse_moe.experts.235.w2", "model.layers.36.block_sparse_moe.experts.236.w2", "model.layers.36.block_sparse_moe.experts.237.w2", "model.layers.36.block_sparse_moe.experts.238.w2", "model.layers.36.block_sparse_moe.experts.239.w2", "model.layers.36.block_sparse_moe.experts.240.w2", "model.layers.36.block_sparse_moe.experts.241.w2", "model.layers.36.block_sparse_moe.experts.242.w2", "model.layers.36.block_sparse_moe.experts.243.w2", "model.layers.36.block_sparse_moe.experts.244.w2", "model.layers.36.block_sparse_moe.experts.245.w2", "model.layers.36.block_sparse_moe.experts.246.w2", "model.layers.36.block_sparse_moe.experts.247.w2", "model.layers.36.block_sparse_moe.experts.248.w2", "model.layers.36.block_sparse_moe.experts.249.w2", "model.layers.36.block_sparse_moe.experts.250.w2", "model.layers.36.block_sparse_moe.experts.251.w2", "model.layers.36.block_sparse_moe.experts.252.w2", "model.layers.36.block_sparse_moe.experts.253.w2", "model.layers.36.block_sparse_moe.experts.254.w2", "model.layers.36.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0016372680664061834, "dbits": 1207959552 } ] }, { "idx": 185, "layers": [ "model.layers.37.self_attn.q_proj" ], "candidates": [ { "dkld": -0.007250311970710688, "dbits": 18874368 } ] }, { "idx": 186, "layers": [ "model.layers.37.self_attn.k_proj", "model.layers.37.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002997487783432007, "dbits": 6291456 } ] }, { "idx": 187, "layers": [ "model.layers.37.self_attn.o_proj" ], "candidates": [ { "dkld": -0.019002394378185183, "dbits": 18874368 } ] }, { "idx": 188, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w1", "model.layers.37.block_sparse_moe.experts.1.w1", "model.layers.37.block_sparse_moe.experts.2.w1", "model.layers.37.block_sparse_moe.experts.3.w1", "model.layers.37.block_sparse_moe.experts.4.w1", "model.layers.37.block_sparse_moe.experts.5.w1", "model.layers.37.block_sparse_moe.experts.6.w1", "model.layers.37.block_sparse_moe.experts.7.w1", "model.layers.37.block_sparse_moe.experts.8.w1", "model.layers.37.block_sparse_moe.experts.9.w1", "model.layers.37.block_sparse_moe.experts.10.w1", "model.layers.37.block_sparse_moe.experts.11.w1", "model.layers.37.block_sparse_moe.experts.12.w1", "model.layers.37.block_sparse_moe.experts.13.w1", "model.layers.37.block_sparse_moe.experts.14.w1", "model.layers.37.block_sparse_moe.experts.15.w1", "model.layers.37.block_sparse_moe.experts.16.w1", "model.layers.37.block_sparse_moe.experts.17.w1", "model.layers.37.block_sparse_moe.experts.18.w1", "model.layers.37.block_sparse_moe.experts.19.w1", "model.layers.37.block_sparse_moe.experts.20.w1", "model.layers.37.block_sparse_moe.experts.21.w1", "model.layers.37.block_sparse_moe.experts.22.w1", "model.layers.37.block_sparse_moe.experts.23.w1", "model.layers.37.block_sparse_moe.experts.24.w1", "model.layers.37.block_sparse_moe.experts.25.w1", "model.layers.37.block_sparse_moe.experts.26.w1", "model.layers.37.block_sparse_moe.experts.27.w1", "model.layers.37.block_sparse_moe.experts.28.w1", "model.layers.37.block_sparse_moe.experts.29.w1", "model.layers.37.block_sparse_moe.experts.30.w1", "model.layers.37.block_sparse_moe.experts.31.w1", "model.layers.37.block_sparse_moe.experts.32.w1", "model.layers.37.block_sparse_moe.experts.33.w1", "model.layers.37.block_sparse_moe.experts.34.w1", "model.layers.37.block_sparse_moe.experts.35.w1", "model.layers.37.block_sparse_moe.experts.36.w1", "model.layers.37.block_sparse_moe.experts.37.w1", "model.layers.37.block_sparse_moe.experts.38.w1", "model.layers.37.block_sparse_moe.experts.39.w1", "model.layers.37.block_sparse_moe.experts.40.w1", "model.layers.37.block_sparse_moe.experts.41.w1", "model.layers.37.block_sparse_moe.experts.42.w1", "model.layers.37.block_sparse_moe.experts.43.w1", "model.layers.37.block_sparse_moe.experts.44.w1", "model.layers.37.block_sparse_moe.experts.45.w1", "model.layers.37.block_sparse_moe.experts.46.w1", "model.layers.37.block_sparse_moe.experts.47.w1", "model.layers.37.block_sparse_moe.experts.48.w1", "model.layers.37.block_sparse_moe.experts.49.w1", "model.layers.37.block_sparse_moe.experts.50.w1", "model.layers.37.block_sparse_moe.experts.51.w1", "model.layers.37.block_sparse_moe.experts.52.w1", "model.layers.37.block_sparse_moe.experts.53.w1", "model.layers.37.block_sparse_moe.experts.54.w1", "model.layers.37.block_sparse_moe.experts.55.w1", "model.layers.37.block_sparse_moe.experts.56.w1", "model.layers.37.block_sparse_moe.experts.57.w1", "model.layers.37.block_sparse_moe.experts.58.w1", "model.layers.37.block_sparse_moe.experts.59.w1", "model.layers.37.block_sparse_moe.experts.60.w1", "model.layers.37.block_sparse_moe.experts.61.w1", "model.layers.37.block_sparse_moe.experts.62.w1", "model.layers.37.block_sparse_moe.experts.63.w1", "model.layers.37.block_sparse_moe.experts.64.w1", "model.layers.37.block_sparse_moe.experts.65.w1", "model.layers.37.block_sparse_moe.experts.66.w1", "model.layers.37.block_sparse_moe.experts.67.w1", "model.layers.37.block_sparse_moe.experts.68.w1", "model.layers.37.block_sparse_moe.experts.69.w1", "model.layers.37.block_sparse_moe.experts.70.w1", "model.layers.37.block_sparse_moe.experts.71.w1", "model.layers.37.block_sparse_moe.experts.72.w1", "model.layers.37.block_sparse_moe.experts.73.w1", "model.layers.37.block_sparse_moe.experts.74.w1", "model.layers.37.block_sparse_moe.experts.75.w1", "model.layers.37.block_sparse_moe.experts.76.w1", "model.layers.37.block_sparse_moe.experts.77.w1", "model.layers.37.block_sparse_moe.experts.78.w1", "model.layers.37.block_sparse_moe.experts.79.w1", "model.layers.37.block_sparse_moe.experts.80.w1", "model.layers.37.block_sparse_moe.experts.81.w1", "model.layers.37.block_sparse_moe.experts.82.w1", "model.layers.37.block_sparse_moe.experts.83.w1", "model.layers.37.block_sparse_moe.experts.84.w1", "model.layers.37.block_sparse_moe.experts.85.w1", "model.layers.37.block_sparse_moe.experts.86.w1", "model.layers.37.block_sparse_moe.experts.87.w1", "model.layers.37.block_sparse_moe.experts.88.w1", "model.layers.37.block_sparse_moe.experts.89.w1", "model.layers.37.block_sparse_moe.experts.90.w1", "model.layers.37.block_sparse_moe.experts.91.w1", "model.layers.37.block_sparse_moe.experts.92.w1", "model.layers.37.block_sparse_moe.experts.93.w1", "model.layers.37.block_sparse_moe.experts.94.w1", "model.layers.37.block_sparse_moe.experts.95.w1", "model.layers.37.block_sparse_moe.experts.96.w1", "model.layers.37.block_sparse_moe.experts.97.w1", "model.layers.37.block_sparse_moe.experts.98.w1", "model.layers.37.block_sparse_moe.experts.99.w1", "model.layers.37.block_sparse_moe.experts.100.w1", "model.layers.37.block_sparse_moe.experts.101.w1", "model.layers.37.block_sparse_moe.experts.102.w1", "model.layers.37.block_sparse_moe.experts.103.w1", "model.layers.37.block_sparse_moe.experts.104.w1", "model.layers.37.block_sparse_moe.experts.105.w1", "model.layers.37.block_sparse_moe.experts.106.w1", "model.layers.37.block_sparse_moe.experts.107.w1", "model.layers.37.block_sparse_moe.experts.108.w1", "model.layers.37.block_sparse_moe.experts.109.w1", "model.layers.37.block_sparse_moe.experts.110.w1", "model.layers.37.block_sparse_moe.experts.111.w1", "model.layers.37.block_sparse_moe.experts.112.w1", "model.layers.37.block_sparse_moe.experts.113.w1", "model.layers.37.block_sparse_moe.experts.114.w1", "model.layers.37.block_sparse_moe.experts.115.w1", "model.layers.37.block_sparse_moe.experts.116.w1", "model.layers.37.block_sparse_moe.experts.117.w1", "model.layers.37.block_sparse_moe.experts.118.w1", "model.layers.37.block_sparse_moe.experts.119.w1", "model.layers.37.block_sparse_moe.experts.120.w1", "model.layers.37.block_sparse_moe.experts.121.w1", "model.layers.37.block_sparse_moe.experts.122.w1", "model.layers.37.block_sparse_moe.experts.123.w1", "model.layers.37.block_sparse_moe.experts.124.w1", "model.layers.37.block_sparse_moe.experts.125.w1", "model.layers.37.block_sparse_moe.experts.126.w1", "model.layers.37.block_sparse_moe.experts.127.w1", "model.layers.37.block_sparse_moe.experts.128.w1", "model.layers.37.block_sparse_moe.experts.129.w1", "model.layers.37.block_sparse_moe.experts.130.w1", "model.layers.37.block_sparse_moe.experts.131.w1", "model.layers.37.block_sparse_moe.experts.132.w1", "model.layers.37.block_sparse_moe.experts.133.w1", "model.layers.37.block_sparse_moe.experts.134.w1", "model.layers.37.block_sparse_moe.experts.135.w1", "model.layers.37.block_sparse_moe.experts.136.w1", "model.layers.37.block_sparse_moe.experts.137.w1", "model.layers.37.block_sparse_moe.experts.138.w1", "model.layers.37.block_sparse_moe.experts.139.w1", "model.layers.37.block_sparse_moe.experts.140.w1", "model.layers.37.block_sparse_moe.experts.141.w1", "model.layers.37.block_sparse_moe.experts.142.w1", "model.layers.37.block_sparse_moe.experts.143.w1", "model.layers.37.block_sparse_moe.experts.144.w1", "model.layers.37.block_sparse_moe.experts.145.w1", "model.layers.37.block_sparse_moe.experts.146.w1", "model.layers.37.block_sparse_moe.experts.147.w1", "model.layers.37.block_sparse_moe.experts.148.w1", "model.layers.37.block_sparse_moe.experts.149.w1", "model.layers.37.block_sparse_moe.experts.150.w1", "model.layers.37.block_sparse_moe.experts.151.w1", "model.layers.37.block_sparse_moe.experts.152.w1", "model.layers.37.block_sparse_moe.experts.153.w1", "model.layers.37.block_sparse_moe.experts.154.w1", "model.layers.37.block_sparse_moe.experts.155.w1", "model.layers.37.block_sparse_moe.experts.156.w1", "model.layers.37.block_sparse_moe.experts.157.w1", "model.layers.37.block_sparse_moe.experts.158.w1", "model.layers.37.block_sparse_moe.experts.159.w1", "model.layers.37.block_sparse_moe.experts.160.w1", "model.layers.37.block_sparse_moe.experts.161.w1", "model.layers.37.block_sparse_moe.experts.162.w1", "model.layers.37.block_sparse_moe.experts.163.w1", "model.layers.37.block_sparse_moe.experts.164.w1", "model.layers.37.block_sparse_moe.experts.165.w1", "model.layers.37.block_sparse_moe.experts.166.w1", "model.layers.37.block_sparse_moe.experts.167.w1", "model.layers.37.block_sparse_moe.experts.168.w1", "model.layers.37.block_sparse_moe.experts.169.w1", "model.layers.37.block_sparse_moe.experts.170.w1", "model.layers.37.block_sparse_moe.experts.171.w1", "model.layers.37.block_sparse_moe.experts.172.w1", "model.layers.37.block_sparse_moe.experts.173.w1", "model.layers.37.block_sparse_moe.experts.174.w1", "model.layers.37.block_sparse_moe.experts.175.w1", "model.layers.37.block_sparse_moe.experts.176.w1", "model.layers.37.block_sparse_moe.experts.177.w1", "model.layers.37.block_sparse_moe.experts.178.w1", "model.layers.37.block_sparse_moe.experts.179.w1", "model.layers.37.block_sparse_moe.experts.180.w1", "model.layers.37.block_sparse_moe.experts.181.w1", "model.layers.37.block_sparse_moe.experts.182.w1", "model.layers.37.block_sparse_moe.experts.183.w1", "model.layers.37.block_sparse_moe.experts.184.w1", "model.layers.37.block_sparse_moe.experts.185.w1", "model.layers.37.block_sparse_moe.experts.186.w1", "model.layers.37.block_sparse_moe.experts.187.w1", "model.layers.37.block_sparse_moe.experts.188.w1", "model.layers.37.block_sparse_moe.experts.189.w1", "model.layers.37.block_sparse_moe.experts.190.w1", "model.layers.37.block_sparse_moe.experts.191.w1", "model.layers.37.block_sparse_moe.experts.192.w1", "model.layers.37.block_sparse_moe.experts.193.w1", "model.layers.37.block_sparse_moe.experts.194.w1", "model.layers.37.block_sparse_moe.experts.195.w1", "model.layers.37.block_sparse_moe.experts.196.w1", "model.layers.37.block_sparse_moe.experts.197.w1", "model.layers.37.block_sparse_moe.experts.198.w1", "model.layers.37.block_sparse_moe.experts.199.w1", "model.layers.37.block_sparse_moe.experts.200.w1", "model.layers.37.block_sparse_moe.experts.201.w1", "model.layers.37.block_sparse_moe.experts.202.w1", "model.layers.37.block_sparse_moe.experts.203.w1", "model.layers.37.block_sparse_moe.experts.204.w1", "model.layers.37.block_sparse_moe.experts.205.w1", "model.layers.37.block_sparse_moe.experts.206.w1", "model.layers.37.block_sparse_moe.experts.207.w1", "model.layers.37.block_sparse_moe.experts.208.w1", "model.layers.37.block_sparse_moe.experts.209.w1", "model.layers.37.block_sparse_moe.experts.210.w1", "model.layers.37.block_sparse_moe.experts.211.w1", "model.layers.37.block_sparse_moe.experts.212.w1", "model.layers.37.block_sparse_moe.experts.213.w1", "model.layers.37.block_sparse_moe.experts.214.w1", "model.layers.37.block_sparse_moe.experts.215.w1", "model.layers.37.block_sparse_moe.experts.216.w1", "model.layers.37.block_sparse_moe.experts.217.w1", "model.layers.37.block_sparse_moe.experts.218.w1", "model.layers.37.block_sparse_moe.experts.219.w1", "model.layers.37.block_sparse_moe.experts.220.w1", "model.layers.37.block_sparse_moe.experts.221.w1", "model.layers.37.block_sparse_moe.experts.222.w1", "model.layers.37.block_sparse_moe.experts.223.w1", "model.layers.37.block_sparse_moe.experts.224.w1", "model.layers.37.block_sparse_moe.experts.225.w1", "model.layers.37.block_sparse_moe.experts.226.w1", "model.layers.37.block_sparse_moe.experts.227.w1", "model.layers.37.block_sparse_moe.experts.228.w1", "model.layers.37.block_sparse_moe.experts.229.w1", "model.layers.37.block_sparse_moe.experts.230.w1", "model.layers.37.block_sparse_moe.experts.231.w1", "model.layers.37.block_sparse_moe.experts.232.w1", "model.layers.37.block_sparse_moe.experts.233.w1", "model.layers.37.block_sparse_moe.experts.234.w1", "model.layers.37.block_sparse_moe.experts.235.w1", "model.layers.37.block_sparse_moe.experts.236.w1", "model.layers.37.block_sparse_moe.experts.237.w1", "model.layers.37.block_sparse_moe.experts.238.w1", "model.layers.37.block_sparse_moe.experts.239.w1", "model.layers.37.block_sparse_moe.experts.240.w1", "model.layers.37.block_sparse_moe.experts.241.w1", "model.layers.37.block_sparse_moe.experts.242.w1", "model.layers.37.block_sparse_moe.experts.243.w1", "model.layers.37.block_sparse_moe.experts.244.w1", "model.layers.37.block_sparse_moe.experts.245.w1", "model.layers.37.block_sparse_moe.experts.246.w1", "model.layers.37.block_sparse_moe.experts.247.w1", "model.layers.37.block_sparse_moe.experts.248.w1", "model.layers.37.block_sparse_moe.experts.249.w1", "model.layers.37.block_sparse_moe.experts.250.w1", "model.layers.37.block_sparse_moe.experts.251.w1", "model.layers.37.block_sparse_moe.experts.252.w1", "model.layers.37.block_sparse_moe.experts.253.w1", "model.layers.37.block_sparse_moe.experts.254.w1", "model.layers.37.block_sparse_moe.experts.255.w1", "model.layers.37.block_sparse_moe.experts.0.w3", "model.layers.37.block_sparse_moe.experts.1.w3", "model.layers.37.block_sparse_moe.experts.2.w3", "model.layers.37.block_sparse_moe.experts.3.w3", "model.layers.37.block_sparse_moe.experts.4.w3", "model.layers.37.block_sparse_moe.experts.5.w3", "model.layers.37.block_sparse_moe.experts.6.w3", "model.layers.37.block_sparse_moe.experts.7.w3", "model.layers.37.block_sparse_moe.experts.8.w3", "model.layers.37.block_sparse_moe.experts.9.w3", "model.layers.37.block_sparse_moe.experts.10.w3", "model.layers.37.block_sparse_moe.experts.11.w3", "model.layers.37.block_sparse_moe.experts.12.w3", "model.layers.37.block_sparse_moe.experts.13.w3", "model.layers.37.block_sparse_moe.experts.14.w3", "model.layers.37.block_sparse_moe.experts.15.w3", "model.layers.37.block_sparse_moe.experts.16.w3", "model.layers.37.block_sparse_moe.experts.17.w3", "model.layers.37.block_sparse_moe.experts.18.w3", "model.layers.37.block_sparse_moe.experts.19.w3", "model.layers.37.block_sparse_moe.experts.20.w3", "model.layers.37.block_sparse_moe.experts.21.w3", "model.layers.37.block_sparse_moe.experts.22.w3", "model.layers.37.block_sparse_moe.experts.23.w3", "model.layers.37.block_sparse_moe.experts.24.w3", "model.layers.37.block_sparse_moe.experts.25.w3", "model.layers.37.block_sparse_moe.experts.26.w3", "model.layers.37.block_sparse_moe.experts.27.w3", "model.layers.37.block_sparse_moe.experts.28.w3", "model.layers.37.block_sparse_moe.experts.29.w3", "model.layers.37.block_sparse_moe.experts.30.w3", "model.layers.37.block_sparse_moe.experts.31.w3", "model.layers.37.block_sparse_moe.experts.32.w3", "model.layers.37.block_sparse_moe.experts.33.w3", "model.layers.37.block_sparse_moe.experts.34.w3", "model.layers.37.block_sparse_moe.experts.35.w3", "model.layers.37.block_sparse_moe.experts.36.w3", "model.layers.37.block_sparse_moe.experts.37.w3", "model.layers.37.block_sparse_moe.experts.38.w3", "model.layers.37.block_sparse_moe.experts.39.w3", "model.layers.37.block_sparse_moe.experts.40.w3", "model.layers.37.block_sparse_moe.experts.41.w3", "model.layers.37.block_sparse_moe.experts.42.w3", "model.layers.37.block_sparse_moe.experts.43.w3", "model.layers.37.block_sparse_moe.experts.44.w3", "model.layers.37.block_sparse_moe.experts.45.w3", "model.layers.37.block_sparse_moe.experts.46.w3", "model.layers.37.block_sparse_moe.experts.47.w3", "model.layers.37.block_sparse_moe.experts.48.w3", "model.layers.37.block_sparse_moe.experts.49.w3", "model.layers.37.block_sparse_moe.experts.50.w3", "model.layers.37.block_sparse_moe.experts.51.w3", "model.layers.37.block_sparse_moe.experts.52.w3", "model.layers.37.block_sparse_moe.experts.53.w3", "model.layers.37.block_sparse_moe.experts.54.w3", "model.layers.37.block_sparse_moe.experts.55.w3", "model.layers.37.block_sparse_moe.experts.56.w3", "model.layers.37.block_sparse_moe.experts.57.w3", "model.layers.37.block_sparse_moe.experts.58.w3", "model.layers.37.block_sparse_moe.experts.59.w3", "model.layers.37.block_sparse_moe.experts.60.w3", "model.layers.37.block_sparse_moe.experts.61.w3", "model.layers.37.block_sparse_moe.experts.62.w3", "model.layers.37.block_sparse_moe.experts.63.w3", "model.layers.37.block_sparse_moe.experts.64.w3", "model.layers.37.block_sparse_moe.experts.65.w3", "model.layers.37.block_sparse_moe.experts.66.w3", "model.layers.37.block_sparse_moe.experts.67.w3", "model.layers.37.block_sparse_moe.experts.68.w3", "model.layers.37.block_sparse_moe.experts.69.w3", "model.layers.37.block_sparse_moe.experts.70.w3", "model.layers.37.block_sparse_moe.experts.71.w3", "model.layers.37.block_sparse_moe.experts.72.w3", "model.layers.37.block_sparse_moe.experts.73.w3", "model.layers.37.block_sparse_moe.experts.74.w3", "model.layers.37.block_sparse_moe.experts.75.w3", "model.layers.37.block_sparse_moe.experts.76.w3", "model.layers.37.block_sparse_moe.experts.77.w3", "model.layers.37.block_sparse_moe.experts.78.w3", "model.layers.37.block_sparse_moe.experts.79.w3", "model.layers.37.block_sparse_moe.experts.80.w3", "model.layers.37.block_sparse_moe.experts.81.w3", "model.layers.37.block_sparse_moe.experts.82.w3", "model.layers.37.block_sparse_moe.experts.83.w3", "model.layers.37.block_sparse_moe.experts.84.w3", "model.layers.37.block_sparse_moe.experts.85.w3", "model.layers.37.block_sparse_moe.experts.86.w3", "model.layers.37.block_sparse_moe.experts.87.w3", "model.layers.37.block_sparse_moe.experts.88.w3", "model.layers.37.block_sparse_moe.experts.89.w3", "model.layers.37.block_sparse_moe.experts.90.w3", "model.layers.37.block_sparse_moe.experts.91.w3", "model.layers.37.block_sparse_moe.experts.92.w3", "model.layers.37.block_sparse_moe.experts.93.w3", "model.layers.37.block_sparse_moe.experts.94.w3", "model.layers.37.block_sparse_moe.experts.95.w3", "model.layers.37.block_sparse_moe.experts.96.w3", "model.layers.37.block_sparse_moe.experts.97.w3", "model.layers.37.block_sparse_moe.experts.98.w3", "model.layers.37.block_sparse_moe.experts.99.w3", "model.layers.37.block_sparse_moe.experts.100.w3", "model.layers.37.block_sparse_moe.experts.101.w3", "model.layers.37.block_sparse_moe.experts.102.w3", "model.layers.37.block_sparse_moe.experts.103.w3", "model.layers.37.block_sparse_moe.experts.104.w3", "model.layers.37.block_sparse_moe.experts.105.w3", "model.layers.37.block_sparse_moe.experts.106.w3", "model.layers.37.block_sparse_moe.experts.107.w3", "model.layers.37.block_sparse_moe.experts.108.w3", "model.layers.37.block_sparse_moe.experts.109.w3", "model.layers.37.block_sparse_moe.experts.110.w3", "model.layers.37.block_sparse_moe.experts.111.w3", "model.layers.37.block_sparse_moe.experts.112.w3", "model.layers.37.block_sparse_moe.experts.113.w3", "model.layers.37.block_sparse_moe.experts.114.w3", "model.layers.37.block_sparse_moe.experts.115.w3", "model.layers.37.block_sparse_moe.experts.116.w3", "model.layers.37.block_sparse_moe.experts.117.w3", "model.layers.37.block_sparse_moe.experts.118.w3", "model.layers.37.block_sparse_moe.experts.119.w3", "model.layers.37.block_sparse_moe.experts.120.w3", "model.layers.37.block_sparse_moe.experts.121.w3", "model.layers.37.block_sparse_moe.experts.122.w3", "model.layers.37.block_sparse_moe.experts.123.w3", "model.layers.37.block_sparse_moe.experts.124.w3", "model.layers.37.block_sparse_moe.experts.125.w3", "model.layers.37.block_sparse_moe.experts.126.w3", "model.layers.37.block_sparse_moe.experts.127.w3", "model.layers.37.block_sparse_moe.experts.128.w3", "model.layers.37.block_sparse_moe.experts.129.w3", "model.layers.37.block_sparse_moe.experts.130.w3", "model.layers.37.block_sparse_moe.experts.131.w3", "model.layers.37.block_sparse_moe.experts.132.w3", "model.layers.37.block_sparse_moe.experts.133.w3", "model.layers.37.block_sparse_moe.experts.134.w3", "model.layers.37.block_sparse_moe.experts.135.w3", "model.layers.37.block_sparse_moe.experts.136.w3", "model.layers.37.block_sparse_moe.experts.137.w3", "model.layers.37.block_sparse_moe.experts.138.w3", "model.layers.37.block_sparse_moe.experts.139.w3", "model.layers.37.block_sparse_moe.experts.140.w3", "model.layers.37.block_sparse_moe.experts.141.w3", "model.layers.37.block_sparse_moe.experts.142.w3", "model.layers.37.block_sparse_moe.experts.143.w3", "model.layers.37.block_sparse_moe.experts.144.w3", "model.layers.37.block_sparse_moe.experts.145.w3", "model.layers.37.block_sparse_moe.experts.146.w3", "model.layers.37.block_sparse_moe.experts.147.w3", "model.layers.37.block_sparse_moe.experts.148.w3", "model.layers.37.block_sparse_moe.experts.149.w3", "model.layers.37.block_sparse_moe.experts.150.w3", "model.layers.37.block_sparse_moe.experts.151.w3", "model.layers.37.block_sparse_moe.experts.152.w3", "model.layers.37.block_sparse_moe.experts.153.w3", "model.layers.37.block_sparse_moe.experts.154.w3", "model.layers.37.block_sparse_moe.experts.155.w3", "model.layers.37.block_sparse_moe.experts.156.w3", "model.layers.37.block_sparse_moe.experts.157.w3", "model.layers.37.block_sparse_moe.experts.158.w3", "model.layers.37.block_sparse_moe.experts.159.w3", "model.layers.37.block_sparse_moe.experts.160.w3", "model.layers.37.block_sparse_moe.experts.161.w3", "model.layers.37.block_sparse_moe.experts.162.w3", "model.layers.37.block_sparse_moe.experts.163.w3", "model.layers.37.block_sparse_moe.experts.164.w3", "model.layers.37.block_sparse_moe.experts.165.w3", "model.layers.37.block_sparse_moe.experts.166.w3", "model.layers.37.block_sparse_moe.experts.167.w3", "model.layers.37.block_sparse_moe.experts.168.w3", "model.layers.37.block_sparse_moe.experts.169.w3", "model.layers.37.block_sparse_moe.experts.170.w3", "model.layers.37.block_sparse_moe.experts.171.w3", "model.layers.37.block_sparse_moe.experts.172.w3", "model.layers.37.block_sparse_moe.experts.173.w3", "model.layers.37.block_sparse_moe.experts.174.w3", "model.layers.37.block_sparse_moe.experts.175.w3", "model.layers.37.block_sparse_moe.experts.176.w3", "model.layers.37.block_sparse_moe.experts.177.w3", "model.layers.37.block_sparse_moe.experts.178.w3", "model.layers.37.block_sparse_moe.experts.179.w3", "model.layers.37.block_sparse_moe.experts.180.w3", "model.layers.37.block_sparse_moe.experts.181.w3", "model.layers.37.block_sparse_moe.experts.182.w3", "model.layers.37.block_sparse_moe.experts.183.w3", "model.layers.37.block_sparse_moe.experts.184.w3", "model.layers.37.block_sparse_moe.experts.185.w3", "model.layers.37.block_sparse_moe.experts.186.w3", "model.layers.37.block_sparse_moe.experts.187.w3", "model.layers.37.block_sparse_moe.experts.188.w3", "model.layers.37.block_sparse_moe.experts.189.w3", "model.layers.37.block_sparse_moe.experts.190.w3", "model.layers.37.block_sparse_moe.experts.191.w3", "model.layers.37.block_sparse_moe.experts.192.w3", "model.layers.37.block_sparse_moe.experts.193.w3", "model.layers.37.block_sparse_moe.experts.194.w3", "model.layers.37.block_sparse_moe.experts.195.w3", "model.layers.37.block_sparse_moe.experts.196.w3", "model.layers.37.block_sparse_moe.experts.197.w3", "model.layers.37.block_sparse_moe.experts.198.w3", "model.layers.37.block_sparse_moe.experts.199.w3", "model.layers.37.block_sparse_moe.experts.200.w3", "model.layers.37.block_sparse_moe.experts.201.w3", "model.layers.37.block_sparse_moe.experts.202.w3", "model.layers.37.block_sparse_moe.experts.203.w3", "model.layers.37.block_sparse_moe.experts.204.w3", "model.layers.37.block_sparse_moe.experts.205.w3", "model.layers.37.block_sparse_moe.experts.206.w3", "model.layers.37.block_sparse_moe.experts.207.w3", "model.layers.37.block_sparse_moe.experts.208.w3", "model.layers.37.block_sparse_moe.experts.209.w3", "model.layers.37.block_sparse_moe.experts.210.w3", "model.layers.37.block_sparse_moe.experts.211.w3", "model.layers.37.block_sparse_moe.experts.212.w3", "model.layers.37.block_sparse_moe.experts.213.w3", "model.layers.37.block_sparse_moe.experts.214.w3", "model.layers.37.block_sparse_moe.experts.215.w3", "model.layers.37.block_sparse_moe.experts.216.w3", "model.layers.37.block_sparse_moe.experts.217.w3", "model.layers.37.block_sparse_moe.experts.218.w3", "model.layers.37.block_sparse_moe.experts.219.w3", "model.layers.37.block_sparse_moe.experts.220.w3", "model.layers.37.block_sparse_moe.experts.221.w3", "model.layers.37.block_sparse_moe.experts.222.w3", "model.layers.37.block_sparse_moe.experts.223.w3", "model.layers.37.block_sparse_moe.experts.224.w3", "model.layers.37.block_sparse_moe.experts.225.w3", "model.layers.37.block_sparse_moe.experts.226.w3", "model.layers.37.block_sparse_moe.experts.227.w3", "model.layers.37.block_sparse_moe.experts.228.w3", "model.layers.37.block_sparse_moe.experts.229.w3", "model.layers.37.block_sparse_moe.experts.230.w3", "model.layers.37.block_sparse_moe.experts.231.w3", "model.layers.37.block_sparse_moe.experts.232.w3", "model.layers.37.block_sparse_moe.experts.233.w3", "model.layers.37.block_sparse_moe.experts.234.w3", "model.layers.37.block_sparse_moe.experts.235.w3", "model.layers.37.block_sparse_moe.experts.236.w3", "model.layers.37.block_sparse_moe.experts.237.w3", "model.layers.37.block_sparse_moe.experts.238.w3", "model.layers.37.block_sparse_moe.experts.239.w3", "model.layers.37.block_sparse_moe.experts.240.w3", "model.layers.37.block_sparse_moe.experts.241.w3", "model.layers.37.block_sparse_moe.experts.242.w3", "model.layers.37.block_sparse_moe.experts.243.w3", "model.layers.37.block_sparse_moe.experts.244.w3", "model.layers.37.block_sparse_moe.experts.245.w3", "model.layers.37.block_sparse_moe.experts.246.w3", "model.layers.37.block_sparse_moe.experts.247.w3", "model.layers.37.block_sparse_moe.experts.248.w3", "model.layers.37.block_sparse_moe.experts.249.w3", "model.layers.37.block_sparse_moe.experts.250.w3", "model.layers.37.block_sparse_moe.experts.251.w3", "model.layers.37.block_sparse_moe.experts.252.w3", "model.layers.37.block_sparse_moe.experts.253.w3", "model.layers.37.block_sparse_moe.experts.254.w3", "model.layers.37.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00046443939208984375, "dbits": 2415919104 } ] }, { "idx": 189, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w2", "model.layers.37.block_sparse_moe.experts.1.w2", "model.layers.37.block_sparse_moe.experts.2.w2", "model.layers.37.block_sparse_moe.experts.3.w2", "model.layers.37.block_sparse_moe.experts.4.w2", "model.layers.37.block_sparse_moe.experts.5.w2", "model.layers.37.block_sparse_moe.experts.6.w2", "model.layers.37.block_sparse_moe.experts.7.w2", "model.layers.37.block_sparse_moe.experts.8.w2", "model.layers.37.block_sparse_moe.experts.9.w2", "model.layers.37.block_sparse_moe.experts.10.w2", "model.layers.37.block_sparse_moe.experts.11.w2", "model.layers.37.block_sparse_moe.experts.12.w2", "model.layers.37.block_sparse_moe.experts.13.w2", "model.layers.37.block_sparse_moe.experts.14.w2", "model.layers.37.block_sparse_moe.experts.15.w2", "model.layers.37.block_sparse_moe.experts.16.w2", "model.layers.37.block_sparse_moe.experts.17.w2", "model.layers.37.block_sparse_moe.experts.18.w2", "model.layers.37.block_sparse_moe.experts.19.w2", "model.layers.37.block_sparse_moe.experts.20.w2", "model.layers.37.block_sparse_moe.experts.21.w2", "model.layers.37.block_sparse_moe.experts.22.w2", "model.layers.37.block_sparse_moe.experts.23.w2", "model.layers.37.block_sparse_moe.experts.24.w2", "model.layers.37.block_sparse_moe.experts.25.w2", "model.layers.37.block_sparse_moe.experts.26.w2", "model.layers.37.block_sparse_moe.experts.27.w2", "model.layers.37.block_sparse_moe.experts.28.w2", "model.layers.37.block_sparse_moe.experts.29.w2", "model.layers.37.block_sparse_moe.experts.30.w2", "model.layers.37.block_sparse_moe.experts.31.w2", "model.layers.37.block_sparse_moe.experts.32.w2", "model.layers.37.block_sparse_moe.experts.33.w2", "model.layers.37.block_sparse_moe.experts.34.w2", "model.layers.37.block_sparse_moe.experts.35.w2", "model.layers.37.block_sparse_moe.experts.36.w2", "model.layers.37.block_sparse_moe.experts.37.w2", "model.layers.37.block_sparse_moe.experts.38.w2", "model.layers.37.block_sparse_moe.experts.39.w2", "model.layers.37.block_sparse_moe.experts.40.w2", "model.layers.37.block_sparse_moe.experts.41.w2", "model.layers.37.block_sparse_moe.experts.42.w2", "model.layers.37.block_sparse_moe.experts.43.w2", "model.layers.37.block_sparse_moe.experts.44.w2", "model.layers.37.block_sparse_moe.experts.45.w2", "model.layers.37.block_sparse_moe.experts.46.w2", "model.layers.37.block_sparse_moe.experts.47.w2", "model.layers.37.block_sparse_moe.experts.48.w2", "model.layers.37.block_sparse_moe.experts.49.w2", "model.layers.37.block_sparse_moe.experts.50.w2", "model.layers.37.block_sparse_moe.experts.51.w2", "model.layers.37.block_sparse_moe.experts.52.w2", "model.layers.37.block_sparse_moe.experts.53.w2", "model.layers.37.block_sparse_moe.experts.54.w2", "model.layers.37.block_sparse_moe.experts.55.w2", "model.layers.37.block_sparse_moe.experts.56.w2", "model.layers.37.block_sparse_moe.experts.57.w2", "model.layers.37.block_sparse_moe.experts.58.w2", "model.layers.37.block_sparse_moe.experts.59.w2", "model.layers.37.block_sparse_moe.experts.60.w2", "model.layers.37.block_sparse_moe.experts.61.w2", "model.layers.37.block_sparse_moe.experts.62.w2", "model.layers.37.block_sparse_moe.experts.63.w2", "model.layers.37.block_sparse_moe.experts.64.w2", "model.layers.37.block_sparse_moe.experts.65.w2", "model.layers.37.block_sparse_moe.experts.66.w2", "model.layers.37.block_sparse_moe.experts.67.w2", "model.layers.37.block_sparse_moe.experts.68.w2", "model.layers.37.block_sparse_moe.experts.69.w2", "model.layers.37.block_sparse_moe.experts.70.w2", "model.layers.37.block_sparse_moe.experts.71.w2", "model.layers.37.block_sparse_moe.experts.72.w2", "model.layers.37.block_sparse_moe.experts.73.w2", "model.layers.37.block_sparse_moe.experts.74.w2", "model.layers.37.block_sparse_moe.experts.75.w2", "model.layers.37.block_sparse_moe.experts.76.w2", "model.layers.37.block_sparse_moe.experts.77.w2", "model.layers.37.block_sparse_moe.experts.78.w2", "model.layers.37.block_sparse_moe.experts.79.w2", "model.layers.37.block_sparse_moe.experts.80.w2", "model.layers.37.block_sparse_moe.experts.81.w2", "model.layers.37.block_sparse_moe.experts.82.w2", "model.layers.37.block_sparse_moe.experts.83.w2", "model.layers.37.block_sparse_moe.experts.84.w2", "model.layers.37.block_sparse_moe.experts.85.w2", "model.layers.37.block_sparse_moe.experts.86.w2", "model.layers.37.block_sparse_moe.experts.87.w2", "model.layers.37.block_sparse_moe.experts.88.w2", "model.layers.37.block_sparse_moe.experts.89.w2", "model.layers.37.block_sparse_moe.experts.90.w2", "model.layers.37.block_sparse_moe.experts.91.w2", "model.layers.37.block_sparse_moe.experts.92.w2", "model.layers.37.block_sparse_moe.experts.93.w2", "model.layers.37.block_sparse_moe.experts.94.w2", "model.layers.37.block_sparse_moe.experts.95.w2", "model.layers.37.block_sparse_moe.experts.96.w2", "model.layers.37.block_sparse_moe.experts.97.w2", "model.layers.37.block_sparse_moe.experts.98.w2", "model.layers.37.block_sparse_moe.experts.99.w2", "model.layers.37.block_sparse_moe.experts.100.w2", "model.layers.37.block_sparse_moe.experts.101.w2", "model.layers.37.block_sparse_moe.experts.102.w2", "model.layers.37.block_sparse_moe.experts.103.w2", "model.layers.37.block_sparse_moe.experts.104.w2", "model.layers.37.block_sparse_moe.experts.105.w2", "model.layers.37.block_sparse_moe.experts.106.w2", "model.layers.37.block_sparse_moe.experts.107.w2", "model.layers.37.block_sparse_moe.experts.108.w2", "model.layers.37.block_sparse_moe.experts.109.w2", "model.layers.37.block_sparse_moe.experts.110.w2", "model.layers.37.block_sparse_moe.experts.111.w2", "model.layers.37.block_sparse_moe.experts.112.w2", "model.layers.37.block_sparse_moe.experts.113.w2", "model.layers.37.block_sparse_moe.experts.114.w2", "model.layers.37.block_sparse_moe.experts.115.w2", "model.layers.37.block_sparse_moe.experts.116.w2", "model.layers.37.block_sparse_moe.experts.117.w2", "model.layers.37.block_sparse_moe.experts.118.w2", "model.layers.37.block_sparse_moe.experts.119.w2", "model.layers.37.block_sparse_moe.experts.120.w2", "model.layers.37.block_sparse_moe.experts.121.w2", "model.layers.37.block_sparse_moe.experts.122.w2", "model.layers.37.block_sparse_moe.experts.123.w2", "model.layers.37.block_sparse_moe.experts.124.w2", "model.layers.37.block_sparse_moe.experts.125.w2", "model.layers.37.block_sparse_moe.experts.126.w2", "model.layers.37.block_sparse_moe.experts.127.w2", "model.layers.37.block_sparse_moe.experts.128.w2", "model.layers.37.block_sparse_moe.experts.129.w2", "model.layers.37.block_sparse_moe.experts.130.w2", "model.layers.37.block_sparse_moe.experts.131.w2", "model.layers.37.block_sparse_moe.experts.132.w2", "model.layers.37.block_sparse_moe.experts.133.w2", "model.layers.37.block_sparse_moe.experts.134.w2", "model.layers.37.block_sparse_moe.experts.135.w2", "model.layers.37.block_sparse_moe.experts.136.w2", "model.layers.37.block_sparse_moe.experts.137.w2", "model.layers.37.block_sparse_moe.experts.138.w2", "model.layers.37.block_sparse_moe.experts.139.w2", "model.layers.37.block_sparse_moe.experts.140.w2", "model.layers.37.block_sparse_moe.experts.141.w2", "model.layers.37.block_sparse_moe.experts.142.w2", "model.layers.37.block_sparse_moe.experts.143.w2", "model.layers.37.block_sparse_moe.experts.144.w2", "model.layers.37.block_sparse_moe.experts.145.w2", "model.layers.37.block_sparse_moe.experts.146.w2", "model.layers.37.block_sparse_moe.experts.147.w2", "model.layers.37.block_sparse_moe.experts.148.w2", "model.layers.37.block_sparse_moe.experts.149.w2", "model.layers.37.block_sparse_moe.experts.150.w2", "model.layers.37.block_sparse_moe.experts.151.w2", "model.layers.37.block_sparse_moe.experts.152.w2", "model.layers.37.block_sparse_moe.experts.153.w2", "model.layers.37.block_sparse_moe.experts.154.w2", "model.layers.37.block_sparse_moe.experts.155.w2", "model.layers.37.block_sparse_moe.experts.156.w2", "model.layers.37.block_sparse_moe.experts.157.w2", "model.layers.37.block_sparse_moe.experts.158.w2", "model.layers.37.block_sparse_moe.experts.159.w2", "model.layers.37.block_sparse_moe.experts.160.w2", "model.layers.37.block_sparse_moe.experts.161.w2", "model.layers.37.block_sparse_moe.experts.162.w2", "model.layers.37.block_sparse_moe.experts.163.w2", "model.layers.37.block_sparse_moe.experts.164.w2", "model.layers.37.block_sparse_moe.experts.165.w2", "model.layers.37.block_sparse_moe.experts.166.w2", "model.layers.37.block_sparse_moe.experts.167.w2", "model.layers.37.block_sparse_moe.experts.168.w2", "model.layers.37.block_sparse_moe.experts.169.w2", "model.layers.37.block_sparse_moe.experts.170.w2", "model.layers.37.block_sparse_moe.experts.171.w2", "model.layers.37.block_sparse_moe.experts.172.w2", "model.layers.37.block_sparse_moe.experts.173.w2", "model.layers.37.block_sparse_moe.experts.174.w2", "model.layers.37.block_sparse_moe.experts.175.w2", "model.layers.37.block_sparse_moe.experts.176.w2", "model.layers.37.block_sparse_moe.experts.177.w2", "model.layers.37.block_sparse_moe.experts.178.w2", "model.layers.37.block_sparse_moe.experts.179.w2", "model.layers.37.block_sparse_moe.experts.180.w2", "model.layers.37.block_sparse_moe.experts.181.w2", "model.layers.37.block_sparse_moe.experts.182.w2", "model.layers.37.block_sparse_moe.experts.183.w2", "model.layers.37.block_sparse_moe.experts.184.w2", "model.layers.37.block_sparse_moe.experts.185.w2", "model.layers.37.block_sparse_moe.experts.186.w2", "model.layers.37.block_sparse_moe.experts.187.w2", "model.layers.37.block_sparse_moe.experts.188.w2", "model.layers.37.block_sparse_moe.experts.189.w2", "model.layers.37.block_sparse_moe.experts.190.w2", "model.layers.37.block_sparse_moe.experts.191.w2", "model.layers.37.block_sparse_moe.experts.192.w2", "model.layers.37.block_sparse_moe.experts.193.w2", "model.layers.37.block_sparse_moe.experts.194.w2", "model.layers.37.block_sparse_moe.experts.195.w2", "model.layers.37.block_sparse_moe.experts.196.w2", "model.layers.37.block_sparse_moe.experts.197.w2", "model.layers.37.block_sparse_moe.experts.198.w2", "model.layers.37.block_sparse_moe.experts.199.w2", "model.layers.37.block_sparse_moe.experts.200.w2", "model.layers.37.block_sparse_moe.experts.201.w2", "model.layers.37.block_sparse_moe.experts.202.w2", "model.layers.37.block_sparse_moe.experts.203.w2", "model.layers.37.block_sparse_moe.experts.204.w2", "model.layers.37.block_sparse_moe.experts.205.w2", "model.layers.37.block_sparse_moe.experts.206.w2", "model.layers.37.block_sparse_moe.experts.207.w2", "model.layers.37.block_sparse_moe.experts.208.w2", "model.layers.37.block_sparse_moe.experts.209.w2", "model.layers.37.block_sparse_moe.experts.210.w2", "model.layers.37.block_sparse_moe.experts.211.w2", "model.layers.37.block_sparse_moe.experts.212.w2", "model.layers.37.block_sparse_moe.experts.213.w2", "model.layers.37.block_sparse_moe.experts.214.w2", "model.layers.37.block_sparse_moe.experts.215.w2", "model.layers.37.block_sparse_moe.experts.216.w2", "model.layers.37.block_sparse_moe.experts.217.w2", "model.layers.37.block_sparse_moe.experts.218.w2", "model.layers.37.block_sparse_moe.experts.219.w2", "model.layers.37.block_sparse_moe.experts.220.w2", "model.layers.37.block_sparse_moe.experts.221.w2", "model.layers.37.block_sparse_moe.experts.222.w2", "model.layers.37.block_sparse_moe.experts.223.w2", "model.layers.37.block_sparse_moe.experts.224.w2", "model.layers.37.block_sparse_moe.experts.225.w2", "model.layers.37.block_sparse_moe.experts.226.w2", "model.layers.37.block_sparse_moe.experts.227.w2", "model.layers.37.block_sparse_moe.experts.228.w2", "model.layers.37.block_sparse_moe.experts.229.w2", "model.layers.37.block_sparse_moe.experts.230.w2", "model.layers.37.block_sparse_moe.experts.231.w2", "model.layers.37.block_sparse_moe.experts.232.w2", "model.layers.37.block_sparse_moe.experts.233.w2", "model.layers.37.block_sparse_moe.experts.234.w2", "model.layers.37.block_sparse_moe.experts.235.w2", "model.layers.37.block_sparse_moe.experts.236.w2", "model.layers.37.block_sparse_moe.experts.237.w2", "model.layers.37.block_sparse_moe.experts.238.w2", "model.layers.37.block_sparse_moe.experts.239.w2", "model.layers.37.block_sparse_moe.experts.240.w2", "model.layers.37.block_sparse_moe.experts.241.w2", "model.layers.37.block_sparse_moe.experts.242.w2", "model.layers.37.block_sparse_moe.experts.243.w2", "model.layers.37.block_sparse_moe.experts.244.w2", "model.layers.37.block_sparse_moe.experts.245.w2", "model.layers.37.block_sparse_moe.experts.246.w2", "model.layers.37.block_sparse_moe.experts.247.w2", "model.layers.37.block_sparse_moe.experts.248.w2", "model.layers.37.block_sparse_moe.experts.249.w2", "model.layers.37.block_sparse_moe.experts.250.w2", "model.layers.37.block_sparse_moe.experts.251.w2", "model.layers.37.block_sparse_moe.experts.252.w2", "model.layers.37.block_sparse_moe.experts.253.w2", "model.layers.37.block_sparse_moe.experts.254.w2", "model.layers.37.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0008202672004700151, "dbits": 1207959552 } ] }, { "idx": 190, "layers": [ "model.layers.38.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0004370927810668501, "dbits": 18874368 } ] }, { "idx": 191, "layers": [ "model.layers.38.self_attn.k_proj", "model.layers.38.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0011609584093092984, "dbits": 6291456 } ] }, { "idx": 192, "layers": [ "model.layers.38.self_attn.o_proj" ], "candidates": [ { "dkld": 0.006118619441986106, "dbits": 18874368 } ] }, { "idx": 193, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w1", "model.layers.38.block_sparse_moe.experts.1.w1", "model.layers.38.block_sparse_moe.experts.2.w1", "model.layers.38.block_sparse_moe.experts.3.w1", "model.layers.38.block_sparse_moe.experts.4.w1", "model.layers.38.block_sparse_moe.experts.5.w1", "model.layers.38.block_sparse_moe.experts.6.w1", "model.layers.38.block_sparse_moe.experts.7.w1", "model.layers.38.block_sparse_moe.experts.8.w1", "model.layers.38.block_sparse_moe.experts.9.w1", "model.layers.38.block_sparse_moe.experts.10.w1", "model.layers.38.block_sparse_moe.experts.11.w1", "model.layers.38.block_sparse_moe.experts.12.w1", "model.layers.38.block_sparse_moe.experts.13.w1", "model.layers.38.block_sparse_moe.experts.14.w1", "model.layers.38.block_sparse_moe.experts.15.w1", "model.layers.38.block_sparse_moe.experts.16.w1", "model.layers.38.block_sparse_moe.experts.17.w1", "model.layers.38.block_sparse_moe.experts.18.w1", "model.layers.38.block_sparse_moe.experts.19.w1", "model.layers.38.block_sparse_moe.experts.20.w1", "model.layers.38.block_sparse_moe.experts.21.w1", "model.layers.38.block_sparse_moe.experts.22.w1", "model.layers.38.block_sparse_moe.experts.23.w1", "model.layers.38.block_sparse_moe.experts.24.w1", "model.layers.38.block_sparse_moe.experts.25.w1", "model.layers.38.block_sparse_moe.experts.26.w1", "model.layers.38.block_sparse_moe.experts.27.w1", "model.layers.38.block_sparse_moe.experts.28.w1", "model.layers.38.block_sparse_moe.experts.29.w1", "model.layers.38.block_sparse_moe.experts.30.w1", "model.layers.38.block_sparse_moe.experts.31.w1", "model.layers.38.block_sparse_moe.experts.32.w1", "model.layers.38.block_sparse_moe.experts.33.w1", "model.layers.38.block_sparse_moe.experts.34.w1", "model.layers.38.block_sparse_moe.experts.35.w1", "model.layers.38.block_sparse_moe.experts.36.w1", "model.layers.38.block_sparse_moe.experts.37.w1", "model.layers.38.block_sparse_moe.experts.38.w1", "model.layers.38.block_sparse_moe.experts.39.w1", "model.layers.38.block_sparse_moe.experts.40.w1", "model.layers.38.block_sparse_moe.experts.41.w1", "model.layers.38.block_sparse_moe.experts.42.w1", "model.layers.38.block_sparse_moe.experts.43.w1", "model.layers.38.block_sparse_moe.experts.44.w1", "model.layers.38.block_sparse_moe.experts.45.w1", "model.layers.38.block_sparse_moe.experts.46.w1", "model.layers.38.block_sparse_moe.experts.47.w1", "model.layers.38.block_sparse_moe.experts.48.w1", "model.layers.38.block_sparse_moe.experts.49.w1", "model.layers.38.block_sparse_moe.experts.50.w1", "model.layers.38.block_sparse_moe.experts.51.w1", "model.layers.38.block_sparse_moe.experts.52.w1", "model.layers.38.block_sparse_moe.experts.53.w1", "model.layers.38.block_sparse_moe.experts.54.w1", "model.layers.38.block_sparse_moe.experts.55.w1", "model.layers.38.block_sparse_moe.experts.56.w1", "model.layers.38.block_sparse_moe.experts.57.w1", "model.layers.38.block_sparse_moe.experts.58.w1", "model.layers.38.block_sparse_moe.experts.59.w1", "model.layers.38.block_sparse_moe.experts.60.w1", "model.layers.38.block_sparse_moe.experts.61.w1", "model.layers.38.block_sparse_moe.experts.62.w1", "model.layers.38.block_sparse_moe.experts.63.w1", "model.layers.38.block_sparse_moe.experts.64.w1", "model.layers.38.block_sparse_moe.experts.65.w1", "model.layers.38.block_sparse_moe.experts.66.w1", "model.layers.38.block_sparse_moe.experts.67.w1", "model.layers.38.block_sparse_moe.experts.68.w1", "model.layers.38.block_sparse_moe.experts.69.w1", "model.layers.38.block_sparse_moe.experts.70.w1", "model.layers.38.block_sparse_moe.experts.71.w1", "model.layers.38.block_sparse_moe.experts.72.w1", "model.layers.38.block_sparse_moe.experts.73.w1", "model.layers.38.block_sparse_moe.experts.74.w1", "model.layers.38.block_sparse_moe.experts.75.w1", "model.layers.38.block_sparse_moe.experts.76.w1", "model.layers.38.block_sparse_moe.experts.77.w1", "model.layers.38.block_sparse_moe.experts.78.w1", "model.layers.38.block_sparse_moe.experts.79.w1", "model.layers.38.block_sparse_moe.experts.80.w1", "model.layers.38.block_sparse_moe.experts.81.w1", "model.layers.38.block_sparse_moe.experts.82.w1", "model.layers.38.block_sparse_moe.experts.83.w1", "model.layers.38.block_sparse_moe.experts.84.w1", "model.layers.38.block_sparse_moe.experts.85.w1", "model.layers.38.block_sparse_moe.experts.86.w1", "model.layers.38.block_sparse_moe.experts.87.w1", "model.layers.38.block_sparse_moe.experts.88.w1", "model.layers.38.block_sparse_moe.experts.89.w1", "model.layers.38.block_sparse_moe.experts.90.w1", "model.layers.38.block_sparse_moe.experts.91.w1", "model.layers.38.block_sparse_moe.experts.92.w1", "model.layers.38.block_sparse_moe.experts.93.w1", "model.layers.38.block_sparse_moe.experts.94.w1", "model.layers.38.block_sparse_moe.experts.95.w1", "model.layers.38.block_sparse_moe.experts.96.w1", "model.layers.38.block_sparse_moe.experts.97.w1", "model.layers.38.block_sparse_moe.experts.98.w1", "model.layers.38.block_sparse_moe.experts.99.w1", "model.layers.38.block_sparse_moe.experts.100.w1", "model.layers.38.block_sparse_moe.experts.101.w1", "model.layers.38.block_sparse_moe.experts.102.w1", "model.layers.38.block_sparse_moe.experts.103.w1", "model.layers.38.block_sparse_moe.experts.104.w1", "model.layers.38.block_sparse_moe.experts.105.w1", "model.layers.38.block_sparse_moe.experts.106.w1", "model.layers.38.block_sparse_moe.experts.107.w1", "model.layers.38.block_sparse_moe.experts.108.w1", "model.layers.38.block_sparse_moe.experts.109.w1", "model.layers.38.block_sparse_moe.experts.110.w1", "model.layers.38.block_sparse_moe.experts.111.w1", "model.layers.38.block_sparse_moe.experts.112.w1", "model.layers.38.block_sparse_moe.experts.113.w1", "model.layers.38.block_sparse_moe.experts.114.w1", "model.layers.38.block_sparse_moe.experts.115.w1", "model.layers.38.block_sparse_moe.experts.116.w1", "model.layers.38.block_sparse_moe.experts.117.w1", "model.layers.38.block_sparse_moe.experts.118.w1", "model.layers.38.block_sparse_moe.experts.119.w1", "model.layers.38.block_sparse_moe.experts.120.w1", "model.layers.38.block_sparse_moe.experts.121.w1", "model.layers.38.block_sparse_moe.experts.122.w1", "model.layers.38.block_sparse_moe.experts.123.w1", "model.layers.38.block_sparse_moe.experts.124.w1", "model.layers.38.block_sparse_moe.experts.125.w1", "model.layers.38.block_sparse_moe.experts.126.w1", "model.layers.38.block_sparse_moe.experts.127.w1", "model.layers.38.block_sparse_moe.experts.128.w1", "model.layers.38.block_sparse_moe.experts.129.w1", "model.layers.38.block_sparse_moe.experts.130.w1", "model.layers.38.block_sparse_moe.experts.131.w1", "model.layers.38.block_sparse_moe.experts.132.w1", "model.layers.38.block_sparse_moe.experts.133.w1", "model.layers.38.block_sparse_moe.experts.134.w1", "model.layers.38.block_sparse_moe.experts.135.w1", "model.layers.38.block_sparse_moe.experts.136.w1", "model.layers.38.block_sparse_moe.experts.137.w1", "model.layers.38.block_sparse_moe.experts.138.w1", "model.layers.38.block_sparse_moe.experts.139.w1", "model.layers.38.block_sparse_moe.experts.140.w1", "model.layers.38.block_sparse_moe.experts.141.w1", "model.layers.38.block_sparse_moe.experts.142.w1", "model.layers.38.block_sparse_moe.experts.143.w1", "model.layers.38.block_sparse_moe.experts.144.w1", "model.layers.38.block_sparse_moe.experts.145.w1", "model.layers.38.block_sparse_moe.experts.146.w1", "model.layers.38.block_sparse_moe.experts.147.w1", "model.layers.38.block_sparse_moe.experts.148.w1", "model.layers.38.block_sparse_moe.experts.149.w1", "model.layers.38.block_sparse_moe.experts.150.w1", "model.layers.38.block_sparse_moe.experts.151.w1", "model.layers.38.block_sparse_moe.experts.152.w1", "model.layers.38.block_sparse_moe.experts.153.w1", "model.layers.38.block_sparse_moe.experts.154.w1", "model.layers.38.block_sparse_moe.experts.155.w1", "model.layers.38.block_sparse_moe.experts.156.w1", "model.layers.38.block_sparse_moe.experts.157.w1", "model.layers.38.block_sparse_moe.experts.158.w1", "model.layers.38.block_sparse_moe.experts.159.w1", "model.layers.38.block_sparse_moe.experts.160.w1", "model.layers.38.block_sparse_moe.experts.161.w1", "model.layers.38.block_sparse_moe.experts.162.w1", "model.layers.38.block_sparse_moe.experts.163.w1", "model.layers.38.block_sparse_moe.experts.164.w1", "model.layers.38.block_sparse_moe.experts.165.w1", "model.layers.38.block_sparse_moe.experts.166.w1", "model.layers.38.block_sparse_moe.experts.167.w1", "model.layers.38.block_sparse_moe.experts.168.w1", "model.layers.38.block_sparse_moe.experts.169.w1", "model.layers.38.block_sparse_moe.experts.170.w1", "model.layers.38.block_sparse_moe.experts.171.w1", "model.layers.38.block_sparse_moe.experts.172.w1", "model.layers.38.block_sparse_moe.experts.173.w1", "model.layers.38.block_sparse_moe.experts.174.w1", "model.layers.38.block_sparse_moe.experts.175.w1", "model.layers.38.block_sparse_moe.experts.176.w1", "model.layers.38.block_sparse_moe.experts.177.w1", "model.layers.38.block_sparse_moe.experts.178.w1", "model.layers.38.block_sparse_moe.experts.179.w1", "model.layers.38.block_sparse_moe.experts.180.w1", "model.layers.38.block_sparse_moe.experts.181.w1", "model.layers.38.block_sparse_moe.experts.182.w1", "model.layers.38.block_sparse_moe.experts.183.w1", "model.layers.38.block_sparse_moe.experts.184.w1", "model.layers.38.block_sparse_moe.experts.185.w1", "model.layers.38.block_sparse_moe.experts.186.w1", "model.layers.38.block_sparse_moe.experts.187.w1", "model.layers.38.block_sparse_moe.experts.188.w1", "model.layers.38.block_sparse_moe.experts.189.w1", "model.layers.38.block_sparse_moe.experts.190.w1", "model.layers.38.block_sparse_moe.experts.191.w1", "model.layers.38.block_sparse_moe.experts.192.w1", "model.layers.38.block_sparse_moe.experts.193.w1", "model.layers.38.block_sparse_moe.experts.194.w1", "model.layers.38.block_sparse_moe.experts.195.w1", "model.layers.38.block_sparse_moe.experts.196.w1", "model.layers.38.block_sparse_moe.experts.197.w1", "model.layers.38.block_sparse_moe.experts.198.w1", "model.layers.38.block_sparse_moe.experts.199.w1", "model.layers.38.block_sparse_moe.experts.200.w1", "model.layers.38.block_sparse_moe.experts.201.w1", "model.layers.38.block_sparse_moe.experts.202.w1", "model.layers.38.block_sparse_moe.experts.203.w1", "model.layers.38.block_sparse_moe.experts.204.w1", "model.layers.38.block_sparse_moe.experts.205.w1", "model.layers.38.block_sparse_moe.experts.206.w1", "model.layers.38.block_sparse_moe.experts.207.w1", "model.layers.38.block_sparse_moe.experts.208.w1", "model.layers.38.block_sparse_moe.experts.209.w1", "model.layers.38.block_sparse_moe.experts.210.w1", "model.layers.38.block_sparse_moe.experts.211.w1", "model.layers.38.block_sparse_moe.experts.212.w1", "model.layers.38.block_sparse_moe.experts.213.w1", "model.layers.38.block_sparse_moe.experts.214.w1", "model.layers.38.block_sparse_moe.experts.215.w1", "model.layers.38.block_sparse_moe.experts.216.w1", "model.layers.38.block_sparse_moe.experts.217.w1", "model.layers.38.block_sparse_moe.experts.218.w1", "model.layers.38.block_sparse_moe.experts.219.w1", "model.layers.38.block_sparse_moe.experts.220.w1", "model.layers.38.block_sparse_moe.experts.221.w1", "model.layers.38.block_sparse_moe.experts.222.w1", "model.layers.38.block_sparse_moe.experts.223.w1", "model.layers.38.block_sparse_moe.experts.224.w1", "model.layers.38.block_sparse_moe.experts.225.w1", "model.layers.38.block_sparse_moe.experts.226.w1", "model.layers.38.block_sparse_moe.experts.227.w1", "model.layers.38.block_sparse_moe.experts.228.w1", "model.layers.38.block_sparse_moe.experts.229.w1", "model.layers.38.block_sparse_moe.experts.230.w1", "model.layers.38.block_sparse_moe.experts.231.w1", "model.layers.38.block_sparse_moe.experts.232.w1", "model.layers.38.block_sparse_moe.experts.233.w1", "model.layers.38.block_sparse_moe.experts.234.w1", "model.layers.38.block_sparse_moe.experts.235.w1", "model.layers.38.block_sparse_moe.experts.236.w1", "model.layers.38.block_sparse_moe.experts.237.w1", "model.layers.38.block_sparse_moe.experts.238.w1", "model.layers.38.block_sparse_moe.experts.239.w1", "model.layers.38.block_sparse_moe.experts.240.w1", "model.layers.38.block_sparse_moe.experts.241.w1", "model.layers.38.block_sparse_moe.experts.242.w1", "model.layers.38.block_sparse_moe.experts.243.w1", "model.layers.38.block_sparse_moe.experts.244.w1", "model.layers.38.block_sparse_moe.experts.245.w1", "model.layers.38.block_sparse_moe.experts.246.w1", "model.layers.38.block_sparse_moe.experts.247.w1", "model.layers.38.block_sparse_moe.experts.248.w1", "model.layers.38.block_sparse_moe.experts.249.w1", "model.layers.38.block_sparse_moe.experts.250.w1", "model.layers.38.block_sparse_moe.experts.251.w1", "model.layers.38.block_sparse_moe.experts.252.w1", "model.layers.38.block_sparse_moe.experts.253.w1", "model.layers.38.block_sparse_moe.experts.254.w1", "model.layers.38.block_sparse_moe.experts.255.w1", "model.layers.38.block_sparse_moe.experts.0.w3", "model.layers.38.block_sparse_moe.experts.1.w3", "model.layers.38.block_sparse_moe.experts.2.w3", "model.layers.38.block_sparse_moe.experts.3.w3", "model.layers.38.block_sparse_moe.experts.4.w3", "model.layers.38.block_sparse_moe.experts.5.w3", "model.layers.38.block_sparse_moe.experts.6.w3", "model.layers.38.block_sparse_moe.experts.7.w3", "model.layers.38.block_sparse_moe.experts.8.w3", "model.layers.38.block_sparse_moe.experts.9.w3", "model.layers.38.block_sparse_moe.experts.10.w3", "model.layers.38.block_sparse_moe.experts.11.w3", "model.layers.38.block_sparse_moe.experts.12.w3", "model.layers.38.block_sparse_moe.experts.13.w3", "model.layers.38.block_sparse_moe.experts.14.w3", "model.layers.38.block_sparse_moe.experts.15.w3", "model.layers.38.block_sparse_moe.experts.16.w3", "model.layers.38.block_sparse_moe.experts.17.w3", "model.layers.38.block_sparse_moe.experts.18.w3", "model.layers.38.block_sparse_moe.experts.19.w3", "model.layers.38.block_sparse_moe.experts.20.w3", "model.layers.38.block_sparse_moe.experts.21.w3", "model.layers.38.block_sparse_moe.experts.22.w3", "model.layers.38.block_sparse_moe.experts.23.w3", "model.layers.38.block_sparse_moe.experts.24.w3", "model.layers.38.block_sparse_moe.experts.25.w3", "model.layers.38.block_sparse_moe.experts.26.w3", "model.layers.38.block_sparse_moe.experts.27.w3", "model.layers.38.block_sparse_moe.experts.28.w3", "model.layers.38.block_sparse_moe.experts.29.w3", "model.layers.38.block_sparse_moe.experts.30.w3", "model.layers.38.block_sparse_moe.experts.31.w3", "model.layers.38.block_sparse_moe.experts.32.w3", "model.layers.38.block_sparse_moe.experts.33.w3", "model.layers.38.block_sparse_moe.experts.34.w3", "model.layers.38.block_sparse_moe.experts.35.w3", "model.layers.38.block_sparse_moe.experts.36.w3", "model.layers.38.block_sparse_moe.experts.37.w3", "model.layers.38.block_sparse_moe.experts.38.w3", "model.layers.38.block_sparse_moe.experts.39.w3", "model.layers.38.block_sparse_moe.experts.40.w3", "model.layers.38.block_sparse_moe.experts.41.w3", "model.layers.38.block_sparse_moe.experts.42.w3", "model.layers.38.block_sparse_moe.experts.43.w3", "model.layers.38.block_sparse_moe.experts.44.w3", "model.layers.38.block_sparse_moe.experts.45.w3", "model.layers.38.block_sparse_moe.experts.46.w3", "model.layers.38.block_sparse_moe.experts.47.w3", "model.layers.38.block_sparse_moe.experts.48.w3", "model.layers.38.block_sparse_moe.experts.49.w3", "model.layers.38.block_sparse_moe.experts.50.w3", "model.layers.38.block_sparse_moe.experts.51.w3", "model.layers.38.block_sparse_moe.experts.52.w3", "model.layers.38.block_sparse_moe.experts.53.w3", "model.layers.38.block_sparse_moe.experts.54.w3", "model.layers.38.block_sparse_moe.experts.55.w3", "model.layers.38.block_sparse_moe.experts.56.w3", "model.layers.38.block_sparse_moe.experts.57.w3", "model.layers.38.block_sparse_moe.experts.58.w3", "model.layers.38.block_sparse_moe.experts.59.w3", "model.layers.38.block_sparse_moe.experts.60.w3", "model.layers.38.block_sparse_moe.experts.61.w3", "model.layers.38.block_sparse_moe.experts.62.w3", "model.layers.38.block_sparse_moe.experts.63.w3", "model.layers.38.block_sparse_moe.experts.64.w3", "model.layers.38.block_sparse_moe.experts.65.w3", "model.layers.38.block_sparse_moe.experts.66.w3", "model.layers.38.block_sparse_moe.experts.67.w3", "model.layers.38.block_sparse_moe.experts.68.w3", "model.layers.38.block_sparse_moe.experts.69.w3", "model.layers.38.block_sparse_moe.experts.70.w3", "model.layers.38.block_sparse_moe.experts.71.w3", "model.layers.38.block_sparse_moe.experts.72.w3", "model.layers.38.block_sparse_moe.experts.73.w3", "model.layers.38.block_sparse_moe.experts.74.w3", "model.layers.38.block_sparse_moe.experts.75.w3", "model.layers.38.block_sparse_moe.experts.76.w3", "model.layers.38.block_sparse_moe.experts.77.w3", "model.layers.38.block_sparse_moe.experts.78.w3", "model.layers.38.block_sparse_moe.experts.79.w3", "model.layers.38.block_sparse_moe.experts.80.w3", "model.layers.38.block_sparse_moe.experts.81.w3", "model.layers.38.block_sparse_moe.experts.82.w3", "model.layers.38.block_sparse_moe.experts.83.w3", "model.layers.38.block_sparse_moe.experts.84.w3", "model.layers.38.block_sparse_moe.experts.85.w3", "model.layers.38.block_sparse_moe.experts.86.w3", "model.layers.38.block_sparse_moe.experts.87.w3", "model.layers.38.block_sparse_moe.experts.88.w3", "model.layers.38.block_sparse_moe.experts.89.w3", "model.layers.38.block_sparse_moe.experts.90.w3", "model.layers.38.block_sparse_moe.experts.91.w3", "model.layers.38.block_sparse_moe.experts.92.w3", "model.layers.38.block_sparse_moe.experts.93.w3", "model.layers.38.block_sparse_moe.experts.94.w3", "model.layers.38.block_sparse_moe.experts.95.w3", "model.layers.38.block_sparse_moe.experts.96.w3", "model.layers.38.block_sparse_moe.experts.97.w3", "model.layers.38.block_sparse_moe.experts.98.w3", "model.layers.38.block_sparse_moe.experts.99.w3", "model.layers.38.block_sparse_moe.experts.100.w3", "model.layers.38.block_sparse_moe.experts.101.w3", "model.layers.38.block_sparse_moe.experts.102.w3", "model.layers.38.block_sparse_moe.experts.103.w3", "model.layers.38.block_sparse_moe.experts.104.w3", "model.layers.38.block_sparse_moe.experts.105.w3", "model.layers.38.block_sparse_moe.experts.106.w3", "model.layers.38.block_sparse_moe.experts.107.w3", "model.layers.38.block_sparse_moe.experts.108.w3", "model.layers.38.block_sparse_moe.experts.109.w3", "model.layers.38.block_sparse_moe.experts.110.w3", "model.layers.38.block_sparse_moe.experts.111.w3", "model.layers.38.block_sparse_moe.experts.112.w3", "model.layers.38.block_sparse_moe.experts.113.w3", "model.layers.38.block_sparse_moe.experts.114.w3", "model.layers.38.block_sparse_moe.experts.115.w3", "model.layers.38.block_sparse_moe.experts.116.w3", "model.layers.38.block_sparse_moe.experts.117.w3", "model.layers.38.block_sparse_moe.experts.118.w3", "model.layers.38.block_sparse_moe.experts.119.w3", "model.layers.38.block_sparse_moe.experts.120.w3", "model.layers.38.block_sparse_moe.experts.121.w3", "model.layers.38.block_sparse_moe.experts.122.w3", "model.layers.38.block_sparse_moe.experts.123.w3", "model.layers.38.block_sparse_moe.experts.124.w3", "model.layers.38.block_sparse_moe.experts.125.w3", "model.layers.38.block_sparse_moe.experts.126.w3", "model.layers.38.block_sparse_moe.experts.127.w3", "model.layers.38.block_sparse_moe.experts.128.w3", "model.layers.38.block_sparse_moe.experts.129.w3", "model.layers.38.block_sparse_moe.experts.130.w3", "model.layers.38.block_sparse_moe.experts.131.w3", "model.layers.38.block_sparse_moe.experts.132.w3", "model.layers.38.block_sparse_moe.experts.133.w3", "model.layers.38.block_sparse_moe.experts.134.w3", "model.layers.38.block_sparse_moe.experts.135.w3", "model.layers.38.block_sparse_moe.experts.136.w3", "model.layers.38.block_sparse_moe.experts.137.w3", "model.layers.38.block_sparse_moe.experts.138.w3", "model.layers.38.block_sparse_moe.experts.139.w3", "model.layers.38.block_sparse_moe.experts.140.w3", "model.layers.38.block_sparse_moe.experts.141.w3", "model.layers.38.block_sparse_moe.experts.142.w3", "model.layers.38.block_sparse_moe.experts.143.w3", "model.layers.38.block_sparse_moe.experts.144.w3", "model.layers.38.block_sparse_moe.experts.145.w3", "model.layers.38.block_sparse_moe.experts.146.w3", "model.layers.38.block_sparse_moe.experts.147.w3", "model.layers.38.block_sparse_moe.experts.148.w3", "model.layers.38.block_sparse_moe.experts.149.w3", "model.layers.38.block_sparse_moe.experts.150.w3", "model.layers.38.block_sparse_moe.experts.151.w3", "model.layers.38.block_sparse_moe.experts.152.w3", "model.layers.38.block_sparse_moe.experts.153.w3", "model.layers.38.block_sparse_moe.experts.154.w3", "model.layers.38.block_sparse_moe.experts.155.w3", "model.layers.38.block_sparse_moe.experts.156.w3", "model.layers.38.block_sparse_moe.experts.157.w3", "model.layers.38.block_sparse_moe.experts.158.w3", "model.layers.38.block_sparse_moe.experts.159.w3", "model.layers.38.block_sparse_moe.experts.160.w3", "model.layers.38.block_sparse_moe.experts.161.w3", "model.layers.38.block_sparse_moe.experts.162.w3", "model.layers.38.block_sparse_moe.experts.163.w3", "model.layers.38.block_sparse_moe.experts.164.w3", "model.layers.38.block_sparse_moe.experts.165.w3", "model.layers.38.block_sparse_moe.experts.166.w3", "model.layers.38.block_sparse_moe.experts.167.w3", "model.layers.38.block_sparse_moe.experts.168.w3", "model.layers.38.block_sparse_moe.experts.169.w3", "model.layers.38.block_sparse_moe.experts.170.w3", "model.layers.38.block_sparse_moe.experts.171.w3", "model.layers.38.block_sparse_moe.experts.172.w3", "model.layers.38.block_sparse_moe.experts.173.w3", "model.layers.38.block_sparse_moe.experts.174.w3", "model.layers.38.block_sparse_moe.experts.175.w3", "model.layers.38.block_sparse_moe.experts.176.w3", "model.layers.38.block_sparse_moe.experts.177.w3", "model.layers.38.block_sparse_moe.experts.178.w3", "model.layers.38.block_sparse_moe.experts.179.w3", "model.layers.38.block_sparse_moe.experts.180.w3", "model.layers.38.block_sparse_moe.experts.181.w3", "model.layers.38.block_sparse_moe.experts.182.w3", "model.layers.38.block_sparse_moe.experts.183.w3", "model.layers.38.block_sparse_moe.experts.184.w3", "model.layers.38.block_sparse_moe.experts.185.w3", "model.layers.38.block_sparse_moe.experts.186.w3", "model.layers.38.block_sparse_moe.experts.187.w3", "model.layers.38.block_sparse_moe.experts.188.w3", "model.layers.38.block_sparse_moe.experts.189.w3", "model.layers.38.block_sparse_moe.experts.190.w3", "model.layers.38.block_sparse_moe.experts.191.w3", "model.layers.38.block_sparse_moe.experts.192.w3", "model.layers.38.block_sparse_moe.experts.193.w3", "model.layers.38.block_sparse_moe.experts.194.w3", "model.layers.38.block_sparse_moe.experts.195.w3", "model.layers.38.block_sparse_moe.experts.196.w3", "model.layers.38.block_sparse_moe.experts.197.w3", "model.layers.38.block_sparse_moe.experts.198.w3", "model.layers.38.block_sparse_moe.experts.199.w3", "model.layers.38.block_sparse_moe.experts.200.w3", "model.layers.38.block_sparse_moe.experts.201.w3", "model.layers.38.block_sparse_moe.experts.202.w3", "model.layers.38.block_sparse_moe.experts.203.w3", "model.layers.38.block_sparse_moe.experts.204.w3", "model.layers.38.block_sparse_moe.experts.205.w3", "model.layers.38.block_sparse_moe.experts.206.w3", "model.layers.38.block_sparse_moe.experts.207.w3", "model.layers.38.block_sparse_moe.experts.208.w3", "model.layers.38.block_sparse_moe.experts.209.w3", "model.layers.38.block_sparse_moe.experts.210.w3", "model.layers.38.block_sparse_moe.experts.211.w3", "model.layers.38.block_sparse_moe.experts.212.w3", "model.layers.38.block_sparse_moe.experts.213.w3", "model.layers.38.block_sparse_moe.experts.214.w3", "model.layers.38.block_sparse_moe.experts.215.w3", "model.layers.38.block_sparse_moe.experts.216.w3", "model.layers.38.block_sparse_moe.experts.217.w3", "model.layers.38.block_sparse_moe.experts.218.w3", "model.layers.38.block_sparse_moe.experts.219.w3", "model.layers.38.block_sparse_moe.experts.220.w3", "model.layers.38.block_sparse_moe.experts.221.w3", "model.layers.38.block_sparse_moe.experts.222.w3", "model.layers.38.block_sparse_moe.experts.223.w3", "model.layers.38.block_sparse_moe.experts.224.w3", "model.layers.38.block_sparse_moe.experts.225.w3", "model.layers.38.block_sparse_moe.experts.226.w3", "model.layers.38.block_sparse_moe.experts.227.w3", "model.layers.38.block_sparse_moe.experts.228.w3", "model.layers.38.block_sparse_moe.experts.229.w3", "model.layers.38.block_sparse_moe.experts.230.w3", "model.layers.38.block_sparse_moe.experts.231.w3", "model.layers.38.block_sparse_moe.experts.232.w3", "model.layers.38.block_sparse_moe.experts.233.w3", "model.layers.38.block_sparse_moe.experts.234.w3", "model.layers.38.block_sparse_moe.experts.235.w3", "model.layers.38.block_sparse_moe.experts.236.w3", "model.layers.38.block_sparse_moe.experts.237.w3", "model.layers.38.block_sparse_moe.experts.238.w3", "model.layers.38.block_sparse_moe.experts.239.w3", "model.layers.38.block_sparse_moe.experts.240.w3", "model.layers.38.block_sparse_moe.experts.241.w3", "model.layers.38.block_sparse_moe.experts.242.w3", "model.layers.38.block_sparse_moe.experts.243.w3", "model.layers.38.block_sparse_moe.experts.244.w3", "model.layers.38.block_sparse_moe.experts.245.w3", "model.layers.38.block_sparse_moe.experts.246.w3", "model.layers.38.block_sparse_moe.experts.247.w3", "model.layers.38.block_sparse_moe.experts.248.w3", "model.layers.38.block_sparse_moe.experts.249.w3", "model.layers.38.block_sparse_moe.experts.250.w3", "model.layers.38.block_sparse_moe.experts.251.w3", "model.layers.38.block_sparse_moe.experts.252.w3", "model.layers.38.block_sparse_moe.experts.253.w3", "model.layers.38.block_sparse_moe.experts.254.w3", "model.layers.38.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0010761290788651179, "dbits": 2415919104 } ] }, { "idx": 194, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w2", "model.layers.38.block_sparse_moe.experts.1.w2", "model.layers.38.block_sparse_moe.experts.2.w2", "model.layers.38.block_sparse_moe.experts.3.w2", "model.layers.38.block_sparse_moe.experts.4.w2", "model.layers.38.block_sparse_moe.experts.5.w2", "model.layers.38.block_sparse_moe.experts.6.w2", "model.layers.38.block_sparse_moe.experts.7.w2", "model.layers.38.block_sparse_moe.experts.8.w2", "model.layers.38.block_sparse_moe.experts.9.w2", "model.layers.38.block_sparse_moe.experts.10.w2", "model.layers.38.block_sparse_moe.experts.11.w2", "model.layers.38.block_sparse_moe.experts.12.w2", "model.layers.38.block_sparse_moe.experts.13.w2", "model.layers.38.block_sparse_moe.experts.14.w2", "model.layers.38.block_sparse_moe.experts.15.w2", "model.layers.38.block_sparse_moe.experts.16.w2", "model.layers.38.block_sparse_moe.experts.17.w2", "model.layers.38.block_sparse_moe.experts.18.w2", "model.layers.38.block_sparse_moe.experts.19.w2", "model.layers.38.block_sparse_moe.experts.20.w2", "model.layers.38.block_sparse_moe.experts.21.w2", "model.layers.38.block_sparse_moe.experts.22.w2", "model.layers.38.block_sparse_moe.experts.23.w2", "model.layers.38.block_sparse_moe.experts.24.w2", "model.layers.38.block_sparse_moe.experts.25.w2", "model.layers.38.block_sparse_moe.experts.26.w2", "model.layers.38.block_sparse_moe.experts.27.w2", "model.layers.38.block_sparse_moe.experts.28.w2", "model.layers.38.block_sparse_moe.experts.29.w2", "model.layers.38.block_sparse_moe.experts.30.w2", "model.layers.38.block_sparse_moe.experts.31.w2", "model.layers.38.block_sparse_moe.experts.32.w2", "model.layers.38.block_sparse_moe.experts.33.w2", "model.layers.38.block_sparse_moe.experts.34.w2", "model.layers.38.block_sparse_moe.experts.35.w2", "model.layers.38.block_sparse_moe.experts.36.w2", "model.layers.38.block_sparse_moe.experts.37.w2", "model.layers.38.block_sparse_moe.experts.38.w2", "model.layers.38.block_sparse_moe.experts.39.w2", "model.layers.38.block_sparse_moe.experts.40.w2", "model.layers.38.block_sparse_moe.experts.41.w2", "model.layers.38.block_sparse_moe.experts.42.w2", "model.layers.38.block_sparse_moe.experts.43.w2", "model.layers.38.block_sparse_moe.experts.44.w2", "model.layers.38.block_sparse_moe.experts.45.w2", "model.layers.38.block_sparse_moe.experts.46.w2", "model.layers.38.block_sparse_moe.experts.47.w2", "model.layers.38.block_sparse_moe.experts.48.w2", "model.layers.38.block_sparse_moe.experts.49.w2", "model.layers.38.block_sparse_moe.experts.50.w2", "model.layers.38.block_sparse_moe.experts.51.w2", "model.layers.38.block_sparse_moe.experts.52.w2", "model.layers.38.block_sparse_moe.experts.53.w2", "model.layers.38.block_sparse_moe.experts.54.w2", "model.layers.38.block_sparse_moe.experts.55.w2", "model.layers.38.block_sparse_moe.experts.56.w2", "model.layers.38.block_sparse_moe.experts.57.w2", "model.layers.38.block_sparse_moe.experts.58.w2", "model.layers.38.block_sparse_moe.experts.59.w2", "model.layers.38.block_sparse_moe.experts.60.w2", "model.layers.38.block_sparse_moe.experts.61.w2", "model.layers.38.block_sparse_moe.experts.62.w2", "model.layers.38.block_sparse_moe.experts.63.w2", "model.layers.38.block_sparse_moe.experts.64.w2", "model.layers.38.block_sparse_moe.experts.65.w2", "model.layers.38.block_sparse_moe.experts.66.w2", "model.layers.38.block_sparse_moe.experts.67.w2", "model.layers.38.block_sparse_moe.experts.68.w2", "model.layers.38.block_sparse_moe.experts.69.w2", "model.layers.38.block_sparse_moe.experts.70.w2", "model.layers.38.block_sparse_moe.experts.71.w2", "model.layers.38.block_sparse_moe.experts.72.w2", "model.layers.38.block_sparse_moe.experts.73.w2", "model.layers.38.block_sparse_moe.experts.74.w2", "model.layers.38.block_sparse_moe.experts.75.w2", "model.layers.38.block_sparse_moe.experts.76.w2", "model.layers.38.block_sparse_moe.experts.77.w2", "model.layers.38.block_sparse_moe.experts.78.w2", "model.layers.38.block_sparse_moe.experts.79.w2", "model.layers.38.block_sparse_moe.experts.80.w2", "model.layers.38.block_sparse_moe.experts.81.w2", "model.layers.38.block_sparse_moe.experts.82.w2", "model.layers.38.block_sparse_moe.experts.83.w2", "model.layers.38.block_sparse_moe.experts.84.w2", "model.layers.38.block_sparse_moe.experts.85.w2", "model.layers.38.block_sparse_moe.experts.86.w2", "model.layers.38.block_sparse_moe.experts.87.w2", "model.layers.38.block_sparse_moe.experts.88.w2", "model.layers.38.block_sparse_moe.experts.89.w2", "model.layers.38.block_sparse_moe.experts.90.w2", "model.layers.38.block_sparse_moe.experts.91.w2", "model.layers.38.block_sparse_moe.experts.92.w2", "model.layers.38.block_sparse_moe.experts.93.w2", "model.layers.38.block_sparse_moe.experts.94.w2", "model.layers.38.block_sparse_moe.experts.95.w2", "model.layers.38.block_sparse_moe.experts.96.w2", "model.layers.38.block_sparse_moe.experts.97.w2", "model.layers.38.block_sparse_moe.experts.98.w2", "model.layers.38.block_sparse_moe.experts.99.w2", "model.layers.38.block_sparse_moe.experts.100.w2", "model.layers.38.block_sparse_moe.experts.101.w2", "model.layers.38.block_sparse_moe.experts.102.w2", "model.layers.38.block_sparse_moe.experts.103.w2", "model.layers.38.block_sparse_moe.experts.104.w2", "model.layers.38.block_sparse_moe.experts.105.w2", "model.layers.38.block_sparse_moe.experts.106.w2", "model.layers.38.block_sparse_moe.experts.107.w2", "model.layers.38.block_sparse_moe.experts.108.w2", "model.layers.38.block_sparse_moe.experts.109.w2", "model.layers.38.block_sparse_moe.experts.110.w2", "model.layers.38.block_sparse_moe.experts.111.w2", "model.layers.38.block_sparse_moe.experts.112.w2", "model.layers.38.block_sparse_moe.experts.113.w2", "model.layers.38.block_sparse_moe.experts.114.w2", "model.layers.38.block_sparse_moe.experts.115.w2", "model.layers.38.block_sparse_moe.experts.116.w2", "model.layers.38.block_sparse_moe.experts.117.w2", "model.layers.38.block_sparse_moe.experts.118.w2", "model.layers.38.block_sparse_moe.experts.119.w2", "model.layers.38.block_sparse_moe.experts.120.w2", "model.layers.38.block_sparse_moe.experts.121.w2", "model.layers.38.block_sparse_moe.experts.122.w2", "model.layers.38.block_sparse_moe.experts.123.w2", "model.layers.38.block_sparse_moe.experts.124.w2", "model.layers.38.block_sparse_moe.experts.125.w2", "model.layers.38.block_sparse_moe.experts.126.w2", "model.layers.38.block_sparse_moe.experts.127.w2", "model.layers.38.block_sparse_moe.experts.128.w2", "model.layers.38.block_sparse_moe.experts.129.w2", "model.layers.38.block_sparse_moe.experts.130.w2", "model.layers.38.block_sparse_moe.experts.131.w2", "model.layers.38.block_sparse_moe.experts.132.w2", "model.layers.38.block_sparse_moe.experts.133.w2", "model.layers.38.block_sparse_moe.experts.134.w2", "model.layers.38.block_sparse_moe.experts.135.w2", "model.layers.38.block_sparse_moe.experts.136.w2", "model.layers.38.block_sparse_moe.experts.137.w2", "model.layers.38.block_sparse_moe.experts.138.w2", "model.layers.38.block_sparse_moe.experts.139.w2", "model.layers.38.block_sparse_moe.experts.140.w2", "model.layers.38.block_sparse_moe.experts.141.w2", "model.layers.38.block_sparse_moe.experts.142.w2", "model.layers.38.block_sparse_moe.experts.143.w2", "model.layers.38.block_sparse_moe.experts.144.w2", "model.layers.38.block_sparse_moe.experts.145.w2", "model.layers.38.block_sparse_moe.experts.146.w2", "model.layers.38.block_sparse_moe.experts.147.w2", "model.layers.38.block_sparse_moe.experts.148.w2", "model.layers.38.block_sparse_moe.experts.149.w2", "model.layers.38.block_sparse_moe.experts.150.w2", "model.layers.38.block_sparse_moe.experts.151.w2", "model.layers.38.block_sparse_moe.experts.152.w2", "model.layers.38.block_sparse_moe.experts.153.w2", "model.layers.38.block_sparse_moe.experts.154.w2", "model.layers.38.block_sparse_moe.experts.155.w2", "model.layers.38.block_sparse_moe.experts.156.w2", "model.layers.38.block_sparse_moe.experts.157.w2", "model.layers.38.block_sparse_moe.experts.158.w2", "model.layers.38.block_sparse_moe.experts.159.w2", "model.layers.38.block_sparse_moe.experts.160.w2", "model.layers.38.block_sparse_moe.experts.161.w2", "model.layers.38.block_sparse_moe.experts.162.w2", "model.layers.38.block_sparse_moe.experts.163.w2", "model.layers.38.block_sparse_moe.experts.164.w2", "model.layers.38.block_sparse_moe.experts.165.w2", "model.layers.38.block_sparse_moe.experts.166.w2", "model.layers.38.block_sparse_moe.experts.167.w2", "model.layers.38.block_sparse_moe.experts.168.w2", "model.layers.38.block_sparse_moe.experts.169.w2", "model.layers.38.block_sparse_moe.experts.170.w2", "model.layers.38.block_sparse_moe.experts.171.w2", "model.layers.38.block_sparse_moe.experts.172.w2", "model.layers.38.block_sparse_moe.experts.173.w2", "model.layers.38.block_sparse_moe.experts.174.w2", "model.layers.38.block_sparse_moe.experts.175.w2", "model.layers.38.block_sparse_moe.experts.176.w2", "model.layers.38.block_sparse_moe.experts.177.w2", "model.layers.38.block_sparse_moe.experts.178.w2", "model.layers.38.block_sparse_moe.experts.179.w2", "model.layers.38.block_sparse_moe.experts.180.w2", "model.layers.38.block_sparse_moe.experts.181.w2", "model.layers.38.block_sparse_moe.experts.182.w2", "model.layers.38.block_sparse_moe.experts.183.w2", "model.layers.38.block_sparse_moe.experts.184.w2", "model.layers.38.block_sparse_moe.experts.185.w2", "model.layers.38.block_sparse_moe.experts.186.w2", "model.layers.38.block_sparse_moe.experts.187.w2", "model.layers.38.block_sparse_moe.experts.188.w2", "model.layers.38.block_sparse_moe.experts.189.w2", "model.layers.38.block_sparse_moe.experts.190.w2", "model.layers.38.block_sparse_moe.experts.191.w2", "model.layers.38.block_sparse_moe.experts.192.w2", "model.layers.38.block_sparse_moe.experts.193.w2", "model.layers.38.block_sparse_moe.experts.194.w2", "model.layers.38.block_sparse_moe.experts.195.w2", "model.layers.38.block_sparse_moe.experts.196.w2", "model.layers.38.block_sparse_moe.experts.197.w2", "model.layers.38.block_sparse_moe.experts.198.w2", "model.layers.38.block_sparse_moe.experts.199.w2", "model.layers.38.block_sparse_moe.experts.200.w2", "model.layers.38.block_sparse_moe.experts.201.w2", "model.layers.38.block_sparse_moe.experts.202.w2", "model.layers.38.block_sparse_moe.experts.203.w2", "model.layers.38.block_sparse_moe.experts.204.w2", "model.layers.38.block_sparse_moe.experts.205.w2", "model.layers.38.block_sparse_moe.experts.206.w2", "model.layers.38.block_sparse_moe.experts.207.w2", "model.layers.38.block_sparse_moe.experts.208.w2", "model.layers.38.block_sparse_moe.experts.209.w2", "model.layers.38.block_sparse_moe.experts.210.w2", "model.layers.38.block_sparse_moe.experts.211.w2", "model.layers.38.block_sparse_moe.experts.212.w2", "model.layers.38.block_sparse_moe.experts.213.w2", "model.layers.38.block_sparse_moe.experts.214.w2", "model.layers.38.block_sparse_moe.experts.215.w2", "model.layers.38.block_sparse_moe.experts.216.w2", "model.layers.38.block_sparse_moe.experts.217.w2", "model.layers.38.block_sparse_moe.experts.218.w2", "model.layers.38.block_sparse_moe.experts.219.w2", "model.layers.38.block_sparse_moe.experts.220.w2", "model.layers.38.block_sparse_moe.experts.221.w2", "model.layers.38.block_sparse_moe.experts.222.w2", "model.layers.38.block_sparse_moe.experts.223.w2", "model.layers.38.block_sparse_moe.experts.224.w2", "model.layers.38.block_sparse_moe.experts.225.w2", "model.layers.38.block_sparse_moe.experts.226.w2", "model.layers.38.block_sparse_moe.experts.227.w2", "model.layers.38.block_sparse_moe.experts.228.w2", "model.layers.38.block_sparse_moe.experts.229.w2", "model.layers.38.block_sparse_moe.experts.230.w2", "model.layers.38.block_sparse_moe.experts.231.w2", "model.layers.38.block_sparse_moe.experts.232.w2", "model.layers.38.block_sparse_moe.experts.233.w2", "model.layers.38.block_sparse_moe.experts.234.w2", "model.layers.38.block_sparse_moe.experts.235.w2", "model.layers.38.block_sparse_moe.experts.236.w2", "model.layers.38.block_sparse_moe.experts.237.w2", "model.layers.38.block_sparse_moe.experts.238.w2", "model.layers.38.block_sparse_moe.experts.239.w2", "model.layers.38.block_sparse_moe.experts.240.w2", "model.layers.38.block_sparse_moe.experts.241.w2", "model.layers.38.block_sparse_moe.experts.242.w2", "model.layers.38.block_sparse_moe.experts.243.w2", "model.layers.38.block_sparse_moe.experts.244.w2", "model.layers.38.block_sparse_moe.experts.245.w2", "model.layers.38.block_sparse_moe.experts.246.w2", "model.layers.38.block_sparse_moe.experts.247.w2", "model.layers.38.block_sparse_moe.experts.248.w2", "model.layers.38.block_sparse_moe.experts.249.w2", "model.layers.38.block_sparse_moe.experts.250.w2", "model.layers.38.block_sparse_moe.experts.251.w2", "model.layers.38.block_sparse_moe.experts.252.w2", "model.layers.38.block_sparse_moe.experts.253.w2", "model.layers.38.block_sparse_moe.experts.254.w2", "model.layers.38.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005697488784789373, "dbits": 1207959552 } ] }, { "idx": 195, "layers": [ "model.layers.39.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00024620890617377267, "dbits": 18874368 } ] }, { "idx": 196, "layers": [ "model.layers.39.self_attn.k_proj", "model.layers.39.self_attn.v_proj" ], "candidates": [ { "dkld": -0.016174009442329385, "dbits": 6291456 } ] }, { "idx": 197, "layers": [ "model.layers.39.self_attn.o_proj" ], "candidates": [ { "dkld": -0.034230127930641174, "dbits": 18874368 } ] }, { "idx": 198, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w1", "model.layers.39.block_sparse_moe.experts.1.w1", "model.layers.39.block_sparse_moe.experts.2.w1", "model.layers.39.block_sparse_moe.experts.3.w1", "model.layers.39.block_sparse_moe.experts.4.w1", "model.layers.39.block_sparse_moe.experts.5.w1", "model.layers.39.block_sparse_moe.experts.6.w1", "model.layers.39.block_sparse_moe.experts.7.w1", "model.layers.39.block_sparse_moe.experts.8.w1", "model.layers.39.block_sparse_moe.experts.9.w1", "model.layers.39.block_sparse_moe.experts.10.w1", "model.layers.39.block_sparse_moe.experts.11.w1", "model.layers.39.block_sparse_moe.experts.12.w1", "model.layers.39.block_sparse_moe.experts.13.w1", "model.layers.39.block_sparse_moe.experts.14.w1", "model.layers.39.block_sparse_moe.experts.15.w1", "model.layers.39.block_sparse_moe.experts.16.w1", "model.layers.39.block_sparse_moe.experts.17.w1", "model.layers.39.block_sparse_moe.experts.18.w1", "model.layers.39.block_sparse_moe.experts.19.w1", "model.layers.39.block_sparse_moe.experts.20.w1", "model.layers.39.block_sparse_moe.experts.21.w1", "model.layers.39.block_sparse_moe.experts.22.w1", "model.layers.39.block_sparse_moe.experts.23.w1", "model.layers.39.block_sparse_moe.experts.24.w1", "model.layers.39.block_sparse_moe.experts.25.w1", "model.layers.39.block_sparse_moe.experts.26.w1", "model.layers.39.block_sparse_moe.experts.27.w1", "model.layers.39.block_sparse_moe.experts.28.w1", "model.layers.39.block_sparse_moe.experts.29.w1", "model.layers.39.block_sparse_moe.experts.30.w1", "model.layers.39.block_sparse_moe.experts.31.w1", "model.layers.39.block_sparse_moe.experts.32.w1", "model.layers.39.block_sparse_moe.experts.33.w1", "model.layers.39.block_sparse_moe.experts.34.w1", "model.layers.39.block_sparse_moe.experts.35.w1", "model.layers.39.block_sparse_moe.experts.36.w1", "model.layers.39.block_sparse_moe.experts.37.w1", "model.layers.39.block_sparse_moe.experts.38.w1", "model.layers.39.block_sparse_moe.experts.39.w1", "model.layers.39.block_sparse_moe.experts.40.w1", "model.layers.39.block_sparse_moe.experts.41.w1", "model.layers.39.block_sparse_moe.experts.42.w1", "model.layers.39.block_sparse_moe.experts.43.w1", "model.layers.39.block_sparse_moe.experts.44.w1", "model.layers.39.block_sparse_moe.experts.45.w1", "model.layers.39.block_sparse_moe.experts.46.w1", "model.layers.39.block_sparse_moe.experts.47.w1", "model.layers.39.block_sparse_moe.experts.48.w1", "model.layers.39.block_sparse_moe.experts.49.w1", "model.layers.39.block_sparse_moe.experts.50.w1", "model.layers.39.block_sparse_moe.experts.51.w1", "model.layers.39.block_sparse_moe.experts.52.w1", "model.layers.39.block_sparse_moe.experts.53.w1", "model.layers.39.block_sparse_moe.experts.54.w1", "model.layers.39.block_sparse_moe.experts.55.w1", "model.layers.39.block_sparse_moe.experts.56.w1", "model.layers.39.block_sparse_moe.experts.57.w1", "model.layers.39.block_sparse_moe.experts.58.w1", "model.layers.39.block_sparse_moe.experts.59.w1", "model.layers.39.block_sparse_moe.experts.60.w1", "model.layers.39.block_sparse_moe.experts.61.w1", "model.layers.39.block_sparse_moe.experts.62.w1", "model.layers.39.block_sparse_moe.experts.63.w1", "model.layers.39.block_sparse_moe.experts.64.w1", "model.layers.39.block_sparse_moe.experts.65.w1", "model.layers.39.block_sparse_moe.experts.66.w1", "model.layers.39.block_sparse_moe.experts.67.w1", "model.layers.39.block_sparse_moe.experts.68.w1", "model.layers.39.block_sparse_moe.experts.69.w1", "model.layers.39.block_sparse_moe.experts.70.w1", "model.layers.39.block_sparse_moe.experts.71.w1", "model.layers.39.block_sparse_moe.experts.72.w1", "model.layers.39.block_sparse_moe.experts.73.w1", "model.layers.39.block_sparse_moe.experts.74.w1", "model.layers.39.block_sparse_moe.experts.75.w1", "model.layers.39.block_sparse_moe.experts.76.w1", "model.layers.39.block_sparse_moe.experts.77.w1", "model.layers.39.block_sparse_moe.experts.78.w1", "model.layers.39.block_sparse_moe.experts.79.w1", "model.layers.39.block_sparse_moe.experts.80.w1", "model.layers.39.block_sparse_moe.experts.81.w1", "model.layers.39.block_sparse_moe.experts.82.w1", "model.layers.39.block_sparse_moe.experts.83.w1", "model.layers.39.block_sparse_moe.experts.84.w1", "model.layers.39.block_sparse_moe.experts.85.w1", "model.layers.39.block_sparse_moe.experts.86.w1", "model.layers.39.block_sparse_moe.experts.87.w1", "model.layers.39.block_sparse_moe.experts.88.w1", "model.layers.39.block_sparse_moe.experts.89.w1", "model.layers.39.block_sparse_moe.experts.90.w1", "model.layers.39.block_sparse_moe.experts.91.w1", "model.layers.39.block_sparse_moe.experts.92.w1", "model.layers.39.block_sparse_moe.experts.93.w1", "model.layers.39.block_sparse_moe.experts.94.w1", "model.layers.39.block_sparse_moe.experts.95.w1", "model.layers.39.block_sparse_moe.experts.96.w1", "model.layers.39.block_sparse_moe.experts.97.w1", "model.layers.39.block_sparse_moe.experts.98.w1", "model.layers.39.block_sparse_moe.experts.99.w1", "model.layers.39.block_sparse_moe.experts.100.w1", "model.layers.39.block_sparse_moe.experts.101.w1", "model.layers.39.block_sparse_moe.experts.102.w1", "model.layers.39.block_sparse_moe.experts.103.w1", "model.layers.39.block_sparse_moe.experts.104.w1", "model.layers.39.block_sparse_moe.experts.105.w1", "model.layers.39.block_sparse_moe.experts.106.w1", "model.layers.39.block_sparse_moe.experts.107.w1", "model.layers.39.block_sparse_moe.experts.108.w1", "model.layers.39.block_sparse_moe.experts.109.w1", "model.layers.39.block_sparse_moe.experts.110.w1", "model.layers.39.block_sparse_moe.experts.111.w1", "model.layers.39.block_sparse_moe.experts.112.w1", "model.layers.39.block_sparse_moe.experts.113.w1", "model.layers.39.block_sparse_moe.experts.114.w1", "model.layers.39.block_sparse_moe.experts.115.w1", "model.layers.39.block_sparse_moe.experts.116.w1", "model.layers.39.block_sparse_moe.experts.117.w1", "model.layers.39.block_sparse_moe.experts.118.w1", "model.layers.39.block_sparse_moe.experts.119.w1", "model.layers.39.block_sparse_moe.experts.120.w1", "model.layers.39.block_sparse_moe.experts.121.w1", "model.layers.39.block_sparse_moe.experts.122.w1", "model.layers.39.block_sparse_moe.experts.123.w1", "model.layers.39.block_sparse_moe.experts.124.w1", "model.layers.39.block_sparse_moe.experts.125.w1", "model.layers.39.block_sparse_moe.experts.126.w1", "model.layers.39.block_sparse_moe.experts.127.w1", "model.layers.39.block_sparse_moe.experts.128.w1", "model.layers.39.block_sparse_moe.experts.129.w1", "model.layers.39.block_sparse_moe.experts.130.w1", "model.layers.39.block_sparse_moe.experts.131.w1", "model.layers.39.block_sparse_moe.experts.132.w1", "model.layers.39.block_sparse_moe.experts.133.w1", "model.layers.39.block_sparse_moe.experts.134.w1", "model.layers.39.block_sparse_moe.experts.135.w1", "model.layers.39.block_sparse_moe.experts.136.w1", "model.layers.39.block_sparse_moe.experts.137.w1", "model.layers.39.block_sparse_moe.experts.138.w1", "model.layers.39.block_sparse_moe.experts.139.w1", "model.layers.39.block_sparse_moe.experts.140.w1", "model.layers.39.block_sparse_moe.experts.141.w1", "model.layers.39.block_sparse_moe.experts.142.w1", "model.layers.39.block_sparse_moe.experts.143.w1", "model.layers.39.block_sparse_moe.experts.144.w1", "model.layers.39.block_sparse_moe.experts.145.w1", "model.layers.39.block_sparse_moe.experts.146.w1", "model.layers.39.block_sparse_moe.experts.147.w1", "model.layers.39.block_sparse_moe.experts.148.w1", "model.layers.39.block_sparse_moe.experts.149.w1", "model.layers.39.block_sparse_moe.experts.150.w1", "model.layers.39.block_sparse_moe.experts.151.w1", "model.layers.39.block_sparse_moe.experts.152.w1", "model.layers.39.block_sparse_moe.experts.153.w1", "model.layers.39.block_sparse_moe.experts.154.w1", "model.layers.39.block_sparse_moe.experts.155.w1", "model.layers.39.block_sparse_moe.experts.156.w1", "model.layers.39.block_sparse_moe.experts.157.w1", "model.layers.39.block_sparse_moe.experts.158.w1", "model.layers.39.block_sparse_moe.experts.159.w1", "model.layers.39.block_sparse_moe.experts.160.w1", "model.layers.39.block_sparse_moe.experts.161.w1", "model.layers.39.block_sparse_moe.experts.162.w1", "model.layers.39.block_sparse_moe.experts.163.w1", "model.layers.39.block_sparse_moe.experts.164.w1", "model.layers.39.block_sparse_moe.experts.165.w1", "model.layers.39.block_sparse_moe.experts.166.w1", "model.layers.39.block_sparse_moe.experts.167.w1", "model.layers.39.block_sparse_moe.experts.168.w1", "model.layers.39.block_sparse_moe.experts.169.w1", "model.layers.39.block_sparse_moe.experts.170.w1", "model.layers.39.block_sparse_moe.experts.171.w1", "model.layers.39.block_sparse_moe.experts.172.w1", "model.layers.39.block_sparse_moe.experts.173.w1", "model.layers.39.block_sparse_moe.experts.174.w1", "model.layers.39.block_sparse_moe.experts.175.w1", "model.layers.39.block_sparse_moe.experts.176.w1", "model.layers.39.block_sparse_moe.experts.177.w1", "model.layers.39.block_sparse_moe.experts.178.w1", "model.layers.39.block_sparse_moe.experts.179.w1", "model.layers.39.block_sparse_moe.experts.180.w1", "model.layers.39.block_sparse_moe.experts.181.w1", "model.layers.39.block_sparse_moe.experts.182.w1", "model.layers.39.block_sparse_moe.experts.183.w1", "model.layers.39.block_sparse_moe.experts.184.w1", "model.layers.39.block_sparse_moe.experts.185.w1", "model.layers.39.block_sparse_moe.experts.186.w1", "model.layers.39.block_sparse_moe.experts.187.w1", "model.layers.39.block_sparse_moe.experts.188.w1", "model.layers.39.block_sparse_moe.experts.189.w1", "model.layers.39.block_sparse_moe.experts.190.w1", "model.layers.39.block_sparse_moe.experts.191.w1", "model.layers.39.block_sparse_moe.experts.192.w1", "model.layers.39.block_sparse_moe.experts.193.w1", "model.layers.39.block_sparse_moe.experts.194.w1", "model.layers.39.block_sparse_moe.experts.195.w1", "model.layers.39.block_sparse_moe.experts.196.w1", "model.layers.39.block_sparse_moe.experts.197.w1", "model.layers.39.block_sparse_moe.experts.198.w1", "model.layers.39.block_sparse_moe.experts.199.w1", "model.layers.39.block_sparse_moe.experts.200.w1", "model.layers.39.block_sparse_moe.experts.201.w1", "model.layers.39.block_sparse_moe.experts.202.w1", "model.layers.39.block_sparse_moe.experts.203.w1", "model.layers.39.block_sparse_moe.experts.204.w1", "model.layers.39.block_sparse_moe.experts.205.w1", "model.layers.39.block_sparse_moe.experts.206.w1", "model.layers.39.block_sparse_moe.experts.207.w1", "model.layers.39.block_sparse_moe.experts.208.w1", "model.layers.39.block_sparse_moe.experts.209.w1", "model.layers.39.block_sparse_moe.experts.210.w1", "model.layers.39.block_sparse_moe.experts.211.w1", "model.layers.39.block_sparse_moe.experts.212.w1", "model.layers.39.block_sparse_moe.experts.213.w1", "model.layers.39.block_sparse_moe.experts.214.w1", "model.layers.39.block_sparse_moe.experts.215.w1", "model.layers.39.block_sparse_moe.experts.216.w1", "model.layers.39.block_sparse_moe.experts.217.w1", "model.layers.39.block_sparse_moe.experts.218.w1", "model.layers.39.block_sparse_moe.experts.219.w1", "model.layers.39.block_sparse_moe.experts.220.w1", "model.layers.39.block_sparse_moe.experts.221.w1", "model.layers.39.block_sparse_moe.experts.222.w1", "model.layers.39.block_sparse_moe.experts.223.w1", "model.layers.39.block_sparse_moe.experts.224.w1", "model.layers.39.block_sparse_moe.experts.225.w1", "model.layers.39.block_sparse_moe.experts.226.w1", "model.layers.39.block_sparse_moe.experts.227.w1", "model.layers.39.block_sparse_moe.experts.228.w1", "model.layers.39.block_sparse_moe.experts.229.w1", "model.layers.39.block_sparse_moe.experts.230.w1", "model.layers.39.block_sparse_moe.experts.231.w1", "model.layers.39.block_sparse_moe.experts.232.w1", "model.layers.39.block_sparse_moe.experts.233.w1", "model.layers.39.block_sparse_moe.experts.234.w1", "model.layers.39.block_sparse_moe.experts.235.w1", "model.layers.39.block_sparse_moe.experts.236.w1", "model.layers.39.block_sparse_moe.experts.237.w1", "model.layers.39.block_sparse_moe.experts.238.w1", "model.layers.39.block_sparse_moe.experts.239.w1", "model.layers.39.block_sparse_moe.experts.240.w1", "model.layers.39.block_sparse_moe.experts.241.w1", "model.layers.39.block_sparse_moe.experts.242.w1", "model.layers.39.block_sparse_moe.experts.243.w1", "model.layers.39.block_sparse_moe.experts.244.w1", "model.layers.39.block_sparse_moe.experts.245.w1", "model.layers.39.block_sparse_moe.experts.246.w1", "model.layers.39.block_sparse_moe.experts.247.w1", "model.layers.39.block_sparse_moe.experts.248.w1", "model.layers.39.block_sparse_moe.experts.249.w1", "model.layers.39.block_sparse_moe.experts.250.w1", "model.layers.39.block_sparse_moe.experts.251.w1", "model.layers.39.block_sparse_moe.experts.252.w1", "model.layers.39.block_sparse_moe.experts.253.w1", "model.layers.39.block_sparse_moe.experts.254.w1", "model.layers.39.block_sparse_moe.experts.255.w1", "model.layers.39.block_sparse_moe.experts.0.w3", "model.layers.39.block_sparse_moe.experts.1.w3", "model.layers.39.block_sparse_moe.experts.2.w3", "model.layers.39.block_sparse_moe.experts.3.w3", "model.layers.39.block_sparse_moe.experts.4.w3", "model.layers.39.block_sparse_moe.experts.5.w3", "model.layers.39.block_sparse_moe.experts.6.w3", "model.layers.39.block_sparse_moe.experts.7.w3", "model.layers.39.block_sparse_moe.experts.8.w3", "model.layers.39.block_sparse_moe.experts.9.w3", "model.layers.39.block_sparse_moe.experts.10.w3", "model.layers.39.block_sparse_moe.experts.11.w3", "model.layers.39.block_sparse_moe.experts.12.w3", "model.layers.39.block_sparse_moe.experts.13.w3", "model.layers.39.block_sparse_moe.experts.14.w3", "model.layers.39.block_sparse_moe.experts.15.w3", "model.layers.39.block_sparse_moe.experts.16.w3", "model.layers.39.block_sparse_moe.experts.17.w3", "model.layers.39.block_sparse_moe.experts.18.w3", "model.layers.39.block_sparse_moe.experts.19.w3", "model.layers.39.block_sparse_moe.experts.20.w3", "model.layers.39.block_sparse_moe.experts.21.w3", "model.layers.39.block_sparse_moe.experts.22.w3", "model.layers.39.block_sparse_moe.experts.23.w3", "model.layers.39.block_sparse_moe.experts.24.w3", "model.layers.39.block_sparse_moe.experts.25.w3", "model.layers.39.block_sparse_moe.experts.26.w3", "model.layers.39.block_sparse_moe.experts.27.w3", "model.layers.39.block_sparse_moe.experts.28.w3", "model.layers.39.block_sparse_moe.experts.29.w3", "model.layers.39.block_sparse_moe.experts.30.w3", "model.layers.39.block_sparse_moe.experts.31.w3", "model.layers.39.block_sparse_moe.experts.32.w3", "model.layers.39.block_sparse_moe.experts.33.w3", "model.layers.39.block_sparse_moe.experts.34.w3", "model.layers.39.block_sparse_moe.experts.35.w3", "model.layers.39.block_sparse_moe.experts.36.w3", "model.layers.39.block_sparse_moe.experts.37.w3", "model.layers.39.block_sparse_moe.experts.38.w3", "model.layers.39.block_sparse_moe.experts.39.w3", "model.layers.39.block_sparse_moe.experts.40.w3", "model.layers.39.block_sparse_moe.experts.41.w3", "model.layers.39.block_sparse_moe.experts.42.w3", "model.layers.39.block_sparse_moe.experts.43.w3", "model.layers.39.block_sparse_moe.experts.44.w3", "model.layers.39.block_sparse_moe.experts.45.w3", "model.layers.39.block_sparse_moe.experts.46.w3", "model.layers.39.block_sparse_moe.experts.47.w3", "model.layers.39.block_sparse_moe.experts.48.w3", "model.layers.39.block_sparse_moe.experts.49.w3", "model.layers.39.block_sparse_moe.experts.50.w3", "model.layers.39.block_sparse_moe.experts.51.w3", "model.layers.39.block_sparse_moe.experts.52.w3", "model.layers.39.block_sparse_moe.experts.53.w3", "model.layers.39.block_sparse_moe.experts.54.w3", "model.layers.39.block_sparse_moe.experts.55.w3", "model.layers.39.block_sparse_moe.experts.56.w3", "model.layers.39.block_sparse_moe.experts.57.w3", "model.layers.39.block_sparse_moe.experts.58.w3", "model.layers.39.block_sparse_moe.experts.59.w3", "model.layers.39.block_sparse_moe.experts.60.w3", "model.layers.39.block_sparse_moe.experts.61.w3", "model.layers.39.block_sparse_moe.experts.62.w3", "model.layers.39.block_sparse_moe.experts.63.w3", "model.layers.39.block_sparse_moe.experts.64.w3", "model.layers.39.block_sparse_moe.experts.65.w3", "model.layers.39.block_sparse_moe.experts.66.w3", "model.layers.39.block_sparse_moe.experts.67.w3", "model.layers.39.block_sparse_moe.experts.68.w3", "model.layers.39.block_sparse_moe.experts.69.w3", "model.layers.39.block_sparse_moe.experts.70.w3", "model.layers.39.block_sparse_moe.experts.71.w3", "model.layers.39.block_sparse_moe.experts.72.w3", "model.layers.39.block_sparse_moe.experts.73.w3", "model.layers.39.block_sparse_moe.experts.74.w3", "model.layers.39.block_sparse_moe.experts.75.w3", "model.layers.39.block_sparse_moe.experts.76.w3", "model.layers.39.block_sparse_moe.experts.77.w3", "model.layers.39.block_sparse_moe.experts.78.w3", "model.layers.39.block_sparse_moe.experts.79.w3", "model.layers.39.block_sparse_moe.experts.80.w3", "model.layers.39.block_sparse_moe.experts.81.w3", "model.layers.39.block_sparse_moe.experts.82.w3", "model.layers.39.block_sparse_moe.experts.83.w3", "model.layers.39.block_sparse_moe.experts.84.w3", "model.layers.39.block_sparse_moe.experts.85.w3", "model.layers.39.block_sparse_moe.experts.86.w3", "model.layers.39.block_sparse_moe.experts.87.w3", "model.layers.39.block_sparse_moe.experts.88.w3", "model.layers.39.block_sparse_moe.experts.89.w3", "model.layers.39.block_sparse_moe.experts.90.w3", "model.layers.39.block_sparse_moe.experts.91.w3", "model.layers.39.block_sparse_moe.experts.92.w3", "model.layers.39.block_sparse_moe.experts.93.w3", "model.layers.39.block_sparse_moe.experts.94.w3", "model.layers.39.block_sparse_moe.experts.95.w3", "model.layers.39.block_sparse_moe.experts.96.w3", "model.layers.39.block_sparse_moe.experts.97.w3", "model.layers.39.block_sparse_moe.experts.98.w3", "model.layers.39.block_sparse_moe.experts.99.w3", "model.layers.39.block_sparse_moe.experts.100.w3", "model.layers.39.block_sparse_moe.experts.101.w3", "model.layers.39.block_sparse_moe.experts.102.w3", "model.layers.39.block_sparse_moe.experts.103.w3", "model.layers.39.block_sparse_moe.experts.104.w3", "model.layers.39.block_sparse_moe.experts.105.w3", "model.layers.39.block_sparse_moe.experts.106.w3", "model.layers.39.block_sparse_moe.experts.107.w3", "model.layers.39.block_sparse_moe.experts.108.w3", "model.layers.39.block_sparse_moe.experts.109.w3", "model.layers.39.block_sparse_moe.experts.110.w3", "model.layers.39.block_sparse_moe.experts.111.w3", "model.layers.39.block_sparse_moe.experts.112.w3", "model.layers.39.block_sparse_moe.experts.113.w3", "model.layers.39.block_sparse_moe.experts.114.w3", "model.layers.39.block_sparse_moe.experts.115.w3", "model.layers.39.block_sparse_moe.experts.116.w3", "model.layers.39.block_sparse_moe.experts.117.w3", "model.layers.39.block_sparse_moe.experts.118.w3", "model.layers.39.block_sparse_moe.experts.119.w3", "model.layers.39.block_sparse_moe.experts.120.w3", "model.layers.39.block_sparse_moe.experts.121.w3", "model.layers.39.block_sparse_moe.experts.122.w3", "model.layers.39.block_sparse_moe.experts.123.w3", "model.layers.39.block_sparse_moe.experts.124.w3", "model.layers.39.block_sparse_moe.experts.125.w3", "model.layers.39.block_sparse_moe.experts.126.w3", "model.layers.39.block_sparse_moe.experts.127.w3", "model.layers.39.block_sparse_moe.experts.128.w3", "model.layers.39.block_sparse_moe.experts.129.w3", "model.layers.39.block_sparse_moe.experts.130.w3", "model.layers.39.block_sparse_moe.experts.131.w3", "model.layers.39.block_sparse_moe.experts.132.w3", "model.layers.39.block_sparse_moe.experts.133.w3", "model.layers.39.block_sparse_moe.experts.134.w3", "model.layers.39.block_sparse_moe.experts.135.w3", "model.layers.39.block_sparse_moe.experts.136.w3", "model.layers.39.block_sparse_moe.experts.137.w3", "model.layers.39.block_sparse_moe.experts.138.w3", "model.layers.39.block_sparse_moe.experts.139.w3", "model.layers.39.block_sparse_moe.experts.140.w3", "model.layers.39.block_sparse_moe.experts.141.w3", "model.layers.39.block_sparse_moe.experts.142.w3", "model.layers.39.block_sparse_moe.experts.143.w3", "model.layers.39.block_sparse_moe.experts.144.w3", "model.layers.39.block_sparse_moe.experts.145.w3", "model.layers.39.block_sparse_moe.experts.146.w3", "model.layers.39.block_sparse_moe.experts.147.w3", "model.layers.39.block_sparse_moe.experts.148.w3", "model.layers.39.block_sparse_moe.experts.149.w3", "model.layers.39.block_sparse_moe.experts.150.w3", "model.layers.39.block_sparse_moe.experts.151.w3", "model.layers.39.block_sparse_moe.experts.152.w3", "model.layers.39.block_sparse_moe.experts.153.w3", "model.layers.39.block_sparse_moe.experts.154.w3", "model.layers.39.block_sparse_moe.experts.155.w3", "model.layers.39.block_sparse_moe.experts.156.w3", "model.layers.39.block_sparse_moe.experts.157.w3", "model.layers.39.block_sparse_moe.experts.158.w3", "model.layers.39.block_sparse_moe.experts.159.w3", "model.layers.39.block_sparse_moe.experts.160.w3", "model.layers.39.block_sparse_moe.experts.161.w3", "model.layers.39.block_sparse_moe.experts.162.w3", "model.layers.39.block_sparse_moe.experts.163.w3", "model.layers.39.block_sparse_moe.experts.164.w3", "model.layers.39.block_sparse_moe.experts.165.w3", "model.layers.39.block_sparse_moe.experts.166.w3", "model.layers.39.block_sparse_moe.experts.167.w3", "model.layers.39.block_sparse_moe.experts.168.w3", "model.layers.39.block_sparse_moe.experts.169.w3", "model.layers.39.block_sparse_moe.experts.170.w3", "model.layers.39.block_sparse_moe.experts.171.w3", "model.layers.39.block_sparse_moe.experts.172.w3", "model.layers.39.block_sparse_moe.experts.173.w3", "model.layers.39.block_sparse_moe.experts.174.w3", "model.layers.39.block_sparse_moe.experts.175.w3", "model.layers.39.block_sparse_moe.experts.176.w3", "model.layers.39.block_sparse_moe.experts.177.w3", "model.layers.39.block_sparse_moe.experts.178.w3", "model.layers.39.block_sparse_moe.experts.179.w3", "model.layers.39.block_sparse_moe.experts.180.w3", "model.layers.39.block_sparse_moe.experts.181.w3", "model.layers.39.block_sparse_moe.experts.182.w3", "model.layers.39.block_sparse_moe.experts.183.w3", "model.layers.39.block_sparse_moe.experts.184.w3", "model.layers.39.block_sparse_moe.experts.185.w3", "model.layers.39.block_sparse_moe.experts.186.w3", "model.layers.39.block_sparse_moe.experts.187.w3", "model.layers.39.block_sparse_moe.experts.188.w3", "model.layers.39.block_sparse_moe.experts.189.w3", "model.layers.39.block_sparse_moe.experts.190.w3", "model.layers.39.block_sparse_moe.experts.191.w3", "model.layers.39.block_sparse_moe.experts.192.w3", "model.layers.39.block_sparse_moe.experts.193.w3", "model.layers.39.block_sparse_moe.experts.194.w3", "model.layers.39.block_sparse_moe.experts.195.w3", "model.layers.39.block_sparse_moe.experts.196.w3", "model.layers.39.block_sparse_moe.experts.197.w3", "model.layers.39.block_sparse_moe.experts.198.w3", "model.layers.39.block_sparse_moe.experts.199.w3", "model.layers.39.block_sparse_moe.experts.200.w3", "model.layers.39.block_sparse_moe.experts.201.w3", "model.layers.39.block_sparse_moe.experts.202.w3", "model.layers.39.block_sparse_moe.experts.203.w3", "model.layers.39.block_sparse_moe.experts.204.w3", "model.layers.39.block_sparse_moe.experts.205.w3", "model.layers.39.block_sparse_moe.experts.206.w3", "model.layers.39.block_sparse_moe.experts.207.w3", "model.layers.39.block_sparse_moe.experts.208.w3", "model.layers.39.block_sparse_moe.experts.209.w3", "model.layers.39.block_sparse_moe.experts.210.w3", "model.layers.39.block_sparse_moe.experts.211.w3", "model.layers.39.block_sparse_moe.experts.212.w3", "model.layers.39.block_sparse_moe.experts.213.w3", "model.layers.39.block_sparse_moe.experts.214.w3", "model.layers.39.block_sparse_moe.experts.215.w3", "model.layers.39.block_sparse_moe.experts.216.w3", "model.layers.39.block_sparse_moe.experts.217.w3", "model.layers.39.block_sparse_moe.experts.218.w3", "model.layers.39.block_sparse_moe.experts.219.w3", "model.layers.39.block_sparse_moe.experts.220.w3", "model.layers.39.block_sparse_moe.experts.221.w3", "model.layers.39.block_sparse_moe.experts.222.w3", "model.layers.39.block_sparse_moe.experts.223.w3", "model.layers.39.block_sparse_moe.experts.224.w3", "model.layers.39.block_sparse_moe.experts.225.w3", "model.layers.39.block_sparse_moe.experts.226.w3", "model.layers.39.block_sparse_moe.experts.227.w3", "model.layers.39.block_sparse_moe.experts.228.w3", "model.layers.39.block_sparse_moe.experts.229.w3", "model.layers.39.block_sparse_moe.experts.230.w3", "model.layers.39.block_sparse_moe.experts.231.w3", "model.layers.39.block_sparse_moe.experts.232.w3", "model.layers.39.block_sparse_moe.experts.233.w3", "model.layers.39.block_sparse_moe.experts.234.w3", "model.layers.39.block_sparse_moe.experts.235.w3", "model.layers.39.block_sparse_moe.experts.236.w3", "model.layers.39.block_sparse_moe.experts.237.w3", "model.layers.39.block_sparse_moe.experts.238.w3", "model.layers.39.block_sparse_moe.experts.239.w3", "model.layers.39.block_sparse_moe.experts.240.w3", "model.layers.39.block_sparse_moe.experts.241.w3", "model.layers.39.block_sparse_moe.experts.242.w3", "model.layers.39.block_sparse_moe.experts.243.w3", "model.layers.39.block_sparse_moe.experts.244.w3", "model.layers.39.block_sparse_moe.experts.245.w3", "model.layers.39.block_sparse_moe.experts.246.w3", "model.layers.39.block_sparse_moe.experts.247.w3", "model.layers.39.block_sparse_moe.experts.248.w3", "model.layers.39.block_sparse_moe.experts.249.w3", "model.layers.39.block_sparse_moe.experts.250.w3", "model.layers.39.block_sparse_moe.experts.251.w3", "model.layers.39.block_sparse_moe.experts.252.w3", "model.layers.39.block_sparse_moe.experts.253.w3", "model.layers.39.block_sparse_moe.experts.254.w3", "model.layers.39.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0014513045549392478, "dbits": 2415919104 } ] }, { "idx": 199, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w2", "model.layers.39.block_sparse_moe.experts.1.w2", "model.layers.39.block_sparse_moe.experts.2.w2", "model.layers.39.block_sparse_moe.experts.3.w2", "model.layers.39.block_sparse_moe.experts.4.w2", "model.layers.39.block_sparse_moe.experts.5.w2", "model.layers.39.block_sparse_moe.experts.6.w2", "model.layers.39.block_sparse_moe.experts.7.w2", "model.layers.39.block_sparse_moe.experts.8.w2", "model.layers.39.block_sparse_moe.experts.9.w2", "model.layers.39.block_sparse_moe.experts.10.w2", "model.layers.39.block_sparse_moe.experts.11.w2", "model.layers.39.block_sparse_moe.experts.12.w2", "model.layers.39.block_sparse_moe.experts.13.w2", "model.layers.39.block_sparse_moe.experts.14.w2", "model.layers.39.block_sparse_moe.experts.15.w2", "model.layers.39.block_sparse_moe.experts.16.w2", "model.layers.39.block_sparse_moe.experts.17.w2", "model.layers.39.block_sparse_moe.experts.18.w2", "model.layers.39.block_sparse_moe.experts.19.w2", "model.layers.39.block_sparse_moe.experts.20.w2", "model.layers.39.block_sparse_moe.experts.21.w2", "model.layers.39.block_sparse_moe.experts.22.w2", "model.layers.39.block_sparse_moe.experts.23.w2", "model.layers.39.block_sparse_moe.experts.24.w2", "model.layers.39.block_sparse_moe.experts.25.w2", "model.layers.39.block_sparse_moe.experts.26.w2", "model.layers.39.block_sparse_moe.experts.27.w2", "model.layers.39.block_sparse_moe.experts.28.w2", "model.layers.39.block_sparse_moe.experts.29.w2", "model.layers.39.block_sparse_moe.experts.30.w2", "model.layers.39.block_sparse_moe.experts.31.w2", "model.layers.39.block_sparse_moe.experts.32.w2", "model.layers.39.block_sparse_moe.experts.33.w2", "model.layers.39.block_sparse_moe.experts.34.w2", "model.layers.39.block_sparse_moe.experts.35.w2", "model.layers.39.block_sparse_moe.experts.36.w2", "model.layers.39.block_sparse_moe.experts.37.w2", "model.layers.39.block_sparse_moe.experts.38.w2", "model.layers.39.block_sparse_moe.experts.39.w2", "model.layers.39.block_sparse_moe.experts.40.w2", "model.layers.39.block_sparse_moe.experts.41.w2", "model.layers.39.block_sparse_moe.experts.42.w2", "model.layers.39.block_sparse_moe.experts.43.w2", "model.layers.39.block_sparse_moe.experts.44.w2", "model.layers.39.block_sparse_moe.experts.45.w2", "model.layers.39.block_sparse_moe.experts.46.w2", "model.layers.39.block_sparse_moe.experts.47.w2", "model.layers.39.block_sparse_moe.experts.48.w2", "model.layers.39.block_sparse_moe.experts.49.w2", "model.layers.39.block_sparse_moe.experts.50.w2", "model.layers.39.block_sparse_moe.experts.51.w2", "model.layers.39.block_sparse_moe.experts.52.w2", "model.layers.39.block_sparse_moe.experts.53.w2", "model.layers.39.block_sparse_moe.experts.54.w2", "model.layers.39.block_sparse_moe.experts.55.w2", "model.layers.39.block_sparse_moe.experts.56.w2", "model.layers.39.block_sparse_moe.experts.57.w2", "model.layers.39.block_sparse_moe.experts.58.w2", "model.layers.39.block_sparse_moe.experts.59.w2", "model.layers.39.block_sparse_moe.experts.60.w2", "model.layers.39.block_sparse_moe.experts.61.w2", "model.layers.39.block_sparse_moe.experts.62.w2", "model.layers.39.block_sparse_moe.experts.63.w2", "model.layers.39.block_sparse_moe.experts.64.w2", "model.layers.39.block_sparse_moe.experts.65.w2", "model.layers.39.block_sparse_moe.experts.66.w2", "model.layers.39.block_sparse_moe.experts.67.w2", "model.layers.39.block_sparse_moe.experts.68.w2", "model.layers.39.block_sparse_moe.experts.69.w2", "model.layers.39.block_sparse_moe.experts.70.w2", "model.layers.39.block_sparse_moe.experts.71.w2", "model.layers.39.block_sparse_moe.experts.72.w2", "model.layers.39.block_sparse_moe.experts.73.w2", "model.layers.39.block_sparse_moe.experts.74.w2", "model.layers.39.block_sparse_moe.experts.75.w2", "model.layers.39.block_sparse_moe.experts.76.w2", "model.layers.39.block_sparse_moe.experts.77.w2", "model.layers.39.block_sparse_moe.experts.78.w2", "model.layers.39.block_sparse_moe.experts.79.w2", "model.layers.39.block_sparse_moe.experts.80.w2", "model.layers.39.block_sparse_moe.experts.81.w2", "model.layers.39.block_sparse_moe.experts.82.w2", "model.layers.39.block_sparse_moe.experts.83.w2", "model.layers.39.block_sparse_moe.experts.84.w2", "model.layers.39.block_sparse_moe.experts.85.w2", "model.layers.39.block_sparse_moe.experts.86.w2", "model.layers.39.block_sparse_moe.experts.87.w2", "model.layers.39.block_sparse_moe.experts.88.w2", "model.layers.39.block_sparse_moe.experts.89.w2", "model.layers.39.block_sparse_moe.experts.90.w2", "model.layers.39.block_sparse_moe.experts.91.w2", "model.layers.39.block_sparse_moe.experts.92.w2", "model.layers.39.block_sparse_moe.experts.93.w2", "model.layers.39.block_sparse_moe.experts.94.w2", "model.layers.39.block_sparse_moe.experts.95.w2", "model.layers.39.block_sparse_moe.experts.96.w2", "model.layers.39.block_sparse_moe.experts.97.w2", "model.layers.39.block_sparse_moe.experts.98.w2", "model.layers.39.block_sparse_moe.experts.99.w2", "model.layers.39.block_sparse_moe.experts.100.w2", "model.layers.39.block_sparse_moe.experts.101.w2", "model.layers.39.block_sparse_moe.experts.102.w2", "model.layers.39.block_sparse_moe.experts.103.w2", "model.layers.39.block_sparse_moe.experts.104.w2", "model.layers.39.block_sparse_moe.experts.105.w2", "model.layers.39.block_sparse_moe.experts.106.w2", "model.layers.39.block_sparse_moe.experts.107.w2", "model.layers.39.block_sparse_moe.experts.108.w2", "model.layers.39.block_sparse_moe.experts.109.w2", "model.layers.39.block_sparse_moe.experts.110.w2", "model.layers.39.block_sparse_moe.experts.111.w2", "model.layers.39.block_sparse_moe.experts.112.w2", "model.layers.39.block_sparse_moe.experts.113.w2", "model.layers.39.block_sparse_moe.experts.114.w2", "model.layers.39.block_sparse_moe.experts.115.w2", "model.layers.39.block_sparse_moe.experts.116.w2", "model.layers.39.block_sparse_moe.experts.117.w2", "model.layers.39.block_sparse_moe.experts.118.w2", "model.layers.39.block_sparse_moe.experts.119.w2", "model.layers.39.block_sparse_moe.experts.120.w2", "model.layers.39.block_sparse_moe.experts.121.w2", "model.layers.39.block_sparse_moe.experts.122.w2", "model.layers.39.block_sparse_moe.experts.123.w2", "model.layers.39.block_sparse_moe.experts.124.w2", "model.layers.39.block_sparse_moe.experts.125.w2", "model.layers.39.block_sparse_moe.experts.126.w2", "model.layers.39.block_sparse_moe.experts.127.w2", "model.layers.39.block_sparse_moe.experts.128.w2", "model.layers.39.block_sparse_moe.experts.129.w2", "model.layers.39.block_sparse_moe.experts.130.w2", "model.layers.39.block_sparse_moe.experts.131.w2", "model.layers.39.block_sparse_moe.experts.132.w2", "model.layers.39.block_sparse_moe.experts.133.w2", "model.layers.39.block_sparse_moe.experts.134.w2", "model.layers.39.block_sparse_moe.experts.135.w2", "model.layers.39.block_sparse_moe.experts.136.w2", "model.layers.39.block_sparse_moe.experts.137.w2", "model.layers.39.block_sparse_moe.experts.138.w2", "model.layers.39.block_sparse_moe.experts.139.w2", "model.layers.39.block_sparse_moe.experts.140.w2", "model.layers.39.block_sparse_moe.experts.141.w2", "model.layers.39.block_sparse_moe.experts.142.w2", "model.layers.39.block_sparse_moe.experts.143.w2", "model.layers.39.block_sparse_moe.experts.144.w2", "model.layers.39.block_sparse_moe.experts.145.w2", "model.layers.39.block_sparse_moe.experts.146.w2", "model.layers.39.block_sparse_moe.experts.147.w2", "model.layers.39.block_sparse_moe.experts.148.w2", "model.layers.39.block_sparse_moe.experts.149.w2", "model.layers.39.block_sparse_moe.experts.150.w2", "model.layers.39.block_sparse_moe.experts.151.w2", "model.layers.39.block_sparse_moe.experts.152.w2", "model.layers.39.block_sparse_moe.experts.153.w2", "model.layers.39.block_sparse_moe.experts.154.w2", "model.layers.39.block_sparse_moe.experts.155.w2", "model.layers.39.block_sparse_moe.experts.156.w2", "model.layers.39.block_sparse_moe.experts.157.w2", "model.layers.39.block_sparse_moe.experts.158.w2", "model.layers.39.block_sparse_moe.experts.159.w2", "model.layers.39.block_sparse_moe.experts.160.w2", "model.layers.39.block_sparse_moe.experts.161.w2", "model.layers.39.block_sparse_moe.experts.162.w2", "model.layers.39.block_sparse_moe.experts.163.w2", "model.layers.39.block_sparse_moe.experts.164.w2", "model.layers.39.block_sparse_moe.experts.165.w2", "model.layers.39.block_sparse_moe.experts.166.w2", "model.layers.39.block_sparse_moe.experts.167.w2", "model.layers.39.block_sparse_moe.experts.168.w2", "model.layers.39.block_sparse_moe.experts.169.w2", "model.layers.39.block_sparse_moe.experts.170.w2", "model.layers.39.block_sparse_moe.experts.171.w2", "model.layers.39.block_sparse_moe.experts.172.w2", "model.layers.39.block_sparse_moe.experts.173.w2", "model.layers.39.block_sparse_moe.experts.174.w2", "model.layers.39.block_sparse_moe.experts.175.w2", "model.layers.39.block_sparse_moe.experts.176.w2", "model.layers.39.block_sparse_moe.experts.177.w2", "model.layers.39.block_sparse_moe.experts.178.w2", "model.layers.39.block_sparse_moe.experts.179.w2", "model.layers.39.block_sparse_moe.experts.180.w2", "model.layers.39.block_sparse_moe.experts.181.w2", "model.layers.39.block_sparse_moe.experts.182.w2", "model.layers.39.block_sparse_moe.experts.183.w2", "model.layers.39.block_sparse_moe.experts.184.w2", "model.layers.39.block_sparse_moe.experts.185.w2", "model.layers.39.block_sparse_moe.experts.186.w2", "model.layers.39.block_sparse_moe.experts.187.w2", "model.layers.39.block_sparse_moe.experts.188.w2", "model.layers.39.block_sparse_moe.experts.189.w2", "model.layers.39.block_sparse_moe.experts.190.w2", "model.layers.39.block_sparse_moe.experts.191.w2", "model.layers.39.block_sparse_moe.experts.192.w2", "model.layers.39.block_sparse_moe.experts.193.w2", "model.layers.39.block_sparse_moe.experts.194.w2", "model.layers.39.block_sparse_moe.experts.195.w2", "model.layers.39.block_sparse_moe.experts.196.w2", "model.layers.39.block_sparse_moe.experts.197.w2", "model.layers.39.block_sparse_moe.experts.198.w2", "model.layers.39.block_sparse_moe.experts.199.w2", "model.layers.39.block_sparse_moe.experts.200.w2", "model.layers.39.block_sparse_moe.experts.201.w2", "model.layers.39.block_sparse_moe.experts.202.w2", "model.layers.39.block_sparse_moe.experts.203.w2", "model.layers.39.block_sparse_moe.experts.204.w2", "model.layers.39.block_sparse_moe.experts.205.w2", "model.layers.39.block_sparse_moe.experts.206.w2", "model.layers.39.block_sparse_moe.experts.207.w2", "model.layers.39.block_sparse_moe.experts.208.w2", "model.layers.39.block_sparse_moe.experts.209.w2", "model.layers.39.block_sparse_moe.experts.210.w2", "model.layers.39.block_sparse_moe.experts.211.w2", "model.layers.39.block_sparse_moe.experts.212.w2", "model.layers.39.block_sparse_moe.experts.213.w2", "model.layers.39.block_sparse_moe.experts.214.w2", "model.layers.39.block_sparse_moe.experts.215.w2", "model.layers.39.block_sparse_moe.experts.216.w2", "model.layers.39.block_sparse_moe.experts.217.w2", "model.layers.39.block_sparse_moe.experts.218.w2", "model.layers.39.block_sparse_moe.experts.219.w2", "model.layers.39.block_sparse_moe.experts.220.w2", "model.layers.39.block_sparse_moe.experts.221.w2", "model.layers.39.block_sparse_moe.experts.222.w2", "model.layers.39.block_sparse_moe.experts.223.w2", "model.layers.39.block_sparse_moe.experts.224.w2", "model.layers.39.block_sparse_moe.experts.225.w2", "model.layers.39.block_sparse_moe.experts.226.w2", "model.layers.39.block_sparse_moe.experts.227.w2", "model.layers.39.block_sparse_moe.experts.228.w2", "model.layers.39.block_sparse_moe.experts.229.w2", "model.layers.39.block_sparse_moe.experts.230.w2", "model.layers.39.block_sparse_moe.experts.231.w2", "model.layers.39.block_sparse_moe.experts.232.w2", "model.layers.39.block_sparse_moe.experts.233.w2", "model.layers.39.block_sparse_moe.experts.234.w2", "model.layers.39.block_sparse_moe.experts.235.w2", "model.layers.39.block_sparse_moe.experts.236.w2", "model.layers.39.block_sparse_moe.experts.237.w2", "model.layers.39.block_sparse_moe.experts.238.w2", "model.layers.39.block_sparse_moe.experts.239.w2", "model.layers.39.block_sparse_moe.experts.240.w2", "model.layers.39.block_sparse_moe.experts.241.w2", "model.layers.39.block_sparse_moe.experts.242.w2", "model.layers.39.block_sparse_moe.experts.243.w2", "model.layers.39.block_sparse_moe.experts.244.w2", "model.layers.39.block_sparse_moe.experts.245.w2", "model.layers.39.block_sparse_moe.experts.246.w2", "model.layers.39.block_sparse_moe.experts.247.w2", "model.layers.39.block_sparse_moe.experts.248.w2", "model.layers.39.block_sparse_moe.experts.249.w2", "model.layers.39.block_sparse_moe.experts.250.w2", "model.layers.39.block_sparse_moe.experts.251.w2", "model.layers.39.block_sparse_moe.experts.252.w2", "model.layers.39.block_sparse_moe.experts.253.w2", "model.layers.39.block_sparse_moe.experts.254.w2", "model.layers.39.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0020910918712615523, "dbits": 1207959552 } ] }, { "idx": 200, "layers": [ "model.layers.40.self_attn.q_proj" ], "candidates": [ { "dkld": 0.000884640216827437, "dbits": 18874368 } ] }, { "idx": 201, "layers": [ "model.layers.40.self_attn.k_proj", "model.layers.40.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0071451306343077725, "dbits": 6291456 } ] }, { "idx": 202, "layers": [ "model.layers.40.self_attn.o_proj" ], "candidates": [ { "dkld": -0.011784321069717385, "dbits": 18874368 } ] }, { "idx": 203, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w1", "model.layers.40.block_sparse_moe.experts.1.w1", "model.layers.40.block_sparse_moe.experts.2.w1", "model.layers.40.block_sparse_moe.experts.3.w1", "model.layers.40.block_sparse_moe.experts.4.w1", "model.layers.40.block_sparse_moe.experts.5.w1", "model.layers.40.block_sparse_moe.experts.6.w1", "model.layers.40.block_sparse_moe.experts.7.w1", "model.layers.40.block_sparse_moe.experts.8.w1", "model.layers.40.block_sparse_moe.experts.9.w1", "model.layers.40.block_sparse_moe.experts.10.w1", "model.layers.40.block_sparse_moe.experts.11.w1", "model.layers.40.block_sparse_moe.experts.12.w1", "model.layers.40.block_sparse_moe.experts.13.w1", "model.layers.40.block_sparse_moe.experts.14.w1", "model.layers.40.block_sparse_moe.experts.15.w1", "model.layers.40.block_sparse_moe.experts.16.w1", "model.layers.40.block_sparse_moe.experts.17.w1", "model.layers.40.block_sparse_moe.experts.18.w1", "model.layers.40.block_sparse_moe.experts.19.w1", "model.layers.40.block_sparse_moe.experts.20.w1", "model.layers.40.block_sparse_moe.experts.21.w1", "model.layers.40.block_sparse_moe.experts.22.w1", "model.layers.40.block_sparse_moe.experts.23.w1", "model.layers.40.block_sparse_moe.experts.24.w1", "model.layers.40.block_sparse_moe.experts.25.w1", "model.layers.40.block_sparse_moe.experts.26.w1", "model.layers.40.block_sparse_moe.experts.27.w1", "model.layers.40.block_sparse_moe.experts.28.w1", "model.layers.40.block_sparse_moe.experts.29.w1", "model.layers.40.block_sparse_moe.experts.30.w1", "model.layers.40.block_sparse_moe.experts.31.w1", "model.layers.40.block_sparse_moe.experts.32.w1", "model.layers.40.block_sparse_moe.experts.33.w1", "model.layers.40.block_sparse_moe.experts.34.w1", "model.layers.40.block_sparse_moe.experts.35.w1", "model.layers.40.block_sparse_moe.experts.36.w1", "model.layers.40.block_sparse_moe.experts.37.w1", "model.layers.40.block_sparse_moe.experts.38.w1", "model.layers.40.block_sparse_moe.experts.39.w1", "model.layers.40.block_sparse_moe.experts.40.w1", "model.layers.40.block_sparse_moe.experts.41.w1", "model.layers.40.block_sparse_moe.experts.42.w1", "model.layers.40.block_sparse_moe.experts.43.w1", "model.layers.40.block_sparse_moe.experts.44.w1", "model.layers.40.block_sparse_moe.experts.45.w1", "model.layers.40.block_sparse_moe.experts.46.w1", "model.layers.40.block_sparse_moe.experts.47.w1", "model.layers.40.block_sparse_moe.experts.48.w1", "model.layers.40.block_sparse_moe.experts.49.w1", "model.layers.40.block_sparse_moe.experts.50.w1", "model.layers.40.block_sparse_moe.experts.51.w1", "model.layers.40.block_sparse_moe.experts.52.w1", "model.layers.40.block_sparse_moe.experts.53.w1", "model.layers.40.block_sparse_moe.experts.54.w1", "model.layers.40.block_sparse_moe.experts.55.w1", "model.layers.40.block_sparse_moe.experts.56.w1", "model.layers.40.block_sparse_moe.experts.57.w1", "model.layers.40.block_sparse_moe.experts.58.w1", "model.layers.40.block_sparse_moe.experts.59.w1", "model.layers.40.block_sparse_moe.experts.60.w1", "model.layers.40.block_sparse_moe.experts.61.w1", "model.layers.40.block_sparse_moe.experts.62.w1", "model.layers.40.block_sparse_moe.experts.63.w1", "model.layers.40.block_sparse_moe.experts.64.w1", "model.layers.40.block_sparse_moe.experts.65.w1", "model.layers.40.block_sparse_moe.experts.66.w1", "model.layers.40.block_sparse_moe.experts.67.w1", "model.layers.40.block_sparse_moe.experts.68.w1", "model.layers.40.block_sparse_moe.experts.69.w1", "model.layers.40.block_sparse_moe.experts.70.w1", "model.layers.40.block_sparse_moe.experts.71.w1", "model.layers.40.block_sparse_moe.experts.72.w1", "model.layers.40.block_sparse_moe.experts.73.w1", "model.layers.40.block_sparse_moe.experts.74.w1", "model.layers.40.block_sparse_moe.experts.75.w1", "model.layers.40.block_sparse_moe.experts.76.w1", "model.layers.40.block_sparse_moe.experts.77.w1", "model.layers.40.block_sparse_moe.experts.78.w1", "model.layers.40.block_sparse_moe.experts.79.w1", "model.layers.40.block_sparse_moe.experts.80.w1", "model.layers.40.block_sparse_moe.experts.81.w1", "model.layers.40.block_sparse_moe.experts.82.w1", "model.layers.40.block_sparse_moe.experts.83.w1", "model.layers.40.block_sparse_moe.experts.84.w1", "model.layers.40.block_sparse_moe.experts.85.w1", "model.layers.40.block_sparse_moe.experts.86.w1", "model.layers.40.block_sparse_moe.experts.87.w1", "model.layers.40.block_sparse_moe.experts.88.w1", "model.layers.40.block_sparse_moe.experts.89.w1", "model.layers.40.block_sparse_moe.experts.90.w1", "model.layers.40.block_sparse_moe.experts.91.w1", "model.layers.40.block_sparse_moe.experts.92.w1", "model.layers.40.block_sparse_moe.experts.93.w1", "model.layers.40.block_sparse_moe.experts.94.w1", "model.layers.40.block_sparse_moe.experts.95.w1", "model.layers.40.block_sparse_moe.experts.96.w1", "model.layers.40.block_sparse_moe.experts.97.w1", "model.layers.40.block_sparse_moe.experts.98.w1", "model.layers.40.block_sparse_moe.experts.99.w1", "model.layers.40.block_sparse_moe.experts.100.w1", "model.layers.40.block_sparse_moe.experts.101.w1", "model.layers.40.block_sparse_moe.experts.102.w1", "model.layers.40.block_sparse_moe.experts.103.w1", "model.layers.40.block_sparse_moe.experts.104.w1", "model.layers.40.block_sparse_moe.experts.105.w1", "model.layers.40.block_sparse_moe.experts.106.w1", "model.layers.40.block_sparse_moe.experts.107.w1", "model.layers.40.block_sparse_moe.experts.108.w1", "model.layers.40.block_sparse_moe.experts.109.w1", "model.layers.40.block_sparse_moe.experts.110.w1", "model.layers.40.block_sparse_moe.experts.111.w1", "model.layers.40.block_sparse_moe.experts.112.w1", "model.layers.40.block_sparse_moe.experts.113.w1", "model.layers.40.block_sparse_moe.experts.114.w1", "model.layers.40.block_sparse_moe.experts.115.w1", "model.layers.40.block_sparse_moe.experts.116.w1", "model.layers.40.block_sparse_moe.experts.117.w1", "model.layers.40.block_sparse_moe.experts.118.w1", "model.layers.40.block_sparse_moe.experts.119.w1", "model.layers.40.block_sparse_moe.experts.120.w1", "model.layers.40.block_sparse_moe.experts.121.w1", "model.layers.40.block_sparse_moe.experts.122.w1", "model.layers.40.block_sparse_moe.experts.123.w1", "model.layers.40.block_sparse_moe.experts.124.w1", "model.layers.40.block_sparse_moe.experts.125.w1", "model.layers.40.block_sparse_moe.experts.126.w1", "model.layers.40.block_sparse_moe.experts.127.w1", "model.layers.40.block_sparse_moe.experts.128.w1", "model.layers.40.block_sparse_moe.experts.129.w1", "model.layers.40.block_sparse_moe.experts.130.w1", "model.layers.40.block_sparse_moe.experts.131.w1", "model.layers.40.block_sparse_moe.experts.132.w1", "model.layers.40.block_sparse_moe.experts.133.w1", "model.layers.40.block_sparse_moe.experts.134.w1", "model.layers.40.block_sparse_moe.experts.135.w1", "model.layers.40.block_sparse_moe.experts.136.w1", "model.layers.40.block_sparse_moe.experts.137.w1", "model.layers.40.block_sparse_moe.experts.138.w1", "model.layers.40.block_sparse_moe.experts.139.w1", "model.layers.40.block_sparse_moe.experts.140.w1", "model.layers.40.block_sparse_moe.experts.141.w1", "model.layers.40.block_sparse_moe.experts.142.w1", "model.layers.40.block_sparse_moe.experts.143.w1", "model.layers.40.block_sparse_moe.experts.144.w1", "model.layers.40.block_sparse_moe.experts.145.w1", "model.layers.40.block_sparse_moe.experts.146.w1", "model.layers.40.block_sparse_moe.experts.147.w1", "model.layers.40.block_sparse_moe.experts.148.w1", "model.layers.40.block_sparse_moe.experts.149.w1", "model.layers.40.block_sparse_moe.experts.150.w1", "model.layers.40.block_sparse_moe.experts.151.w1", "model.layers.40.block_sparse_moe.experts.152.w1", "model.layers.40.block_sparse_moe.experts.153.w1", "model.layers.40.block_sparse_moe.experts.154.w1", "model.layers.40.block_sparse_moe.experts.155.w1", "model.layers.40.block_sparse_moe.experts.156.w1", "model.layers.40.block_sparse_moe.experts.157.w1", "model.layers.40.block_sparse_moe.experts.158.w1", "model.layers.40.block_sparse_moe.experts.159.w1", "model.layers.40.block_sparse_moe.experts.160.w1", "model.layers.40.block_sparse_moe.experts.161.w1", "model.layers.40.block_sparse_moe.experts.162.w1", "model.layers.40.block_sparse_moe.experts.163.w1", "model.layers.40.block_sparse_moe.experts.164.w1", "model.layers.40.block_sparse_moe.experts.165.w1", "model.layers.40.block_sparse_moe.experts.166.w1", "model.layers.40.block_sparse_moe.experts.167.w1", "model.layers.40.block_sparse_moe.experts.168.w1", "model.layers.40.block_sparse_moe.experts.169.w1", "model.layers.40.block_sparse_moe.experts.170.w1", "model.layers.40.block_sparse_moe.experts.171.w1", "model.layers.40.block_sparse_moe.experts.172.w1", "model.layers.40.block_sparse_moe.experts.173.w1", "model.layers.40.block_sparse_moe.experts.174.w1", "model.layers.40.block_sparse_moe.experts.175.w1", "model.layers.40.block_sparse_moe.experts.176.w1", "model.layers.40.block_sparse_moe.experts.177.w1", "model.layers.40.block_sparse_moe.experts.178.w1", "model.layers.40.block_sparse_moe.experts.179.w1", "model.layers.40.block_sparse_moe.experts.180.w1", "model.layers.40.block_sparse_moe.experts.181.w1", "model.layers.40.block_sparse_moe.experts.182.w1", "model.layers.40.block_sparse_moe.experts.183.w1", "model.layers.40.block_sparse_moe.experts.184.w1", "model.layers.40.block_sparse_moe.experts.185.w1", "model.layers.40.block_sparse_moe.experts.186.w1", "model.layers.40.block_sparse_moe.experts.187.w1", "model.layers.40.block_sparse_moe.experts.188.w1", "model.layers.40.block_sparse_moe.experts.189.w1", "model.layers.40.block_sparse_moe.experts.190.w1", "model.layers.40.block_sparse_moe.experts.191.w1", "model.layers.40.block_sparse_moe.experts.192.w1", "model.layers.40.block_sparse_moe.experts.193.w1", "model.layers.40.block_sparse_moe.experts.194.w1", "model.layers.40.block_sparse_moe.experts.195.w1", "model.layers.40.block_sparse_moe.experts.196.w1", "model.layers.40.block_sparse_moe.experts.197.w1", "model.layers.40.block_sparse_moe.experts.198.w1", "model.layers.40.block_sparse_moe.experts.199.w1", "model.layers.40.block_sparse_moe.experts.200.w1", "model.layers.40.block_sparse_moe.experts.201.w1", "model.layers.40.block_sparse_moe.experts.202.w1", "model.layers.40.block_sparse_moe.experts.203.w1", "model.layers.40.block_sparse_moe.experts.204.w1", "model.layers.40.block_sparse_moe.experts.205.w1", "model.layers.40.block_sparse_moe.experts.206.w1", "model.layers.40.block_sparse_moe.experts.207.w1", "model.layers.40.block_sparse_moe.experts.208.w1", "model.layers.40.block_sparse_moe.experts.209.w1", "model.layers.40.block_sparse_moe.experts.210.w1", "model.layers.40.block_sparse_moe.experts.211.w1", "model.layers.40.block_sparse_moe.experts.212.w1", "model.layers.40.block_sparse_moe.experts.213.w1", "model.layers.40.block_sparse_moe.experts.214.w1", "model.layers.40.block_sparse_moe.experts.215.w1", "model.layers.40.block_sparse_moe.experts.216.w1", "model.layers.40.block_sparse_moe.experts.217.w1", "model.layers.40.block_sparse_moe.experts.218.w1", "model.layers.40.block_sparse_moe.experts.219.w1", "model.layers.40.block_sparse_moe.experts.220.w1", "model.layers.40.block_sparse_moe.experts.221.w1", "model.layers.40.block_sparse_moe.experts.222.w1", "model.layers.40.block_sparse_moe.experts.223.w1", "model.layers.40.block_sparse_moe.experts.224.w1", "model.layers.40.block_sparse_moe.experts.225.w1", "model.layers.40.block_sparse_moe.experts.226.w1", "model.layers.40.block_sparse_moe.experts.227.w1", "model.layers.40.block_sparse_moe.experts.228.w1", "model.layers.40.block_sparse_moe.experts.229.w1", "model.layers.40.block_sparse_moe.experts.230.w1", "model.layers.40.block_sparse_moe.experts.231.w1", "model.layers.40.block_sparse_moe.experts.232.w1", "model.layers.40.block_sparse_moe.experts.233.w1", "model.layers.40.block_sparse_moe.experts.234.w1", "model.layers.40.block_sparse_moe.experts.235.w1", "model.layers.40.block_sparse_moe.experts.236.w1", "model.layers.40.block_sparse_moe.experts.237.w1", "model.layers.40.block_sparse_moe.experts.238.w1", "model.layers.40.block_sparse_moe.experts.239.w1", "model.layers.40.block_sparse_moe.experts.240.w1", "model.layers.40.block_sparse_moe.experts.241.w1", "model.layers.40.block_sparse_moe.experts.242.w1", "model.layers.40.block_sparse_moe.experts.243.w1", "model.layers.40.block_sparse_moe.experts.244.w1", "model.layers.40.block_sparse_moe.experts.245.w1", "model.layers.40.block_sparse_moe.experts.246.w1", "model.layers.40.block_sparse_moe.experts.247.w1", "model.layers.40.block_sparse_moe.experts.248.w1", "model.layers.40.block_sparse_moe.experts.249.w1", "model.layers.40.block_sparse_moe.experts.250.w1", "model.layers.40.block_sparse_moe.experts.251.w1", "model.layers.40.block_sparse_moe.experts.252.w1", "model.layers.40.block_sparse_moe.experts.253.w1", "model.layers.40.block_sparse_moe.experts.254.w1", "model.layers.40.block_sparse_moe.experts.255.w1", "model.layers.40.block_sparse_moe.experts.0.w3", "model.layers.40.block_sparse_moe.experts.1.w3", "model.layers.40.block_sparse_moe.experts.2.w3", "model.layers.40.block_sparse_moe.experts.3.w3", "model.layers.40.block_sparse_moe.experts.4.w3", "model.layers.40.block_sparse_moe.experts.5.w3", "model.layers.40.block_sparse_moe.experts.6.w3", "model.layers.40.block_sparse_moe.experts.7.w3", "model.layers.40.block_sparse_moe.experts.8.w3", "model.layers.40.block_sparse_moe.experts.9.w3", "model.layers.40.block_sparse_moe.experts.10.w3", "model.layers.40.block_sparse_moe.experts.11.w3", "model.layers.40.block_sparse_moe.experts.12.w3", "model.layers.40.block_sparse_moe.experts.13.w3", "model.layers.40.block_sparse_moe.experts.14.w3", "model.layers.40.block_sparse_moe.experts.15.w3", "model.layers.40.block_sparse_moe.experts.16.w3", "model.layers.40.block_sparse_moe.experts.17.w3", "model.layers.40.block_sparse_moe.experts.18.w3", "model.layers.40.block_sparse_moe.experts.19.w3", "model.layers.40.block_sparse_moe.experts.20.w3", "model.layers.40.block_sparse_moe.experts.21.w3", "model.layers.40.block_sparse_moe.experts.22.w3", "model.layers.40.block_sparse_moe.experts.23.w3", "model.layers.40.block_sparse_moe.experts.24.w3", "model.layers.40.block_sparse_moe.experts.25.w3", "model.layers.40.block_sparse_moe.experts.26.w3", "model.layers.40.block_sparse_moe.experts.27.w3", "model.layers.40.block_sparse_moe.experts.28.w3", "model.layers.40.block_sparse_moe.experts.29.w3", "model.layers.40.block_sparse_moe.experts.30.w3", "model.layers.40.block_sparse_moe.experts.31.w3", "model.layers.40.block_sparse_moe.experts.32.w3", "model.layers.40.block_sparse_moe.experts.33.w3", "model.layers.40.block_sparse_moe.experts.34.w3", "model.layers.40.block_sparse_moe.experts.35.w3", "model.layers.40.block_sparse_moe.experts.36.w3", "model.layers.40.block_sparse_moe.experts.37.w3", "model.layers.40.block_sparse_moe.experts.38.w3", "model.layers.40.block_sparse_moe.experts.39.w3", "model.layers.40.block_sparse_moe.experts.40.w3", "model.layers.40.block_sparse_moe.experts.41.w3", "model.layers.40.block_sparse_moe.experts.42.w3", "model.layers.40.block_sparse_moe.experts.43.w3", "model.layers.40.block_sparse_moe.experts.44.w3", "model.layers.40.block_sparse_moe.experts.45.w3", "model.layers.40.block_sparse_moe.experts.46.w3", "model.layers.40.block_sparse_moe.experts.47.w3", "model.layers.40.block_sparse_moe.experts.48.w3", "model.layers.40.block_sparse_moe.experts.49.w3", "model.layers.40.block_sparse_moe.experts.50.w3", "model.layers.40.block_sparse_moe.experts.51.w3", "model.layers.40.block_sparse_moe.experts.52.w3", "model.layers.40.block_sparse_moe.experts.53.w3", "model.layers.40.block_sparse_moe.experts.54.w3", "model.layers.40.block_sparse_moe.experts.55.w3", "model.layers.40.block_sparse_moe.experts.56.w3", "model.layers.40.block_sparse_moe.experts.57.w3", "model.layers.40.block_sparse_moe.experts.58.w3", "model.layers.40.block_sparse_moe.experts.59.w3", "model.layers.40.block_sparse_moe.experts.60.w3", "model.layers.40.block_sparse_moe.experts.61.w3", "model.layers.40.block_sparse_moe.experts.62.w3", "model.layers.40.block_sparse_moe.experts.63.w3", "model.layers.40.block_sparse_moe.experts.64.w3", "model.layers.40.block_sparse_moe.experts.65.w3", "model.layers.40.block_sparse_moe.experts.66.w3", "model.layers.40.block_sparse_moe.experts.67.w3", "model.layers.40.block_sparse_moe.experts.68.w3", "model.layers.40.block_sparse_moe.experts.69.w3", "model.layers.40.block_sparse_moe.experts.70.w3", "model.layers.40.block_sparse_moe.experts.71.w3", "model.layers.40.block_sparse_moe.experts.72.w3", "model.layers.40.block_sparse_moe.experts.73.w3", "model.layers.40.block_sparse_moe.experts.74.w3", "model.layers.40.block_sparse_moe.experts.75.w3", "model.layers.40.block_sparse_moe.experts.76.w3", "model.layers.40.block_sparse_moe.experts.77.w3", "model.layers.40.block_sparse_moe.experts.78.w3", "model.layers.40.block_sparse_moe.experts.79.w3", "model.layers.40.block_sparse_moe.experts.80.w3", "model.layers.40.block_sparse_moe.experts.81.w3", "model.layers.40.block_sparse_moe.experts.82.w3", "model.layers.40.block_sparse_moe.experts.83.w3", "model.layers.40.block_sparse_moe.experts.84.w3", "model.layers.40.block_sparse_moe.experts.85.w3", "model.layers.40.block_sparse_moe.experts.86.w3", "model.layers.40.block_sparse_moe.experts.87.w3", "model.layers.40.block_sparse_moe.experts.88.w3", "model.layers.40.block_sparse_moe.experts.89.w3", "model.layers.40.block_sparse_moe.experts.90.w3", "model.layers.40.block_sparse_moe.experts.91.w3", "model.layers.40.block_sparse_moe.experts.92.w3", "model.layers.40.block_sparse_moe.experts.93.w3", "model.layers.40.block_sparse_moe.experts.94.w3", "model.layers.40.block_sparse_moe.experts.95.w3", "model.layers.40.block_sparse_moe.experts.96.w3", "model.layers.40.block_sparse_moe.experts.97.w3", "model.layers.40.block_sparse_moe.experts.98.w3", "model.layers.40.block_sparse_moe.experts.99.w3", "model.layers.40.block_sparse_moe.experts.100.w3", "model.layers.40.block_sparse_moe.experts.101.w3", "model.layers.40.block_sparse_moe.experts.102.w3", "model.layers.40.block_sparse_moe.experts.103.w3", "model.layers.40.block_sparse_moe.experts.104.w3", "model.layers.40.block_sparse_moe.experts.105.w3", "model.layers.40.block_sparse_moe.experts.106.w3", "model.layers.40.block_sparse_moe.experts.107.w3", "model.layers.40.block_sparse_moe.experts.108.w3", "model.layers.40.block_sparse_moe.experts.109.w3", "model.layers.40.block_sparse_moe.experts.110.w3", "model.layers.40.block_sparse_moe.experts.111.w3", "model.layers.40.block_sparse_moe.experts.112.w3", "model.layers.40.block_sparse_moe.experts.113.w3", "model.layers.40.block_sparse_moe.experts.114.w3", "model.layers.40.block_sparse_moe.experts.115.w3", "model.layers.40.block_sparse_moe.experts.116.w3", "model.layers.40.block_sparse_moe.experts.117.w3", "model.layers.40.block_sparse_moe.experts.118.w3", "model.layers.40.block_sparse_moe.experts.119.w3", "model.layers.40.block_sparse_moe.experts.120.w3", "model.layers.40.block_sparse_moe.experts.121.w3", "model.layers.40.block_sparse_moe.experts.122.w3", "model.layers.40.block_sparse_moe.experts.123.w3", "model.layers.40.block_sparse_moe.experts.124.w3", "model.layers.40.block_sparse_moe.experts.125.w3", "model.layers.40.block_sparse_moe.experts.126.w3", "model.layers.40.block_sparse_moe.experts.127.w3", "model.layers.40.block_sparse_moe.experts.128.w3", "model.layers.40.block_sparse_moe.experts.129.w3", "model.layers.40.block_sparse_moe.experts.130.w3", "model.layers.40.block_sparse_moe.experts.131.w3", "model.layers.40.block_sparse_moe.experts.132.w3", "model.layers.40.block_sparse_moe.experts.133.w3", "model.layers.40.block_sparse_moe.experts.134.w3", "model.layers.40.block_sparse_moe.experts.135.w3", "model.layers.40.block_sparse_moe.experts.136.w3", "model.layers.40.block_sparse_moe.experts.137.w3", "model.layers.40.block_sparse_moe.experts.138.w3", "model.layers.40.block_sparse_moe.experts.139.w3", "model.layers.40.block_sparse_moe.experts.140.w3", "model.layers.40.block_sparse_moe.experts.141.w3", "model.layers.40.block_sparse_moe.experts.142.w3", "model.layers.40.block_sparse_moe.experts.143.w3", "model.layers.40.block_sparse_moe.experts.144.w3", "model.layers.40.block_sparse_moe.experts.145.w3", "model.layers.40.block_sparse_moe.experts.146.w3", "model.layers.40.block_sparse_moe.experts.147.w3", "model.layers.40.block_sparse_moe.experts.148.w3", "model.layers.40.block_sparse_moe.experts.149.w3", "model.layers.40.block_sparse_moe.experts.150.w3", "model.layers.40.block_sparse_moe.experts.151.w3", "model.layers.40.block_sparse_moe.experts.152.w3", "model.layers.40.block_sparse_moe.experts.153.w3", "model.layers.40.block_sparse_moe.experts.154.w3", "model.layers.40.block_sparse_moe.experts.155.w3", "model.layers.40.block_sparse_moe.experts.156.w3", "model.layers.40.block_sparse_moe.experts.157.w3", "model.layers.40.block_sparse_moe.experts.158.w3", "model.layers.40.block_sparse_moe.experts.159.w3", "model.layers.40.block_sparse_moe.experts.160.w3", "model.layers.40.block_sparse_moe.experts.161.w3", "model.layers.40.block_sparse_moe.experts.162.w3", "model.layers.40.block_sparse_moe.experts.163.w3", "model.layers.40.block_sparse_moe.experts.164.w3", "model.layers.40.block_sparse_moe.experts.165.w3", "model.layers.40.block_sparse_moe.experts.166.w3", "model.layers.40.block_sparse_moe.experts.167.w3", "model.layers.40.block_sparse_moe.experts.168.w3", "model.layers.40.block_sparse_moe.experts.169.w3", "model.layers.40.block_sparse_moe.experts.170.w3", "model.layers.40.block_sparse_moe.experts.171.w3", "model.layers.40.block_sparse_moe.experts.172.w3", "model.layers.40.block_sparse_moe.experts.173.w3", "model.layers.40.block_sparse_moe.experts.174.w3", "model.layers.40.block_sparse_moe.experts.175.w3", "model.layers.40.block_sparse_moe.experts.176.w3", "model.layers.40.block_sparse_moe.experts.177.w3", "model.layers.40.block_sparse_moe.experts.178.w3", "model.layers.40.block_sparse_moe.experts.179.w3", "model.layers.40.block_sparse_moe.experts.180.w3", "model.layers.40.block_sparse_moe.experts.181.w3", "model.layers.40.block_sparse_moe.experts.182.w3", "model.layers.40.block_sparse_moe.experts.183.w3", "model.layers.40.block_sparse_moe.experts.184.w3", "model.layers.40.block_sparse_moe.experts.185.w3", "model.layers.40.block_sparse_moe.experts.186.w3", "model.layers.40.block_sparse_moe.experts.187.w3", "model.layers.40.block_sparse_moe.experts.188.w3", "model.layers.40.block_sparse_moe.experts.189.w3", "model.layers.40.block_sparse_moe.experts.190.w3", "model.layers.40.block_sparse_moe.experts.191.w3", "model.layers.40.block_sparse_moe.experts.192.w3", "model.layers.40.block_sparse_moe.experts.193.w3", "model.layers.40.block_sparse_moe.experts.194.w3", "model.layers.40.block_sparse_moe.experts.195.w3", "model.layers.40.block_sparse_moe.experts.196.w3", "model.layers.40.block_sparse_moe.experts.197.w3", "model.layers.40.block_sparse_moe.experts.198.w3", "model.layers.40.block_sparse_moe.experts.199.w3", "model.layers.40.block_sparse_moe.experts.200.w3", "model.layers.40.block_sparse_moe.experts.201.w3", "model.layers.40.block_sparse_moe.experts.202.w3", "model.layers.40.block_sparse_moe.experts.203.w3", "model.layers.40.block_sparse_moe.experts.204.w3", "model.layers.40.block_sparse_moe.experts.205.w3", "model.layers.40.block_sparse_moe.experts.206.w3", "model.layers.40.block_sparse_moe.experts.207.w3", "model.layers.40.block_sparse_moe.experts.208.w3", "model.layers.40.block_sparse_moe.experts.209.w3", "model.layers.40.block_sparse_moe.experts.210.w3", "model.layers.40.block_sparse_moe.experts.211.w3", "model.layers.40.block_sparse_moe.experts.212.w3", "model.layers.40.block_sparse_moe.experts.213.w3", "model.layers.40.block_sparse_moe.experts.214.w3", "model.layers.40.block_sparse_moe.experts.215.w3", "model.layers.40.block_sparse_moe.experts.216.w3", "model.layers.40.block_sparse_moe.experts.217.w3", "model.layers.40.block_sparse_moe.experts.218.w3", "model.layers.40.block_sparse_moe.experts.219.w3", "model.layers.40.block_sparse_moe.experts.220.w3", "model.layers.40.block_sparse_moe.experts.221.w3", "model.layers.40.block_sparse_moe.experts.222.w3", "model.layers.40.block_sparse_moe.experts.223.w3", "model.layers.40.block_sparse_moe.experts.224.w3", "model.layers.40.block_sparse_moe.experts.225.w3", "model.layers.40.block_sparse_moe.experts.226.w3", "model.layers.40.block_sparse_moe.experts.227.w3", "model.layers.40.block_sparse_moe.experts.228.w3", "model.layers.40.block_sparse_moe.experts.229.w3", "model.layers.40.block_sparse_moe.experts.230.w3", "model.layers.40.block_sparse_moe.experts.231.w3", "model.layers.40.block_sparse_moe.experts.232.w3", "model.layers.40.block_sparse_moe.experts.233.w3", "model.layers.40.block_sparse_moe.experts.234.w3", "model.layers.40.block_sparse_moe.experts.235.w3", "model.layers.40.block_sparse_moe.experts.236.w3", "model.layers.40.block_sparse_moe.experts.237.w3", "model.layers.40.block_sparse_moe.experts.238.w3", "model.layers.40.block_sparse_moe.experts.239.w3", "model.layers.40.block_sparse_moe.experts.240.w3", "model.layers.40.block_sparse_moe.experts.241.w3", "model.layers.40.block_sparse_moe.experts.242.w3", "model.layers.40.block_sparse_moe.experts.243.w3", "model.layers.40.block_sparse_moe.experts.244.w3", "model.layers.40.block_sparse_moe.experts.245.w3", "model.layers.40.block_sparse_moe.experts.246.w3", "model.layers.40.block_sparse_moe.experts.247.w3", "model.layers.40.block_sparse_moe.experts.248.w3", "model.layers.40.block_sparse_moe.experts.249.w3", "model.layers.40.block_sparse_moe.experts.250.w3", "model.layers.40.block_sparse_moe.experts.251.w3", "model.layers.40.block_sparse_moe.experts.252.w3", "model.layers.40.block_sparse_moe.experts.253.w3", "model.layers.40.block_sparse_moe.experts.254.w3", "model.layers.40.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0002897739410401279, "dbits": 2415919104 } ] }, { "idx": 204, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w2", "model.layers.40.block_sparse_moe.experts.1.w2", "model.layers.40.block_sparse_moe.experts.2.w2", "model.layers.40.block_sparse_moe.experts.3.w2", "model.layers.40.block_sparse_moe.experts.4.w2", "model.layers.40.block_sparse_moe.experts.5.w2", "model.layers.40.block_sparse_moe.experts.6.w2", "model.layers.40.block_sparse_moe.experts.7.w2", "model.layers.40.block_sparse_moe.experts.8.w2", "model.layers.40.block_sparse_moe.experts.9.w2", "model.layers.40.block_sparse_moe.experts.10.w2", "model.layers.40.block_sparse_moe.experts.11.w2", "model.layers.40.block_sparse_moe.experts.12.w2", "model.layers.40.block_sparse_moe.experts.13.w2", "model.layers.40.block_sparse_moe.experts.14.w2", "model.layers.40.block_sparse_moe.experts.15.w2", "model.layers.40.block_sparse_moe.experts.16.w2", "model.layers.40.block_sparse_moe.experts.17.w2", "model.layers.40.block_sparse_moe.experts.18.w2", "model.layers.40.block_sparse_moe.experts.19.w2", "model.layers.40.block_sparse_moe.experts.20.w2", "model.layers.40.block_sparse_moe.experts.21.w2", "model.layers.40.block_sparse_moe.experts.22.w2", "model.layers.40.block_sparse_moe.experts.23.w2", "model.layers.40.block_sparse_moe.experts.24.w2", "model.layers.40.block_sparse_moe.experts.25.w2", "model.layers.40.block_sparse_moe.experts.26.w2", "model.layers.40.block_sparse_moe.experts.27.w2", "model.layers.40.block_sparse_moe.experts.28.w2", "model.layers.40.block_sparse_moe.experts.29.w2", "model.layers.40.block_sparse_moe.experts.30.w2", "model.layers.40.block_sparse_moe.experts.31.w2", "model.layers.40.block_sparse_moe.experts.32.w2", "model.layers.40.block_sparse_moe.experts.33.w2", "model.layers.40.block_sparse_moe.experts.34.w2", "model.layers.40.block_sparse_moe.experts.35.w2", "model.layers.40.block_sparse_moe.experts.36.w2", "model.layers.40.block_sparse_moe.experts.37.w2", "model.layers.40.block_sparse_moe.experts.38.w2", "model.layers.40.block_sparse_moe.experts.39.w2", "model.layers.40.block_sparse_moe.experts.40.w2", "model.layers.40.block_sparse_moe.experts.41.w2", "model.layers.40.block_sparse_moe.experts.42.w2", "model.layers.40.block_sparse_moe.experts.43.w2", "model.layers.40.block_sparse_moe.experts.44.w2", "model.layers.40.block_sparse_moe.experts.45.w2", "model.layers.40.block_sparse_moe.experts.46.w2", "model.layers.40.block_sparse_moe.experts.47.w2", "model.layers.40.block_sparse_moe.experts.48.w2", "model.layers.40.block_sparse_moe.experts.49.w2", "model.layers.40.block_sparse_moe.experts.50.w2", "model.layers.40.block_sparse_moe.experts.51.w2", "model.layers.40.block_sparse_moe.experts.52.w2", "model.layers.40.block_sparse_moe.experts.53.w2", "model.layers.40.block_sparse_moe.experts.54.w2", "model.layers.40.block_sparse_moe.experts.55.w2", "model.layers.40.block_sparse_moe.experts.56.w2", "model.layers.40.block_sparse_moe.experts.57.w2", "model.layers.40.block_sparse_moe.experts.58.w2", "model.layers.40.block_sparse_moe.experts.59.w2", "model.layers.40.block_sparse_moe.experts.60.w2", "model.layers.40.block_sparse_moe.experts.61.w2", "model.layers.40.block_sparse_moe.experts.62.w2", "model.layers.40.block_sparse_moe.experts.63.w2", "model.layers.40.block_sparse_moe.experts.64.w2", "model.layers.40.block_sparse_moe.experts.65.w2", "model.layers.40.block_sparse_moe.experts.66.w2", "model.layers.40.block_sparse_moe.experts.67.w2", "model.layers.40.block_sparse_moe.experts.68.w2", "model.layers.40.block_sparse_moe.experts.69.w2", "model.layers.40.block_sparse_moe.experts.70.w2", "model.layers.40.block_sparse_moe.experts.71.w2", "model.layers.40.block_sparse_moe.experts.72.w2", "model.layers.40.block_sparse_moe.experts.73.w2", "model.layers.40.block_sparse_moe.experts.74.w2", "model.layers.40.block_sparse_moe.experts.75.w2", "model.layers.40.block_sparse_moe.experts.76.w2", "model.layers.40.block_sparse_moe.experts.77.w2", "model.layers.40.block_sparse_moe.experts.78.w2", "model.layers.40.block_sparse_moe.experts.79.w2", "model.layers.40.block_sparse_moe.experts.80.w2", "model.layers.40.block_sparse_moe.experts.81.w2", "model.layers.40.block_sparse_moe.experts.82.w2", "model.layers.40.block_sparse_moe.experts.83.w2", "model.layers.40.block_sparse_moe.experts.84.w2", "model.layers.40.block_sparse_moe.experts.85.w2", "model.layers.40.block_sparse_moe.experts.86.w2", "model.layers.40.block_sparse_moe.experts.87.w2", "model.layers.40.block_sparse_moe.experts.88.w2", "model.layers.40.block_sparse_moe.experts.89.w2", "model.layers.40.block_sparse_moe.experts.90.w2", "model.layers.40.block_sparse_moe.experts.91.w2", "model.layers.40.block_sparse_moe.experts.92.w2", "model.layers.40.block_sparse_moe.experts.93.w2", "model.layers.40.block_sparse_moe.experts.94.w2", "model.layers.40.block_sparse_moe.experts.95.w2", "model.layers.40.block_sparse_moe.experts.96.w2", "model.layers.40.block_sparse_moe.experts.97.w2", "model.layers.40.block_sparse_moe.experts.98.w2", "model.layers.40.block_sparse_moe.experts.99.w2", "model.layers.40.block_sparse_moe.experts.100.w2", "model.layers.40.block_sparse_moe.experts.101.w2", "model.layers.40.block_sparse_moe.experts.102.w2", "model.layers.40.block_sparse_moe.experts.103.w2", "model.layers.40.block_sparse_moe.experts.104.w2", "model.layers.40.block_sparse_moe.experts.105.w2", "model.layers.40.block_sparse_moe.experts.106.w2", "model.layers.40.block_sparse_moe.experts.107.w2", "model.layers.40.block_sparse_moe.experts.108.w2", "model.layers.40.block_sparse_moe.experts.109.w2", "model.layers.40.block_sparse_moe.experts.110.w2", "model.layers.40.block_sparse_moe.experts.111.w2", "model.layers.40.block_sparse_moe.experts.112.w2", "model.layers.40.block_sparse_moe.experts.113.w2", "model.layers.40.block_sparse_moe.experts.114.w2", "model.layers.40.block_sparse_moe.experts.115.w2", "model.layers.40.block_sparse_moe.experts.116.w2", "model.layers.40.block_sparse_moe.experts.117.w2", "model.layers.40.block_sparse_moe.experts.118.w2", "model.layers.40.block_sparse_moe.experts.119.w2", "model.layers.40.block_sparse_moe.experts.120.w2", "model.layers.40.block_sparse_moe.experts.121.w2", "model.layers.40.block_sparse_moe.experts.122.w2", "model.layers.40.block_sparse_moe.experts.123.w2", "model.layers.40.block_sparse_moe.experts.124.w2", "model.layers.40.block_sparse_moe.experts.125.w2", "model.layers.40.block_sparse_moe.experts.126.w2", "model.layers.40.block_sparse_moe.experts.127.w2", "model.layers.40.block_sparse_moe.experts.128.w2", "model.layers.40.block_sparse_moe.experts.129.w2", "model.layers.40.block_sparse_moe.experts.130.w2", "model.layers.40.block_sparse_moe.experts.131.w2", "model.layers.40.block_sparse_moe.experts.132.w2", "model.layers.40.block_sparse_moe.experts.133.w2", "model.layers.40.block_sparse_moe.experts.134.w2", "model.layers.40.block_sparse_moe.experts.135.w2", "model.layers.40.block_sparse_moe.experts.136.w2", "model.layers.40.block_sparse_moe.experts.137.w2", "model.layers.40.block_sparse_moe.experts.138.w2", "model.layers.40.block_sparse_moe.experts.139.w2", "model.layers.40.block_sparse_moe.experts.140.w2", "model.layers.40.block_sparse_moe.experts.141.w2", "model.layers.40.block_sparse_moe.experts.142.w2", "model.layers.40.block_sparse_moe.experts.143.w2", "model.layers.40.block_sparse_moe.experts.144.w2", "model.layers.40.block_sparse_moe.experts.145.w2", "model.layers.40.block_sparse_moe.experts.146.w2", "model.layers.40.block_sparse_moe.experts.147.w2", "model.layers.40.block_sparse_moe.experts.148.w2", "model.layers.40.block_sparse_moe.experts.149.w2", "model.layers.40.block_sparse_moe.experts.150.w2", "model.layers.40.block_sparse_moe.experts.151.w2", "model.layers.40.block_sparse_moe.experts.152.w2", "model.layers.40.block_sparse_moe.experts.153.w2", "model.layers.40.block_sparse_moe.experts.154.w2", "model.layers.40.block_sparse_moe.experts.155.w2", "model.layers.40.block_sparse_moe.experts.156.w2", "model.layers.40.block_sparse_moe.experts.157.w2", "model.layers.40.block_sparse_moe.experts.158.w2", "model.layers.40.block_sparse_moe.experts.159.w2", "model.layers.40.block_sparse_moe.experts.160.w2", "model.layers.40.block_sparse_moe.experts.161.w2", "model.layers.40.block_sparse_moe.experts.162.w2", "model.layers.40.block_sparse_moe.experts.163.w2", "model.layers.40.block_sparse_moe.experts.164.w2", "model.layers.40.block_sparse_moe.experts.165.w2", "model.layers.40.block_sparse_moe.experts.166.w2", "model.layers.40.block_sparse_moe.experts.167.w2", "model.layers.40.block_sparse_moe.experts.168.w2", "model.layers.40.block_sparse_moe.experts.169.w2", "model.layers.40.block_sparse_moe.experts.170.w2", "model.layers.40.block_sparse_moe.experts.171.w2", "model.layers.40.block_sparse_moe.experts.172.w2", "model.layers.40.block_sparse_moe.experts.173.w2", "model.layers.40.block_sparse_moe.experts.174.w2", "model.layers.40.block_sparse_moe.experts.175.w2", "model.layers.40.block_sparse_moe.experts.176.w2", "model.layers.40.block_sparse_moe.experts.177.w2", "model.layers.40.block_sparse_moe.experts.178.w2", "model.layers.40.block_sparse_moe.experts.179.w2", "model.layers.40.block_sparse_moe.experts.180.w2", "model.layers.40.block_sparse_moe.experts.181.w2", "model.layers.40.block_sparse_moe.experts.182.w2", "model.layers.40.block_sparse_moe.experts.183.w2", "model.layers.40.block_sparse_moe.experts.184.w2", "model.layers.40.block_sparse_moe.experts.185.w2", "model.layers.40.block_sparse_moe.experts.186.w2", "model.layers.40.block_sparse_moe.experts.187.w2", "model.layers.40.block_sparse_moe.experts.188.w2", "model.layers.40.block_sparse_moe.experts.189.w2", "model.layers.40.block_sparse_moe.experts.190.w2", "model.layers.40.block_sparse_moe.experts.191.w2", "model.layers.40.block_sparse_moe.experts.192.w2", "model.layers.40.block_sparse_moe.experts.193.w2", "model.layers.40.block_sparse_moe.experts.194.w2", "model.layers.40.block_sparse_moe.experts.195.w2", "model.layers.40.block_sparse_moe.experts.196.w2", "model.layers.40.block_sparse_moe.experts.197.w2", "model.layers.40.block_sparse_moe.experts.198.w2", "model.layers.40.block_sparse_moe.experts.199.w2", "model.layers.40.block_sparse_moe.experts.200.w2", "model.layers.40.block_sparse_moe.experts.201.w2", "model.layers.40.block_sparse_moe.experts.202.w2", "model.layers.40.block_sparse_moe.experts.203.w2", "model.layers.40.block_sparse_moe.experts.204.w2", "model.layers.40.block_sparse_moe.experts.205.w2", "model.layers.40.block_sparse_moe.experts.206.w2", "model.layers.40.block_sparse_moe.experts.207.w2", "model.layers.40.block_sparse_moe.experts.208.w2", "model.layers.40.block_sparse_moe.experts.209.w2", "model.layers.40.block_sparse_moe.experts.210.w2", "model.layers.40.block_sparse_moe.experts.211.w2", "model.layers.40.block_sparse_moe.experts.212.w2", "model.layers.40.block_sparse_moe.experts.213.w2", "model.layers.40.block_sparse_moe.experts.214.w2", "model.layers.40.block_sparse_moe.experts.215.w2", "model.layers.40.block_sparse_moe.experts.216.w2", "model.layers.40.block_sparse_moe.experts.217.w2", "model.layers.40.block_sparse_moe.experts.218.w2", "model.layers.40.block_sparse_moe.experts.219.w2", "model.layers.40.block_sparse_moe.experts.220.w2", "model.layers.40.block_sparse_moe.experts.221.w2", "model.layers.40.block_sparse_moe.experts.222.w2", "model.layers.40.block_sparse_moe.experts.223.w2", "model.layers.40.block_sparse_moe.experts.224.w2", "model.layers.40.block_sparse_moe.experts.225.w2", "model.layers.40.block_sparse_moe.experts.226.w2", "model.layers.40.block_sparse_moe.experts.227.w2", "model.layers.40.block_sparse_moe.experts.228.w2", "model.layers.40.block_sparse_moe.experts.229.w2", "model.layers.40.block_sparse_moe.experts.230.w2", "model.layers.40.block_sparse_moe.experts.231.w2", "model.layers.40.block_sparse_moe.experts.232.w2", "model.layers.40.block_sparse_moe.experts.233.w2", "model.layers.40.block_sparse_moe.experts.234.w2", "model.layers.40.block_sparse_moe.experts.235.w2", "model.layers.40.block_sparse_moe.experts.236.w2", "model.layers.40.block_sparse_moe.experts.237.w2", "model.layers.40.block_sparse_moe.experts.238.w2", "model.layers.40.block_sparse_moe.experts.239.w2", "model.layers.40.block_sparse_moe.experts.240.w2", "model.layers.40.block_sparse_moe.experts.241.w2", "model.layers.40.block_sparse_moe.experts.242.w2", "model.layers.40.block_sparse_moe.experts.243.w2", "model.layers.40.block_sparse_moe.experts.244.w2", "model.layers.40.block_sparse_moe.experts.245.w2", "model.layers.40.block_sparse_moe.experts.246.w2", "model.layers.40.block_sparse_moe.experts.247.w2", "model.layers.40.block_sparse_moe.experts.248.w2", "model.layers.40.block_sparse_moe.experts.249.w2", "model.layers.40.block_sparse_moe.experts.250.w2", "model.layers.40.block_sparse_moe.experts.251.w2", "model.layers.40.block_sparse_moe.experts.252.w2", "model.layers.40.block_sparse_moe.experts.253.w2", "model.layers.40.block_sparse_moe.experts.254.w2", "model.layers.40.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005722612142562422, "dbits": 1207959552 } ] }, { "idx": 205, "layers": [ "model.layers.41.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0025116831064223577, "dbits": 18874368 } ] }, { "idx": 206, "layers": [ "model.layers.41.self_attn.k_proj", "model.layers.41.self_attn.v_proj" ], "candidates": [ { "dkld": -0.018566915392875583, "dbits": 6291456 } ] }, { "idx": 207, "layers": [ "model.layers.41.self_attn.o_proj" ], "candidates": [ { "dkld": -0.002394402027130038, "dbits": 18874368 } ] }, { "idx": 208, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w1", "model.layers.41.block_sparse_moe.experts.1.w1", "model.layers.41.block_sparse_moe.experts.2.w1", "model.layers.41.block_sparse_moe.experts.3.w1", "model.layers.41.block_sparse_moe.experts.4.w1", "model.layers.41.block_sparse_moe.experts.5.w1", "model.layers.41.block_sparse_moe.experts.6.w1", "model.layers.41.block_sparse_moe.experts.7.w1", "model.layers.41.block_sparse_moe.experts.8.w1", "model.layers.41.block_sparse_moe.experts.9.w1", "model.layers.41.block_sparse_moe.experts.10.w1", "model.layers.41.block_sparse_moe.experts.11.w1", "model.layers.41.block_sparse_moe.experts.12.w1", "model.layers.41.block_sparse_moe.experts.13.w1", "model.layers.41.block_sparse_moe.experts.14.w1", "model.layers.41.block_sparse_moe.experts.15.w1", "model.layers.41.block_sparse_moe.experts.16.w1", "model.layers.41.block_sparse_moe.experts.17.w1", "model.layers.41.block_sparse_moe.experts.18.w1", "model.layers.41.block_sparse_moe.experts.19.w1", "model.layers.41.block_sparse_moe.experts.20.w1", "model.layers.41.block_sparse_moe.experts.21.w1", "model.layers.41.block_sparse_moe.experts.22.w1", "model.layers.41.block_sparse_moe.experts.23.w1", "model.layers.41.block_sparse_moe.experts.24.w1", "model.layers.41.block_sparse_moe.experts.25.w1", "model.layers.41.block_sparse_moe.experts.26.w1", "model.layers.41.block_sparse_moe.experts.27.w1", "model.layers.41.block_sparse_moe.experts.28.w1", "model.layers.41.block_sparse_moe.experts.29.w1", "model.layers.41.block_sparse_moe.experts.30.w1", "model.layers.41.block_sparse_moe.experts.31.w1", "model.layers.41.block_sparse_moe.experts.32.w1", "model.layers.41.block_sparse_moe.experts.33.w1", "model.layers.41.block_sparse_moe.experts.34.w1", "model.layers.41.block_sparse_moe.experts.35.w1", "model.layers.41.block_sparse_moe.experts.36.w1", "model.layers.41.block_sparse_moe.experts.37.w1", "model.layers.41.block_sparse_moe.experts.38.w1", "model.layers.41.block_sparse_moe.experts.39.w1", "model.layers.41.block_sparse_moe.experts.40.w1", "model.layers.41.block_sparse_moe.experts.41.w1", "model.layers.41.block_sparse_moe.experts.42.w1", "model.layers.41.block_sparse_moe.experts.43.w1", "model.layers.41.block_sparse_moe.experts.44.w1", "model.layers.41.block_sparse_moe.experts.45.w1", "model.layers.41.block_sparse_moe.experts.46.w1", "model.layers.41.block_sparse_moe.experts.47.w1", "model.layers.41.block_sparse_moe.experts.48.w1", "model.layers.41.block_sparse_moe.experts.49.w1", "model.layers.41.block_sparse_moe.experts.50.w1", "model.layers.41.block_sparse_moe.experts.51.w1", "model.layers.41.block_sparse_moe.experts.52.w1", "model.layers.41.block_sparse_moe.experts.53.w1", "model.layers.41.block_sparse_moe.experts.54.w1", "model.layers.41.block_sparse_moe.experts.55.w1", "model.layers.41.block_sparse_moe.experts.56.w1", "model.layers.41.block_sparse_moe.experts.57.w1", "model.layers.41.block_sparse_moe.experts.58.w1", "model.layers.41.block_sparse_moe.experts.59.w1", "model.layers.41.block_sparse_moe.experts.60.w1", "model.layers.41.block_sparse_moe.experts.61.w1", "model.layers.41.block_sparse_moe.experts.62.w1", "model.layers.41.block_sparse_moe.experts.63.w1", "model.layers.41.block_sparse_moe.experts.64.w1", "model.layers.41.block_sparse_moe.experts.65.w1", "model.layers.41.block_sparse_moe.experts.66.w1", "model.layers.41.block_sparse_moe.experts.67.w1", "model.layers.41.block_sparse_moe.experts.68.w1", "model.layers.41.block_sparse_moe.experts.69.w1", "model.layers.41.block_sparse_moe.experts.70.w1", "model.layers.41.block_sparse_moe.experts.71.w1", "model.layers.41.block_sparse_moe.experts.72.w1", "model.layers.41.block_sparse_moe.experts.73.w1", "model.layers.41.block_sparse_moe.experts.74.w1", "model.layers.41.block_sparse_moe.experts.75.w1", "model.layers.41.block_sparse_moe.experts.76.w1", "model.layers.41.block_sparse_moe.experts.77.w1", "model.layers.41.block_sparse_moe.experts.78.w1", "model.layers.41.block_sparse_moe.experts.79.w1", "model.layers.41.block_sparse_moe.experts.80.w1", "model.layers.41.block_sparse_moe.experts.81.w1", "model.layers.41.block_sparse_moe.experts.82.w1", "model.layers.41.block_sparse_moe.experts.83.w1", "model.layers.41.block_sparse_moe.experts.84.w1", "model.layers.41.block_sparse_moe.experts.85.w1", "model.layers.41.block_sparse_moe.experts.86.w1", "model.layers.41.block_sparse_moe.experts.87.w1", "model.layers.41.block_sparse_moe.experts.88.w1", "model.layers.41.block_sparse_moe.experts.89.w1", "model.layers.41.block_sparse_moe.experts.90.w1", "model.layers.41.block_sparse_moe.experts.91.w1", "model.layers.41.block_sparse_moe.experts.92.w1", "model.layers.41.block_sparse_moe.experts.93.w1", "model.layers.41.block_sparse_moe.experts.94.w1", "model.layers.41.block_sparse_moe.experts.95.w1", "model.layers.41.block_sparse_moe.experts.96.w1", "model.layers.41.block_sparse_moe.experts.97.w1", "model.layers.41.block_sparse_moe.experts.98.w1", "model.layers.41.block_sparse_moe.experts.99.w1", "model.layers.41.block_sparse_moe.experts.100.w1", "model.layers.41.block_sparse_moe.experts.101.w1", "model.layers.41.block_sparse_moe.experts.102.w1", "model.layers.41.block_sparse_moe.experts.103.w1", "model.layers.41.block_sparse_moe.experts.104.w1", "model.layers.41.block_sparse_moe.experts.105.w1", "model.layers.41.block_sparse_moe.experts.106.w1", "model.layers.41.block_sparse_moe.experts.107.w1", "model.layers.41.block_sparse_moe.experts.108.w1", "model.layers.41.block_sparse_moe.experts.109.w1", "model.layers.41.block_sparse_moe.experts.110.w1", "model.layers.41.block_sparse_moe.experts.111.w1", "model.layers.41.block_sparse_moe.experts.112.w1", "model.layers.41.block_sparse_moe.experts.113.w1", "model.layers.41.block_sparse_moe.experts.114.w1", "model.layers.41.block_sparse_moe.experts.115.w1", "model.layers.41.block_sparse_moe.experts.116.w1", "model.layers.41.block_sparse_moe.experts.117.w1", "model.layers.41.block_sparse_moe.experts.118.w1", "model.layers.41.block_sparse_moe.experts.119.w1", "model.layers.41.block_sparse_moe.experts.120.w1", "model.layers.41.block_sparse_moe.experts.121.w1", "model.layers.41.block_sparse_moe.experts.122.w1", "model.layers.41.block_sparse_moe.experts.123.w1", "model.layers.41.block_sparse_moe.experts.124.w1", "model.layers.41.block_sparse_moe.experts.125.w1", "model.layers.41.block_sparse_moe.experts.126.w1", "model.layers.41.block_sparse_moe.experts.127.w1", "model.layers.41.block_sparse_moe.experts.128.w1", "model.layers.41.block_sparse_moe.experts.129.w1", "model.layers.41.block_sparse_moe.experts.130.w1", "model.layers.41.block_sparse_moe.experts.131.w1", "model.layers.41.block_sparse_moe.experts.132.w1", "model.layers.41.block_sparse_moe.experts.133.w1", "model.layers.41.block_sparse_moe.experts.134.w1", "model.layers.41.block_sparse_moe.experts.135.w1", "model.layers.41.block_sparse_moe.experts.136.w1", "model.layers.41.block_sparse_moe.experts.137.w1", "model.layers.41.block_sparse_moe.experts.138.w1", "model.layers.41.block_sparse_moe.experts.139.w1", "model.layers.41.block_sparse_moe.experts.140.w1", "model.layers.41.block_sparse_moe.experts.141.w1", "model.layers.41.block_sparse_moe.experts.142.w1", "model.layers.41.block_sparse_moe.experts.143.w1", "model.layers.41.block_sparse_moe.experts.144.w1", "model.layers.41.block_sparse_moe.experts.145.w1", "model.layers.41.block_sparse_moe.experts.146.w1", "model.layers.41.block_sparse_moe.experts.147.w1", "model.layers.41.block_sparse_moe.experts.148.w1", "model.layers.41.block_sparse_moe.experts.149.w1", "model.layers.41.block_sparse_moe.experts.150.w1", "model.layers.41.block_sparse_moe.experts.151.w1", "model.layers.41.block_sparse_moe.experts.152.w1", "model.layers.41.block_sparse_moe.experts.153.w1", "model.layers.41.block_sparse_moe.experts.154.w1", "model.layers.41.block_sparse_moe.experts.155.w1", "model.layers.41.block_sparse_moe.experts.156.w1", "model.layers.41.block_sparse_moe.experts.157.w1", "model.layers.41.block_sparse_moe.experts.158.w1", "model.layers.41.block_sparse_moe.experts.159.w1", "model.layers.41.block_sparse_moe.experts.160.w1", "model.layers.41.block_sparse_moe.experts.161.w1", "model.layers.41.block_sparse_moe.experts.162.w1", "model.layers.41.block_sparse_moe.experts.163.w1", "model.layers.41.block_sparse_moe.experts.164.w1", "model.layers.41.block_sparse_moe.experts.165.w1", "model.layers.41.block_sparse_moe.experts.166.w1", "model.layers.41.block_sparse_moe.experts.167.w1", "model.layers.41.block_sparse_moe.experts.168.w1", "model.layers.41.block_sparse_moe.experts.169.w1", "model.layers.41.block_sparse_moe.experts.170.w1", "model.layers.41.block_sparse_moe.experts.171.w1", "model.layers.41.block_sparse_moe.experts.172.w1", "model.layers.41.block_sparse_moe.experts.173.w1", "model.layers.41.block_sparse_moe.experts.174.w1", "model.layers.41.block_sparse_moe.experts.175.w1", "model.layers.41.block_sparse_moe.experts.176.w1", "model.layers.41.block_sparse_moe.experts.177.w1", "model.layers.41.block_sparse_moe.experts.178.w1", "model.layers.41.block_sparse_moe.experts.179.w1", "model.layers.41.block_sparse_moe.experts.180.w1", "model.layers.41.block_sparse_moe.experts.181.w1", "model.layers.41.block_sparse_moe.experts.182.w1", "model.layers.41.block_sparse_moe.experts.183.w1", "model.layers.41.block_sparse_moe.experts.184.w1", "model.layers.41.block_sparse_moe.experts.185.w1", "model.layers.41.block_sparse_moe.experts.186.w1", "model.layers.41.block_sparse_moe.experts.187.w1", "model.layers.41.block_sparse_moe.experts.188.w1", "model.layers.41.block_sparse_moe.experts.189.w1", "model.layers.41.block_sparse_moe.experts.190.w1", "model.layers.41.block_sparse_moe.experts.191.w1", "model.layers.41.block_sparse_moe.experts.192.w1", "model.layers.41.block_sparse_moe.experts.193.w1", "model.layers.41.block_sparse_moe.experts.194.w1", "model.layers.41.block_sparse_moe.experts.195.w1", "model.layers.41.block_sparse_moe.experts.196.w1", "model.layers.41.block_sparse_moe.experts.197.w1", "model.layers.41.block_sparse_moe.experts.198.w1", "model.layers.41.block_sparse_moe.experts.199.w1", "model.layers.41.block_sparse_moe.experts.200.w1", "model.layers.41.block_sparse_moe.experts.201.w1", "model.layers.41.block_sparse_moe.experts.202.w1", "model.layers.41.block_sparse_moe.experts.203.w1", "model.layers.41.block_sparse_moe.experts.204.w1", "model.layers.41.block_sparse_moe.experts.205.w1", "model.layers.41.block_sparse_moe.experts.206.w1", "model.layers.41.block_sparse_moe.experts.207.w1", "model.layers.41.block_sparse_moe.experts.208.w1", "model.layers.41.block_sparse_moe.experts.209.w1", "model.layers.41.block_sparse_moe.experts.210.w1", "model.layers.41.block_sparse_moe.experts.211.w1", "model.layers.41.block_sparse_moe.experts.212.w1", "model.layers.41.block_sparse_moe.experts.213.w1", "model.layers.41.block_sparse_moe.experts.214.w1", "model.layers.41.block_sparse_moe.experts.215.w1", "model.layers.41.block_sparse_moe.experts.216.w1", "model.layers.41.block_sparse_moe.experts.217.w1", "model.layers.41.block_sparse_moe.experts.218.w1", "model.layers.41.block_sparse_moe.experts.219.w1", "model.layers.41.block_sparse_moe.experts.220.w1", "model.layers.41.block_sparse_moe.experts.221.w1", "model.layers.41.block_sparse_moe.experts.222.w1", "model.layers.41.block_sparse_moe.experts.223.w1", "model.layers.41.block_sparse_moe.experts.224.w1", "model.layers.41.block_sparse_moe.experts.225.w1", "model.layers.41.block_sparse_moe.experts.226.w1", "model.layers.41.block_sparse_moe.experts.227.w1", "model.layers.41.block_sparse_moe.experts.228.w1", "model.layers.41.block_sparse_moe.experts.229.w1", "model.layers.41.block_sparse_moe.experts.230.w1", "model.layers.41.block_sparse_moe.experts.231.w1", "model.layers.41.block_sparse_moe.experts.232.w1", "model.layers.41.block_sparse_moe.experts.233.w1", "model.layers.41.block_sparse_moe.experts.234.w1", "model.layers.41.block_sparse_moe.experts.235.w1", "model.layers.41.block_sparse_moe.experts.236.w1", "model.layers.41.block_sparse_moe.experts.237.w1", "model.layers.41.block_sparse_moe.experts.238.w1", "model.layers.41.block_sparse_moe.experts.239.w1", "model.layers.41.block_sparse_moe.experts.240.w1", "model.layers.41.block_sparse_moe.experts.241.w1", "model.layers.41.block_sparse_moe.experts.242.w1", "model.layers.41.block_sparse_moe.experts.243.w1", "model.layers.41.block_sparse_moe.experts.244.w1", "model.layers.41.block_sparse_moe.experts.245.w1", "model.layers.41.block_sparse_moe.experts.246.w1", "model.layers.41.block_sparse_moe.experts.247.w1", "model.layers.41.block_sparse_moe.experts.248.w1", "model.layers.41.block_sparse_moe.experts.249.w1", "model.layers.41.block_sparse_moe.experts.250.w1", "model.layers.41.block_sparse_moe.experts.251.w1", "model.layers.41.block_sparse_moe.experts.252.w1", "model.layers.41.block_sparse_moe.experts.253.w1", "model.layers.41.block_sparse_moe.experts.254.w1", "model.layers.41.block_sparse_moe.experts.255.w1", "model.layers.41.block_sparse_moe.experts.0.w3", "model.layers.41.block_sparse_moe.experts.1.w3", "model.layers.41.block_sparse_moe.experts.2.w3", "model.layers.41.block_sparse_moe.experts.3.w3", "model.layers.41.block_sparse_moe.experts.4.w3", "model.layers.41.block_sparse_moe.experts.5.w3", "model.layers.41.block_sparse_moe.experts.6.w3", "model.layers.41.block_sparse_moe.experts.7.w3", "model.layers.41.block_sparse_moe.experts.8.w3", "model.layers.41.block_sparse_moe.experts.9.w3", "model.layers.41.block_sparse_moe.experts.10.w3", "model.layers.41.block_sparse_moe.experts.11.w3", "model.layers.41.block_sparse_moe.experts.12.w3", "model.layers.41.block_sparse_moe.experts.13.w3", "model.layers.41.block_sparse_moe.experts.14.w3", "model.layers.41.block_sparse_moe.experts.15.w3", "model.layers.41.block_sparse_moe.experts.16.w3", "model.layers.41.block_sparse_moe.experts.17.w3", "model.layers.41.block_sparse_moe.experts.18.w3", "model.layers.41.block_sparse_moe.experts.19.w3", "model.layers.41.block_sparse_moe.experts.20.w3", "model.layers.41.block_sparse_moe.experts.21.w3", "model.layers.41.block_sparse_moe.experts.22.w3", "model.layers.41.block_sparse_moe.experts.23.w3", "model.layers.41.block_sparse_moe.experts.24.w3", "model.layers.41.block_sparse_moe.experts.25.w3", "model.layers.41.block_sparse_moe.experts.26.w3", "model.layers.41.block_sparse_moe.experts.27.w3", "model.layers.41.block_sparse_moe.experts.28.w3", "model.layers.41.block_sparse_moe.experts.29.w3", "model.layers.41.block_sparse_moe.experts.30.w3", "model.layers.41.block_sparse_moe.experts.31.w3", "model.layers.41.block_sparse_moe.experts.32.w3", "model.layers.41.block_sparse_moe.experts.33.w3", "model.layers.41.block_sparse_moe.experts.34.w3", "model.layers.41.block_sparse_moe.experts.35.w3", "model.layers.41.block_sparse_moe.experts.36.w3", "model.layers.41.block_sparse_moe.experts.37.w3", "model.layers.41.block_sparse_moe.experts.38.w3", "model.layers.41.block_sparse_moe.experts.39.w3", "model.layers.41.block_sparse_moe.experts.40.w3", "model.layers.41.block_sparse_moe.experts.41.w3", "model.layers.41.block_sparse_moe.experts.42.w3", "model.layers.41.block_sparse_moe.experts.43.w3", "model.layers.41.block_sparse_moe.experts.44.w3", "model.layers.41.block_sparse_moe.experts.45.w3", "model.layers.41.block_sparse_moe.experts.46.w3", "model.layers.41.block_sparse_moe.experts.47.w3", "model.layers.41.block_sparse_moe.experts.48.w3", "model.layers.41.block_sparse_moe.experts.49.w3", "model.layers.41.block_sparse_moe.experts.50.w3", "model.layers.41.block_sparse_moe.experts.51.w3", "model.layers.41.block_sparse_moe.experts.52.w3", "model.layers.41.block_sparse_moe.experts.53.w3", "model.layers.41.block_sparse_moe.experts.54.w3", "model.layers.41.block_sparse_moe.experts.55.w3", "model.layers.41.block_sparse_moe.experts.56.w3", "model.layers.41.block_sparse_moe.experts.57.w3", "model.layers.41.block_sparse_moe.experts.58.w3", "model.layers.41.block_sparse_moe.experts.59.w3", "model.layers.41.block_sparse_moe.experts.60.w3", "model.layers.41.block_sparse_moe.experts.61.w3", "model.layers.41.block_sparse_moe.experts.62.w3", "model.layers.41.block_sparse_moe.experts.63.w3", "model.layers.41.block_sparse_moe.experts.64.w3", "model.layers.41.block_sparse_moe.experts.65.w3", "model.layers.41.block_sparse_moe.experts.66.w3", "model.layers.41.block_sparse_moe.experts.67.w3", "model.layers.41.block_sparse_moe.experts.68.w3", "model.layers.41.block_sparse_moe.experts.69.w3", "model.layers.41.block_sparse_moe.experts.70.w3", "model.layers.41.block_sparse_moe.experts.71.w3", "model.layers.41.block_sparse_moe.experts.72.w3", "model.layers.41.block_sparse_moe.experts.73.w3", "model.layers.41.block_sparse_moe.experts.74.w3", "model.layers.41.block_sparse_moe.experts.75.w3", "model.layers.41.block_sparse_moe.experts.76.w3", "model.layers.41.block_sparse_moe.experts.77.w3", "model.layers.41.block_sparse_moe.experts.78.w3", "model.layers.41.block_sparse_moe.experts.79.w3", "model.layers.41.block_sparse_moe.experts.80.w3", "model.layers.41.block_sparse_moe.experts.81.w3", "model.layers.41.block_sparse_moe.experts.82.w3", "model.layers.41.block_sparse_moe.experts.83.w3", "model.layers.41.block_sparse_moe.experts.84.w3", "model.layers.41.block_sparse_moe.experts.85.w3", "model.layers.41.block_sparse_moe.experts.86.w3", "model.layers.41.block_sparse_moe.experts.87.w3", "model.layers.41.block_sparse_moe.experts.88.w3", "model.layers.41.block_sparse_moe.experts.89.w3", "model.layers.41.block_sparse_moe.experts.90.w3", "model.layers.41.block_sparse_moe.experts.91.w3", "model.layers.41.block_sparse_moe.experts.92.w3", "model.layers.41.block_sparse_moe.experts.93.w3", "model.layers.41.block_sparse_moe.experts.94.w3", "model.layers.41.block_sparse_moe.experts.95.w3", "model.layers.41.block_sparse_moe.experts.96.w3", "model.layers.41.block_sparse_moe.experts.97.w3", "model.layers.41.block_sparse_moe.experts.98.w3", "model.layers.41.block_sparse_moe.experts.99.w3", "model.layers.41.block_sparse_moe.experts.100.w3", "model.layers.41.block_sparse_moe.experts.101.w3", "model.layers.41.block_sparse_moe.experts.102.w3", "model.layers.41.block_sparse_moe.experts.103.w3", "model.layers.41.block_sparse_moe.experts.104.w3", "model.layers.41.block_sparse_moe.experts.105.w3", "model.layers.41.block_sparse_moe.experts.106.w3", "model.layers.41.block_sparse_moe.experts.107.w3", "model.layers.41.block_sparse_moe.experts.108.w3", "model.layers.41.block_sparse_moe.experts.109.w3", "model.layers.41.block_sparse_moe.experts.110.w3", "model.layers.41.block_sparse_moe.experts.111.w3", "model.layers.41.block_sparse_moe.experts.112.w3", "model.layers.41.block_sparse_moe.experts.113.w3", "model.layers.41.block_sparse_moe.experts.114.w3", "model.layers.41.block_sparse_moe.experts.115.w3", "model.layers.41.block_sparse_moe.experts.116.w3", "model.layers.41.block_sparse_moe.experts.117.w3", "model.layers.41.block_sparse_moe.experts.118.w3", "model.layers.41.block_sparse_moe.experts.119.w3", "model.layers.41.block_sparse_moe.experts.120.w3", "model.layers.41.block_sparse_moe.experts.121.w3", "model.layers.41.block_sparse_moe.experts.122.w3", "model.layers.41.block_sparse_moe.experts.123.w3", "model.layers.41.block_sparse_moe.experts.124.w3", "model.layers.41.block_sparse_moe.experts.125.w3", "model.layers.41.block_sparse_moe.experts.126.w3", "model.layers.41.block_sparse_moe.experts.127.w3", "model.layers.41.block_sparse_moe.experts.128.w3", "model.layers.41.block_sparse_moe.experts.129.w3", "model.layers.41.block_sparse_moe.experts.130.w3", "model.layers.41.block_sparse_moe.experts.131.w3", "model.layers.41.block_sparse_moe.experts.132.w3", "model.layers.41.block_sparse_moe.experts.133.w3", "model.layers.41.block_sparse_moe.experts.134.w3", "model.layers.41.block_sparse_moe.experts.135.w3", "model.layers.41.block_sparse_moe.experts.136.w3", "model.layers.41.block_sparse_moe.experts.137.w3", "model.layers.41.block_sparse_moe.experts.138.w3", "model.layers.41.block_sparse_moe.experts.139.w3", "model.layers.41.block_sparse_moe.experts.140.w3", "model.layers.41.block_sparse_moe.experts.141.w3", "model.layers.41.block_sparse_moe.experts.142.w3", "model.layers.41.block_sparse_moe.experts.143.w3", "model.layers.41.block_sparse_moe.experts.144.w3", "model.layers.41.block_sparse_moe.experts.145.w3", "model.layers.41.block_sparse_moe.experts.146.w3", "model.layers.41.block_sparse_moe.experts.147.w3", "model.layers.41.block_sparse_moe.experts.148.w3", "model.layers.41.block_sparse_moe.experts.149.w3", "model.layers.41.block_sparse_moe.experts.150.w3", "model.layers.41.block_sparse_moe.experts.151.w3", "model.layers.41.block_sparse_moe.experts.152.w3", "model.layers.41.block_sparse_moe.experts.153.w3", "model.layers.41.block_sparse_moe.experts.154.w3", "model.layers.41.block_sparse_moe.experts.155.w3", "model.layers.41.block_sparse_moe.experts.156.w3", "model.layers.41.block_sparse_moe.experts.157.w3", "model.layers.41.block_sparse_moe.experts.158.w3", "model.layers.41.block_sparse_moe.experts.159.w3", "model.layers.41.block_sparse_moe.experts.160.w3", "model.layers.41.block_sparse_moe.experts.161.w3", "model.layers.41.block_sparse_moe.experts.162.w3", "model.layers.41.block_sparse_moe.experts.163.w3", "model.layers.41.block_sparse_moe.experts.164.w3", "model.layers.41.block_sparse_moe.experts.165.w3", "model.layers.41.block_sparse_moe.experts.166.w3", "model.layers.41.block_sparse_moe.experts.167.w3", "model.layers.41.block_sparse_moe.experts.168.w3", "model.layers.41.block_sparse_moe.experts.169.w3", "model.layers.41.block_sparse_moe.experts.170.w3", "model.layers.41.block_sparse_moe.experts.171.w3", "model.layers.41.block_sparse_moe.experts.172.w3", "model.layers.41.block_sparse_moe.experts.173.w3", "model.layers.41.block_sparse_moe.experts.174.w3", "model.layers.41.block_sparse_moe.experts.175.w3", "model.layers.41.block_sparse_moe.experts.176.w3", "model.layers.41.block_sparse_moe.experts.177.w3", "model.layers.41.block_sparse_moe.experts.178.w3", "model.layers.41.block_sparse_moe.experts.179.w3", "model.layers.41.block_sparse_moe.experts.180.w3", "model.layers.41.block_sparse_moe.experts.181.w3", "model.layers.41.block_sparse_moe.experts.182.w3", "model.layers.41.block_sparse_moe.experts.183.w3", "model.layers.41.block_sparse_moe.experts.184.w3", "model.layers.41.block_sparse_moe.experts.185.w3", "model.layers.41.block_sparse_moe.experts.186.w3", "model.layers.41.block_sparse_moe.experts.187.w3", "model.layers.41.block_sparse_moe.experts.188.w3", "model.layers.41.block_sparse_moe.experts.189.w3", "model.layers.41.block_sparse_moe.experts.190.w3", "model.layers.41.block_sparse_moe.experts.191.w3", "model.layers.41.block_sparse_moe.experts.192.w3", "model.layers.41.block_sparse_moe.experts.193.w3", "model.layers.41.block_sparse_moe.experts.194.w3", "model.layers.41.block_sparse_moe.experts.195.w3", "model.layers.41.block_sparse_moe.experts.196.w3", "model.layers.41.block_sparse_moe.experts.197.w3", "model.layers.41.block_sparse_moe.experts.198.w3", "model.layers.41.block_sparse_moe.experts.199.w3", "model.layers.41.block_sparse_moe.experts.200.w3", "model.layers.41.block_sparse_moe.experts.201.w3", "model.layers.41.block_sparse_moe.experts.202.w3", "model.layers.41.block_sparse_moe.experts.203.w3", "model.layers.41.block_sparse_moe.experts.204.w3", "model.layers.41.block_sparse_moe.experts.205.w3", "model.layers.41.block_sparse_moe.experts.206.w3", "model.layers.41.block_sparse_moe.experts.207.w3", "model.layers.41.block_sparse_moe.experts.208.w3", "model.layers.41.block_sparse_moe.experts.209.w3", "model.layers.41.block_sparse_moe.experts.210.w3", "model.layers.41.block_sparse_moe.experts.211.w3", "model.layers.41.block_sparse_moe.experts.212.w3", "model.layers.41.block_sparse_moe.experts.213.w3", "model.layers.41.block_sparse_moe.experts.214.w3", "model.layers.41.block_sparse_moe.experts.215.w3", "model.layers.41.block_sparse_moe.experts.216.w3", "model.layers.41.block_sparse_moe.experts.217.w3", "model.layers.41.block_sparse_moe.experts.218.w3", "model.layers.41.block_sparse_moe.experts.219.w3", "model.layers.41.block_sparse_moe.experts.220.w3", "model.layers.41.block_sparse_moe.experts.221.w3", "model.layers.41.block_sparse_moe.experts.222.w3", "model.layers.41.block_sparse_moe.experts.223.w3", "model.layers.41.block_sparse_moe.experts.224.w3", "model.layers.41.block_sparse_moe.experts.225.w3", "model.layers.41.block_sparse_moe.experts.226.w3", "model.layers.41.block_sparse_moe.experts.227.w3", "model.layers.41.block_sparse_moe.experts.228.w3", "model.layers.41.block_sparse_moe.experts.229.w3", "model.layers.41.block_sparse_moe.experts.230.w3", "model.layers.41.block_sparse_moe.experts.231.w3", "model.layers.41.block_sparse_moe.experts.232.w3", "model.layers.41.block_sparse_moe.experts.233.w3", "model.layers.41.block_sparse_moe.experts.234.w3", "model.layers.41.block_sparse_moe.experts.235.w3", "model.layers.41.block_sparse_moe.experts.236.w3", "model.layers.41.block_sparse_moe.experts.237.w3", "model.layers.41.block_sparse_moe.experts.238.w3", "model.layers.41.block_sparse_moe.experts.239.w3", "model.layers.41.block_sparse_moe.experts.240.w3", "model.layers.41.block_sparse_moe.experts.241.w3", "model.layers.41.block_sparse_moe.experts.242.w3", "model.layers.41.block_sparse_moe.experts.243.w3", "model.layers.41.block_sparse_moe.experts.244.w3", "model.layers.41.block_sparse_moe.experts.245.w3", "model.layers.41.block_sparse_moe.experts.246.w3", "model.layers.41.block_sparse_moe.experts.247.w3", "model.layers.41.block_sparse_moe.experts.248.w3", "model.layers.41.block_sparse_moe.experts.249.w3", "model.layers.41.block_sparse_moe.experts.250.w3", "model.layers.41.block_sparse_moe.experts.251.w3", "model.layers.41.block_sparse_moe.experts.252.w3", "model.layers.41.block_sparse_moe.experts.253.w3", "model.layers.41.block_sparse_moe.experts.254.w3", "model.layers.41.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001575046777725131, "dbits": 2415919104 } ] }, { "idx": 209, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w2", "model.layers.41.block_sparse_moe.experts.1.w2", "model.layers.41.block_sparse_moe.experts.2.w2", "model.layers.41.block_sparse_moe.experts.3.w2", "model.layers.41.block_sparse_moe.experts.4.w2", "model.layers.41.block_sparse_moe.experts.5.w2", "model.layers.41.block_sparse_moe.experts.6.w2", "model.layers.41.block_sparse_moe.experts.7.w2", "model.layers.41.block_sparse_moe.experts.8.w2", "model.layers.41.block_sparse_moe.experts.9.w2", "model.layers.41.block_sparse_moe.experts.10.w2", "model.layers.41.block_sparse_moe.experts.11.w2", "model.layers.41.block_sparse_moe.experts.12.w2", "model.layers.41.block_sparse_moe.experts.13.w2", "model.layers.41.block_sparse_moe.experts.14.w2", "model.layers.41.block_sparse_moe.experts.15.w2", "model.layers.41.block_sparse_moe.experts.16.w2", "model.layers.41.block_sparse_moe.experts.17.w2", "model.layers.41.block_sparse_moe.experts.18.w2", "model.layers.41.block_sparse_moe.experts.19.w2", "model.layers.41.block_sparse_moe.experts.20.w2", "model.layers.41.block_sparse_moe.experts.21.w2", "model.layers.41.block_sparse_moe.experts.22.w2", "model.layers.41.block_sparse_moe.experts.23.w2", "model.layers.41.block_sparse_moe.experts.24.w2", "model.layers.41.block_sparse_moe.experts.25.w2", "model.layers.41.block_sparse_moe.experts.26.w2", "model.layers.41.block_sparse_moe.experts.27.w2", "model.layers.41.block_sparse_moe.experts.28.w2", "model.layers.41.block_sparse_moe.experts.29.w2", "model.layers.41.block_sparse_moe.experts.30.w2", "model.layers.41.block_sparse_moe.experts.31.w2", "model.layers.41.block_sparse_moe.experts.32.w2", "model.layers.41.block_sparse_moe.experts.33.w2", "model.layers.41.block_sparse_moe.experts.34.w2", "model.layers.41.block_sparse_moe.experts.35.w2", "model.layers.41.block_sparse_moe.experts.36.w2", "model.layers.41.block_sparse_moe.experts.37.w2", "model.layers.41.block_sparse_moe.experts.38.w2", "model.layers.41.block_sparse_moe.experts.39.w2", "model.layers.41.block_sparse_moe.experts.40.w2", "model.layers.41.block_sparse_moe.experts.41.w2", "model.layers.41.block_sparse_moe.experts.42.w2", "model.layers.41.block_sparse_moe.experts.43.w2", "model.layers.41.block_sparse_moe.experts.44.w2", "model.layers.41.block_sparse_moe.experts.45.w2", "model.layers.41.block_sparse_moe.experts.46.w2", "model.layers.41.block_sparse_moe.experts.47.w2", "model.layers.41.block_sparse_moe.experts.48.w2", "model.layers.41.block_sparse_moe.experts.49.w2", "model.layers.41.block_sparse_moe.experts.50.w2", "model.layers.41.block_sparse_moe.experts.51.w2", "model.layers.41.block_sparse_moe.experts.52.w2", "model.layers.41.block_sparse_moe.experts.53.w2", "model.layers.41.block_sparse_moe.experts.54.w2", "model.layers.41.block_sparse_moe.experts.55.w2", "model.layers.41.block_sparse_moe.experts.56.w2", "model.layers.41.block_sparse_moe.experts.57.w2", "model.layers.41.block_sparse_moe.experts.58.w2", "model.layers.41.block_sparse_moe.experts.59.w2", "model.layers.41.block_sparse_moe.experts.60.w2", "model.layers.41.block_sparse_moe.experts.61.w2", "model.layers.41.block_sparse_moe.experts.62.w2", "model.layers.41.block_sparse_moe.experts.63.w2", "model.layers.41.block_sparse_moe.experts.64.w2", "model.layers.41.block_sparse_moe.experts.65.w2", "model.layers.41.block_sparse_moe.experts.66.w2", "model.layers.41.block_sparse_moe.experts.67.w2", "model.layers.41.block_sparse_moe.experts.68.w2", "model.layers.41.block_sparse_moe.experts.69.w2", "model.layers.41.block_sparse_moe.experts.70.w2", "model.layers.41.block_sparse_moe.experts.71.w2", "model.layers.41.block_sparse_moe.experts.72.w2", "model.layers.41.block_sparse_moe.experts.73.w2", "model.layers.41.block_sparse_moe.experts.74.w2", "model.layers.41.block_sparse_moe.experts.75.w2", "model.layers.41.block_sparse_moe.experts.76.w2", "model.layers.41.block_sparse_moe.experts.77.w2", "model.layers.41.block_sparse_moe.experts.78.w2", "model.layers.41.block_sparse_moe.experts.79.w2", "model.layers.41.block_sparse_moe.experts.80.w2", "model.layers.41.block_sparse_moe.experts.81.w2", "model.layers.41.block_sparse_moe.experts.82.w2", "model.layers.41.block_sparse_moe.experts.83.w2", "model.layers.41.block_sparse_moe.experts.84.w2", "model.layers.41.block_sparse_moe.experts.85.w2", "model.layers.41.block_sparse_moe.experts.86.w2", "model.layers.41.block_sparse_moe.experts.87.w2", "model.layers.41.block_sparse_moe.experts.88.w2", "model.layers.41.block_sparse_moe.experts.89.w2", "model.layers.41.block_sparse_moe.experts.90.w2", "model.layers.41.block_sparse_moe.experts.91.w2", "model.layers.41.block_sparse_moe.experts.92.w2", "model.layers.41.block_sparse_moe.experts.93.w2", "model.layers.41.block_sparse_moe.experts.94.w2", "model.layers.41.block_sparse_moe.experts.95.w2", "model.layers.41.block_sparse_moe.experts.96.w2", "model.layers.41.block_sparse_moe.experts.97.w2", "model.layers.41.block_sparse_moe.experts.98.w2", "model.layers.41.block_sparse_moe.experts.99.w2", "model.layers.41.block_sparse_moe.experts.100.w2", "model.layers.41.block_sparse_moe.experts.101.w2", "model.layers.41.block_sparse_moe.experts.102.w2", "model.layers.41.block_sparse_moe.experts.103.w2", "model.layers.41.block_sparse_moe.experts.104.w2", "model.layers.41.block_sparse_moe.experts.105.w2", "model.layers.41.block_sparse_moe.experts.106.w2", "model.layers.41.block_sparse_moe.experts.107.w2", "model.layers.41.block_sparse_moe.experts.108.w2", "model.layers.41.block_sparse_moe.experts.109.w2", "model.layers.41.block_sparse_moe.experts.110.w2", "model.layers.41.block_sparse_moe.experts.111.w2", "model.layers.41.block_sparse_moe.experts.112.w2", "model.layers.41.block_sparse_moe.experts.113.w2", "model.layers.41.block_sparse_moe.experts.114.w2", "model.layers.41.block_sparse_moe.experts.115.w2", "model.layers.41.block_sparse_moe.experts.116.w2", "model.layers.41.block_sparse_moe.experts.117.w2", "model.layers.41.block_sparse_moe.experts.118.w2", "model.layers.41.block_sparse_moe.experts.119.w2", "model.layers.41.block_sparse_moe.experts.120.w2", "model.layers.41.block_sparse_moe.experts.121.w2", "model.layers.41.block_sparse_moe.experts.122.w2", "model.layers.41.block_sparse_moe.experts.123.w2", "model.layers.41.block_sparse_moe.experts.124.w2", "model.layers.41.block_sparse_moe.experts.125.w2", "model.layers.41.block_sparse_moe.experts.126.w2", "model.layers.41.block_sparse_moe.experts.127.w2", "model.layers.41.block_sparse_moe.experts.128.w2", "model.layers.41.block_sparse_moe.experts.129.w2", "model.layers.41.block_sparse_moe.experts.130.w2", "model.layers.41.block_sparse_moe.experts.131.w2", "model.layers.41.block_sparse_moe.experts.132.w2", "model.layers.41.block_sparse_moe.experts.133.w2", "model.layers.41.block_sparse_moe.experts.134.w2", "model.layers.41.block_sparse_moe.experts.135.w2", "model.layers.41.block_sparse_moe.experts.136.w2", "model.layers.41.block_sparse_moe.experts.137.w2", "model.layers.41.block_sparse_moe.experts.138.w2", "model.layers.41.block_sparse_moe.experts.139.w2", "model.layers.41.block_sparse_moe.experts.140.w2", "model.layers.41.block_sparse_moe.experts.141.w2", "model.layers.41.block_sparse_moe.experts.142.w2", "model.layers.41.block_sparse_moe.experts.143.w2", "model.layers.41.block_sparse_moe.experts.144.w2", "model.layers.41.block_sparse_moe.experts.145.w2", "model.layers.41.block_sparse_moe.experts.146.w2", "model.layers.41.block_sparse_moe.experts.147.w2", "model.layers.41.block_sparse_moe.experts.148.w2", "model.layers.41.block_sparse_moe.experts.149.w2", "model.layers.41.block_sparse_moe.experts.150.w2", "model.layers.41.block_sparse_moe.experts.151.w2", "model.layers.41.block_sparse_moe.experts.152.w2", "model.layers.41.block_sparse_moe.experts.153.w2", "model.layers.41.block_sparse_moe.experts.154.w2", "model.layers.41.block_sparse_moe.experts.155.w2", "model.layers.41.block_sparse_moe.experts.156.w2", "model.layers.41.block_sparse_moe.experts.157.w2", "model.layers.41.block_sparse_moe.experts.158.w2", "model.layers.41.block_sparse_moe.experts.159.w2", "model.layers.41.block_sparse_moe.experts.160.w2", "model.layers.41.block_sparse_moe.experts.161.w2", "model.layers.41.block_sparse_moe.experts.162.w2", "model.layers.41.block_sparse_moe.experts.163.w2", "model.layers.41.block_sparse_moe.experts.164.w2", "model.layers.41.block_sparse_moe.experts.165.w2", "model.layers.41.block_sparse_moe.experts.166.w2", "model.layers.41.block_sparse_moe.experts.167.w2", "model.layers.41.block_sparse_moe.experts.168.w2", "model.layers.41.block_sparse_moe.experts.169.w2", "model.layers.41.block_sparse_moe.experts.170.w2", "model.layers.41.block_sparse_moe.experts.171.w2", "model.layers.41.block_sparse_moe.experts.172.w2", "model.layers.41.block_sparse_moe.experts.173.w2", "model.layers.41.block_sparse_moe.experts.174.w2", "model.layers.41.block_sparse_moe.experts.175.w2", "model.layers.41.block_sparse_moe.experts.176.w2", "model.layers.41.block_sparse_moe.experts.177.w2", "model.layers.41.block_sparse_moe.experts.178.w2", "model.layers.41.block_sparse_moe.experts.179.w2", "model.layers.41.block_sparse_moe.experts.180.w2", "model.layers.41.block_sparse_moe.experts.181.w2", "model.layers.41.block_sparse_moe.experts.182.w2", "model.layers.41.block_sparse_moe.experts.183.w2", "model.layers.41.block_sparse_moe.experts.184.w2", "model.layers.41.block_sparse_moe.experts.185.w2", "model.layers.41.block_sparse_moe.experts.186.w2", "model.layers.41.block_sparse_moe.experts.187.w2", "model.layers.41.block_sparse_moe.experts.188.w2", "model.layers.41.block_sparse_moe.experts.189.w2", "model.layers.41.block_sparse_moe.experts.190.w2", "model.layers.41.block_sparse_moe.experts.191.w2", "model.layers.41.block_sparse_moe.experts.192.w2", "model.layers.41.block_sparse_moe.experts.193.w2", "model.layers.41.block_sparse_moe.experts.194.w2", "model.layers.41.block_sparse_moe.experts.195.w2", "model.layers.41.block_sparse_moe.experts.196.w2", "model.layers.41.block_sparse_moe.experts.197.w2", "model.layers.41.block_sparse_moe.experts.198.w2", "model.layers.41.block_sparse_moe.experts.199.w2", "model.layers.41.block_sparse_moe.experts.200.w2", "model.layers.41.block_sparse_moe.experts.201.w2", "model.layers.41.block_sparse_moe.experts.202.w2", "model.layers.41.block_sparse_moe.experts.203.w2", "model.layers.41.block_sparse_moe.experts.204.w2", "model.layers.41.block_sparse_moe.experts.205.w2", "model.layers.41.block_sparse_moe.experts.206.w2", "model.layers.41.block_sparse_moe.experts.207.w2", "model.layers.41.block_sparse_moe.experts.208.w2", "model.layers.41.block_sparse_moe.experts.209.w2", "model.layers.41.block_sparse_moe.experts.210.w2", "model.layers.41.block_sparse_moe.experts.211.w2", "model.layers.41.block_sparse_moe.experts.212.w2", "model.layers.41.block_sparse_moe.experts.213.w2", "model.layers.41.block_sparse_moe.experts.214.w2", "model.layers.41.block_sparse_moe.experts.215.w2", "model.layers.41.block_sparse_moe.experts.216.w2", "model.layers.41.block_sparse_moe.experts.217.w2", "model.layers.41.block_sparse_moe.experts.218.w2", "model.layers.41.block_sparse_moe.experts.219.w2", "model.layers.41.block_sparse_moe.experts.220.w2", "model.layers.41.block_sparse_moe.experts.221.w2", "model.layers.41.block_sparse_moe.experts.222.w2", "model.layers.41.block_sparse_moe.experts.223.w2", "model.layers.41.block_sparse_moe.experts.224.w2", "model.layers.41.block_sparse_moe.experts.225.w2", "model.layers.41.block_sparse_moe.experts.226.w2", "model.layers.41.block_sparse_moe.experts.227.w2", "model.layers.41.block_sparse_moe.experts.228.w2", "model.layers.41.block_sparse_moe.experts.229.w2", "model.layers.41.block_sparse_moe.experts.230.w2", "model.layers.41.block_sparse_moe.experts.231.w2", "model.layers.41.block_sparse_moe.experts.232.w2", "model.layers.41.block_sparse_moe.experts.233.w2", "model.layers.41.block_sparse_moe.experts.234.w2", "model.layers.41.block_sparse_moe.experts.235.w2", "model.layers.41.block_sparse_moe.experts.236.w2", "model.layers.41.block_sparse_moe.experts.237.w2", "model.layers.41.block_sparse_moe.experts.238.w2", "model.layers.41.block_sparse_moe.experts.239.w2", "model.layers.41.block_sparse_moe.experts.240.w2", "model.layers.41.block_sparse_moe.experts.241.w2", "model.layers.41.block_sparse_moe.experts.242.w2", "model.layers.41.block_sparse_moe.experts.243.w2", "model.layers.41.block_sparse_moe.experts.244.w2", "model.layers.41.block_sparse_moe.experts.245.w2", "model.layers.41.block_sparse_moe.experts.246.w2", "model.layers.41.block_sparse_moe.experts.247.w2", "model.layers.41.block_sparse_moe.experts.248.w2", "model.layers.41.block_sparse_moe.experts.249.w2", "model.layers.41.block_sparse_moe.experts.250.w2", "model.layers.41.block_sparse_moe.experts.251.w2", "model.layers.41.block_sparse_moe.experts.252.w2", "model.layers.41.block_sparse_moe.experts.253.w2", "model.layers.41.block_sparse_moe.experts.254.w2", "model.layers.41.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0011977314949035422, "dbits": 1207959552 } ] }, { "idx": 210, "layers": [ "model.layers.42.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0017031610012053777, "dbits": 18874368 } ] }, { "idx": 211, "layers": [ "model.layers.42.self_attn.k_proj", "model.layers.42.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004702292382717133, "dbits": 6291456 } ] }, { "idx": 212, "layers": [ "model.layers.42.self_attn.o_proj" ], "candidates": [ { "dkld": -0.009453138709068276, "dbits": 18874368 } ] }, { "idx": 213, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w1", "model.layers.42.block_sparse_moe.experts.1.w1", "model.layers.42.block_sparse_moe.experts.2.w1", "model.layers.42.block_sparse_moe.experts.3.w1", "model.layers.42.block_sparse_moe.experts.4.w1", "model.layers.42.block_sparse_moe.experts.5.w1", "model.layers.42.block_sparse_moe.experts.6.w1", "model.layers.42.block_sparse_moe.experts.7.w1", "model.layers.42.block_sparse_moe.experts.8.w1", "model.layers.42.block_sparse_moe.experts.9.w1", "model.layers.42.block_sparse_moe.experts.10.w1", "model.layers.42.block_sparse_moe.experts.11.w1", "model.layers.42.block_sparse_moe.experts.12.w1", "model.layers.42.block_sparse_moe.experts.13.w1", "model.layers.42.block_sparse_moe.experts.14.w1", "model.layers.42.block_sparse_moe.experts.15.w1", "model.layers.42.block_sparse_moe.experts.16.w1", "model.layers.42.block_sparse_moe.experts.17.w1", "model.layers.42.block_sparse_moe.experts.18.w1", "model.layers.42.block_sparse_moe.experts.19.w1", "model.layers.42.block_sparse_moe.experts.20.w1", "model.layers.42.block_sparse_moe.experts.21.w1", "model.layers.42.block_sparse_moe.experts.22.w1", "model.layers.42.block_sparse_moe.experts.23.w1", "model.layers.42.block_sparse_moe.experts.24.w1", "model.layers.42.block_sparse_moe.experts.25.w1", "model.layers.42.block_sparse_moe.experts.26.w1", "model.layers.42.block_sparse_moe.experts.27.w1", "model.layers.42.block_sparse_moe.experts.28.w1", "model.layers.42.block_sparse_moe.experts.29.w1", "model.layers.42.block_sparse_moe.experts.30.w1", "model.layers.42.block_sparse_moe.experts.31.w1", "model.layers.42.block_sparse_moe.experts.32.w1", "model.layers.42.block_sparse_moe.experts.33.w1", "model.layers.42.block_sparse_moe.experts.34.w1", "model.layers.42.block_sparse_moe.experts.35.w1", "model.layers.42.block_sparse_moe.experts.36.w1", "model.layers.42.block_sparse_moe.experts.37.w1", "model.layers.42.block_sparse_moe.experts.38.w1", "model.layers.42.block_sparse_moe.experts.39.w1", "model.layers.42.block_sparse_moe.experts.40.w1", "model.layers.42.block_sparse_moe.experts.41.w1", "model.layers.42.block_sparse_moe.experts.42.w1", "model.layers.42.block_sparse_moe.experts.43.w1", "model.layers.42.block_sparse_moe.experts.44.w1", "model.layers.42.block_sparse_moe.experts.45.w1", "model.layers.42.block_sparse_moe.experts.46.w1", "model.layers.42.block_sparse_moe.experts.47.w1", "model.layers.42.block_sparse_moe.experts.48.w1", "model.layers.42.block_sparse_moe.experts.49.w1", "model.layers.42.block_sparse_moe.experts.50.w1", "model.layers.42.block_sparse_moe.experts.51.w1", "model.layers.42.block_sparse_moe.experts.52.w1", "model.layers.42.block_sparse_moe.experts.53.w1", "model.layers.42.block_sparse_moe.experts.54.w1", "model.layers.42.block_sparse_moe.experts.55.w1", "model.layers.42.block_sparse_moe.experts.56.w1", "model.layers.42.block_sparse_moe.experts.57.w1", "model.layers.42.block_sparse_moe.experts.58.w1", "model.layers.42.block_sparse_moe.experts.59.w1", "model.layers.42.block_sparse_moe.experts.60.w1", "model.layers.42.block_sparse_moe.experts.61.w1", "model.layers.42.block_sparse_moe.experts.62.w1", "model.layers.42.block_sparse_moe.experts.63.w1", "model.layers.42.block_sparse_moe.experts.64.w1", "model.layers.42.block_sparse_moe.experts.65.w1", "model.layers.42.block_sparse_moe.experts.66.w1", "model.layers.42.block_sparse_moe.experts.67.w1", "model.layers.42.block_sparse_moe.experts.68.w1", "model.layers.42.block_sparse_moe.experts.69.w1", "model.layers.42.block_sparse_moe.experts.70.w1", "model.layers.42.block_sparse_moe.experts.71.w1", "model.layers.42.block_sparse_moe.experts.72.w1", "model.layers.42.block_sparse_moe.experts.73.w1", "model.layers.42.block_sparse_moe.experts.74.w1", "model.layers.42.block_sparse_moe.experts.75.w1", "model.layers.42.block_sparse_moe.experts.76.w1", "model.layers.42.block_sparse_moe.experts.77.w1", "model.layers.42.block_sparse_moe.experts.78.w1", "model.layers.42.block_sparse_moe.experts.79.w1", "model.layers.42.block_sparse_moe.experts.80.w1", "model.layers.42.block_sparse_moe.experts.81.w1", "model.layers.42.block_sparse_moe.experts.82.w1", "model.layers.42.block_sparse_moe.experts.83.w1", "model.layers.42.block_sparse_moe.experts.84.w1", "model.layers.42.block_sparse_moe.experts.85.w1", "model.layers.42.block_sparse_moe.experts.86.w1", "model.layers.42.block_sparse_moe.experts.87.w1", "model.layers.42.block_sparse_moe.experts.88.w1", "model.layers.42.block_sparse_moe.experts.89.w1", "model.layers.42.block_sparse_moe.experts.90.w1", "model.layers.42.block_sparse_moe.experts.91.w1", "model.layers.42.block_sparse_moe.experts.92.w1", "model.layers.42.block_sparse_moe.experts.93.w1", "model.layers.42.block_sparse_moe.experts.94.w1", "model.layers.42.block_sparse_moe.experts.95.w1", "model.layers.42.block_sparse_moe.experts.96.w1", "model.layers.42.block_sparse_moe.experts.97.w1", "model.layers.42.block_sparse_moe.experts.98.w1", "model.layers.42.block_sparse_moe.experts.99.w1", "model.layers.42.block_sparse_moe.experts.100.w1", "model.layers.42.block_sparse_moe.experts.101.w1", "model.layers.42.block_sparse_moe.experts.102.w1", "model.layers.42.block_sparse_moe.experts.103.w1", "model.layers.42.block_sparse_moe.experts.104.w1", "model.layers.42.block_sparse_moe.experts.105.w1", "model.layers.42.block_sparse_moe.experts.106.w1", "model.layers.42.block_sparse_moe.experts.107.w1", "model.layers.42.block_sparse_moe.experts.108.w1", "model.layers.42.block_sparse_moe.experts.109.w1", "model.layers.42.block_sparse_moe.experts.110.w1", "model.layers.42.block_sparse_moe.experts.111.w1", "model.layers.42.block_sparse_moe.experts.112.w1", "model.layers.42.block_sparse_moe.experts.113.w1", "model.layers.42.block_sparse_moe.experts.114.w1", "model.layers.42.block_sparse_moe.experts.115.w1", "model.layers.42.block_sparse_moe.experts.116.w1", "model.layers.42.block_sparse_moe.experts.117.w1", "model.layers.42.block_sparse_moe.experts.118.w1", "model.layers.42.block_sparse_moe.experts.119.w1", "model.layers.42.block_sparse_moe.experts.120.w1", "model.layers.42.block_sparse_moe.experts.121.w1", "model.layers.42.block_sparse_moe.experts.122.w1", "model.layers.42.block_sparse_moe.experts.123.w1", "model.layers.42.block_sparse_moe.experts.124.w1", "model.layers.42.block_sparse_moe.experts.125.w1", "model.layers.42.block_sparse_moe.experts.126.w1", "model.layers.42.block_sparse_moe.experts.127.w1", "model.layers.42.block_sparse_moe.experts.128.w1", "model.layers.42.block_sparse_moe.experts.129.w1", "model.layers.42.block_sparse_moe.experts.130.w1", "model.layers.42.block_sparse_moe.experts.131.w1", "model.layers.42.block_sparse_moe.experts.132.w1", "model.layers.42.block_sparse_moe.experts.133.w1", "model.layers.42.block_sparse_moe.experts.134.w1", "model.layers.42.block_sparse_moe.experts.135.w1", "model.layers.42.block_sparse_moe.experts.136.w1", "model.layers.42.block_sparse_moe.experts.137.w1", "model.layers.42.block_sparse_moe.experts.138.w1", "model.layers.42.block_sparse_moe.experts.139.w1", "model.layers.42.block_sparse_moe.experts.140.w1", "model.layers.42.block_sparse_moe.experts.141.w1", "model.layers.42.block_sparse_moe.experts.142.w1", "model.layers.42.block_sparse_moe.experts.143.w1", "model.layers.42.block_sparse_moe.experts.144.w1", "model.layers.42.block_sparse_moe.experts.145.w1", "model.layers.42.block_sparse_moe.experts.146.w1", "model.layers.42.block_sparse_moe.experts.147.w1", "model.layers.42.block_sparse_moe.experts.148.w1", "model.layers.42.block_sparse_moe.experts.149.w1", "model.layers.42.block_sparse_moe.experts.150.w1", "model.layers.42.block_sparse_moe.experts.151.w1", "model.layers.42.block_sparse_moe.experts.152.w1", "model.layers.42.block_sparse_moe.experts.153.w1", "model.layers.42.block_sparse_moe.experts.154.w1", "model.layers.42.block_sparse_moe.experts.155.w1", "model.layers.42.block_sparse_moe.experts.156.w1", "model.layers.42.block_sparse_moe.experts.157.w1", "model.layers.42.block_sparse_moe.experts.158.w1", "model.layers.42.block_sparse_moe.experts.159.w1", "model.layers.42.block_sparse_moe.experts.160.w1", "model.layers.42.block_sparse_moe.experts.161.w1", "model.layers.42.block_sparse_moe.experts.162.w1", "model.layers.42.block_sparse_moe.experts.163.w1", "model.layers.42.block_sparse_moe.experts.164.w1", "model.layers.42.block_sparse_moe.experts.165.w1", "model.layers.42.block_sparse_moe.experts.166.w1", "model.layers.42.block_sparse_moe.experts.167.w1", "model.layers.42.block_sparse_moe.experts.168.w1", "model.layers.42.block_sparse_moe.experts.169.w1", "model.layers.42.block_sparse_moe.experts.170.w1", "model.layers.42.block_sparse_moe.experts.171.w1", "model.layers.42.block_sparse_moe.experts.172.w1", "model.layers.42.block_sparse_moe.experts.173.w1", "model.layers.42.block_sparse_moe.experts.174.w1", "model.layers.42.block_sparse_moe.experts.175.w1", "model.layers.42.block_sparse_moe.experts.176.w1", "model.layers.42.block_sparse_moe.experts.177.w1", "model.layers.42.block_sparse_moe.experts.178.w1", "model.layers.42.block_sparse_moe.experts.179.w1", "model.layers.42.block_sparse_moe.experts.180.w1", "model.layers.42.block_sparse_moe.experts.181.w1", "model.layers.42.block_sparse_moe.experts.182.w1", "model.layers.42.block_sparse_moe.experts.183.w1", "model.layers.42.block_sparse_moe.experts.184.w1", "model.layers.42.block_sparse_moe.experts.185.w1", "model.layers.42.block_sparse_moe.experts.186.w1", "model.layers.42.block_sparse_moe.experts.187.w1", "model.layers.42.block_sparse_moe.experts.188.w1", "model.layers.42.block_sparse_moe.experts.189.w1", "model.layers.42.block_sparse_moe.experts.190.w1", "model.layers.42.block_sparse_moe.experts.191.w1", "model.layers.42.block_sparse_moe.experts.192.w1", "model.layers.42.block_sparse_moe.experts.193.w1", "model.layers.42.block_sparse_moe.experts.194.w1", "model.layers.42.block_sparse_moe.experts.195.w1", "model.layers.42.block_sparse_moe.experts.196.w1", "model.layers.42.block_sparse_moe.experts.197.w1", "model.layers.42.block_sparse_moe.experts.198.w1", "model.layers.42.block_sparse_moe.experts.199.w1", "model.layers.42.block_sparse_moe.experts.200.w1", "model.layers.42.block_sparse_moe.experts.201.w1", "model.layers.42.block_sparse_moe.experts.202.w1", "model.layers.42.block_sparse_moe.experts.203.w1", "model.layers.42.block_sparse_moe.experts.204.w1", "model.layers.42.block_sparse_moe.experts.205.w1", "model.layers.42.block_sparse_moe.experts.206.w1", "model.layers.42.block_sparse_moe.experts.207.w1", "model.layers.42.block_sparse_moe.experts.208.w1", "model.layers.42.block_sparse_moe.experts.209.w1", "model.layers.42.block_sparse_moe.experts.210.w1", "model.layers.42.block_sparse_moe.experts.211.w1", "model.layers.42.block_sparse_moe.experts.212.w1", "model.layers.42.block_sparse_moe.experts.213.w1", "model.layers.42.block_sparse_moe.experts.214.w1", "model.layers.42.block_sparse_moe.experts.215.w1", "model.layers.42.block_sparse_moe.experts.216.w1", "model.layers.42.block_sparse_moe.experts.217.w1", "model.layers.42.block_sparse_moe.experts.218.w1", "model.layers.42.block_sparse_moe.experts.219.w1", "model.layers.42.block_sparse_moe.experts.220.w1", "model.layers.42.block_sparse_moe.experts.221.w1", "model.layers.42.block_sparse_moe.experts.222.w1", "model.layers.42.block_sparse_moe.experts.223.w1", "model.layers.42.block_sparse_moe.experts.224.w1", "model.layers.42.block_sparse_moe.experts.225.w1", "model.layers.42.block_sparse_moe.experts.226.w1", "model.layers.42.block_sparse_moe.experts.227.w1", "model.layers.42.block_sparse_moe.experts.228.w1", "model.layers.42.block_sparse_moe.experts.229.w1", "model.layers.42.block_sparse_moe.experts.230.w1", "model.layers.42.block_sparse_moe.experts.231.w1", "model.layers.42.block_sparse_moe.experts.232.w1", "model.layers.42.block_sparse_moe.experts.233.w1", "model.layers.42.block_sparse_moe.experts.234.w1", "model.layers.42.block_sparse_moe.experts.235.w1", "model.layers.42.block_sparse_moe.experts.236.w1", "model.layers.42.block_sparse_moe.experts.237.w1", "model.layers.42.block_sparse_moe.experts.238.w1", "model.layers.42.block_sparse_moe.experts.239.w1", "model.layers.42.block_sparse_moe.experts.240.w1", "model.layers.42.block_sparse_moe.experts.241.w1", "model.layers.42.block_sparse_moe.experts.242.w1", "model.layers.42.block_sparse_moe.experts.243.w1", "model.layers.42.block_sparse_moe.experts.244.w1", "model.layers.42.block_sparse_moe.experts.245.w1", "model.layers.42.block_sparse_moe.experts.246.w1", "model.layers.42.block_sparse_moe.experts.247.w1", "model.layers.42.block_sparse_moe.experts.248.w1", "model.layers.42.block_sparse_moe.experts.249.w1", "model.layers.42.block_sparse_moe.experts.250.w1", "model.layers.42.block_sparse_moe.experts.251.w1", "model.layers.42.block_sparse_moe.experts.252.w1", "model.layers.42.block_sparse_moe.experts.253.w1", "model.layers.42.block_sparse_moe.experts.254.w1", "model.layers.42.block_sparse_moe.experts.255.w1", "model.layers.42.block_sparse_moe.experts.0.w3", "model.layers.42.block_sparse_moe.experts.1.w3", "model.layers.42.block_sparse_moe.experts.2.w3", "model.layers.42.block_sparse_moe.experts.3.w3", "model.layers.42.block_sparse_moe.experts.4.w3", "model.layers.42.block_sparse_moe.experts.5.w3", "model.layers.42.block_sparse_moe.experts.6.w3", "model.layers.42.block_sparse_moe.experts.7.w3", "model.layers.42.block_sparse_moe.experts.8.w3", "model.layers.42.block_sparse_moe.experts.9.w3", "model.layers.42.block_sparse_moe.experts.10.w3", "model.layers.42.block_sparse_moe.experts.11.w3", "model.layers.42.block_sparse_moe.experts.12.w3", "model.layers.42.block_sparse_moe.experts.13.w3", "model.layers.42.block_sparse_moe.experts.14.w3", "model.layers.42.block_sparse_moe.experts.15.w3", "model.layers.42.block_sparse_moe.experts.16.w3", "model.layers.42.block_sparse_moe.experts.17.w3", "model.layers.42.block_sparse_moe.experts.18.w3", "model.layers.42.block_sparse_moe.experts.19.w3", "model.layers.42.block_sparse_moe.experts.20.w3", "model.layers.42.block_sparse_moe.experts.21.w3", "model.layers.42.block_sparse_moe.experts.22.w3", "model.layers.42.block_sparse_moe.experts.23.w3", "model.layers.42.block_sparse_moe.experts.24.w3", "model.layers.42.block_sparse_moe.experts.25.w3", "model.layers.42.block_sparse_moe.experts.26.w3", "model.layers.42.block_sparse_moe.experts.27.w3", "model.layers.42.block_sparse_moe.experts.28.w3", "model.layers.42.block_sparse_moe.experts.29.w3", "model.layers.42.block_sparse_moe.experts.30.w3", "model.layers.42.block_sparse_moe.experts.31.w3", "model.layers.42.block_sparse_moe.experts.32.w3", "model.layers.42.block_sparse_moe.experts.33.w3", "model.layers.42.block_sparse_moe.experts.34.w3", "model.layers.42.block_sparse_moe.experts.35.w3", "model.layers.42.block_sparse_moe.experts.36.w3", "model.layers.42.block_sparse_moe.experts.37.w3", "model.layers.42.block_sparse_moe.experts.38.w3", "model.layers.42.block_sparse_moe.experts.39.w3", "model.layers.42.block_sparse_moe.experts.40.w3", "model.layers.42.block_sparse_moe.experts.41.w3", "model.layers.42.block_sparse_moe.experts.42.w3", "model.layers.42.block_sparse_moe.experts.43.w3", "model.layers.42.block_sparse_moe.experts.44.w3", "model.layers.42.block_sparse_moe.experts.45.w3", "model.layers.42.block_sparse_moe.experts.46.w3", "model.layers.42.block_sparse_moe.experts.47.w3", "model.layers.42.block_sparse_moe.experts.48.w3", "model.layers.42.block_sparse_moe.experts.49.w3", "model.layers.42.block_sparse_moe.experts.50.w3", "model.layers.42.block_sparse_moe.experts.51.w3", "model.layers.42.block_sparse_moe.experts.52.w3", "model.layers.42.block_sparse_moe.experts.53.w3", "model.layers.42.block_sparse_moe.experts.54.w3", "model.layers.42.block_sparse_moe.experts.55.w3", "model.layers.42.block_sparse_moe.experts.56.w3", "model.layers.42.block_sparse_moe.experts.57.w3", "model.layers.42.block_sparse_moe.experts.58.w3", "model.layers.42.block_sparse_moe.experts.59.w3", "model.layers.42.block_sparse_moe.experts.60.w3", "model.layers.42.block_sparse_moe.experts.61.w3", "model.layers.42.block_sparse_moe.experts.62.w3", "model.layers.42.block_sparse_moe.experts.63.w3", "model.layers.42.block_sparse_moe.experts.64.w3", "model.layers.42.block_sparse_moe.experts.65.w3", "model.layers.42.block_sparse_moe.experts.66.w3", "model.layers.42.block_sparse_moe.experts.67.w3", "model.layers.42.block_sparse_moe.experts.68.w3", "model.layers.42.block_sparse_moe.experts.69.w3", "model.layers.42.block_sparse_moe.experts.70.w3", "model.layers.42.block_sparse_moe.experts.71.w3", "model.layers.42.block_sparse_moe.experts.72.w3", "model.layers.42.block_sparse_moe.experts.73.w3", "model.layers.42.block_sparse_moe.experts.74.w3", "model.layers.42.block_sparse_moe.experts.75.w3", "model.layers.42.block_sparse_moe.experts.76.w3", "model.layers.42.block_sparse_moe.experts.77.w3", "model.layers.42.block_sparse_moe.experts.78.w3", "model.layers.42.block_sparse_moe.experts.79.w3", "model.layers.42.block_sparse_moe.experts.80.w3", "model.layers.42.block_sparse_moe.experts.81.w3", "model.layers.42.block_sparse_moe.experts.82.w3", "model.layers.42.block_sparse_moe.experts.83.w3", "model.layers.42.block_sparse_moe.experts.84.w3", "model.layers.42.block_sparse_moe.experts.85.w3", "model.layers.42.block_sparse_moe.experts.86.w3", "model.layers.42.block_sparse_moe.experts.87.w3", "model.layers.42.block_sparse_moe.experts.88.w3", "model.layers.42.block_sparse_moe.experts.89.w3", "model.layers.42.block_sparse_moe.experts.90.w3", "model.layers.42.block_sparse_moe.experts.91.w3", "model.layers.42.block_sparse_moe.experts.92.w3", "model.layers.42.block_sparse_moe.experts.93.w3", "model.layers.42.block_sparse_moe.experts.94.w3", "model.layers.42.block_sparse_moe.experts.95.w3", "model.layers.42.block_sparse_moe.experts.96.w3", "model.layers.42.block_sparse_moe.experts.97.w3", "model.layers.42.block_sparse_moe.experts.98.w3", "model.layers.42.block_sparse_moe.experts.99.w3", "model.layers.42.block_sparse_moe.experts.100.w3", "model.layers.42.block_sparse_moe.experts.101.w3", "model.layers.42.block_sparse_moe.experts.102.w3", "model.layers.42.block_sparse_moe.experts.103.w3", "model.layers.42.block_sparse_moe.experts.104.w3", "model.layers.42.block_sparse_moe.experts.105.w3", "model.layers.42.block_sparse_moe.experts.106.w3", "model.layers.42.block_sparse_moe.experts.107.w3", "model.layers.42.block_sparse_moe.experts.108.w3", "model.layers.42.block_sparse_moe.experts.109.w3", "model.layers.42.block_sparse_moe.experts.110.w3", "model.layers.42.block_sparse_moe.experts.111.w3", "model.layers.42.block_sparse_moe.experts.112.w3", "model.layers.42.block_sparse_moe.experts.113.w3", "model.layers.42.block_sparse_moe.experts.114.w3", "model.layers.42.block_sparse_moe.experts.115.w3", "model.layers.42.block_sparse_moe.experts.116.w3", "model.layers.42.block_sparse_moe.experts.117.w3", "model.layers.42.block_sparse_moe.experts.118.w3", "model.layers.42.block_sparse_moe.experts.119.w3", "model.layers.42.block_sparse_moe.experts.120.w3", "model.layers.42.block_sparse_moe.experts.121.w3", "model.layers.42.block_sparse_moe.experts.122.w3", "model.layers.42.block_sparse_moe.experts.123.w3", "model.layers.42.block_sparse_moe.experts.124.w3", "model.layers.42.block_sparse_moe.experts.125.w3", "model.layers.42.block_sparse_moe.experts.126.w3", "model.layers.42.block_sparse_moe.experts.127.w3", "model.layers.42.block_sparse_moe.experts.128.w3", "model.layers.42.block_sparse_moe.experts.129.w3", "model.layers.42.block_sparse_moe.experts.130.w3", "model.layers.42.block_sparse_moe.experts.131.w3", "model.layers.42.block_sparse_moe.experts.132.w3", "model.layers.42.block_sparse_moe.experts.133.w3", "model.layers.42.block_sparse_moe.experts.134.w3", "model.layers.42.block_sparse_moe.experts.135.w3", "model.layers.42.block_sparse_moe.experts.136.w3", "model.layers.42.block_sparse_moe.experts.137.w3", "model.layers.42.block_sparse_moe.experts.138.w3", "model.layers.42.block_sparse_moe.experts.139.w3", "model.layers.42.block_sparse_moe.experts.140.w3", "model.layers.42.block_sparse_moe.experts.141.w3", "model.layers.42.block_sparse_moe.experts.142.w3", "model.layers.42.block_sparse_moe.experts.143.w3", "model.layers.42.block_sparse_moe.experts.144.w3", "model.layers.42.block_sparse_moe.experts.145.w3", "model.layers.42.block_sparse_moe.experts.146.w3", "model.layers.42.block_sparse_moe.experts.147.w3", "model.layers.42.block_sparse_moe.experts.148.w3", "model.layers.42.block_sparse_moe.experts.149.w3", "model.layers.42.block_sparse_moe.experts.150.w3", "model.layers.42.block_sparse_moe.experts.151.w3", "model.layers.42.block_sparse_moe.experts.152.w3", "model.layers.42.block_sparse_moe.experts.153.w3", "model.layers.42.block_sparse_moe.experts.154.w3", "model.layers.42.block_sparse_moe.experts.155.w3", "model.layers.42.block_sparse_moe.experts.156.w3", "model.layers.42.block_sparse_moe.experts.157.w3", "model.layers.42.block_sparse_moe.experts.158.w3", "model.layers.42.block_sparse_moe.experts.159.w3", "model.layers.42.block_sparse_moe.experts.160.w3", "model.layers.42.block_sparse_moe.experts.161.w3", "model.layers.42.block_sparse_moe.experts.162.w3", "model.layers.42.block_sparse_moe.experts.163.w3", "model.layers.42.block_sparse_moe.experts.164.w3", "model.layers.42.block_sparse_moe.experts.165.w3", "model.layers.42.block_sparse_moe.experts.166.w3", "model.layers.42.block_sparse_moe.experts.167.w3", "model.layers.42.block_sparse_moe.experts.168.w3", "model.layers.42.block_sparse_moe.experts.169.w3", "model.layers.42.block_sparse_moe.experts.170.w3", "model.layers.42.block_sparse_moe.experts.171.w3", "model.layers.42.block_sparse_moe.experts.172.w3", "model.layers.42.block_sparse_moe.experts.173.w3", "model.layers.42.block_sparse_moe.experts.174.w3", "model.layers.42.block_sparse_moe.experts.175.w3", "model.layers.42.block_sparse_moe.experts.176.w3", "model.layers.42.block_sparse_moe.experts.177.w3", "model.layers.42.block_sparse_moe.experts.178.w3", "model.layers.42.block_sparse_moe.experts.179.w3", "model.layers.42.block_sparse_moe.experts.180.w3", "model.layers.42.block_sparse_moe.experts.181.w3", "model.layers.42.block_sparse_moe.experts.182.w3", "model.layers.42.block_sparse_moe.experts.183.w3", "model.layers.42.block_sparse_moe.experts.184.w3", "model.layers.42.block_sparse_moe.experts.185.w3", "model.layers.42.block_sparse_moe.experts.186.w3", "model.layers.42.block_sparse_moe.experts.187.w3", "model.layers.42.block_sparse_moe.experts.188.w3", "model.layers.42.block_sparse_moe.experts.189.w3", "model.layers.42.block_sparse_moe.experts.190.w3", "model.layers.42.block_sparse_moe.experts.191.w3", "model.layers.42.block_sparse_moe.experts.192.w3", "model.layers.42.block_sparse_moe.experts.193.w3", "model.layers.42.block_sparse_moe.experts.194.w3", "model.layers.42.block_sparse_moe.experts.195.w3", "model.layers.42.block_sparse_moe.experts.196.w3", "model.layers.42.block_sparse_moe.experts.197.w3", "model.layers.42.block_sparse_moe.experts.198.w3", "model.layers.42.block_sparse_moe.experts.199.w3", "model.layers.42.block_sparse_moe.experts.200.w3", "model.layers.42.block_sparse_moe.experts.201.w3", "model.layers.42.block_sparse_moe.experts.202.w3", "model.layers.42.block_sparse_moe.experts.203.w3", "model.layers.42.block_sparse_moe.experts.204.w3", "model.layers.42.block_sparse_moe.experts.205.w3", "model.layers.42.block_sparse_moe.experts.206.w3", "model.layers.42.block_sparse_moe.experts.207.w3", "model.layers.42.block_sparse_moe.experts.208.w3", "model.layers.42.block_sparse_moe.experts.209.w3", "model.layers.42.block_sparse_moe.experts.210.w3", "model.layers.42.block_sparse_moe.experts.211.w3", "model.layers.42.block_sparse_moe.experts.212.w3", "model.layers.42.block_sparse_moe.experts.213.w3", "model.layers.42.block_sparse_moe.experts.214.w3", "model.layers.42.block_sparse_moe.experts.215.w3", "model.layers.42.block_sparse_moe.experts.216.w3", "model.layers.42.block_sparse_moe.experts.217.w3", "model.layers.42.block_sparse_moe.experts.218.w3", "model.layers.42.block_sparse_moe.experts.219.w3", "model.layers.42.block_sparse_moe.experts.220.w3", "model.layers.42.block_sparse_moe.experts.221.w3", "model.layers.42.block_sparse_moe.experts.222.w3", "model.layers.42.block_sparse_moe.experts.223.w3", "model.layers.42.block_sparse_moe.experts.224.w3", "model.layers.42.block_sparse_moe.experts.225.w3", "model.layers.42.block_sparse_moe.experts.226.w3", "model.layers.42.block_sparse_moe.experts.227.w3", "model.layers.42.block_sparse_moe.experts.228.w3", "model.layers.42.block_sparse_moe.experts.229.w3", "model.layers.42.block_sparse_moe.experts.230.w3", "model.layers.42.block_sparse_moe.experts.231.w3", "model.layers.42.block_sparse_moe.experts.232.w3", "model.layers.42.block_sparse_moe.experts.233.w3", "model.layers.42.block_sparse_moe.experts.234.w3", "model.layers.42.block_sparse_moe.experts.235.w3", "model.layers.42.block_sparse_moe.experts.236.w3", "model.layers.42.block_sparse_moe.experts.237.w3", "model.layers.42.block_sparse_moe.experts.238.w3", "model.layers.42.block_sparse_moe.experts.239.w3", "model.layers.42.block_sparse_moe.experts.240.w3", "model.layers.42.block_sparse_moe.experts.241.w3", "model.layers.42.block_sparse_moe.experts.242.w3", "model.layers.42.block_sparse_moe.experts.243.w3", "model.layers.42.block_sparse_moe.experts.244.w3", "model.layers.42.block_sparse_moe.experts.245.w3", "model.layers.42.block_sparse_moe.experts.246.w3", "model.layers.42.block_sparse_moe.experts.247.w3", "model.layers.42.block_sparse_moe.experts.248.w3", "model.layers.42.block_sparse_moe.experts.249.w3", "model.layers.42.block_sparse_moe.experts.250.w3", "model.layers.42.block_sparse_moe.experts.251.w3", "model.layers.42.block_sparse_moe.experts.252.w3", "model.layers.42.block_sparse_moe.experts.253.w3", "model.layers.42.block_sparse_moe.experts.254.w3", "model.layers.42.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0021283119916915005, "dbits": 2415919104 } ] }, { "idx": 214, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w2", "model.layers.42.block_sparse_moe.experts.1.w2", "model.layers.42.block_sparse_moe.experts.2.w2", "model.layers.42.block_sparse_moe.experts.3.w2", "model.layers.42.block_sparse_moe.experts.4.w2", "model.layers.42.block_sparse_moe.experts.5.w2", "model.layers.42.block_sparse_moe.experts.6.w2", "model.layers.42.block_sparse_moe.experts.7.w2", "model.layers.42.block_sparse_moe.experts.8.w2", "model.layers.42.block_sparse_moe.experts.9.w2", "model.layers.42.block_sparse_moe.experts.10.w2", "model.layers.42.block_sparse_moe.experts.11.w2", "model.layers.42.block_sparse_moe.experts.12.w2", "model.layers.42.block_sparse_moe.experts.13.w2", "model.layers.42.block_sparse_moe.experts.14.w2", "model.layers.42.block_sparse_moe.experts.15.w2", "model.layers.42.block_sparse_moe.experts.16.w2", "model.layers.42.block_sparse_moe.experts.17.w2", "model.layers.42.block_sparse_moe.experts.18.w2", "model.layers.42.block_sparse_moe.experts.19.w2", "model.layers.42.block_sparse_moe.experts.20.w2", "model.layers.42.block_sparse_moe.experts.21.w2", "model.layers.42.block_sparse_moe.experts.22.w2", "model.layers.42.block_sparse_moe.experts.23.w2", "model.layers.42.block_sparse_moe.experts.24.w2", "model.layers.42.block_sparse_moe.experts.25.w2", "model.layers.42.block_sparse_moe.experts.26.w2", "model.layers.42.block_sparse_moe.experts.27.w2", "model.layers.42.block_sparse_moe.experts.28.w2", "model.layers.42.block_sparse_moe.experts.29.w2", "model.layers.42.block_sparse_moe.experts.30.w2", "model.layers.42.block_sparse_moe.experts.31.w2", "model.layers.42.block_sparse_moe.experts.32.w2", "model.layers.42.block_sparse_moe.experts.33.w2", "model.layers.42.block_sparse_moe.experts.34.w2", "model.layers.42.block_sparse_moe.experts.35.w2", "model.layers.42.block_sparse_moe.experts.36.w2", "model.layers.42.block_sparse_moe.experts.37.w2", "model.layers.42.block_sparse_moe.experts.38.w2", "model.layers.42.block_sparse_moe.experts.39.w2", "model.layers.42.block_sparse_moe.experts.40.w2", "model.layers.42.block_sparse_moe.experts.41.w2", "model.layers.42.block_sparse_moe.experts.42.w2", "model.layers.42.block_sparse_moe.experts.43.w2", "model.layers.42.block_sparse_moe.experts.44.w2", "model.layers.42.block_sparse_moe.experts.45.w2", "model.layers.42.block_sparse_moe.experts.46.w2", "model.layers.42.block_sparse_moe.experts.47.w2", "model.layers.42.block_sparse_moe.experts.48.w2", "model.layers.42.block_sparse_moe.experts.49.w2", "model.layers.42.block_sparse_moe.experts.50.w2", "model.layers.42.block_sparse_moe.experts.51.w2", "model.layers.42.block_sparse_moe.experts.52.w2", "model.layers.42.block_sparse_moe.experts.53.w2", "model.layers.42.block_sparse_moe.experts.54.w2", "model.layers.42.block_sparse_moe.experts.55.w2", "model.layers.42.block_sparse_moe.experts.56.w2", "model.layers.42.block_sparse_moe.experts.57.w2", "model.layers.42.block_sparse_moe.experts.58.w2", "model.layers.42.block_sparse_moe.experts.59.w2", "model.layers.42.block_sparse_moe.experts.60.w2", "model.layers.42.block_sparse_moe.experts.61.w2", "model.layers.42.block_sparse_moe.experts.62.w2", "model.layers.42.block_sparse_moe.experts.63.w2", "model.layers.42.block_sparse_moe.experts.64.w2", "model.layers.42.block_sparse_moe.experts.65.w2", "model.layers.42.block_sparse_moe.experts.66.w2", "model.layers.42.block_sparse_moe.experts.67.w2", "model.layers.42.block_sparse_moe.experts.68.w2", "model.layers.42.block_sparse_moe.experts.69.w2", "model.layers.42.block_sparse_moe.experts.70.w2", "model.layers.42.block_sparse_moe.experts.71.w2", "model.layers.42.block_sparse_moe.experts.72.w2", "model.layers.42.block_sparse_moe.experts.73.w2", "model.layers.42.block_sparse_moe.experts.74.w2", "model.layers.42.block_sparse_moe.experts.75.w2", "model.layers.42.block_sparse_moe.experts.76.w2", "model.layers.42.block_sparse_moe.experts.77.w2", "model.layers.42.block_sparse_moe.experts.78.w2", "model.layers.42.block_sparse_moe.experts.79.w2", "model.layers.42.block_sparse_moe.experts.80.w2", "model.layers.42.block_sparse_moe.experts.81.w2", "model.layers.42.block_sparse_moe.experts.82.w2", "model.layers.42.block_sparse_moe.experts.83.w2", "model.layers.42.block_sparse_moe.experts.84.w2", "model.layers.42.block_sparse_moe.experts.85.w2", "model.layers.42.block_sparse_moe.experts.86.w2", "model.layers.42.block_sparse_moe.experts.87.w2", "model.layers.42.block_sparse_moe.experts.88.w2", "model.layers.42.block_sparse_moe.experts.89.w2", "model.layers.42.block_sparse_moe.experts.90.w2", "model.layers.42.block_sparse_moe.experts.91.w2", "model.layers.42.block_sparse_moe.experts.92.w2", "model.layers.42.block_sparse_moe.experts.93.w2", "model.layers.42.block_sparse_moe.experts.94.w2", "model.layers.42.block_sparse_moe.experts.95.w2", "model.layers.42.block_sparse_moe.experts.96.w2", "model.layers.42.block_sparse_moe.experts.97.w2", "model.layers.42.block_sparse_moe.experts.98.w2", "model.layers.42.block_sparse_moe.experts.99.w2", "model.layers.42.block_sparse_moe.experts.100.w2", "model.layers.42.block_sparse_moe.experts.101.w2", "model.layers.42.block_sparse_moe.experts.102.w2", "model.layers.42.block_sparse_moe.experts.103.w2", "model.layers.42.block_sparse_moe.experts.104.w2", "model.layers.42.block_sparse_moe.experts.105.w2", "model.layers.42.block_sparse_moe.experts.106.w2", "model.layers.42.block_sparse_moe.experts.107.w2", "model.layers.42.block_sparse_moe.experts.108.w2", "model.layers.42.block_sparse_moe.experts.109.w2", "model.layers.42.block_sparse_moe.experts.110.w2", "model.layers.42.block_sparse_moe.experts.111.w2", "model.layers.42.block_sparse_moe.experts.112.w2", "model.layers.42.block_sparse_moe.experts.113.w2", "model.layers.42.block_sparse_moe.experts.114.w2", "model.layers.42.block_sparse_moe.experts.115.w2", "model.layers.42.block_sparse_moe.experts.116.w2", "model.layers.42.block_sparse_moe.experts.117.w2", "model.layers.42.block_sparse_moe.experts.118.w2", "model.layers.42.block_sparse_moe.experts.119.w2", "model.layers.42.block_sparse_moe.experts.120.w2", "model.layers.42.block_sparse_moe.experts.121.w2", "model.layers.42.block_sparse_moe.experts.122.w2", "model.layers.42.block_sparse_moe.experts.123.w2", "model.layers.42.block_sparse_moe.experts.124.w2", "model.layers.42.block_sparse_moe.experts.125.w2", "model.layers.42.block_sparse_moe.experts.126.w2", "model.layers.42.block_sparse_moe.experts.127.w2", "model.layers.42.block_sparse_moe.experts.128.w2", "model.layers.42.block_sparse_moe.experts.129.w2", "model.layers.42.block_sparse_moe.experts.130.w2", "model.layers.42.block_sparse_moe.experts.131.w2", "model.layers.42.block_sparse_moe.experts.132.w2", "model.layers.42.block_sparse_moe.experts.133.w2", "model.layers.42.block_sparse_moe.experts.134.w2", "model.layers.42.block_sparse_moe.experts.135.w2", "model.layers.42.block_sparse_moe.experts.136.w2", "model.layers.42.block_sparse_moe.experts.137.w2", "model.layers.42.block_sparse_moe.experts.138.w2", "model.layers.42.block_sparse_moe.experts.139.w2", "model.layers.42.block_sparse_moe.experts.140.w2", "model.layers.42.block_sparse_moe.experts.141.w2", "model.layers.42.block_sparse_moe.experts.142.w2", "model.layers.42.block_sparse_moe.experts.143.w2", "model.layers.42.block_sparse_moe.experts.144.w2", "model.layers.42.block_sparse_moe.experts.145.w2", "model.layers.42.block_sparse_moe.experts.146.w2", "model.layers.42.block_sparse_moe.experts.147.w2", "model.layers.42.block_sparse_moe.experts.148.w2", "model.layers.42.block_sparse_moe.experts.149.w2", "model.layers.42.block_sparse_moe.experts.150.w2", "model.layers.42.block_sparse_moe.experts.151.w2", "model.layers.42.block_sparse_moe.experts.152.w2", "model.layers.42.block_sparse_moe.experts.153.w2", "model.layers.42.block_sparse_moe.experts.154.w2", "model.layers.42.block_sparse_moe.experts.155.w2", "model.layers.42.block_sparse_moe.experts.156.w2", "model.layers.42.block_sparse_moe.experts.157.w2", "model.layers.42.block_sparse_moe.experts.158.w2", "model.layers.42.block_sparse_moe.experts.159.w2", "model.layers.42.block_sparse_moe.experts.160.w2", "model.layers.42.block_sparse_moe.experts.161.w2", "model.layers.42.block_sparse_moe.experts.162.w2", "model.layers.42.block_sparse_moe.experts.163.w2", "model.layers.42.block_sparse_moe.experts.164.w2", "model.layers.42.block_sparse_moe.experts.165.w2", "model.layers.42.block_sparse_moe.experts.166.w2", "model.layers.42.block_sparse_moe.experts.167.w2", "model.layers.42.block_sparse_moe.experts.168.w2", "model.layers.42.block_sparse_moe.experts.169.w2", "model.layers.42.block_sparse_moe.experts.170.w2", "model.layers.42.block_sparse_moe.experts.171.w2", "model.layers.42.block_sparse_moe.experts.172.w2", "model.layers.42.block_sparse_moe.experts.173.w2", "model.layers.42.block_sparse_moe.experts.174.w2", "model.layers.42.block_sparse_moe.experts.175.w2", "model.layers.42.block_sparse_moe.experts.176.w2", "model.layers.42.block_sparse_moe.experts.177.w2", "model.layers.42.block_sparse_moe.experts.178.w2", "model.layers.42.block_sparse_moe.experts.179.w2", "model.layers.42.block_sparse_moe.experts.180.w2", "model.layers.42.block_sparse_moe.experts.181.w2", "model.layers.42.block_sparse_moe.experts.182.w2", "model.layers.42.block_sparse_moe.experts.183.w2", "model.layers.42.block_sparse_moe.experts.184.w2", "model.layers.42.block_sparse_moe.experts.185.w2", "model.layers.42.block_sparse_moe.experts.186.w2", "model.layers.42.block_sparse_moe.experts.187.w2", "model.layers.42.block_sparse_moe.experts.188.w2", "model.layers.42.block_sparse_moe.experts.189.w2", "model.layers.42.block_sparse_moe.experts.190.w2", "model.layers.42.block_sparse_moe.experts.191.w2", "model.layers.42.block_sparse_moe.experts.192.w2", "model.layers.42.block_sparse_moe.experts.193.w2", "model.layers.42.block_sparse_moe.experts.194.w2", "model.layers.42.block_sparse_moe.experts.195.w2", "model.layers.42.block_sparse_moe.experts.196.w2", "model.layers.42.block_sparse_moe.experts.197.w2", "model.layers.42.block_sparse_moe.experts.198.w2", "model.layers.42.block_sparse_moe.experts.199.w2", "model.layers.42.block_sparse_moe.experts.200.w2", "model.layers.42.block_sparse_moe.experts.201.w2", "model.layers.42.block_sparse_moe.experts.202.w2", "model.layers.42.block_sparse_moe.experts.203.w2", "model.layers.42.block_sparse_moe.experts.204.w2", "model.layers.42.block_sparse_moe.experts.205.w2", "model.layers.42.block_sparse_moe.experts.206.w2", "model.layers.42.block_sparse_moe.experts.207.w2", "model.layers.42.block_sparse_moe.experts.208.w2", "model.layers.42.block_sparse_moe.experts.209.w2", "model.layers.42.block_sparse_moe.experts.210.w2", "model.layers.42.block_sparse_moe.experts.211.w2", "model.layers.42.block_sparse_moe.experts.212.w2", "model.layers.42.block_sparse_moe.experts.213.w2", "model.layers.42.block_sparse_moe.experts.214.w2", "model.layers.42.block_sparse_moe.experts.215.w2", "model.layers.42.block_sparse_moe.experts.216.w2", "model.layers.42.block_sparse_moe.experts.217.w2", "model.layers.42.block_sparse_moe.experts.218.w2", "model.layers.42.block_sparse_moe.experts.219.w2", "model.layers.42.block_sparse_moe.experts.220.w2", "model.layers.42.block_sparse_moe.experts.221.w2", "model.layers.42.block_sparse_moe.experts.222.w2", "model.layers.42.block_sparse_moe.experts.223.w2", "model.layers.42.block_sparse_moe.experts.224.w2", "model.layers.42.block_sparse_moe.experts.225.w2", "model.layers.42.block_sparse_moe.experts.226.w2", "model.layers.42.block_sparse_moe.experts.227.w2", "model.layers.42.block_sparse_moe.experts.228.w2", "model.layers.42.block_sparse_moe.experts.229.w2", "model.layers.42.block_sparse_moe.experts.230.w2", "model.layers.42.block_sparse_moe.experts.231.w2", "model.layers.42.block_sparse_moe.experts.232.w2", "model.layers.42.block_sparse_moe.experts.233.w2", "model.layers.42.block_sparse_moe.experts.234.w2", "model.layers.42.block_sparse_moe.experts.235.w2", "model.layers.42.block_sparse_moe.experts.236.w2", "model.layers.42.block_sparse_moe.experts.237.w2", "model.layers.42.block_sparse_moe.experts.238.w2", "model.layers.42.block_sparse_moe.experts.239.w2", "model.layers.42.block_sparse_moe.experts.240.w2", "model.layers.42.block_sparse_moe.experts.241.w2", "model.layers.42.block_sparse_moe.experts.242.w2", "model.layers.42.block_sparse_moe.experts.243.w2", "model.layers.42.block_sparse_moe.experts.244.w2", "model.layers.42.block_sparse_moe.experts.245.w2", "model.layers.42.block_sparse_moe.experts.246.w2", "model.layers.42.block_sparse_moe.experts.247.w2", "model.layers.42.block_sparse_moe.experts.248.w2", "model.layers.42.block_sparse_moe.experts.249.w2", "model.layers.42.block_sparse_moe.experts.250.w2", "model.layers.42.block_sparse_moe.experts.251.w2", "model.layers.42.block_sparse_moe.experts.252.w2", "model.layers.42.block_sparse_moe.experts.253.w2", "model.layers.42.block_sparse_moe.experts.254.w2", "model.layers.42.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00031574368476861014, "dbits": 1207959552 } ] }, { "idx": 215, "layers": [ "model.layers.43.self_attn.q_proj" ], "candidates": [ { "dkld": 0.002353951334953308, "dbits": 18874368 } ] }, { "idx": 216, "layers": [ "model.layers.43.self_attn.k_proj", "model.layers.43.self_attn.v_proj" ], "candidates": [ { "dkld": -0.001551491022109941, "dbits": 6291456 } ] }, { "idx": 217, "layers": [ "model.layers.43.self_attn.o_proj" ], "candidates": [ { "dkld": 0.005973055958747864, "dbits": 18874368 } ] }, { "idx": 218, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w1", "model.layers.43.block_sparse_moe.experts.1.w1", "model.layers.43.block_sparse_moe.experts.2.w1", "model.layers.43.block_sparse_moe.experts.3.w1", "model.layers.43.block_sparse_moe.experts.4.w1", "model.layers.43.block_sparse_moe.experts.5.w1", "model.layers.43.block_sparse_moe.experts.6.w1", "model.layers.43.block_sparse_moe.experts.7.w1", "model.layers.43.block_sparse_moe.experts.8.w1", "model.layers.43.block_sparse_moe.experts.9.w1", "model.layers.43.block_sparse_moe.experts.10.w1", "model.layers.43.block_sparse_moe.experts.11.w1", "model.layers.43.block_sparse_moe.experts.12.w1", "model.layers.43.block_sparse_moe.experts.13.w1", "model.layers.43.block_sparse_moe.experts.14.w1", "model.layers.43.block_sparse_moe.experts.15.w1", "model.layers.43.block_sparse_moe.experts.16.w1", "model.layers.43.block_sparse_moe.experts.17.w1", "model.layers.43.block_sparse_moe.experts.18.w1", "model.layers.43.block_sparse_moe.experts.19.w1", "model.layers.43.block_sparse_moe.experts.20.w1", "model.layers.43.block_sparse_moe.experts.21.w1", "model.layers.43.block_sparse_moe.experts.22.w1", "model.layers.43.block_sparse_moe.experts.23.w1", "model.layers.43.block_sparse_moe.experts.24.w1", "model.layers.43.block_sparse_moe.experts.25.w1", "model.layers.43.block_sparse_moe.experts.26.w1", "model.layers.43.block_sparse_moe.experts.27.w1", "model.layers.43.block_sparse_moe.experts.28.w1", "model.layers.43.block_sparse_moe.experts.29.w1", "model.layers.43.block_sparse_moe.experts.30.w1", "model.layers.43.block_sparse_moe.experts.31.w1", "model.layers.43.block_sparse_moe.experts.32.w1", "model.layers.43.block_sparse_moe.experts.33.w1", "model.layers.43.block_sparse_moe.experts.34.w1", "model.layers.43.block_sparse_moe.experts.35.w1", "model.layers.43.block_sparse_moe.experts.36.w1", "model.layers.43.block_sparse_moe.experts.37.w1", "model.layers.43.block_sparse_moe.experts.38.w1", "model.layers.43.block_sparse_moe.experts.39.w1", "model.layers.43.block_sparse_moe.experts.40.w1", "model.layers.43.block_sparse_moe.experts.41.w1", "model.layers.43.block_sparse_moe.experts.42.w1", "model.layers.43.block_sparse_moe.experts.43.w1", "model.layers.43.block_sparse_moe.experts.44.w1", "model.layers.43.block_sparse_moe.experts.45.w1", "model.layers.43.block_sparse_moe.experts.46.w1", "model.layers.43.block_sparse_moe.experts.47.w1", "model.layers.43.block_sparse_moe.experts.48.w1", "model.layers.43.block_sparse_moe.experts.49.w1", "model.layers.43.block_sparse_moe.experts.50.w1", "model.layers.43.block_sparse_moe.experts.51.w1", "model.layers.43.block_sparse_moe.experts.52.w1", "model.layers.43.block_sparse_moe.experts.53.w1", "model.layers.43.block_sparse_moe.experts.54.w1", "model.layers.43.block_sparse_moe.experts.55.w1", "model.layers.43.block_sparse_moe.experts.56.w1", "model.layers.43.block_sparse_moe.experts.57.w1", "model.layers.43.block_sparse_moe.experts.58.w1", "model.layers.43.block_sparse_moe.experts.59.w1", "model.layers.43.block_sparse_moe.experts.60.w1", "model.layers.43.block_sparse_moe.experts.61.w1", "model.layers.43.block_sparse_moe.experts.62.w1", "model.layers.43.block_sparse_moe.experts.63.w1", "model.layers.43.block_sparse_moe.experts.64.w1", "model.layers.43.block_sparse_moe.experts.65.w1", "model.layers.43.block_sparse_moe.experts.66.w1", "model.layers.43.block_sparse_moe.experts.67.w1", "model.layers.43.block_sparse_moe.experts.68.w1", "model.layers.43.block_sparse_moe.experts.69.w1", "model.layers.43.block_sparse_moe.experts.70.w1", "model.layers.43.block_sparse_moe.experts.71.w1", "model.layers.43.block_sparse_moe.experts.72.w1", "model.layers.43.block_sparse_moe.experts.73.w1", "model.layers.43.block_sparse_moe.experts.74.w1", "model.layers.43.block_sparse_moe.experts.75.w1", "model.layers.43.block_sparse_moe.experts.76.w1", "model.layers.43.block_sparse_moe.experts.77.w1", "model.layers.43.block_sparse_moe.experts.78.w1", "model.layers.43.block_sparse_moe.experts.79.w1", "model.layers.43.block_sparse_moe.experts.80.w1", "model.layers.43.block_sparse_moe.experts.81.w1", "model.layers.43.block_sparse_moe.experts.82.w1", "model.layers.43.block_sparse_moe.experts.83.w1", "model.layers.43.block_sparse_moe.experts.84.w1", "model.layers.43.block_sparse_moe.experts.85.w1", "model.layers.43.block_sparse_moe.experts.86.w1", "model.layers.43.block_sparse_moe.experts.87.w1", "model.layers.43.block_sparse_moe.experts.88.w1", "model.layers.43.block_sparse_moe.experts.89.w1", "model.layers.43.block_sparse_moe.experts.90.w1", "model.layers.43.block_sparse_moe.experts.91.w1", "model.layers.43.block_sparse_moe.experts.92.w1", "model.layers.43.block_sparse_moe.experts.93.w1", "model.layers.43.block_sparse_moe.experts.94.w1", "model.layers.43.block_sparse_moe.experts.95.w1", "model.layers.43.block_sparse_moe.experts.96.w1", "model.layers.43.block_sparse_moe.experts.97.w1", "model.layers.43.block_sparse_moe.experts.98.w1", "model.layers.43.block_sparse_moe.experts.99.w1", "model.layers.43.block_sparse_moe.experts.100.w1", "model.layers.43.block_sparse_moe.experts.101.w1", "model.layers.43.block_sparse_moe.experts.102.w1", "model.layers.43.block_sparse_moe.experts.103.w1", "model.layers.43.block_sparse_moe.experts.104.w1", "model.layers.43.block_sparse_moe.experts.105.w1", "model.layers.43.block_sparse_moe.experts.106.w1", "model.layers.43.block_sparse_moe.experts.107.w1", "model.layers.43.block_sparse_moe.experts.108.w1", "model.layers.43.block_sparse_moe.experts.109.w1", "model.layers.43.block_sparse_moe.experts.110.w1", "model.layers.43.block_sparse_moe.experts.111.w1", "model.layers.43.block_sparse_moe.experts.112.w1", "model.layers.43.block_sparse_moe.experts.113.w1", "model.layers.43.block_sparse_moe.experts.114.w1", "model.layers.43.block_sparse_moe.experts.115.w1", "model.layers.43.block_sparse_moe.experts.116.w1", "model.layers.43.block_sparse_moe.experts.117.w1", "model.layers.43.block_sparse_moe.experts.118.w1", "model.layers.43.block_sparse_moe.experts.119.w1", "model.layers.43.block_sparse_moe.experts.120.w1", "model.layers.43.block_sparse_moe.experts.121.w1", "model.layers.43.block_sparse_moe.experts.122.w1", "model.layers.43.block_sparse_moe.experts.123.w1", "model.layers.43.block_sparse_moe.experts.124.w1", "model.layers.43.block_sparse_moe.experts.125.w1", "model.layers.43.block_sparse_moe.experts.126.w1", "model.layers.43.block_sparse_moe.experts.127.w1", "model.layers.43.block_sparse_moe.experts.128.w1", "model.layers.43.block_sparse_moe.experts.129.w1", "model.layers.43.block_sparse_moe.experts.130.w1", "model.layers.43.block_sparse_moe.experts.131.w1", "model.layers.43.block_sparse_moe.experts.132.w1", "model.layers.43.block_sparse_moe.experts.133.w1", "model.layers.43.block_sparse_moe.experts.134.w1", "model.layers.43.block_sparse_moe.experts.135.w1", "model.layers.43.block_sparse_moe.experts.136.w1", "model.layers.43.block_sparse_moe.experts.137.w1", "model.layers.43.block_sparse_moe.experts.138.w1", "model.layers.43.block_sparse_moe.experts.139.w1", "model.layers.43.block_sparse_moe.experts.140.w1", "model.layers.43.block_sparse_moe.experts.141.w1", "model.layers.43.block_sparse_moe.experts.142.w1", "model.layers.43.block_sparse_moe.experts.143.w1", "model.layers.43.block_sparse_moe.experts.144.w1", "model.layers.43.block_sparse_moe.experts.145.w1", "model.layers.43.block_sparse_moe.experts.146.w1", "model.layers.43.block_sparse_moe.experts.147.w1", "model.layers.43.block_sparse_moe.experts.148.w1", "model.layers.43.block_sparse_moe.experts.149.w1", "model.layers.43.block_sparse_moe.experts.150.w1", "model.layers.43.block_sparse_moe.experts.151.w1", "model.layers.43.block_sparse_moe.experts.152.w1", "model.layers.43.block_sparse_moe.experts.153.w1", "model.layers.43.block_sparse_moe.experts.154.w1", "model.layers.43.block_sparse_moe.experts.155.w1", "model.layers.43.block_sparse_moe.experts.156.w1", "model.layers.43.block_sparse_moe.experts.157.w1", "model.layers.43.block_sparse_moe.experts.158.w1", "model.layers.43.block_sparse_moe.experts.159.w1", "model.layers.43.block_sparse_moe.experts.160.w1", "model.layers.43.block_sparse_moe.experts.161.w1", "model.layers.43.block_sparse_moe.experts.162.w1", "model.layers.43.block_sparse_moe.experts.163.w1", "model.layers.43.block_sparse_moe.experts.164.w1", "model.layers.43.block_sparse_moe.experts.165.w1", "model.layers.43.block_sparse_moe.experts.166.w1", "model.layers.43.block_sparse_moe.experts.167.w1", "model.layers.43.block_sparse_moe.experts.168.w1", "model.layers.43.block_sparse_moe.experts.169.w1", "model.layers.43.block_sparse_moe.experts.170.w1", "model.layers.43.block_sparse_moe.experts.171.w1", "model.layers.43.block_sparse_moe.experts.172.w1", "model.layers.43.block_sparse_moe.experts.173.w1", "model.layers.43.block_sparse_moe.experts.174.w1", "model.layers.43.block_sparse_moe.experts.175.w1", "model.layers.43.block_sparse_moe.experts.176.w1", "model.layers.43.block_sparse_moe.experts.177.w1", "model.layers.43.block_sparse_moe.experts.178.w1", "model.layers.43.block_sparse_moe.experts.179.w1", "model.layers.43.block_sparse_moe.experts.180.w1", "model.layers.43.block_sparse_moe.experts.181.w1", "model.layers.43.block_sparse_moe.experts.182.w1", "model.layers.43.block_sparse_moe.experts.183.w1", "model.layers.43.block_sparse_moe.experts.184.w1", "model.layers.43.block_sparse_moe.experts.185.w1", "model.layers.43.block_sparse_moe.experts.186.w1", "model.layers.43.block_sparse_moe.experts.187.w1", "model.layers.43.block_sparse_moe.experts.188.w1", "model.layers.43.block_sparse_moe.experts.189.w1", "model.layers.43.block_sparse_moe.experts.190.w1", "model.layers.43.block_sparse_moe.experts.191.w1", "model.layers.43.block_sparse_moe.experts.192.w1", "model.layers.43.block_sparse_moe.experts.193.w1", "model.layers.43.block_sparse_moe.experts.194.w1", "model.layers.43.block_sparse_moe.experts.195.w1", "model.layers.43.block_sparse_moe.experts.196.w1", "model.layers.43.block_sparse_moe.experts.197.w1", "model.layers.43.block_sparse_moe.experts.198.w1", "model.layers.43.block_sparse_moe.experts.199.w1", "model.layers.43.block_sparse_moe.experts.200.w1", "model.layers.43.block_sparse_moe.experts.201.w1", "model.layers.43.block_sparse_moe.experts.202.w1", "model.layers.43.block_sparse_moe.experts.203.w1", "model.layers.43.block_sparse_moe.experts.204.w1", "model.layers.43.block_sparse_moe.experts.205.w1", "model.layers.43.block_sparse_moe.experts.206.w1", "model.layers.43.block_sparse_moe.experts.207.w1", "model.layers.43.block_sparse_moe.experts.208.w1", "model.layers.43.block_sparse_moe.experts.209.w1", "model.layers.43.block_sparse_moe.experts.210.w1", "model.layers.43.block_sparse_moe.experts.211.w1", "model.layers.43.block_sparse_moe.experts.212.w1", "model.layers.43.block_sparse_moe.experts.213.w1", "model.layers.43.block_sparse_moe.experts.214.w1", "model.layers.43.block_sparse_moe.experts.215.w1", "model.layers.43.block_sparse_moe.experts.216.w1", "model.layers.43.block_sparse_moe.experts.217.w1", "model.layers.43.block_sparse_moe.experts.218.w1", "model.layers.43.block_sparse_moe.experts.219.w1", "model.layers.43.block_sparse_moe.experts.220.w1", "model.layers.43.block_sparse_moe.experts.221.w1", "model.layers.43.block_sparse_moe.experts.222.w1", "model.layers.43.block_sparse_moe.experts.223.w1", "model.layers.43.block_sparse_moe.experts.224.w1", "model.layers.43.block_sparse_moe.experts.225.w1", "model.layers.43.block_sparse_moe.experts.226.w1", "model.layers.43.block_sparse_moe.experts.227.w1", "model.layers.43.block_sparse_moe.experts.228.w1", "model.layers.43.block_sparse_moe.experts.229.w1", "model.layers.43.block_sparse_moe.experts.230.w1", "model.layers.43.block_sparse_moe.experts.231.w1", "model.layers.43.block_sparse_moe.experts.232.w1", "model.layers.43.block_sparse_moe.experts.233.w1", "model.layers.43.block_sparse_moe.experts.234.w1", "model.layers.43.block_sparse_moe.experts.235.w1", "model.layers.43.block_sparse_moe.experts.236.w1", "model.layers.43.block_sparse_moe.experts.237.w1", "model.layers.43.block_sparse_moe.experts.238.w1", "model.layers.43.block_sparse_moe.experts.239.w1", "model.layers.43.block_sparse_moe.experts.240.w1", "model.layers.43.block_sparse_moe.experts.241.w1", "model.layers.43.block_sparse_moe.experts.242.w1", "model.layers.43.block_sparse_moe.experts.243.w1", "model.layers.43.block_sparse_moe.experts.244.w1", "model.layers.43.block_sparse_moe.experts.245.w1", "model.layers.43.block_sparse_moe.experts.246.w1", "model.layers.43.block_sparse_moe.experts.247.w1", "model.layers.43.block_sparse_moe.experts.248.w1", "model.layers.43.block_sparse_moe.experts.249.w1", "model.layers.43.block_sparse_moe.experts.250.w1", "model.layers.43.block_sparse_moe.experts.251.w1", "model.layers.43.block_sparse_moe.experts.252.w1", "model.layers.43.block_sparse_moe.experts.253.w1", "model.layers.43.block_sparse_moe.experts.254.w1", "model.layers.43.block_sparse_moe.experts.255.w1", "model.layers.43.block_sparse_moe.experts.0.w3", "model.layers.43.block_sparse_moe.experts.1.w3", "model.layers.43.block_sparse_moe.experts.2.w3", "model.layers.43.block_sparse_moe.experts.3.w3", "model.layers.43.block_sparse_moe.experts.4.w3", "model.layers.43.block_sparse_moe.experts.5.w3", "model.layers.43.block_sparse_moe.experts.6.w3", "model.layers.43.block_sparse_moe.experts.7.w3", "model.layers.43.block_sparse_moe.experts.8.w3", "model.layers.43.block_sparse_moe.experts.9.w3", "model.layers.43.block_sparse_moe.experts.10.w3", "model.layers.43.block_sparse_moe.experts.11.w3", "model.layers.43.block_sparse_moe.experts.12.w3", "model.layers.43.block_sparse_moe.experts.13.w3", "model.layers.43.block_sparse_moe.experts.14.w3", "model.layers.43.block_sparse_moe.experts.15.w3", "model.layers.43.block_sparse_moe.experts.16.w3", "model.layers.43.block_sparse_moe.experts.17.w3", "model.layers.43.block_sparse_moe.experts.18.w3", "model.layers.43.block_sparse_moe.experts.19.w3", "model.layers.43.block_sparse_moe.experts.20.w3", "model.layers.43.block_sparse_moe.experts.21.w3", "model.layers.43.block_sparse_moe.experts.22.w3", "model.layers.43.block_sparse_moe.experts.23.w3", "model.layers.43.block_sparse_moe.experts.24.w3", "model.layers.43.block_sparse_moe.experts.25.w3", "model.layers.43.block_sparse_moe.experts.26.w3", "model.layers.43.block_sparse_moe.experts.27.w3", "model.layers.43.block_sparse_moe.experts.28.w3", "model.layers.43.block_sparse_moe.experts.29.w3", "model.layers.43.block_sparse_moe.experts.30.w3", "model.layers.43.block_sparse_moe.experts.31.w3", "model.layers.43.block_sparse_moe.experts.32.w3", "model.layers.43.block_sparse_moe.experts.33.w3", "model.layers.43.block_sparse_moe.experts.34.w3", "model.layers.43.block_sparse_moe.experts.35.w3", "model.layers.43.block_sparse_moe.experts.36.w3", "model.layers.43.block_sparse_moe.experts.37.w3", "model.layers.43.block_sparse_moe.experts.38.w3", "model.layers.43.block_sparse_moe.experts.39.w3", "model.layers.43.block_sparse_moe.experts.40.w3", "model.layers.43.block_sparse_moe.experts.41.w3", "model.layers.43.block_sparse_moe.experts.42.w3", "model.layers.43.block_sparse_moe.experts.43.w3", "model.layers.43.block_sparse_moe.experts.44.w3", "model.layers.43.block_sparse_moe.experts.45.w3", "model.layers.43.block_sparse_moe.experts.46.w3", "model.layers.43.block_sparse_moe.experts.47.w3", "model.layers.43.block_sparse_moe.experts.48.w3", "model.layers.43.block_sparse_moe.experts.49.w3", "model.layers.43.block_sparse_moe.experts.50.w3", "model.layers.43.block_sparse_moe.experts.51.w3", "model.layers.43.block_sparse_moe.experts.52.w3", "model.layers.43.block_sparse_moe.experts.53.w3", "model.layers.43.block_sparse_moe.experts.54.w3", "model.layers.43.block_sparse_moe.experts.55.w3", "model.layers.43.block_sparse_moe.experts.56.w3", "model.layers.43.block_sparse_moe.experts.57.w3", "model.layers.43.block_sparse_moe.experts.58.w3", "model.layers.43.block_sparse_moe.experts.59.w3", "model.layers.43.block_sparse_moe.experts.60.w3", "model.layers.43.block_sparse_moe.experts.61.w3", "model.layers.43.block_sparse_moe.experts.62.w3", "model.layers.43.block_sparse_moe.experts.63.w3", "model.layers.43.block_sparse_moe.experts.64.w3", "model.layers.43.block_sparse_moe.experts.65.w3", "model.layers.43.block_sparse_moe.experts.66.w3", "model.layers.43.block_sparse_moe.experts.67.w3", "model.layers.43.block_sparse_moe.experts.68.w3", "model.layers.43.block_sparse_moe.experts.69.w3", "model.layers.43.block_sparse_moe.experts.70.w3", "model.layers.43.block_sparse_moe.experts.71.w3", "model.layers.43.block_sparse_moe.experts.72.w3", "model.layers.43.block_sparse_moe.experts.73.w3", "model.layers.43.block_sparse_moe.experts.74.w3", "model.layers.43.block_sparse_moe.experts.75.w3", "model.layers.43.block_sparse_moe.experts.76.w3", "model.layers.43.block_sparse_moe.experts.77.w3", "model.layers.43.block_sparse_moe.experts.78.w3", "model.layers.43.block_sparse_moe.experts.79.w3", "model.layers.43.block_sparse_moe.experts.80.w3", "model.layers.43.block_sparse_moe.experts.81.w3", "model.layers.43.block_sparse_moe.experts.82.w3", "model.layers.43.block_sparse_moe.experts.83.w3", "model.layers.43.block_sparse_moe.experts.84.w3", "model.layers.43.block_sparse_moe.experts.85.w3", "model.layers.43.block_sparse_moe.experts.86.w3", "model.layers.43.block_sparse_moe.experts.87.w3", "model.layers.43.block_sparse_moe.experts.88.w3", "model.layers.43.block_sparse_moe.experts.89.w3", "model.layers.43.block_sparse_moe.experts.90.w3", "model.layers.43.block_sparse_moe.experts.91.w3", "model.layers.43.block_sparse_moe.experts.92.w3", "model.layers.43.block_sparse_moe.experts.93.w3", "model.layers.43.block_sparse_moe.experts.94.w3", "model.layers.43.block_sparse_moe.experts.95.w3", "model.layers.43.block_sparse_moe.experts.96.w3", "model.layers.43.block_sparse_moe.experts.97.w3", "model.layers.43.block_sparse_moe.experts.98.w3", "model.layers.43.block_sparse_moe.experts.99.w3", "model.layers.43.block_sparse_moe.experts.100.w3", "model.layers.43.block_sparse_moe.experts.101.w3", "model.layers.43.block_sparse_moe.experts.102.w3", "model.layers.43.block_sparse_moe.experts.103.w3", "model.layers.43.block_sparse_moe.experts.104.w3", "model.layers.43.block_sparse_moe.experts.105.w3", "model.layers.43.block_sparse_moe.experts.106.w3", "model.layers.43.block_sparse_moe.experts.107.w3", "model.layers.43.block_sparse_moe.experts.108.w3", "model.layers.43.block_sparse_moe.experts.109.w3", "model.layers.43.block_sparse_moe.experts.110.w3", "model.layers.43.block_sparse_moe.experts.111.w3", "model.layers.43.block_sparse_moe.experts.112.w3", "model.layers.43.block_sparse_moe.experts.113.w3", "model.layers.43.block_sparse_moe.experts.114.w3", "model.layers.43.block_sparse_moe.experts.115.w3", "model.layers.43.block_sparse_moe.experts.116.w3", "model.layers.43.block_sparse_moe.experts.117.w3", "model.layers.43.block_sparse_moe.experts.118.w3", "model.layers.43.block_sparse_moe.experts.119.w3", "model.layers.43.block_sparse_moe.experts.120.w3", "model.layers.43.block_sparse_moe.experts.121.w3", "model.layers.43.block_sparse_moe.experts.122.w3", "model.layers.43.block_sparse_moe.experts.123.w3", "model.layers.43.block_sparse_moe.experts.124.w3", "model.layers.43.block_sparse_moe.experts.125.w3", "model.layers.43.block_sparse_moe.experts.126.w3", "model.layers.43.block_sparse_moe.experts.127.w3", "model.layers.43.block_sparse_moe.experts.128.w3", "model.layers.43.block_sparse_moe.experts.129.w3", "model.layers.43.block_sparse_moe.experts.130.w3", "model.layers.43.block_sparse_moe.experts.131.w3", "model.layers.43.block_sparse_moe.experts.132.w3", "model.layers.43.block_sparse_moe.experts.133.w3", "model.layers.43.block_sparse_moe.experts.134.w3", "model.layers.43.block_sparse_moe.experts.135.w3", "model.layers.43.block_sparse_moe.experts.136.w3", "model.layers.43.block_sparse_moe.experts.137.w3", "model.layers.43.block_sparse_moe.experts.138.w3", "model.layers.43.block_sparse_moe.experts.139.w3", "model.layers.43.block_sparse_moe.experts.140.w3", "model.layers.43.block_sparse_moe.experts.141.w3", "model.layers.43.block_sparse_moe.experts.142.w3", "model.layers.43.block_sparse_moe.experts.143.w3", "model.layers.43.block_sparse_moe.experts.144.w3", "model.layers.43.block_sparse_moe.experts.145.w3", "model.layers.43.block_sparse_moe.experts.146.w3", "model.layers.43.block_sparse_moe.experts.147.w3", "model.layers.43.block_sparse_moe.experts.148.w3", "model.layers.43.block_sparse_moe.experts.149.w3", "model.layers.43.block_sparse_moe.experts.150.w3", "model.layers.43.block_sparse_moe.experts.151.w3", "model.layers.43.block_sparse_moe.experts.152.w3", "model.layers.43.block_sparse_moe.experts.153.w3", "model.layers.43.block_sparse_moe.experts.154.w3", "model.layers.43.block_sparse_moe.experts.155.w3", "model.layers.43.block_sparse_moe.experts.156.w3", "model.layers.43.block_sparse_moe.experts.157.w3", "model.layers.43.block_sparse_moe.experts.158.w3", "model.layers.43.block_sparse_moe.experts.159.w3", "model.layers.43.block_sparse_moe.experts.160.w3", "model.layers.43.block_sparse_moe.experts.161.w3", "model.layers.43.block_sparse_moe.experts.162.w3", "model.layers.43.block_sparse_moe.experts.163.w3", "model.layers.43.block_sparse_moe.experts.164.w3", "model.layers.43.block_sparse_moe.experts.165.w3", "model.layers.43.block_sparse_moe.experts.166.w3", "model.layers.43.block_sparse_moe.experts.167.w3", "model.layers.43.block_sparse_moe.experts.168.w3", "model.layers.43.block_sparse_moe.experts.169.w3", "model.layers.43.block_sparse_moe.experts.170.w3", "model.layers.43.block_sparse_moe.experts.171.w3", "model.layers.43.block_sparse_moe.experts.172.w3", "model.layers.43.block_sparse_moe.experts.173.w3", "model.layers.43.block_sparse_moe.experts.174.w3", "model.layers.43.block_sparse_moe.experts.175.w3", "model.layers.43.block_sparse_moe.experts.176.w3", "model.layers.43.block_sparse_moe.experts.177.w3", "model.layers.43.block_sparse_moe.experts.178.w3", "model.layers.43.block_sparse_moe.experts.179.w3", "model.layers.43.block_sparse_moe.experts.180.w3", "model.layers.43.block_sparse_moe.experts.181.w3", "model.layers.43.block_sparse_moe.experts.182.w3", "model.layers.43.block_sparse_moe.experts.183.w3", "model.layers.43.block_sparse_moe.experts.184.w3", "model.layers.43.block_sparse_moe.experts.185.w3", "model.layers.43.block_sparse_moe.experts.186.w3", "model.layers.43.block_sparse_moe.experts.187.w3", "model.layers.43.block_sparse_moe.experts.188.w3", "model.layers.43.block_sparse_moe.experts.189.w3", "model.layers.43.block_sparse_moe.experts.190.w3", "model.layers.43.block_sparse_moe.experts.191.w3", "model.layers.43.block_sparse_moe.experts.192.w3", "model.layers.43.block_sparse_moe.experts.193.w3", "model.layers.43.block_sparse_moe.experts.194.w3", "model.layers.43.block_sparse_moe.experts.195.w3", "model.layers.43.block_sparse_moe.experts.196.w3", "model.layers.43.block_sparse_moe.experts.197.w3", "model.layers.43.block_sparse_moe.experts.198.w3", "model.layers.43.block_sparse_moe.experts.199.w3", "model.layers.43.block_sparse_moe.experts.200.w3", "model.layers.43.block_sparse_moe.experts.201.w3", "model.layers.43.block_sparse_moe.experts.202.w3", "model.layers.43.block_sparse_moe.experts.203.w3", "model.layers.43.block_sparse_moe.experts.204.w3", "model.layers.43.block_sparse_moe.experts.205.w3", "model.layers.43.block_sparse_moe.experts.206.w3", "model.layers.43.block_sparse_moe.experts.207.w3", "model.layers.43.block_sparse_moe.experts.208.w3", "model.layers.43.block_sparse_moe.experts.209.w3", "model.layers.43.block_sparse_moe.experts.210.w3", "model.layers.43.block_sparse_moe.experts.211.w3", "model.layers.43.block_sparse_moe.experts.212.w3", "model.layers.43.block_sparse_moe.experts.213.w3", "model.layers.43.block_sparse_moe.experts.214.w3", "model.layers.43.block_sparse_moe.experts.215.w3", "model.layers.43.block_sparse_moe.experts.216.w3", "model.layers.43.block_sparse_moe.experts.217.w3", "model.layers.43.block_sparse_moe.experts.218.w3", "model.layers.43.block_sparse_moe.experts.219.w3", "model.layers.43.block_sparse_moe.experts.220.w3", "model.layers.43.block_sparse_moe.experts.221.w3", "model.layers.43.block_sparse_moe.experts.222.w3", "model.layers.43.block_sparse_moe.experts.223.w3", "model.layers.43.block_sparse_moe.experts.224.w3", "model.layers.43.block_sparse_moe.experts.225.w3", "model.layers.43.block_sparse_moe.experts.226.w3", "model.layers.43.block_sparse_moe.experts.227.w3", "model.layers.43.block_sparse_moe.experts.228.w3", "model.layers.43.block_sparse_moe.experts.229.w3", "model.layers.43.block_sparse_moe.experts.230.w3", "model.layers.43.block_sparse_moe.experts.231.w3", "model.layers.43.block_sparse_moe.experts.232.w3", "model.layers.43.block_sparse_moe.experts.233.w3", "model.layers.43.block_sparse_moe.experts.234.w3", "model.layers.43.block_sparse_moe.experts.235.w3", "model.layers.43.block_sparse_moe.experts.236.w3", "model.layers.43.block_sparse_moe.experts.237.w3", "model.layers.43.block_sparse_moe.experts.238.w3", "model.layers.43.block_sparse_moe.experts.239.w3", "model.layers.43.block_sparse_moe.experts.240.w3", "model.layers.43.block_sparse_moe.experts.241.w3", "model.layers.43.block_sparse_moe.experts.242.w3", "model.layers.43.block_sparse_moe.experts.243.w3", "model.layers.43.block_sparse_moe.experts.244.w3", "model.layers.43.block_sparse_moe.experts.245.w3", "model.layers.43.block_sparse_moe.experts.246.w3", "model.layers.43.block_sparse_moe.experts.247.w3", "model.layers.43.block_sparse_moe.experts.248.w3", "model.layers.43.block_sparse_moe.experts.249.w3", "model.layers.43.block_sparse_moe.experts.250.w3", "model.layers.43.block_sparse_moe.experts.251.w3", "model.layers.43.block_sparse_moe.experts.252.w3", "model.layers.43.block_sparse_moe.experts.253.w3", "model.layers.43.block_sparse_moe.experts.254.w3", "model.layers.43.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00010602176189422607, "dbits": 2415919104 } ] }, { "idx": 219, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w2", "model.layers.43.block_sparse_moe.experts.1.w2", "model.layers.43.block_sparse_moe.experts.2.w2", "model.layers.43.block_sparse_moe.experts.3.w2", "model.layers.43.block_sparse_moe.experts.4.w2", "model.layers.43.block_sparse_moe.experts.5.w2", "model.layers.43.block_sparse_moe.experts.6.w2", "model.layers.43.block_sparse_moe.experts.7.w2", "model.layers.43.block_sparse_moe.experts.8.w2", "model.layers.43.block_sparse_moe.experts.9.w2", "model.layers.43.block_sparse_moe.experts.10.w2", "model.layers.43.block_sparse_moe.experts.11.w2", "model.layers.43.block_sparse_moe.experts.12.w2", "model.layers.43.block_sparse_moe.experts.13.w2", "model.layers.43.block_sparse_moe.experts.14.w2", "model.layers.43.block_sparse_moe.experts.15.w2", "model.layers.43.block_sparse_moe.experts.16.w2", "model.layers.43.block_sparse_moe.experts.17.w2", "model.layers.43.block_sparse_moe.experts.18.w2", "model.layers.43.block_sparse_moe.experts.19.w2", "model.layers.43.block_sparse_moe.experts.20.w2", "model.layers.43.block_sparse_moe.experts.21.w2", "model.layers.43.block_sparse_moe.experts.22.w2", "model.layers.43.block_sparse_moe.experts.23.w2", "model.layers.43.block_sparse_moe.experts.24.w2", "model.layers.43.block_sparse_moe.experts.25.w2", "model.layers.43.block_sparse_moe.experts.26.w2", "model.layers.43.block_sparse_moe.experts.27.w2", "model.layers.43.block_sparse_moe.experts.28.w2", "model.layers.43.block_sparse_moe.experts.29.w2", "model.layers.43.block_sparse_moe.experts.30.w2", "model.layers.43.block_sparse_moe.experts.31.w2", "model.layers.43.block_sparse_moe.experts.32.w2", "model.layers.43.block_sparse_moe.experts.33.w2", "model.layers.43.block_sparse_moe.experts.34.w2", "model.layers.43.block_sparse_moe.experts.35.w2", "model.layers.43.block_sparse_moe.experts.36.w2", "model.layers.43.block_sparse_moe.experts.37.w2", "model.layers.43.block_sparse_moe.experts.38.w2", "model.layers.43.block_sparse_moe.experts.39.w2", "model.layers.43.block_sparse_moe.experts.40.w2", "model.layers.43.block_sparse_moe.experts.41.w2", "model.layers.43.block_sparse_moe.experts.42.w2", "model.layers.43.block_sparse_moe.experts.43.w2", "model.layers.43.block_sparse_moe.experts.44.w2", "model.layers.43.block_sparse_moe.experts.45.w2", "model.layers.43.block_sparse_moe.experts.46.w2", "model.layers.43.block_sparse_moe.experts.47.w2", "model.layers.43.block_sparse_moe.experts.48.w2", "model.layers.43.block_sparse_moe.experts.49.w2", "model.layers.43.block_sparse_moe.experts.50.w2", "model.layers.43.block_sparse_moe.experts.51.w2", "model.layers.43.block_sparse_moe.experts.52.w2", "model.layers.43.block_sparse_moe.experts.53.w2", "model.layers.43.block_sparse_moe.experts.54.w2", "model.layers.43.block_sparse_moe.experts.55.w2", "model.layers.43.block_sparse_moe.experts.56.w2", "model.layers.43.block_sparse_moe.experts.57.w2", "model.layers.43.block_sparse_moe.experts.58.w2", "model.layers.43.block_sparse_moe.experts.59.w2", "model.layers.43.block_sparse_moe.experts.60.w2", "model.layers.43.block_sparse_moe.experts.61.w2", "model.layers.43.block_sparse_moe.experts.62.w2", "model.layers.43.block_sparse_moe.experts.63.w2", "model.layers.43.block_sparse_moe.experts.64.w2", "model.layers.43.block_sparse_moe.experts.65.w2", "model.layers.43.block_sparse_moe.experts.66.w2", "model.layers.43.block_sparse_moe.experts.67.w2", "model.layers.43.block_sparse_moe.experts.68.w2", "model.layers.43.block_sparse_moe.experts.69.w2", "model.layers.43.block_sparse_moe.experts.70.w2", "model.layers.43.block_sparse_moe.experts.71.w2", "model.layers.43.block_sparse_moe.experts.72.w2", "model.layers.43.block_sparse_moe.experts.73.w2", "model.layers.43.block_sparse_moe.experts.74.w2", "model.layers.43.block_sparse_moe.experts.75.w2", "model.layers.43.block_sparse_moe.experts.76.w2", "model.layers.43.block_sparse_moe.experts.77.w2", "model.layers.43.block_sparse_moe.experts.78.w2", "model.layers.43.block_sparse_moe.experts.79.w2", "model.layers.43.block_sparse_moe.experts.80.w2", "model.layers.43.block_sparse_moe.experts.81.w2", "model.layers.43.block_sparse_moe.experts.82.w2", "model.layers.43.block_sparse_moe.experts.83.w2", "model.layers.43.block_sparse_moe.experts.84.w2", "model.layers.43.block_sparse_moe.experts.85.w2", "model.layers.43.block_sparse_moe.experts.86.w2", "model.layers.43.block_sparse_moe.experts.87.w2", "model.layers.43.block_sparse_moe.experts.88.w2", "model.layers.43.block_sparse_moe.experts.89.w2", "model.layers.43.block_sparse_moe.experts.90.w2", "model.layers.43.block_sparse_moe.experts.91.w2", "model.layers.43.block_sparse_moe.experts.92.w2", "model.layers.43.block_sparse_moe.experts.93.w2", "model.layers.43.block_sparse_moe.experts.94.w2", "model.layers.43.block_sparse_moe.experts.95.w2", "model.layers.43.block_sparse_moe.experts.96.w2", "model.layers.43.block_sparse_moe.experts.97.w2", "model.layers.43.block_sparse_moe.experts.98.w2", "model.layers.43.block_sparse_moe.experts.99.w2", "model.layers.43.block_sparse_moe.experts.100.w2", "model.layers.43.block_sparse_moe.experts.101.w2", "model.layers.43.block_sparse_moe.experts.102.w2", "model.layers.43.block_sparse_moe.experts.103.w2", "model.layers.43.block_sparse_moe.experts.104.w2", "model.layers.43.block_sparse_moe.experts.105.w2", "model.layers.43.block_sparse_moe.experts.106.w2", "model.layers.43.block_sparse_moe.experts.107.w2", "model.layers.43.block_sparse_moe.experts.108.w2", "model.layers.43.block_sparse_moe.experts.109.w2", "model.layers.43.block_sparse_moe.experts.110.w2", "model.layers.43.block_sparse_moe.experts.111.w2", "model.layers.43.block_sparse_moe.experts.112.w2", "model.layers.43.block_sparse_moe.experts.113.w2", "model.layers.43.block_sparse_moe.experts.114.w2", "model.layers.43.block_sparse_moe.experts.115.w2", "model.layers.43.block_sparse_moe.experts.116.w2", "model.layers.43.block_sparse_moe.experts.117.w2", "model.layers.43.block_sparse_moe.experts.118.w2", "model.layers.43.block_sparse_moe.experts.119.w2", "model.layers.43.block_sparse_moe.experts.120.w2", "model.layers.43.block_sparse_moe.experts.121.w2", "model.layers.43.block_sparse_moe.experts.122.w2", "model.layers.43.block_sparse_moe.experts.123.w2", "model.layers.43.block_sparse_moe.experts.124.w2", "model.layers.43.block_sparse_moe.experts.125.w2", "model.layers.43.block_sparse_moe.experts.126.w2", "model.layers.43.block_sparse_moe.experts.127.w2", "model.layers.43.block_sparse_moe.experts.128.w2", "model.layers.43.block_sparse_moe.experts.129.w2", "model.layers.43.block_sparse_moe.experts.130.w2", "model.layers.43.block_sparse_moe.experts.131.w2", "model.layers.43.block_sparse_moe.experts.132.w2", "model.layers.43.block_sparse_moe.experts.133.w2", "model.layers.43.block_sparse_moe.experts.134.w2", "model.layers.43.block_sparse_moe.experts.135.w2", "model.layers.43.block_sparse_moe.experts.136.w2", "model.layers.43.block_sparse_moe.experts.137.w2", "model.layers.43.block_sparse_moe.experts.138.w2", "model.layers.43.block_sparse_moe.experts.139.w2", "model.layers.43.block_sparse_moe.experts.140.w2", "model.layers.43.block_sparse_moe.experts.141.w2", "model.layers.43.block_sparse_moe.experts.142.w2", "model.layers.43.block_sparse_moe.experts.143.w2", "model.layers.43.block_sparse_moe.experts.144.w2", "model.layers.43.block_sparse_moe.experts.145.w2", "model.layers.43.block_sparse_moe.experts.146.w2", "model.layers.43.block_sparse_moe.experts.147.w2", "model.layers.43.block_sparse_moe.experts.148.w2", "model.layers.43.block_sparse_moe.experts.149.w2", "model.layers.43.block_sparse_moe.experts.150.w2", "model.layers.43.block_sparse_moe.experts.151.w2", "model.layers.43.block_sparse_moe.experts.152.w2", "model.layers.43.block_sparse_moe.experts.153.w2", "model.layers.43.block_sparse_moe.experts.154.w2", "model.layers.43.block_sparse_moe.experts.155.w2", "model.layers.43.block_sparse_moe.experts.156.w2", "model.layers.43.block_sparse_moe.experts.157.w2", "model.layers.43.block_sparse_moe.experts.158.w2", "model.layers.43.block_sparse_moe.experts.159.w2", "model.layers.43.block_sparse_moe.experts.160.w2", "model.layers.43.block_sparse_moe.experts.161.w2", "model.layers.43.block_sparse_moe.experts.162.w2", "model.layers.43.block_sparse_moe.experts.163.w2", "model.layers.43.block_sparse_moe.experts.164.w2", "model.layers.43.block_sparse_moe.experts.165.w2", "model.layers.43.block_sparse_moe.experts.166.w2", "model.layers.43.block_sparse_moe.experts.167.w2", "model.layers.43.block_sparse_moe.experts.168.w2", "model.layers.43.block_sparse_moe.experts.169.w2", "model.layers.43.block_sparse_moe.experts.170.w2", "model.layers.43.block_sparse_moe.experts.171.w2", "model.layers.43.block_sparse_moe.experts.172.w2", "model.layers.43.block_sparse_moe.experts.173.w2", "model.layers.43.block_sparse_moe.experts.174.w2", "model.layers.43.block_sparse_moe.experts.175.w2", "model.layers.43.block_sparse_moe.experts.176.w2", "model.layers.43.block_sparse_moe.experts.177.w2", "model.layers.43.block_sparse_moe.experts.178.w2", "model.layers.43.block_sparse_moe.experts.179.w2", "model.layers.43.block_sparse_moe.experts.180.w2", "model.layers.43.block_sparse_moe.experts.181.w2", "model.layers.43.block_sparse_moe.experts.182.w2", "model.layers.43.block_sparse_moe.experts.183.w2", "model.layers.43.block_sparse_moe.experts.184.w2", "model.layers.43.block_sparse_moe.experts.185.w2", "model.layers.43.block_sparse_moe.experts.186.w2", "model.layers.43.block_sparse_moe.experts.187.w2", "model.layers.43.block_sparse_moe.experts.188.w2", "model.layers.43.block_sparse_moe.experts.189.w2", "model.layers.43.block_sparse_moe.experts.190.w2", "model.layers.43.block_sparse_moe.experts.191.w2", "model.layers.43.block_sparse_moe.experts.192.w2", "model.layers.43.block_sparse_moe.experts.193.w2", "model.layers.43.block_sparse_moe.experts.194.w2", "model.layers.43.block_sparse_moe.experts.195.w2", "model.layers.43.block_sparse_moe.experts.196.w2", "model.layers.43.block_sparse_moe.experts.197.w2", "model.layers.43.block_sparse_moe.experts.198.w2", "model.layers.43.block_sparse_moe.experts.199.w2", "model.layers.43.block_sparse_moe.experts.200.w2", "model.layers.43.block_sparse_moe.experts.201.w2", "model.layers.43.block_sparse_moe.experts.202.w2", "model.layers.43.block_sparse_moe.experts.203.w2", "model.layers.43.block_sparse_moe.experts.204.w2", "model.layers.43.block_sparse_moe.experts.205.w2", "model.layers.43.block_sparse_moe.experts.206.w2", "model.layers.43.block_sparse_moe.experts.207.w2", "model.layers.43.block_sparse_moe.experts.208.w2", "model.layers.43.block_sparse_moe.experts.209.w2", "model.layers.43.block_sparse_moe.experts.210.w2", "model.layers.43.block_sparse_moe.experts.211.w2", "model.layers.43.block_sparse_moe.experts.212.w2", "model.layers.43.block_sparse_moe.experts.213.w2", "model.layers.43.block_sparse_moe.experts.214.w2", "model.layers.43.block_sparse_moe.experts.215.w2", "model.layers.43.block_sparse_moe.experts.216.w2", "model.layers.43.block_sparse_moe.experts.217.w2", "model.layers.43.block_sparse_moe.experts.218.w2", "model.layers.43.block_sparse_moe.experts.219.w2", "model.layers.43.block_sparse_moe.experts.220.w2", "model.layers.43.block_sparse_moe.experts.221.w2", "model.layers.43.block_sparse_moe.experts.222.w2", "model.layers.43.block_sparse_moe.experts.223.w2", "model.layers.43.block_sparse_moe.experts.224.w2", "model.layers.43.block_sparse_moe.experts.225.w2", "model.layers.43.block_sparse_moe.experts.226.w2", "model.layers.43.block_sparse_moe.experts.227.w2", "model.layers.43.block_sparse_moe.experts.228.w2", "model.layers.43.block_sparse_moe.experts.229.w2", "model.layers.43.block_sparse_moe.experts.230.w2", "model.layers.43.block_sparse_moe.experts.231.w2", "model.layers.43.block_sparse_moe.experts.232.w2", "model.layers.43.block_sparse_moe.experts.233.w2", "model.layers.43.block_sparse_moe.experts.234.w2", "model.layers.43.block_sparse_moe.experts.235.w2", "model.layers.43.block_sparse_moe.experts.236.w2", "model.layers.43.block_sparse_moe.experts.237.w2", "model.layers.43.block_sparse_moe.experts.238.w2", "model.layers.43.block_sparse_moe.experts.239.w2", "model.layers.43.block_sparse_moe.experts.240.w2", "model.layers.43.block_sparse_moe.experts.241.w2", "model.layers.43.block_sparse_moe.experts.242.w2", "model.layers.43.block_sparse_moe.experts.243.w2", "model.layers.43.block_sparse_moe.experts.244.w2", "model.layers.43.block_sparse_moe.experts.245.w2", "model.layers.43.block_sparse_moe.experts.246.w2", "model.layers.43.block_sparse_moe.experts.247.w2", "model.layers.43.block_sparse_moe.experts.248.w2", "model.layers.43.block_sparse_moe.experts.249.w2", "model.layers.43.block_sparse_moe.experts.250.w2", "model.layers.43.block_sparse_moe.experts.251.w2", "model.layers.43.block_sparse_moe.experts.252.w2", "model.layers.43.block_sparse_moe.experts.253.w2", "model.layers.43.block_sparse_moe.experts.254.w2", "model.layers.43.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00037914514541625977, "dbits": 1207959552 } ] }, { "idx": 220, "layers": [ "model.layers.44.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0003267437219620639, "dbits": 18874368 } ] }, { "idx": 221, "layers": [ "model.layers.44.self_attn.k_proj", "model.layers.44.self_attn.v_proj" ], "candidates": [ { "dkld": -0.012880323827266671, "dbits": 6291456 } ] }, { "idx": 222, "layers": [ "model.layers.44.self_attn.o_proj" ], "candidates": [ { "dkld": -0.020288893580436618, "dbits": 18874368 } ] }, { "idx": 223, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w1", "model.layers.44.block_sparse_moe.experts.1.w1", "model.layers.44.block_sparse_moe.experts.2.w1", "model.layers.44.block_sparse_moe.experts.3.w1", "model.layers.44.block_sparse_moe.experts.4.w1", "model.layers.44.block_sparse_moe.experts.5.w1", "model.layers.44.block_sparse_moe.experts.6.w1", "model.layers.44.block_sparse_moe.experts.7.w1", "model.layers.44.block_sparse_moe.experts.8.w1", "model.layers.44.block_sparse_moe.experts.9.w1", "model.layers.44.block_sparse_moe.experts.10.w1", "model.layers.44.block_sparse_moe.experts.11.w1", "model.layers.44.block_sparse_moe.experts.12.w1", "model.layers.44.block_sparse_moe.experts.13.w1", "model.layers.44.block_sparse_moe.experts.14.w1", "model.layers.44.block_sparse_moe.experts.15.w1", "model.layers.44.block_sparse_moe.experts.16.w1", "model.layers.44.block_sparse_moe.experts.17.w1", "model.layers.44.block_sparse_moe.experts.18.w1", "model.layers.44.block_sparse_moe.experts.19.w1", "model.layers.44.block_sparse_moe.experts.20.w1", "model.layers.44.block_sparse_moe.experts.21.w1", "model.layers.44.block_sparse_moe.experts.22.w1", "model.layers.44.block_sparse_moe.experts.23.w1", "model.layers.44.block_sparse_moe.experts.24.w1", "model.layers.44.block_sparse_moe.experts.25.w1", "model.layers.44.block_sparse_moe.experts.26.w1", "model.layers.44.block_sparse_moe.experts.27.w1", "model.layers.44.block_sparse_moe.experts.28.w1", "model.layers.44.block_sparse_moe.experts.29.w1", "model.layers.44.block_sparse_moe.experts.30.w1", "model.layers.44.block_sparse_moe.experts.31.w1", "model.layers.44.block_sparse_moe.experts.32.w1", "model.layers.44.block_sparse_moe.experts.33.w1", "model.layers.44.block_sparse_moe.experts.34.w1", "model.layers.44.block_sparse_moe.experts.35.w1", "model.layers.44.block_sparse_moe.experts.36.w1", "model.layers.44.block_sparse_moe.experts.37.w1", "model.layers.44.block_sparse_moe.experts.38.w1", "model.layers.44.block_sparse_moe.experts.39.w1", "model.layers.44.block_sparse_moe.experts.40.w1", "model.layers.44.block_sparse_moe.experts.41.w1", "model.layers.44.block_sparse_moe.experts.42.w1", "model.layers.44.block_sparse_moe.experts.43.w1", "model.layers.44.block_sparse_moe.experts.44.w1", "model.layers.44.block_sparse_moe.experts.45.w1", "model.layers.44.block_sparse_moe.experts.46.w1", "model.layers.44.block_sparse_moe.experts.47.w1", "model.layers.44.block_sparse_moe.experts.48.w1", "model.layers.44.block_sparse_moe.experts.49.w1", "model.layers.44.block_sparse_moe.experts.50.w1", "model.layers.44.block_sparse_moe.experts.51.w1", "model.layers.44.block_sparse_moe.experts.52.w1", "model.layers.44.block_sparse_moe.experts.53.w1", "model.layers.44.block_sparse_moe.experts.54.w1", "model.layers.44.block_sparse_moe.experts.55.w1", "model.layers.44.block_sparse_moe.experts.56.w1", "model.layers.44.block_sparse_moe.experts.57.w1", "model.layers.44.block_sparse_moe.experts.58.w1", "model.layers.44.block_sparse_moe.experts.59.w1", "model.layers.44.block_sparse_moe.experts.60.w1", "model.layers.44.block_sparse_moe.experts.61.w1", "model.layers.44.block_sparse_moe.experts.62.w1", "model.layers.44.block_sparse_moe.experts.63.w1", "model.layers.44.block_sparse_moe.experts.64.w1", "model.layers.44.block_sparse_moe.experts.65.w1", "model.layers.44.block_sparse_moe.experts.66.w1", "model.layers.44.block_sparse_moe.experts.67.w1", "model.layers.44.block_sparse_moe.experts.68.w1", "model.layers.44.block_sparse_moe.experts.69.w1", "model.layers.44.block_sparse_moe.experts.70.w1", "model.layers.44.block_sparse_moe.experts.71.w1", "model.layers.44.block_sparse_moe.experts.72.w1", "model.layers.44.block_sparse_moe.experts.73.w1", "model.layers.44.block_sparse_moe.experts.74.w1", "model.layers.44.block_sparse_moe.experts.75.w1", "model.layers.44.block_sparse_moe.experts.76.w1", "model.layers.44.block_sparse_moe.experts.77.w1", "model.layers.44.block_sparse_moe.experts.78.w1", "model.layers.44.block_sparse_moe.experts.79.w1", "model.layers.44.block_sparse_moe.experts.80.w1", "model.layers.44.block_sparse_moe.experts.81.w1", "model.layers.44.block_sparse_moe.experts.82.w1", "model.layers.44.block_sparse_moe.experts.83.w1", "model.layers.44.block_sparse_moe.experts.84.w1", "model.layers.44.block_sparse_moe.experts.85.w1", "model.layers.44.block_sparse_moe.experts.86.w1", "model.layers.44.block_sparse_moe.experts.87.w1", "model.layers.44.block_sparse_moe.experts.88.w1", "model.layers.44.block_sparse_moe.experts.89.w1", "model.layers.44.block_sparse_moe.experts.90.w1", "model.layers.44.block_sparse_moe.experts.91.w1", "model.layers.44.block_sparse_moe.experts.92.w1", "model.layers.44.block_sparse_moe.experts.93.w1", "model.layers.44.block_sparse_moe.experts.94.w1", "model.layers.44.block_sparse_moe.experts.95.w1", "model.layers.44.block_sparse_moe.experts.96.w1", "model.layers.44.block_sparse_moe.experts.97.w1", "model.layers.44.block_sparse_moe.experts.98.w1", "model.layers.44.block_sparse_moe.experts.99.w1", "model.layers.44.block_sparse_moe.experts.100.w1", "model.layers.44.block_sparse_moe.experts.101.w1", "model.layers.44.block_sparse_moe.experts.102.w1", "model.layers.44.block_sparse_moe.experts.103.w1", "model.layers.44.block_sparse_moe.experts.104.w1", "model.layers.44.block_sparse_moe.experts.105.w1", "model.layers.44.block_sparse_moe.experts.106.w1", "model.layers.44.block_sparse_moe.experts.107.w1", "model.layers.44.block_sparse_moe.experts.108.w1", "model.layers.44.block_sparse_moe.experts.109.w1", "model.layers.44.block_sparse_moe.experts.110.w1", "model.layers.44.block_sparse_moe.experts.111.w1", "model.layers.44.block_sparse_moe.experts.112.w1", "model.layers.44.block_sparse_moe.experts.113.w1", "model.layers.44.block_sparse_moe.experts.114.w1", "model.layers.44.block_sparse_moe.experts.115.w1", "model.layers.44.block_sparse_moe.experts.116.w1", "model.layers.44.block_sparse_moe.experts.117.w1", "model.layers.44.block_sparse_moe.experts.118.w1", "model.layers.44.block_sparse_moe.experts.119.w1", "model.layers.44.block_sparse_moe.experts.120.w1", "model.layers.44.block_sparse_moe.experts.121.w1", "model.layers.44.block_sparse_moe.experts.122.w1", "model.layers.44.block_sparse_moe.experts.123.w1", "model.layers.44.block_sparse_moe.experts.124.w1", "model.layers.44.block_sparse_moe.experts.125.w1", "model.layers.44.block_sparse_moe.experts.126.w1", "model.layers.44.block_sparse_moe.experts.127.w1", "model.layers.44.block_sparse_moe.experts.128.w1", "model.layers.44.block_sparse_moe.experts.129.w1", "model.layers.44.block_sparse_moe.experts.130.w1", "model.layers.44.block_sparse_moe.experts.131.w1", "model.layers.44.block_sparse_moe.experts.132.w1", "model.layers.44.block_sparse_moe.experts.133.w1", "model.layers.44.block_sparse_moe.experts.134.w1", "model.layers.44.block_sparse_moe.experts.135.w1", "model.layers.44.block_sparse_moe.experts.136.w1", "model.layers.44.block_sparse_moe.experts.137.w1", "model.layers.44.block_sparse_moe.experts.138.w1", "model.layers.44.block_sparse_moe.experts.139.w1", "model.layers.44.block_sparse_moe.experts.140.w1", "model.layers.44.block_sparse_moe.experts.141.w1", "model.layers.44.block_sparse_moe.experts.142.w1", "model.layers.44.block_sparse_moe.experts.143.w1", "model.layers.44.block_sparse_moe.experts.144.w1", "model.layers.44.block_sparse_moe.experts.145.w1", "model.layers.44.block_sparse_moe.experts.146.w1", "model.layers.44.block_sparse_moe.experts.147.w1", "model.layers.44.block_sparse_moe.experts.148.w1", "model.layers.44.block_sparse_moe.experts.149.w1", "model.layers.44.block_sparse_moe.experts.150.w1", "model.layers.44.block_sparse_moe.experts.151.w1", "model.layers.44.block_sparse_moe.experts.152.w1", "model.layers.44.block_sparse_moe.experts.153.w1", "model.layers.44.block_sparse_moe.experts.154.w1", "model.layers.44.block_sparse_moe.experts.155.w1", "model.layers.44.block_sparse_moe.experts.156.w1", "model.layers.44.block_sparse_moe.experts.157.w1", "model.layers.44.block_sparse_moe.experts.158.w1", "model.layers.44.block_sparse_moe.experts.159.w1", "model.layers.44.block_sparse_moe.experts.160.w1", "model.layers.44.block_sparse_moe.experts.161.w1", "model.layers.44.block_sparse_moe.experts.162.w1", "model.layers.44.block_sparse_moe.experts.163.w1", "model.layers.44.block_sparse_moe.experts.164.w1", "model.layers.44.block_sparse_moe.experts.165.w1", "model.layers.44.block_sparse_moe.experts.166.w1", "model.layers.44.block_sparse_moe.experts.167.w1", "model.layers.44.block_sparse_moe.experts.168.w1", "model.layers.44.block_sparse_moe.experts.169.w1", "model.layers.44.block_sparse_moe.experts.170.w1", "model.layers.44.block_sparse_moe.experts.171.w1", "model.layers.44.block_sparse_moe.experts.172.w1", "model.layers.44.block_sparse_moe.experts.173.w1", "model.layers.44.block_sparse_moe.experts.174.w1", "model.layers.44.block_sparse_moe.experts.175.w1", "model.layers.44.block_sparse_moe.experts.176.w1", "model.layers.44.block_sparse_moe.experts.177.w1", "model.layers.44.block_sparse_moe.experts.178.w1", "model.layers.44.block_sparse_moe.experts.179.w1", "model.layers.44.block_sparse_moe.experts.180.w1", "model.layers.44.block_sparse_moe.experts.181.w1", "model.layers.44.block_sparse_moe.experts.182.w1", "model.layers.44.block_sparse_moe.experts.183.w1", "model.layers.44.block_sparse_moe.experts.184.w1", "model.layers.44.block_sparse_moe.experts.185.w1", "model.layers.44.block_sparse_moe.experts.186.w1", "model.layers.44.block_sparse_moe.experts.187.w1", "model.layers.44.block_sparse_moe.experts.188.w1", "model.layers.44.block_sparse_moe.experts.189.w1", "model.layers.44.block_sparse_moe.experts.190.w1", "model.layers.44.block_sparse_moe.experts.191.w1", "model.layers.44.block_sparse_moe.experts.192.w1", "model.layers.44.block_sparse_moe.experts.193.w1", "model.layers.44.block_sparse_moe.experts.194.w1", "model.layers.44.block_sparse_moe.experts.195.w1", "model.layers.44.block_sparse_moe.experts.196.w1", "model.layers.44.block_sparse_moe.experts.197.w1", "model.layers.44.block_sparse_moe.experts.198.w1", "model.layers.44.block_sparse_moe.experts.199.w1", "model.layers.44.block_sparse_moe.experts.200.w1", "model.layers.44.block_sparse_moe.experts.201.w1", "model.layers.44.block_sparse_moe.experts.202.w1", "model.layers.44.block_sparse_moe.experts.203.w1", "model.layers.44.block_sparse_moe.experts.204.w1", "model.layers.44.block_sparse_moe.experts.205.w1", "model.layers.44.block_sparse_moe.experts.206.w1", "model.layers.44.block_sparse_moe.experts.207.w1", "model.layers.44.block_sparse_moe.experts.208.w1", "model.layers.44.block_sparse_moe.experts.209.w1", "model.layers.44.block_sparse_moe.experts.210.w1", "model.layers.44.block_sparse_moe.experts.211.w1", "model.layers.44.block_sparse_moe.experts.212.w1", "model.layers.44.block_sparse_moe.experts.213.w1", "model.layers.44.block_sparse_moe.experts.214.w1", "model.layers.44.block_sparse_moe.experts.215.w1", "model.layers.44.block_sparse_moe.experts.216.w1", "model.layers.44.block_sparse_moe.experts.217.w1", "model.layers.44.block_sparse_moe.experts.218.w1", "model.layers.44.block_sparse_moe.experts.219.w1", "model.layers.44.block_sparse_moe.experts.220.w1", "model.layers.44.block_sparse_moe.experts.221.w1", "model.layers.44.block_sparse_moe.experts.222.w1", "model.layers.44.block_sparse_moe.experts.223.w1", "model.layers.44.block_sparse_moe.experts.224.w1", "model.layers.44.block_sparse_moe.experts.225.w1", "model.layers.44.block_sparse_moe.experts.226.w1", "model.layers.44.block_sparse_moe.experts.227.w1", "model.layers.44.block_sparse_moe.experts.228.w1", "model.layers.44.block_sparse_moe.experts.229.w1", "model.layers.44.block_sparse_moe.experts.230.w1", "model.layers.44.block_sparse_moe.experts.231.w1", "model.layers.44.block_sparse_moe.experts.232.w1", "model.layers.44.block_sparse_moe.experts.233.w1", "model.layers.44.block_sparse_moe.experts.234.w1", "model.layers.44.block_sparse_moe.experts.235.w1", "model.layers.44.block_sparse_moe.experts.236.w1", "model.layers.44.block_sparse_moe.experts.237.w1", "model.layers.44.block_sparse_moe.experts.238.w1", "model.layers.44.block_sparse_moe.experts.239.w1", "model.layers.44.block_sparse_moe.experts.240.w1", "model.layers.44.block_sparse_moe.experts.241.w1", "model.layers.44.block_sparse_moe.experts.242.w1", "model.layers.44.block_sparse_moe.experts.243.w1", "model.layers.44.block_sparse_moe.experts.244.w1", "model.layers.44.block_sparse_moe.experts.245.w1", "model.layers.44.block_sparse_moe.experts.246.w1", "model.layers.44.block_sparse_moe.experts.247.w1", "model.layers.44.block_sparse_moe.experts.248.w1", "model.layers.44.block_sparse_moe.experts.249.w1", "model.layers.44.block_sparse_moe.experts.250.w1", "model.layers.44.block_sparse_moe.experts.251.w1", "model.layers.44.block_sparse_moe.experts.252.w1", "model.layers.44.block_sparse_moe.experts.253.w1", "model.layers.44.block_sparse_moe.experts.254.w1", "model.layers.44.block_sparse_moe.experts.255.w1", "model.layers.44.block_sparse_moe.experts.0.w3", "model.layers.44.block_sparse_moe.experts.1.w3", "model.layers.44.block_sparse_moe.experts.2.w3", "model.layers.44.block_sparse_moe.experts.3.w3", "model.layers.44.block_sparse_moe.experts.4.w3", "model.layers.44.block_sparse_moe.experts.5.w3", "model.layers.44.block_sparse_moe.experts.6.w3", "model.layers.44.block_sparse_moe.experts.7.w3", "model.layers.44.block_sparse_moe.experts.8.w3", "model.layers.44.block_sparse_moe.experts.9.w3", "model.layers.44.block_sparse_moe.experts.10.w3", "model.layers.44.block_sparse_moe.experts.11.w3", "model.layers.44.block_sparse_moe.experts.12.w3", "model.layers.44.block_sparse_moe.experts.13.w3", "model.layers.44.block_sparse_moe.experts.14.w3", "model.layers.44.block_sparse_moe.experts.15.w3", "model.layers.44.block_sparse_moe.experts.16.w3", "model.layers.44.block_sparse_moe.experts.17.w3", "model.layers.44.block_sparse_moe.experts.18.w3", "model.layers.44.block_sparse_moe.experts.19.w3", "model.layers.44.block_sparse_moe.experts.20.w3", "model.layers.44.block_sparse_moe.experts.21.w3", "model.layers.44.block_sparse_moe.experts.22.w3", "model.layers.44.block_sparse_moe.experts.23.w3", "model.layers.44.block_sparse_moe.experts.24.w3", "model.layers.44.block_sparse_moe.experts.25.w3", "model.layers.44.block_sparse_moe.experts.26.w3", "model.layers.44.block_sparse_moe.experts.27.w3", "model.layers.44.block_sparse_moe.experts.28.w3", "model.layers.44.block_sparse_moe.experts.29.w3", "model.layers.44.block_sparse_moe.experts.30.w3", "model.layers.44.block_sparse_moe.experts.31.w3", "model.layers.44.block_sparse_moe.experts.32.w3", "model.layers.44.block_sparse_moe.experts.33.w3", "model.layers.44.block_sparse_moe.experts.34.w3", "model.layers.44.block_sparse_moe.experts.35.w3", "model.layers.44.block_sparse_moe.experts.36.w3", "model.layers.44.block_sparse_moe.experts.37.w3", "model.layers.44.block_sparse_moe.experts.38.w3", "model.layers.44.block_sparse_moe.experts.39.w3", "model.layers.44.block_sparse_moe.experts.40.w3", "model.layers.44.block_sparse_moe.experts.41.w3", "model.layers.44.block_sparse_moe.experts.42.w3", "model.layers.44.block_sparse_moe.experts.43.w3", "model.layers.44.block_sparse_moe.experts.44.w3", "model.layers.44.block_sparse_moe.experts.45.w3", "model.layers.44.block_sparse_moe.experts.46.w3", "model.layers.44.block_sparse_moe.experts.47.w3", "model.layers.44.block_sparse_moe.experts.48.w3", "model.layers.44.block_sparse_moe.experts.49.w3", "model.layers.44.block_sparse_moe.experts.50.w3", "model.layers.44.block_sparse_moe.experts.51.w3", "model.layers.44.block_sparse_moe.experts.52.w3", "model.layers.44.block_sparse_moe.experts.53.w3", "model.layers.44.block_sparse_moe.experts.54.w3", "model.layers.44.block_sparse_moe.experts.55.w3", "model.layers.44.block_sparse_moe.experts.56.w3", "model.layers.44.block_sparse_moe.experts.57.w3", "model.layers.44.block_sparse_moe.experts.58.w3", "model.layers.44.block_sparse_moe.experts.59.w3", "model.layers.44.block_sparse_moe.experts.60.w3", "model.layers.44.block_sparse_moe.experts.61.w3", "model.layers.44.block_sparse_moe.experts.62.w3", "model.layers.44.block_sparse_moe.experts.63.w3", "model.layers.44.block_sparse_moe.experts.64.w3", "model.layers.44.block_sparse_moe.experts.65.w3", "model.layers.44.block_sparse_moe.experts.66.w3", "model.layers.44.block_sparse_moe.experts.67.w3", "model.layers.44.block_sparse_moe.experts.68.w3", "model.layers.44.block_sparse_moe.experts.69.w3", "model.layers.44.block_sparse_moe.experts.70.w3", "model.layers.44.block_sparse_moe.experts.71.w3", "model.layers.44.block_sparse_moe.experts.72.w3", "model.layers.44.block_sparse_moe.experts.73.w3", "model.layers.44.block_sparse_moe.experts.74.w3", "model.layers.44.block_sparse_moe.experts.75.w3", "model.layers.44.block_sparse_moe.experts.76.w3", "model.layers.44.block_sparse_moe.experts.77.w3", "model.layers.44.block_sparse_moe.experts.78.w3", "model.layers.44.block_sparse_moe.experts.79.w3", "model.layers.44.block_sparse_moe.experts.80.w3", "model.layers.44.block_sparse_moe.experts.81.w3", "model.layers.44.block_sparse_moe.experts.82.w3", "model.layers.44.block_sparse_moe.experts.83.w3", "model.layers.44.block_sparse_moe.experts.84.w3", "model.layers.44.block_sparse_moe.experts.85.w3", "model.layers.44.block_sparse_moe.experts.86.w3", "model.layers.44.block_sparse_moe.experts.87.w3", "model.layers.44.block_sparse_moe.experts.88.w3", "model.layers.44.block_sparse_moe.experts.89.w3", "model.layers.44.block_sparse_moe.experts.90.w3", "model.layers.44.block_sparse_moe.experts.91.w3", "model.layers.44.block_sparse_moe.experts.92.w3", "model.layers.44.block_sparse_moe.experts.93.w3", "model.layers.44.block_sparse_moe.experts.94.w3", "model.layers.44.block_sparse_moe.experts.95.w3", "model.layers.44.block_sparse_moe.experts.96.w3", "model.layers.44.block_sparse_moe.experts.97.w3", "model.layers.44.block_sparse_moe.experts.98.w3", "model.layers.44.block_sparse_moe.experts.99.w3", "model.layers.44.block_sparse_moe.experts.100.w3", "model.layers.44.block_sparse_moe.experts.101.w3", "model.layers.44.block_sparse_moe.experts.102.w3", "model.layers.44.block_sparse_moe.experts.103.w3", "model.layers.44.block_sparse_moe.experts.104.w3", "model.layers.44.block_sparse_moe.experts.105.w3", "model.layers.44.block_sparse_moe.experts.106.w3", "model.layers.44.block_sparse_moe.experts.107.w3", "model.layers.44.block_sparse_moe.experts.108.w3", "model.layers.44.block_sparse_moe.experts.109.w3", "model.layers.44.block_sparse_moe.experts.110.w3", "model.layers.44.block_sparse_moe.experts.111.w3", "model.layers.44.block_sparse_moe.experts.112.w3", "model.layers.44.block_sparse_moe.experts.113.w3", "model.layers.44.block_sparse_moe.experts.114.w3", "model.layers.44.block_sparse_moe.experts.115.w3", "model.layers.44.block_sparse_moe.experts.116.w3", "model.layers.44.block_sparse_moe.experts.117.w3", "model.layers.44.block_sparse_moe.experts.118.w3", "model.layers.44.block_sparse_moe.experts.119.w3", "model.layers.44.block_sparse_moe.experts.120.w3", "model.layers.44.block_sparse_moe.experts.121.w3", "model.layers.44.block_sparse_moe.experts.122.w3", "model.layers.44.block_sparse_moe.experts.123.w3", "model.layers.44.block_sparse_moe.experts.124.w3", "model.layers.44.block_sparse_moe.experts.125.w3", "model.layers.44.block_sparse_moe.experts.126.w3", "model.layers.44.block_sparse_moe.experts.127.w3", "model.layers.44.block_sparse_moe.experts.128.w3", "model.layers.44.block_sparse_moe.experts.129.w3", "model.layers.44.block_sparse_moe.experts.130.w3", "model.layers.44.block_sparse_moe.experts.131.w3", "model.layers.44.block_sparse_moe.experts.132.w3", "model.layers.44.block_sparse_moe.experts.133.w3", "model.layers.44.block_sparse_moe.experts.134.w3", "model.layers.44.block_sparse_moe.experts.135.w3", "model.layers.44.block_sparse_moe.experts.136.w3", "model.layers.44.block_sparse_moe.experts.137.w3", "model.layers.44.block_sparse_moe.experts.138.w3", "model.layers.44.block_sparse_moe.experts.139.w3", "model.layers.44.block_sparse_moe.experts.140.w3", "model.layers.44.block_sparse_moe.experts.141.w3", "model.layers.44.block_sparse_moe.experts.142.w3", "model.layers.44.block_sparse_moe.experts.143.w3", "model.layers.44.block_sparse_moe.experts.144.w3", "model.layers.44.block_sparse_moe.experts.145.w3", "model.layers.44.block_sparse_moe.experts.146.w3", "model.layers.44.block_sparse_moe.experts.147.w3", "model.layers.44.block_sparse_moe.experts.148.w3", "model.layers.44.block_sparse_moe.experts.149.w3", "model.layers.44.block_sparse_moe.experts.150.w3", "model.layers.44.block_sparse_moe.experts.151.w3", "model.layers.44.block_sparse_moe.experts.152.w3", "model.layers.44.block_sparse_moe.experts.153.w3", "model.layers.44.block_sparse_moe.experts.154.w3", "model.layers.44.block_sparse_moe.experts.155.w3", "model.layers.44.block_sparse_moe.experts.156.w3", "model.layers.44.block_sparse_moe.experts.157.w3", "model.layers.44.block_sparse_moe.experts.158.w3", "model.layers.44.block_sparse_moe.experts.159.w3", "model.layers.44.block_sparse_moe.experts.160.w3", "model.layers.44.block_sparse_moe.experts.161.w3", "model.layers.44.block_sparse_moe.experts.162.w3", "model.layers.44.block_sparse_moe.experts.163.w3", "model.layers.44.block_sparse_moe.experts.164.w3", "model.layers.44.block_sparse_moe.experts.165.w3", "model.layers.44.block_sparse_moe.experts.166.w3", "model.layers.44.block_sparse_moe.experts.167.w3", "model.layers.44.block_sparse_moe.experts.168.w3", "model.layers.44.block_sparse_moe.experts.169.w3", "model.layers.44.block_sparse_moe.experts.170.w3", "model.layers.44.block_sparse_moe.experts.171.w3", "model.layers.44.block_sparse_moe.experts.172.w3", "model.layers.44.block_sparse_moe.experts.173.w3", "model.layers.44.block_sparse_moe.experts.174.w3", "model.layers.44.block_sparse_moe.experts.175.w3", "model.layers.44.block_sparse_moe.experts.176.w3", "model.layers.44.block_sparse_moe.experts.177.w3", "model.layers.44.block_sparse_moe.experts.178.w3", "model.layers.44.block_sparse_moe.experts.179.w3", "model.layers.44.block_sparse_moe.experts.180.w3", "model.layers.44.block_sparse_moe.experts.181.w3", "model.layers.44.block_sparse_moe.experts.182.w3", "model.layers.44.block_sparse_moe.experts.183.w3", "model.layers.44.block_sparse_moe.experts.184.w3", "model.layers.44.block_sparse_moe.experts.185.w3", "model.layers.44.block_sparse_moe.experts.186.w3", "model.layers.44.block_sparse_moe.experts.187.w3", "model.layers.44.block_sparse_moe.experts.188.w3", "model.layers.44.block_sparse_moe.experts.189.w3", "model.layers.44.block_sparse_moe.experts.190.w3", "model.layers.44.block_sparse_moe.experts.191.w3", "model.layers.44.block_sparse_moe.experts.192.w3", "model.layers.44.block_sparse_moe.experts.193.w3", "model.layers.44.block_sparse_moe.experts.194.w3", "model.layers.44.block_sparse_moe.experts.195.w3", "model.layers.44.block_sparse_moe.experts.196.w3", "model.layers.44.block_sparse_moe.experts.197.w3", "model.layers.44.block_sparse_moe.experts.198.w3", "model.layers.44.block_sparse_moe.experts.199.w3", "model.layers.44.block_sparse_moe.experts.200.w3", "model.layers.44.block_sparse_moe.experts.201.w3", "model.layers.44.block_sparse_moe.experts.202.w3", "model.layers.44.block_sparse_moe.experts.203.w3", "model.layers.44.block_sparse_moe.experts.204.w3", "model.layers.44.block_sparse_moe.experts.205.w3", "model.layers.44.block_sparse_moe.experts.206.w3", "model.layers.44.block_sparse_moe.experts.207.w3", "model.layers.44.block_sparse_moe.experts.208.w3", "model.layers.44.block_sparse_moe.experts.209.w3", "model.layers.44.block_sparse_moe.experts.210.w3", "model.layers.44.block_sparse_moe.experts.211.w3", "model.layers.44.block_sparse_moe.experts.212.w3", "model.layers.44.block_sparse_moe.experts.213.w3", "model.layers.44.block_sparse_moe.experts.214.w3", "model.layers.44.block_sparse_moe.experts.215.w3", "model.layers.44.block_sparse_moe.experts.216.w3", "model.layers.44.block_sparse_moe.experts.217.w3", "model.layers.44.block_sparse_moe.experts.218.w3", "model.layers.44.block_sparse_moe.experts.219.w3", "model.layers.44.block_sparse_moe.experts.220.w3", "model.layers.44.block_sparse_moe.experts.221.w3", "model.layers.44.block_sparse_moe.experts.222.w3", "model.layers.44.block_sparse_moe.experts.223.w3", "model.layers.44.block_sparse_moe.experts.224.w3", "model.layers.44.block_sparse_moe.experts.225.w3", "model.layers.44.block_sparse_moe.experts.226.w3", "model.layers.44.block_sparse_moe.experts.227.w3", "model.layers.44.block_sparse_moe.experts.228.w3", "model.layers.44.block_sparse_moe.experts.229.w3", "model.layers.44.block_sparse_moe.experts.230.w3", "model.layers.44.block_sparse_moe.experts.231.w3", "model.layers.44.block_sparse_moe.experts.232.w3", "model.layers.44.block_sparse_moe.experts.233.w3", "model.layers.44.block_sparse_moe.experts.234.w3", "model.layers.44.block_sparse_moe.experts.235.w3", "model.layers.44.block_sparse_moe.experts.236.w3", "model.layers.44.block_sparse_moe.experts.237.w3", "model.layers.44.block_sparse_moe.experts.238.w3", "model.layers.44.block_sparse_moe.experts.239.w3", "model.layers.44.block_sparse_moe.experts.240.w3", "model.layers.44.block_sparse_moe.experts.241.w3", "model.layers.44.block_sparse_moe.experts.242.w3", "model.layers.44.block_sparse_moe.experts.243.w3", "model.layers.44.block_sparse_moe.experts.244.w3", "model.layers.44.block_sparse_moe.experts.245.w3", "model.layers.44.block_sparse_moe.experts.246.w3", "model.layers.44.block_sparse_moe.experts.247.w3", "model.layers.44.block_sparse_moe.experts.248.w3", "model.layers.44.block_sparse_moe.experts.249.w3", "model.layers.44.block_sparse_moe.experts.250.w3", "model.layers.44.block_sparse_moe.experts.251.w3", "model.layers.44.block_sparse_moe.experts.252.w3", "model.layers.44.block_sparse_moe.experts.253.w3", "model.layers.44.block_sparse_moe.experts.254.w3", "model.layers.44.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.002359485626220681, "dbits": 2415919104 } ] }, { "idx": 224, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w2", "model.layers.44.block_sparse_moe.experts.1.w2", "model.layers.44.block_sparse_moe.experts.2.w2", "model.layers.44.block_sparse_moe.experts.3.w2", "model.layers.44.block_sparse_moe.experts.4.w2", "model.layers.44.block_sparse_moe.experts.5.w2", "model.layers.44.block_sparse_moe.experts.6.w2", "model.layers.44.block_sparse_moe.experts.7.w2", "model.layers.44.block_sparse_moe.experts.8.w2", "model.layers.44.block_sparse_moe.experts.9.w2", "model.layers.44.block_sparse_moe.experts.10.w2", "model.layers.44.block_sparse_moe.experts.11.w2", "model.layers.44.block_sparse_moe.experts.12.w2", "model.layers.44.block_sparse_moe.experts.13.w2", "model.layers.44.block_sparse_moe.experts.14.w2", "model.layers.44.block_sparse_moe.experts.15.w2", "model.layers.44.block_sparse_moe.experts.16.w2", "model.layers.44.block_sparse_moe.experts.17.w2", "model.layers.44.block_sparse_moe.experts.18.w2", "model.layers.44.block_sparse_moe.experts.19.w2", "model.layers.44.block_sparse_moe.experts.20.w2", "model.layers.44.block_sparse_moe.experts.21.w2", "model.layers.44.block_sparse_moe.experts.22.w2", "model.layers.44.block_sparse_moe.experts.23.w2", "model.layers.44.block_sparse_moe.experts.24.w2", "model.layers.44.block_sparse_moe.experts.25.w2", "model.layers.44.block_sparse_moe.experts.26.w2", "model.layers.44.block_sparse_moe.experts.27.w2", "model.layers.44.block_sparse_moe.experts.28.w2", "model.layers.44.block_sparse_moe.experts.29.w2", "model.layers.44.block_sparse_moe.experts.30.w2", "model.layers.44.block_sparse_moe.experts.31.w2", "model.layers.44.block_sparse_moe.experts.32.w2", "model.layers.44.block_sparse_moe.experts.33.w2", "model.layers.44.block_sparse_moe.experts.34.w2", "model.layers.44.block_sparse_moe.experts.35.w2", "model.layers.44.block_sparse_moe.experts.36.w2", "model.layers.44.block_sparse_moe.experts.37.w2", "model.layers.44.block_sparse_moe.experts.38.w2", "model.layers.44.block_sparse_moe.experts.39.w2", "model.layers.44.block_sparse_moe.experts.40.w2", "model.layers.44.block_sparse_moe.experts.41.w2", "model.layers.44.block_sparse_moe.experts.42.w2", "model.layers.44.block_sparse_moe.experts.43.w2", "model.layers.44.block_sparse_moe.experts.44.w2", "model.layers.44.block_sparse_moe.experts.45.w2", "model.layers.44.block_sparse_moe.experts.46.w2", "model.layers.44.block_sparse_moe.experts.47.w2", "model.layers.44.block_sparse_moe.experts.48.w2", "model.layers.44.block_sparse_moe.experts.49.w2", "model.layers.44.block_sparse_moe.experts.50.w2", "model.layers.44.block_sparse_moe.experts.51.w2", "model.layers.44.block_sparse_moe.experts.52.w2", "model.layers.44.block_sparse_moe.experts.53.w2", "model.layers.44.block_sparse_moe.experts.54.w2", "model.layers.44.block_sparse_moe.experts.55.w2", "model.layers.44.block_sparse_moe.experts.56.w2", "model.layers.44.block_sparse_moe.experts.57.w2", "model.layers.44.block_sparse_moe.experts.58.w2", "model.layers.44.block_sparse_moe.experts.59.w2", "model.layers.44.block_sparse_moe.experts.60.w2", "model.layers.44.block_sparse_moe.experts.61.w2", "model.layers.44.block_sparse_moe.experts.62.w2", "model.layers.44.block_sparse_moe.experts.63.w2", "model.layers.44.block_sparse_moe.experts.64.w2", "model.layers.44.block_sparse_moe.experts.65.w2", "model.layers.44.block_sparse_moe.experts.66.w2", "model.layers.44.block_sparse_moe.experts.67.w2", "model.layers.44.block_sparse_moe.experts.68.w2", "model.layers.44.block_sparse_moe.experts.69.w2", "model.layers.44.block_sparse_moe.experts.70.w2", "model.layers.44.block_sparse_moe.experts.71.w2", "model.layers.44.block_sparse_moe.experts.72.w2", "model.layers.44.block_sparse_moe.experts.73.w2", "model.layers.44.block_sparse_moe.experts.74.w2", "model.layers.44.block_sparse_moe.experts.75.w2", "model.layers.44.block_sparse_moe.experts.76.w2", "model.layers.44.block_sparse_moe.experts.77.w2", "model.layers.44.block_sparse_moe.experts.78.w2", "model.layers.44.block_sparse_moe.experts.79.w2", "model.layers.44.block_sparse_moe.experts.80.w2", "model.layers.44.block_sparse_moe.experts.81.w2", "model.layers.44.block_sparse_moe.experts.82.w2", "model.layers.44.block_sparse_moe.experts.83.w2", "model.layers.44.block_sparse_moe.experts.84.w2", "model.layers.44.block_sparse_moe.experts.85.w2", "model.layers.44.block_sparse_moe.experts.86.w2", "model.layers.44.block_sparse_moe.experts.87.w2", "model.layers.44.block_sparse_moe.experts.88.w2", "model.layers.44.block_sparse_moe.experts.89.w2", "model.layers.44.block_sparse_moe.experts.90.w2", "model.layers.44.block_sparse_moe.experts.91.w2", "model.layers.44.block_sparse_moe.experts.92.w2", "model.layers.44.block_sparse_moe.experts.93.w2", "model.layers.44.block_sparse_moe.experts.94.w2", "model.layers.44.block_sparse_moe.experts.95.w2", "model.layers.44.block_sparse_moe.experts.96.w2", "model.layers.44.block_sparse_moe.experts.97.w2", "model.layers.44.block_sparse_moe.experts.98.w2", "model.layers.44.block_sparse_moe.experts.99.w2", "model.layers.44.block_sparse_moe.experts.100.w2", "model.layers.44.block_sparse_moe.experts.101.w2", "model.layers.44.block_sparse_moe.experts.102.w2", "model.layers.44.block_sparse_moe.experts.103.w2", "model.layers.44.block_sparse_moe.experts.104.w2", "model.layers.44.block_sparse_moe.experts.105.w2", "model.layers.44.block_sparse_moe.experts.106.w2", "model.layers.44.block_sparse_moe.experts.107.w2", "model.layers.44.block_sparse_moe.experts.108.w2", "model.layers.44.block_sparse_moe.experts.109.w2", "model.layers.44.block_sparse_moe.experts.110.w2", "model.layers.44.block_sparse_moe.experts.111.w2", "model.layers.44.block_sparse_moe.experts.112.w2", "model.layers.44.block_sparse_moe.experts.113.w2", "model.layers.44.block_sparse_moe.experts.114.w2", "model.layers.44.block_sparse_moe.experts.115.w2", "model.layers.44.block_sparse_moe.experts.116.w2", "model.layers.44.block_sparse_moe.experts.117.w2", "model.layers.44.block_sparse_moe.experts.118.w2", "model.layers.44.block_sparse_moe.experts.119.w2", "model.layers.44.block_sparse_moe.experts.120.w2", "model.layers.44.block_sparse_moe.experts.121.w2", "model.layers.44.block_sparse_moe.experts.122.w2", "model.layers.44.block_sparse_moe.experts.123.w2", "model.layers.44.block_sparse_moe.experts.124.w2", "model.layers.44.block_sparse_moe.experts.125.w2", "model.layers.44.block_sparse_moe.experts.126.w2", "model.layers.44.block_sparse_moe.experts.127.w2", "model.layers.44.block_sparse_moe.experts.128.w2", "model.layers.44.block_sparse_moe.experts.129.w2", "model.layers.44.block_sparse_moe.experts.130.w2", "model.layers.44.block_sparse_moe.experts.131.w2", "model.layers.44.block_sparse_moe.experts.132.w2", "model.layers.44.block_sparse_moe.experts.133.w2", "model.layers.44.block_sparse_moe.experts.134.w2", "model.layers.44.block_sparse_moe.experts.135.w2", "model.layers.44.block_sparse_moe.experts.136.w2", "model.layers.44.block_sparse_moe.experts.137.w2", "model.layers.44.block_sparse_moe.experts.138.w2", "model.layers.44.block_sparse_moe.experts.139.w2", "model.layers.44.block_sparse_moe.experts.140.w2", "model.layers.44.block_sparse_moe.experts.141.w2", "model.layers.44.block_sparse_moe.experts.142.w2", "model.layers.44.block_sparse_moe.experts.143.w2", "model.layers.44.block_sparse_moe.experts.144.w2", "model.layers.44.block_sparse_moe.experts.145.w2", "model.layers.44.block_sparse_moe.experts.146.w2", "model.layers.44.block_sparse_moe.experts.147.w2", "model.layers.44.block_sparse_moe.experts.148.w2", "model.layers.44.block_sparse_moe.experts.149.w2", "model.layers.44.block_sparse_moe.experts.150.w2", "model.layers.44.block_sparse_moe.experts.151.w2", "model.layers.44.block_sparse_moe.experts.152.w2", "model.layers.44.block_sparse_moe.experts.153.w2", "model.layers.44.block_sparse_moe.experts.154.w2", "model.layers.44.block_sparse_moe.experts.155.w2", "model.layers.44.block_sparse_moe.experts.156.w2", "model.layers.44.block_sparse_moe.experts.157.w2", "model.layers.44.block_sparse_moe.experts.158.w2", "model.layers.44.block_sparse_moe.experts.159.w2", "model.layers.44.block_sparse_moe.experts.160.w2", "model.layers.44.block_sparse_moe.experts.161.w2", "model.layers.44.block_sparse_moe.experts.162.w2", "model.layers.44.block_sparse_moe.experts.163.w2", "model.layers.44.block_sparse_moe.experts.164.w2", "model.layers.44.block_sparse_moe.experts.165.w2", "model.layers.44.block_sparse_moe.experts.166.w2", "model.layers.44.block_sparse_moe.experts.167.w2", "model.layers.44.block_sparse_moe.experts.168.w2", "model.layers.44.block_sparse_moe.experts.169.w2", "model.layers.44.block_sparse_moe.experts.170.w2", "model.layers.44.block_sparse_moe.experts.171.w2", "model.layers.44.block_sparse_moe.experts.172.w2", "model.layers.44.block_sparse_moe.experts.173.w2", "model.layers.44.block_sparse_moe.experts.174.w2", "model.layers.44.block_sparse_moe.experts.175.w2", "model.layers.44.block_sparse_moe.experts.176.w2", "model.layers.44.block_sparse_moe.experts.177.w2", "model.layers.44.block_sparse_moe.experts.178.w2", "model.layers.44.block_sparse_moe.experts.179.w2", "model.layers.44.block_sparse_moe.experts.180.w2", "model.layers.44.block_sparse_moe.experts.181.w2", "model.layers.44.block_sparse_moe.experts.182.w2", "model.layers.44.block_sparse_moe.experts.183.w2", "model.layers.44.block_sparse_moe.experts.184.w2", "model.layers.44.block_sparse_moe.experts.185.w2", "model.layers.44.block_sparse_moe.experts.186.w2", "model.layers.44.block_sparse_moe.experts.187.w2", "model.layers.44.block_sparse_moe.experts.188.w2", "model.layers.44.block_sparse_moe.experts.189.w2", "model.layers.44.block_sparse_moe.experts.190.w2", "model.layers.44.block_sparse_moe.experts.191.w2", "model.layers.44.block_sparse_moe.experts.192.w2", "model.layers.44.block_sparse_moe.experts.193.w2", "model.layers.44.block_sparse_moe.experts.194.w2", "model.layers.44.block_sparse_moe.experts.195.w2", "model.layers.44.block_sparse_moe.experts.196.w2", "model.layers.44.block_sparse_moe.experts.197.w2", "model.layers.44.block_sparse_moe.experts.198.w2", "model.layers.44.block_sparse_moe.experts.199.w2", "model.layers.44.block_sparse_moe.experts.200.w2", "model.layers.44.block_sparse_moe.experts.201.w2", "model.layers.44.block_sparse_moe.experts.202.w2", "model.layers.44.block_sparse_moe.experts.203.w2", "model.layers.44.block_sparse_moe.experts.204.w2", "model.layers.44.block_sparse_moe.experts.205.w2", "model.layers.44.block_sparse_moe.experts.206.w2", "model.layers.44.block_sparse_moe.experts.207.w2", "model.layers.44.block_sparse_moe.experts.208.w2", "model.layers.44.block_sparse_moe.experts.209.w2", "model.layers.44.block_sparse_moe.experts.210.w2", "model.layers.44.block_sparse_moe.experts.211.w2", "model.layers.44.block_sparse_moe.experts.212.w2", "model.layers.44.block_sparse_moe.experts.213.w2", "model.layers.44.block_sparse_moe.experts.214.w2", "model.layers.44.block_sparse_moe.experts.215.w2", "model.layers.44.block_sparse_moe.experts.216.w2", "model.layers.44.block_sparse_moe.experts.217.w2", "model.layers.44.block_sparse_moe.experts.218.w2", "model.layers.44.block_sparse_moe.experts.219.w2", "model.layers.44.block_sparse_moe.experts.220.w2", "model.layers.44.block_sparse_moe.experts.221.w2", "model.layers.44.block_sparse_moe.experts.222.w2", "model.layers.44.block_sparse_moe.experts.223.w2", "model.layers.44.block_sparse_moe.experts.224.w2", "model.layers.44.block_sparse_moe.experts.225.w2", "model.layers.44.block_sparse_moe.experts.226.w2", "model.layers.44.block_sparse_moe.experts.227.w2", "model.layers.44.block_sparse_moe.experts.228.w2", "model.layers.44.block_sparse_moe.experts.229.w2", "model.layers.44.block_sparse_moe.experts.230.w2", "model.layers.44.block_sparse_moe.experts.231.w2", "model.layers.44.block_sparse_moe.experts.232.w2", "model.layers.44.block_sparse_moe.experts.233.w2", "model.layers.44.block_sparse_moe.experts.234.w2", "model.layers.44.block_sparse_moe.experts.235.w2", "model.layers.44.block_sparse_moe.experts.236.w2", "model.layers.44.block_sparse_moe.experts.237.w2", "model.layers.44.block_sparse_moe.experts.238.w2", "model.layers.44.block_sparse_moe.experts.239.w2", "model.layers.44.block_sparse_moe.experts.240.w2", "model.layers.44.block_sparse_moe.experts.241.w2", "model.layers.44.block_sparse_moe.experts.242.w2", "model.layers.44.block_sparse_moe.experts.243.w2", "model.layers.44.block_sparse_moe.experts.244.w2", "model.layers.44.block_sparse_moe.experts.245.w2", "model.layers.44.block_sparse_moe.experts.246.w2", "model.layers.44.block_sparse_moe.experts.247.w2", "model.layers.44.block_sparse_moe.experts.248.w2", "model.layers.44.block_sparse_moe.experts.249.w2", "model.layers.44.block_sparse_moe.experts.250.w2", "model.layers.44.block_sparse_moe.experts.251.w2", "model.layers.44.block_sparse_moe.experts.252.w2", "model.layers.44.block_sparse_moe.experts.253.w2", "model.layers.44.block_sparse_moe.experts.254.w2", "model.layers.44.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009127765893935935, "dbits": 1207959552 } ] }, { "idx": 225, "layers": [ "model.layers.45.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0008314013481141025, "dbits": 18874368 } ] }, { "idx": 226, "layers": [ "model.layers.45.self_attn.k_proj", "model.layers.45.self_attn.v_proj" ], "candidates": [ { "dkld": 0.003140833973884649, "dbits": 6291456 } ] }, { "idx": 227, "layers": [ "model.layers.45.self_attn.o_proj" ], "candidates": [ { "dkld": -0.020256379246711642, "dbits": 18874368 } ] }, { "idx": 228, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w1", "model.layers.45.block_sparse_moe.experts.1.w1", "model.layers.45.block_sparse_moe.experts.2.w1", "model.layers.45.block_sparse_moe.experts.3.w1", "model.layers.45.block_sparse_moe.experts.4.w1", "model.layers.45.block_sparse_moe.experts.5.w1", "model.layers.45.block_sparse_moe.experts.6.w1", "model.layers.45.block_sparse_moe.experts.7.w1", "model.layers.45.block_sparse_moe.experts.8.w1", "model.layers.45.block_sparse_moe.experts.9.w1", "model.layers.45.block_sparse_moe.experts.10.w1", "model.layers.45.block_sparse_moe.experts.11.w1", "model.layers.45.block_sparse_moe.experts.12.w1", "model.layers.45.block_sparse_moe.experts.13.w1", "model.layers.45.block_sparse_moe.experts.14.w1", "model.layers.45.block_sparse_moe.experts.15.w1", "model.layers.45.block_sparse_moe.experts.16.w1", "model.layers.45.block_sparse_moe.experts.17.w1", "model.layers.45.block_sparse_moe.experts.18.w1", "model.layers.45.block_sparse_moe.experts.19.w1", "model.layers.45.block_sparse_moe.experts.20.w1", "model.layers.45.block_sparse_moe.experts.21.w1", "model.layers.45.block_sparse_moe.experts.22.w1", "model.layers.45.block_sparse_moe.experts.23.w1", "model.layers.45.block_sparse_moe.experts.24.w1", "model.layers.45.block_sparse_moe.experts.25.w1", "model.layers.45.block_sparse_moe.experts.26.w1", "model.layers.45.block_sparse_moe.experts.27.w1", "model.layers.45.block_sparse_moe.experts.28.w1", "model.layers.45.block_sparse_moe.experts.29.w1", "model.layers.45.block_sparse_moe.experts.30.w1", "model.layers.45.block_sparse_moe.experts.31.w1", "model.layers.45.block_sparse_moe.experts.32.w1", "model.layers.45.block_sparse_moe.experts.33.w1", "model.layers.45.block_sparse_moe.experts.34.w1", "model.layers.45.block_sparse_moe.experts.35.w1", "model.layers.45.block_sparse_moe.experts.36.w1", "model.layers.45.block_sparse_moe.experts.37.w1", "model.layers.45.block_sparse_moe.experts.38.w1", "model.layers.45.block_sparse_moe.experts.39.w1", "model.layers.45.block_sparse_moe.experts.40.w1", "model.layers.45.block_sparse_moe.experts.41.w1", "model.layers.45.block_sparse_moe.experts.42.w1", "model.layers.45.block_sparse_moe.experts.43.w1", "model.layers.45.block_sparse_moe.experts.44.w1", "model.layers.45.block_sparse_moe.experts.45.w1", "model.layers.45.block_sparse_moe.experts.46.w1", "model.layers.45.block_sparse_moe.experts.47.w1", "model.layers.45.block_sparse_moe.experts.48.w1", "model.layers.45.block_sparse_moe.experts.49.w1", "model.layers.45.block_sparse_moe.experts.50.w1", "model.layers.45.block_sparse_moe.experts.51.w1", "model.layers.45.block_sparse_moe.experts.52.w1", "model.layers.45.block_sparse_moe.experts.53.w1", "model.layers.45.block_sparse_moe.experts.54.w1", "model.layers.45.block_sparse_moe.experts.55.w1", "model.layers.45.block_sparse_moe.experts.56.w1", "model.layers.45.block_sparse_moe.experts.57.w1", "model.layers.45.block_sparse_moe.experts.58.w1", "model.layers.45.block_sparse_moe.experts.59.w1", "model.layers.45.block_sparse_moe.experts.60.w1", "model.layers.45.block_sparse_moe.experts.61.w1", "model.layers.45.block_sparse_moe.experts.62.w1", "model.layers.45.block_sparse_moe.experts.63.w1", "model.layers.45.block_sparse_moe.experts.64.w1", "model.layers.45.block_sparse_moe.experts.65.w1", "model.layers.45.block_sparse_moe.experts.66.w1", "model.layers.45.block_sparse_moe.experts.67.w1", "model.layers.45.block_sparse_moe.experts.68.w1", "model.layers.45.block_sparse_moe.experts.69.w1", "model.layers.45.block_sparse_moe.experts.70.w1", "model.layers.45.block_sparse_moe.experts.71.w1", "model.layers.45.block_sparse_moe.experts.72.w1", "model.layers.45.block_sparse_moe.experts.73.w1", "model.layers.45.block_sparse_moe.experts.74.w1", "model.layers.45.block_sparse_moe.experts.75.w1", "model.layers.45.block_sparse_moe.experts.76.w1", "model.layers.45.block_sparse_moe.experts.77.w1", "model.layers.45.block_sparse_moe.experts.78.w1", "model.layers.45.block_sparse_moe.experts.79.w1", "model.layers.45.block_sparse_moe.experts.80.w1", "model.layers.45.block_sparse_moe.experts.81.w1", "model.layers.45.block_sparse_moe.experts.82.w1", "model.layers.45.block_sparse_moe.experts.83.w1", "model.layers.45.block_sparse_moe.experts.84.w1", "model.layers.45.block_sparse_moe.experts.85.w1", "model.layers.45.block_sparse_moe.experts.86.w1", "model.layers.45.block_sparse_moe.experts.87.w1", "model.layers.45.block_sparse_moe.experts.88.w1", "model.layers.45.block_sparse_moe.experts.89.w1", "model.layers.45.block_sparse_moe.experts.90.w1", "model.layers.45.block_sparse_moe.experts.91.w1", "model.layers.45.block_sparse_moe.experts.92.w1", "model.layers.45.block_sparse_moe.experts.93.w1", "model.layers.45.block_sparse_moe.experts.94.w1", "model.layers.45.block_sparse_moe.experts.95.w1", "model.layers.45.block_sparse_moe.experts.96.w1", "model.layers.45.block_sparse_moe.experts.97.w1", "model.layers.45.block_sparse_moe.experts.98.w1", "model.layers.45.block_sparse_moe.experts.99.w1", "model.layers.45.block_sparse_moe.experts.100.w1", "model.layers.45.block_sparse_moe.experts.101.w1", "model.layers.45.block_sparse_moe.experts.102.w1", "model.layers.45.block_sparse_moe.experts.103.w1", "model.layers.45.block_sparse_moe.experts.104.w1", "model.layers.45.block_sparse_moe.experts.105.w1", "model.layers.45.block_sparse_moe.experts.106.w1", "model.layers.45.block_sparse_moe.experts.107.w1", "model.layers.45.block_sparse_moe.experts.108.w1", "model.layers.45.block_sparse_moe.experts.109.w1", "model.layers.45.block_sparse_moe.experts.110.w1", "model.layers.45.block_sparse_moe.experts.111.w1", "model.layers.45.block_sparse_moe.experts.112.w1", "model.layers.45.block_sparse_moe.experts.113.w1", "model.layers.45.block_sparse_moe.experts.114.w1", "model.layers.45.block_sparse_moe.experts.115.w1", "model.layers.45.block_sparse_moe.experts.116.w1", "model.layers.45.block_sparse_moe.experts.117.w1", "model.layers.45.block_sparse_moe.experts.118.w1", "model.layers.45.block_sparse_moe.experts.119.w1", "model.layers.45.block_sparse_moe.experts.120.w1", "model.layers.45.block_sparse_moe.experts.121.w1", "model.layers.45.block_sparse_moe.experts.122.w1", "model.layers.45.block_sparse_moe.experts.123.w1", "model.layers.45.block_sparse_moe.experts.124.w1", "model.layers.45.block_sparse_moe.experts.125.w1", "model.layers.45.block_sparse_moe.experts.126.w1", "model.layers.45.block_sparse_moe.experts.127.w1", "model.layers.45.block_sparse_moe.experts.128.w1", "model.layers.45.block_sparse_moe.experts.129.w1", "model.layers.45.block_sparse_moe.experts.130.w1", "model.layers.45.block_sparse_moe.experts.131.w1", "model.layers.45.block_sparse_moe.experts.132.w1", "model.layers.45.block_sparse_moe.experts.133.w1", "model.layers.45.block_sparse_moe.experts.134.w1", "model.layers.45.block_sparse_moe.experts.135.w1", "model.layers.45.block_sparse_moe.experts.136.w1", "model.layers.45.block_sparse_moe.experts.137.w1", "model.layers.45.block_sparse_moe.experts.138.w1", "model.layers.45.block_sparse_moe.experts.139.w1", "model.layers.45.block_sparse_moe.experts.140.w1", "model.layers.45.block_sparse_moe.experts.141.w1", "model.layers.45.block_sparse_moe.experts.142.w1", "model.layers.45.block_sparse_moe.experts.143.w1", "model.layers.45.block_sparse_moe.experts.144.w1", "model.layers.45.block_sparse_moe.experts.145.w1", "model.layers.45.block_sparse_moe.experts.146.w1", "model.layers.45.block_sparse_moe.experts.147.w1", "model.layers.45.block_sparse_moe.experts.148.w1", "model.layers.45.block_sparse_moe.experts.149.w1", "model.layers.45.block_sparse_moe.experts.150.w1", "model.layers.45.block_sparse_moe.experts.151.w1", "model.layers.45.block_sparse_moe.experts.152.w1", "model.layers.45.block_sparse_moe.experts.153.w1", "model.layers.45.block_sparse_moe.experts.154.w1", "model.layers.45.block_sparse_moe.experts.155.w1", "model.layers.45.block_sparse_moe.experts.156.w1", "model.layers.45.block_sparse_moe.experts.157.w1", "model.layers.45.block_sparse_moe.experts.158.w1", "model.layers.45.block_sparse_moe.experts.159.w1", "model.layers.45.block_sparse_moe.experts.160.w1", "model.layers.45.block_sparse_moe.experts.161.w1", "model.layers.45.block_sparse_moe.experts.162.w1", "model.layers.45.block_sparse_moe.experts.163.w1", "model.layers.45.block_sparse_moe.experts.164.w1", "model.layers.45.block_sparse_moe.experts.165.w1", "model.layers.45.block_sparse_moe.experts.166.w1", "model.layers.45.block_sparse_moe.experts.167.w1", "model.layers.45.block_sparse_moe.experts.168.w1", "model.layers.45.block_sparse_moe.experts.169.w1", "model.layers.45.block_sparse_moe.experts.170.w1", "model.layers.45.block_sparse_moe.experts.171.w1", "model.layers.45.block_sparse_moe.experts.172.w1", "model.layers.45.block_sparse_moe.experts.173.w1", "model.layers.45.block_sparse_moe.experts.174.w1", "model.layers.45.block_sparse_moe.experts.175.w1", "model.layers.45.block_sparse_moe.experts.176.w1", "model.layers.45.block_sparse_moe.experts.177.w1", "model.layers.45.block_sparse_moe.experts.178.w1", "model.layers.45.block_sparse_moe.experts.179.w1", "model.layers.45.block_sparse_moe.experts.180.w1", "model.layers.45.block_sparse_moe.experts.181.w1", "model.layers.45.block_sparse_moe.experts.182.w1", "model.layers.45.block_sparse_moe.experts.183.w1", "model.layers.45.block_sparse_moe.experts.184.w1", "model.layers.45.block_sparse_moe.experts.185.w1", "model.layers.45.block_sparse_moe.experts.186.w1", "model.layers.45.block_sparse_moe.experts.187.w1", "model.layers.45.block_sparse_moe.experts.188.w1", "model.layers.45.block_sparse_moe.experts.189.w1", "model.layers.45.block_sparse_moe.experts.190.w1", "model.layers.45.block_sparse_moe.experts.191.w1", "model.layers.45.block_sparse_moe.experts.192.w1", "model.layers.45.block_sparse_moe.experts.193.w1", "model.layers.45.block_sparse_moe.experts.194.w1", "model.layers.45.block_sparse_moe.experts.195.w1", "model.layers.45.block_sparse_moe.experts.196.w1", "model.layers.45.block_sparse_moe.experts.197.w1", "model.layers.45.block_sparse_moe.experts.198.w1", "model.layers.45.block_sparse_moe.experts.199.w1", "model.layers.45.block_sparse_moe.experts.200.w1", "model.layers.45.block_sparse_moe.experts.201.w1", "model.layers.45.block_sparse_moe.experts.202.w1", "model.layers.45.block_sparse_moe.experts.203.w1", "model.layers.45.block_sparse_moe.experts.204.w1", "model.layers.45.block_sparse_moe.experts.205.w1", "model.layers.45.block_sparse_moe.experts.206.w1", "model.layers.45.block_sparse_moe.experts.207.w1", "model.layers.45.block_sparse_moe.experts.208.w1", "model.layers.45.block_sparse_moe.experts.209.w1", "model.layers.45.block_sparse_moe.experts.210.w1", "model.layers.45.block_sparse_moe.experts.211.w1", "model.layers.45.block_sparse_moe.experts.212.w1", "model.layers.45.block_sparse_moe.experts.213.w1", "model.layers.45.block_sparse_moe.experts.214.w1", "model.layers.45.block_sparse_moe.experts.215.w1", "model.layers.45.block_sparse_moe.experts.216.w1", "model.layers.45.block_sparse_moe.experts.217.w1", "model.layers.45.block_sparse_moe.experts.218.w1", "model.layers.45.block_sparse_moe.experts.219.w1", "model.layers.45.block_sparse_moe.experts.220.w1", "model.layers.45.block_sparse_moe.experts.221.w1", "model.layers.45.block_sparse_moe.experts.222.w1", "model.layers.45.block_sparse_moe.experts.223.w1", "model.layers.45.block_sparse_moe.experts.224.w1", "model.layers.45.block_sparse_moe.experts.225.w1", "model.layers.45.block_sparse_moe.experts.226.w1", "model.layers.45.block_sparse_moe.experts.227.w1", "model.layers.45.block_sparse_moe.experts.228.w1", "model.layers.45.block_sparse_moe.experts.229.w1", "model.layers.45.block_sparse_moe.experts.230.w1", "model.layers.45.block_sparse_moe.experts.231.w1", "model.layers.45.block_sparse_moe.experts.232.w1", "model.layers.45.block_sparse_moe.experts.233.w1", "model.layers.45.block_sparse_moe.experts.234.w1", "model.layers.45.block_sparse_moe.experts.235.w1", "model.layers.45.block_sparse_moe.experts.236.w1", "model.layers.45.block_sparse_moe.experts.237.w1", "model.layers.45.block_sparse_moe.experts.238.w1", "model.layers.45.block_sparse_moe.experts.239.w1", "model.layers.45.block_sparse_moe.experts.240.w1", "model.layers.45.block_sparse_moe.experts.241.w1", "model.layers.45.block_sparse_moe.experts.242.w1", "model.layers.45.block_sparse_moe.experts.243.w1", "model.layers.45.block_sparse_moe.experts.244.w1", "model.layers.45.block_sparse_moe.experts.245.w1", "model.layers.45.block_sparse_moe.experts.246.w1", "model.layers.45.block_sparse_moe.experts.247.w1", "model.layers.45.block_sparse_moe.experts.248.w1", "model.layers.45.block_sparse_moe.experts.249.w1", "model.layers.45.block_sparse_moe.experts.250.w1", "model.layers.45.block_sparse_moe.experts.251.w1", "model.layers.45.block_sparse_moe.experts.252.w1", "model.layers.45.block_sparse_moe.experts.253.w1", "model.layers.45.block_sparse_moe.experts.254.w1", "model.layers.45.block_sparse_moe.experts.255.w1", "model.layers.45.block_sparse_moe.experts.0.w3", "model.layers.45.block_sparse_moe.experts.1.w3", "model.layers.45.block_sparse_moe.experts.2.w3", "model.layers.45.block_sparse_moe.experts.3.w3", "model.layers.45.block_sparse_moe.experts.4.w3", "model.layers.45.block_sparse_moe.experts.5.w3", "model.layers.45.block_sparse_moe.experts.6.w3", "model.layers.45.block_sparse_moe.experts.7.w3", "model.layers.45.block_sparse_moe.experts.8.w3", "model.layers.45.block_sparse_moe.experts.9.w3", "model.layers.45.block_sparse_moe.experts.10.w3", "model.layers.45.block_sparse_moe.experts.11.w3", "model.layers.45.block_sparse_moe.experts.12.w3", "model.layers.45.block_sparse_moe.experts.13.w3", "model.layers.45.block_sparse_moe.experts.14.w3", "model.layers.45.block_sparse_moe.experts.15.w3", "model.layers.45.block_sparse_moe.experts.16.w3", "model.layers.45.block_sparse_moe.experts.17.w3", "model.layers.45.block_sparse_moe.experts.18.w3", "model.layers.45.block_sparse_moe.experts.19.w3", "model.layers.45.block_sparse_moe.experts.20.w3", "model.layers.45.block_sparse_moe.experts.21.w3", "model.layers.45.block_sparse_moe.experts.22.w3", "model.layers.45.block_sparse_moe.experts.23.w3", "model.layers.45.block_sparse_moe.experts.24.w3", "model.layers.45.block_sparse_moe.experts.25.w3", "model.layers.45.block_sparse_moe.experts.26.w3", "model.layers.45.block_sparse_moe.experts.27.w3", "model.layers.45.block_sparse_moe.experts.28.w3", "model.layers.45.block_sparse_moe.experts.29.w3", "model.layers.45.block_sparse_moe.experts.30.w3", "model.layers.45.block_sparse_moe.experts.31.w3", "model.layers.45.block_sparse_moe.experts.32.w3", "model.layers.45.block_sparse_moe.experts.33.w3", "model.layers.45.block_sparse_moe.experts.34.w3", "model.layers.45.block_sparse_moe.experts.35.w3", "model.layers.45.block_sparse_moe.experts.36.w3", "model.layers.45.block_sparse_moe.experts.37.w3", "model.layers.45.block_sparse_moe.experts.38.w3", "model.layers.45.block_sparse_moe.experts.39.w3", "model.layers.45.block_sparse_moe.experts.40.w3", "model.layers.45.block_sparse_moe.experts.41.w3", "model.layers.45.block_sparse_moe.experts.42.w3", "model.layers.45.block_sparse_moe.experts.43.w3", "model.layers.45.block_sparse_moe.experts.44.w3", "model.layers.45.block_sparse_moe.experts.45.w3", "model.layers.45.block_sparse_moe.experts.46.w3", "model.layers.45.block_sparse_moe.experts.47.w3", "model.layers.45.block_sparse_moe.experts.48.w3", "model.layers.45.block_sparse_moe.experts.49.w3", "model.layers.45.block_sparse_moe.experts.50.w3", "model.layers.45.block_sparse_moe.experts.51.w3", "model.layers.45.block_sparse_moe.experts.52.w3", "model.layers.45.block_sparse_moe.experts.53.w3", "model.layers.45.block_sparse_moe.experts.54.w3", "model.layers.45.block_sparse_moe.experts.55.w3", "model.layers.45.block_sparse_moe.experts.56.w3", "model.layers.45.block_sparse_moe.experts.57.w3", "model.layers.45.block_sparse_moe.experts.58.w3", "model.layers.45.block_sparse_moe.experts.59.w3", "model.layers.45.block_sparse_moe.experts.60.w3", "model.layers.45.block_sparse_moe.experts.61.w3", "model.layers.45.block_sparse_moe.experts.62.w3", "model.layers.45.block_sparse_moe.experts.63.w3", "model.layers.45.block_sparse_moe.experts.64.w3", "model.layers.45.block_sparse_moe.experts.65.w3", "model.layers.45.block_sparse_moe.experts.66.w3", "model.layers.45.block_sparse_moe.experts.67.w3", "model.layers.45.block_sparse_moe.experts.68.w3", "model.layers.45.block_sparse_moe.experts.69.w3", "model.layers.45.block_sparse_moe.experts.70.w3", "model.layers.45.block_sparse_moe.experts.71.w3", "model.layers.45.block_sparse_moe.experts.72.w3", "model.layers.45.block_sparse_moe.experts.73.w3", "model.layers.45.block_sparse_moe.experts.74.w3", "model.layers.45.block_sparse_moe.experts.75.w3", "model.layers.45.block_sparse_moe.experts.76.w3", "model.layers.45.block_sparse_moe.experts.77.w3", "model.layers.45.block_sparse_moe.experts.78.w3", "model.layers.45.block_sparse_moe.experts.79.w3", "model.layers.45.block_sparse_moe.experts.80.w3", "model.layers.45.block_sparse_moe.experts.81.w3", "model.layers.45.block_sparse_moe.experts.82.w3", "model.layers.45.block_sparse_moe.experts.83.w3", "model.layers.45.block_sparse_moe.experts.84.w3", "model.layers.45.block_sparse_moe.experts.85.w3", "model.layers.45.block_sparse_moe.experts.86.w3", "model.layers.45.block_sparse_moe.experts.87.w3", "model.layers.45.block_sparse_moe.experts.88.w3", "model.layers.45.block_sparse_moe.experts.89.w3", "model.layers.45.block_sparse_moe.experts.90.w3", "model.layers.45.block_sparse_moe.experts.91.w3", "model.layers.45.block_sparse_moe.experts.92.w3", "model.layers.45.block_sparse_moe.experts.93.w3", "model.layers.45.block_sparse_moe.experts.94.w3", "model.layers.45.block_sparse_moe.experts.95.w3", "model.layers.45.block_sparse_moe.experts.96.w3", "model.layers.45.block_sparse_moe.experts.97.w3", "model.layers.45.block_sparse_moe.experts.98.w3", "model.layers.45.block_sparse_moe.experts.99.w3", "model.layers.45.block_sparse_moe.experts.100.w3", "model.layers.45.block_sparse_moe.experts.101.w3", "model.layers.45.block_sparse_moe.experts.102.w3", "model.layers.45.block_sparse_moe.experts.103.w3", "model.layers.45.block_sparse_moe.experts.104.w3", "model.layers.45.block_sparse_moe.experts.105.w3", "model.layers.45.block_sparse_moe.experts.106.w3", "model.layers.45.block_sparse_moe.experts.107.w3", "model.layers.45.block_sparse_moe.experts.108.w3", "model.layers.45.block_sparse_moe.experts.109.w3", "model.layers.45.block_sparse_moe.experts.110.w3", "model.layers.45.block_sparse_moe.experts.111.w3", "model.layers.45.block_sparse_moe.experts.112.w3", "model.layers.45.block_sparse_moe.experts.113.w3", "model.layers.45.block_sparse_moe.experts.114.w3", "model.layers.45.block_sparse_moe.experts.115.w3", "model.layers.45.block_sparse_moe.experts.116.w3", "model.layers.45.block_sparse_moe.experts.117.w3", "model.layers.45.block_sparse_moe.experts.118.w3", "model.layers.45.block_sparse_moe.experts.119.w3", "model.layers.45.block_sparse_moe.experts.120.w3", "model.layers.45.block_sparse_moe.experts.121.w3", "model.layers.45.block_sparse_moe.experts.122.w3", "model.layers.45.block_sparse_moe.experts.123.w3", "model.layers.45.block_sparse_moe.experts.124.w3", "model.layers.45.block_sparse_moe.experts.125.w3", "model.layers.45.block_sparse_moe.experts.126.w3", "model.layers.45.block_sparse_moe.experts.127.w3", "model.layers.45.block_sparse_moe.experts.128.w3", "model.layers.45.block_sparse_moe.experts.129.w3", "model.layers.45.block_sparse_moe.experts.130.w3", "model.layers.45.block_sparse_moe.experts.131.w3", "model.layers.45.block_sparse_moe.experts.132.w3", "model.layers.45.block_sparse_moe.experts.133.w3", "model.layers.45.block_sparse_moe.experts.134.w3", "model.layers.45.block_sparse_moe.experts.135.w3", "model.layers.45.block_sparse_moe.experts.136.w3", "model.layers.45.block_sparse_moe.experts.137.w3", "model.layers.45.block_sparse_moe.experts.138.w3", "model.layers.45.block_sparse_moe.experts.139.w3", "model.layers.45.block_sparse_moe.experts.140.w3", "model.layers.45.block_sparse_moe.experts.141.w3", "model.layers.45.block_sparse_moe.experts.142.w3", "model.layers.45.block_sparse_moe.experts.143.w3", "model.layers.45.block_sparse_moe.experts.144.w3", "model.layers.45.block_sparse_moe.experts.145.w3", "model.layers.45.block_sparse_moe.experts.146.w3", "model.layers.45.block_sparse_moe.experts.147.w3", "model.layers.45.block_sparse_moe.experts.148.w3", "model.layers.45.block_sparse_moe.experts.149.w3", "model.layers.45.block_sparse_moe.experts.150.w3", "model.layers.45.block_sparse_moe.experts.151.w3", "model.layers.45.block_sparse_moe.experts.152.w3", "model.layers.45.block_sparse_moe.experts.153.w3", "model.layers.45.block_sparse_moe.experts.154.w3", "model.layers.45.block_sparse_moe.experts.155.w3", "model.layers.45.block_sparse_moe.experts.156.w3", "model.layers.45.block_sparse_moe.experts.157.w3", "model.layers.45.block_sparse_moe.experts.158.w3", "model.layers.45.block_sparse_moe.experts.159.w3", "model.layers.45.block_sparse_moe.experts.160.w3", "model.layers.45.block_sparse_moe.experts.161.w3", "model.layers.45.block_sparse_moe.experts.162.w3", "model.layers.45.block_sparse_moe.experts.163.w3", "model.layers.45.block_sparse_moe.experts.164.w3", "model.layers.45.block_sparse_moe.experts.165.w3", "model.layers.45.block_sparse_moe.experts.166.w3", "model.layers.45.block_sparse_moe.experts.167.w3", "model.layers.45.block_sparse_moe.experts.168.w3", "model.layers.45.block_sparse_moe.experts.169.w3", "model.layers.45.block_sparse_moe.experts.170.w3", "model.layers.45.block_sparse_moe.experts.171.w3", "model.layers.45.block_sparse_moe.experts.172.w3", "model.layers.45.block_sparse_moe.experts.173.w3", "model.layers.45.block_sparse_moe.experts.174.w3", "model.layers.45.block_sparse_moe.experts.175.w3", "model.layers.45.block_sparse_moe.experts.176.w3", "model.layers.45.block_sparse_moe.experts.177.w3", "model.layers.45.block_sparse_moe.experts.178.w3", "model.layers.45.block_sparse_moe.experts.179.w3", "model.layers.45.block_sparse_moe.experts.180.w3", "model.layers.45.block_sparse_moe.experts.181.w3", "model.layers.45.block_sparse_moe.experts.182.w3", "model.layers.45.block_sparse_moe.experts.183.w3", "model.layers.45.block_sparse_moe.experts.184.w3", "model.layers.45.block_sparse_moe.experts.185.w3", "model.layers.45.block_sparse_moe.experts.186.w3", "model.layers.45.block_sparse_moe.experts.187.w3", "model.layers.45.block_sparse_moe.experts.188.w3", "model.layers.45.block_sparse_moe.experts.189.w3", "model.layers.45.block_sparse_moe.experts.190.w3", "model.layers.45.block_sparse_moe.experts.191.w3", "model.layers.45.block_sparse_moe.experts.192.w3", "model.layers.45.block_sparse_moe.experts.193.w3", "model.layers.45.block_sparse_moe.experts.194.w3", "model.layers.45.block_sparse_moe.experts.195.w3", "model.layers.45.block_sparse_moe.experts.196.w3", "model.layers.45.block_sparse_moe.experts.197.w3", "model.layers.45.block_sparse_moe.experts.198.w3", "model.layers.45.block_sparse_moe.experts.199.w3", "model.layers.45.block_sparse_moe.experts.200.w3", "model.layers.45.block_sparse_moe.experts.201.w3", "model.layers.45.block_sparse_moe.experts.202.w3", "model.layers.45.block_sparse_moe.experts.203.w3", "model.layers.45.block_sparse_moe.experts.204.w3", "model.layers.45.block_sparse_moe.experts.205.w3", "model.layers.45.block_sparse_moe.experts.206.w3", "model.layers.45.block_sparse_moe.experts.207.w3", "model.layers.45.block_sparse_moe.experts.208.w3", "model.layers.45.block_sparse_moe.experts.209.w3", "model.layers.45.block_sparse_moe.experts.210.w3", "model.layers.45.block_sparse_moe.experts.211.w3", "model.layers.45.block_sparse_moe.experts.212.w3", "model.layers.45.block_sparse_moe.experts.213.w3", "model.layers.45.block_sparse_moe.experts.214.w3", "model.layers.45.block_sparse_moe.experts.215.w3", "model.layers.45.block_sparse_moe.experts.216.w3", "model.layers.45.block_sparse_moe.experts.217.w3", "model.layers.45.block_sparse_moe.experts.218.w3", "model.layers.45.block_sparse_moe.experts.219.w3", "model.layers.45.block_sparse_moe.experts.220.w3", "model.layers.45.block_sparse_moe.experts.221.w3", "model.layers.45.block_sparse_moe.experts.222.w3", "model.layers.45.block_sparse_moe.experts.223.w3", "model.layers.45.block_sparse_moe.experts.224.w3", "model.layers.45.block_sparse_moe.experts.225.w3", "model.layers.45.block_sparse_moe.experts.226.w3", "model.layers.45.block_sparse_moe.experts.227.w3", "model.layers.45.block_sparse_moe.experts.228.w3", "model.layers.45.block_sparse_moe.experts.229.w3", "model.layers.45.block_sparse_moe.experts.230.w3", "model.layers.45.block_sparse_moe.experts.231.w3", "model.layers.45.block_sparse_moe.experts.232.w3", "model.layers.45.block_sparse_moe.experts.233.w3", "model.layers.45.block_sparse_moe.experts.234.w3", "model.layers.45.block_sparse_moe.experts.235.w3", "model.layers.45.block_sparse_moe.experts.236.w3", "model.layers.45.block_sparse_moe.experts.237.w3", "model.layers.45.block_sparse_moe.experts.238.w3", "model.layers.45.block_sparse_moe.experts.239.w3", "model.layers.45.block_sparse_moe.experts.240.w3", "model.layers.45.block_sparse_moe.experts.241.w3", "model.layers.45.block_sparse_moe.experts.242.w3", "model.layers.45.block_sparse_moe.experts.243.w3", "model.layers.45.block_sparse_moe.experts.244.w3", "model.layers.45.block_sparse_moe.experts.245.w3", "model.layers.45.block_sparse_moe.experts.246.w3", "model.layers.45.block_sparse_moe.experts.247.w3", "model.layers.45.block_sparse_moe.experts.248.w3", "model.layers.45.block_sparse_moe.experts.249.w3", "model.layers.45.block_sparse_moe.experts.250.w3", "model.layers.45.block_sparse_moe.experts.251.w3", "model.layers.45.block_sparse_moe.experts.252.w3", "model.layers.45.block_sparse_moe.experts.253.w3", "model.layers.45.block_sparse_moe.experts.254.w3", "model.layers.45.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0013523519039154719, "dbits": 2415919104 } ] }, { "idx": 229, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w2", "model.layers.45.block_sparse_moe.experts.1.w2", "model.layers.45.block_sparse_moe.experts.2.w2", "model.layers.45.block_sparse_moe.experts.3.w2", "model.layers.45.block_sparse_moe.experts.4.w2", "model.layers.45.block_sparse_moe.experts.5.w2", "model.layers.45.block_sparse_moe.experts.6.w2", "model.layers.45.block_sparse_moe.experts.7.w2", "model.layers.45.block_sparse_moe.experts.8.w2", "model.layers.45.block_sparse_moe.experts.9.w2", "model.layers.45.block_sparse_moe.experts.10.w2", "model.layers.45.block_sparse_moe.experts.11.w2", "model.layers.45.block_sparse_moe.experts.12.w2", "model.layers.45.block_sparse_moe.experts.13.w2", "model.layers.45.block_sparse_moe.experts.14.w2", "model.layers.45.block_sparse_moe.experts.15.w2", "model.layers.45.block_sparse_moe.experts.16.w2", "model.layers.45.block_sparse_moe.experts.17.w2", "model.layers.45.block_sparse_moe.experts.18.w2", "model.layers.45.block_sparse_moe.experts.19.w2", "model.layers.45.block_sparse_moe.experts.20.w2", "model.layers.45.block_sparse_moe.experts.21.w2", "model.layers.45.block_sparse_moe.experts.22.w2", "model.layers.45.block_sparse_moe.experts.23.w2", "model.layers.45.block_sparse_moe.experts.24.w2", "model.layers.45.block_sparse_moe.experts.25.w2", "model.layers.45.block_sparse_moe.experts.26.w2", "model.layers.45.block_sparse_moe.experts.27.w2", "model.layers.45.block_sparse_moe.experts.28.w2", "model.layers.45.block_sparse_moe.experts.29.w2", "model.layers.45.block_sparse_moe.experts.30.w2", "model.layers.45.block_sparse_moe.experts.31.w2", "model.layers.45.block_sparse_moe.experts.32.w2", "model.layers.45.block_sparse_moe.experts.33.w2", "model.layers.45.block_sparse_moe.experts.34.w2", "model.layers.45.block_sparse_moe.experts.35.w2", "model.layers.45.block_sparse_moe.experts.36.w2", "model.layers.45.block_sparse_moe.experts.37.w2", "model.layers.45.block_sparse_moe.experts.38.w2", "model.layers.45.block_sparse_moe.experts.39.w2", "model.layers.45.block_sparse_moe.experts.40.w2", "model.layers.45.block_sparse_moe.experts.41.w2", "model.layers.45.block_sparse_moe.experts.42.w2", "model.layers.45.block_sparse_moe.experts.43.w2", "model.layers.45.block_sparse_moe.experts.44.w2", "model.layers.45.block_sparse_moe.experts.45.w2", "model.layers.45.block_sparse_moe.experts.46.w2", "model.layers.45.block_sparse_moe.experts.47.w2", "model.layers.45.block_sparse_moe.experts.48.w2", "model.layers.45.block_sparse_moe.experts.49.w2", "model.layers.45.block_sparse_moe.experts.50.w2", "model.layers.45.block_sparse_moe.experts.51.w2", "model.layers.45.block_sparse_moe.experts.52.w2", "model.layers.45.block_sparse_moe.experts.53.w2", "model.layers.45.block_sparse_moe.experts.54.w2", "model.layers.45.block_sparse_moe.experts.55.w2", "model.layers.45.block_sparse_moe.experts.56.w2", "model.layers.45.block_sparse_moe.experts.57.w2", "model.layers.45.block_sparse_moe.experts.58.w2", "model.layers.45.block_sparse_moe.experts.59.w2", "model.layers.45.block_sparse_moe.experts.60.w2", "model.layers.45.block_sparse_moe.experts.61.w2", "model.layers.45.block_sparse_moe.experts.62.w2", "model.layers.45.block_sparse_moe.experts.63.w2", "model.layers.45.block_sparse_moe.experts.64.w2", "model.layers.45.block_sparse_moe.experts.65.w2", "model.layers.45.block_sparse_moe.experts.66.w2", "model.layers.45.block_sparse_moe.experts.67.w2", "model.layers.45.block_sparse_moe.experts.68.w2", "model.layers.45.block_sparse_moe.experts.69.w2", "model.layers.45.block_sparse_moe.experts.70.w2", "model.layers.45.block_sparse_moe.experts.71.w2", "model.layers.45.block_sparse_moe.experts.72.w2", "model.layers.45.block_sparse_moe.experts.73.w2", "model.layers.45.block_sparse_moe.experts.74.w2", "model.layers.45.block_sparse_moe.experts.75.w2", "model.layers.45.block_sparse_moe.experts.76.w2", "model.layers.45.block_sparse_moe.experts.77.w2", "model.layers.45.block_sparse_moe.experts.78.w2", "model.layers.45.block_sparse_moe.experts.79.w2", "model.layers.45.block_sparse_moe.experts.80.w2", "model.layers.45.block_sparse_moe.experts.81.w2", "model.layers.45.block_sparse_moe.experts.82.w2", "model.layers.45.block_sparse_moe.experts.83.w2", "model.layers.45.block_sparse_moe.experts.84.w2", "model.layers.45.block_sparse_moe.experts.85.w2", "model.layers.45.block_sparse_moe.experts.86.w2", "model.layers.45.block_sparse_moe.experts.87.w2", "model.layers.45.block_sparse_moe.experts.88.w2", "model.layers.45.block_sparse_moe.experts.89.w2", "model.layers.45.block_sparse_moe.experts.90.w2", "model.layers.45.block_sparse_moe.experts.91.w2", "model.layers.45.block_sparse_moe.experts.92.w2", "model.layers.45.block_sparse_moe.experts.93.w2", "model.layers.45.block_sparse_moe.experts.94.w2", "model.layers.45.block_sparse_moe.experts.95.w2", "model.layers.45.block_sparse_moe.experts.96.w2", "model.layers.45.block_sparse_moe.experts.97.w2", "model.layers.45.block_sparse_moe.experts.98.w2", "model.layers.45.block_sparse_moe.experts.99.w2", "model.layers.45.block_sparse_moe.experts.100.w2", "model.layers.45.block_sparse_moe.experts.101.w2", "model.layers.45.block_sparse_moe.experts.102.w2", "model.layers.45.block_sparse_moe.experts.103.w2", "model.layers.45.block_sparse_moe.experts.104.w2", "model.layers.45.block_sparse_moe.experts.105.w2", "model.layers.45.block_sparse_moe.experts.106.w2", "model.layers.45.block_sparse_moe.experts.107.w2", "model.layers.45.block_sparse_moe.experts.108.w2", "model.layers.45.block_sparse_moe.experts.109.w2", "model.layers.45.block_sparse_moe.experts.110.w2", "model.layers.45.block_sparse_moe.experts.111.w2", "model.layers.45.block_sparse_moe.experts.112.w2", "model.layers.45.block_sparse_moe.experts.113.w2", "model.layers.45.block_sparse_moe.experts.114.w2", "model.layers.45.block_sparse_moe.experts.115.w2", "model.layers.45.block_sparse_moe.experts.116.w2", "model.layers.45.block_sparse_moe.experts.117.w2", "model.layers.45.block_sparse_moe.experts.118.w2", "model.layers.45.block_sparse_moe.experts.119.w2", "model.layers.45.block_sparse_moe.experts.120.w2", "model.layers.45.block_sparse_moe.experts.121.w2", "model.layers.45.block_sparse_moe.experts.122.w2", "model.layers.45.block_sparse_moe.experts.123.w2", "model.layers.45.block_sparse_moe.experts.124.w2", "model.layers.45.block_sparse_moe.experts.125.w2", "model.layers.45.block_sparse_moe.experts.126.w2", "model.layers.45.block_sparse_moe.experts.127.w2", "model.layers.45.block_sparse_moe.experts.128.w2", "model.layers.45.block_sparse_moe.experts.129.w2", "model.layers.45.block_sparse_moe.experts.130.w2", "model.layers.45.block_sparse_moe.experts.131.w2", "model.layers.45.block_sparse_moe.experts.132.w2", "model.layers.45.block_sparse_moe.experts.133.w2", "model.layers.45.block_sparse_moe.experts.134.w2", "model.layers.45.block_sparse_moe.experts.135.w2", "model.layers.45.block_sparse_moe.experts.136.w2", "model.layers.45.block_sparse_moe.experts.137.w2", "model.layers.45.block_sparse_moe.experts.138.w2", "model.layers.45.block_sparse_moe.experts.139.w2", "model.layers.45.block_sparse_moe.experts.140.w2", "model.layers.45.block_sparse_moe.experts.141.w2", "model.layers.45.block_sparse_moe.experts.142.w2", "model.layers.45.block_sparse_moe.experts.143.w2", "model.layers.45.block_sparse_moe.experts.144.w2", "model.layers.45.block_sparse_moe.experts.145.w2", "model.layers.45.block_sparse_moe.experts.146.w2", "model.layers.45.block_sparse_moe.experts.147.w2", "model.layers.45.block_sparse_moe.experts.148.w2", "model.layers.45.block_sparse_moe.experts.149.w2", "model.layers.45.block_sparse_moe.experts.150.w2", "model.layers.45.block_sparse_moe.experts.151.w2", "model.layers.45.block_sparse_moe.experts.152.w2", "model.layers.45.block_sparse_moe.experts.153.w2", "model.layers.45.block_sparse_moe.experts.154.w2", "model.layers.45.block_sparse_moe.experts.155.w2", "model.layers.45.block_sparse_moe.experts.156.w2", "model.layers.45.block_sparse_moe.experts.157.w2", "model.layers.45.block_sparse_moe.experts.158.w2", "model.layers.45.block_sparse_moe.experts.159.w2", "model.layers.45.block_sparse_moe.experts.160.w2", "model.layers.45.block_sparse_moe.experts.161.w2", "model.layers.45.block_sparse_moe.experts.162.w2", "model.layers.45.block_sparse_moe.experts.163.w2", "model.layers.45.block_sparse_moe.experts.164.w2", "model.layers.45.block_sparse_moe.experts.165.w2", "model.layers.45.block_sparse_moe.experts.166.w2", "model.layers.45.block_sparse_moe.experts.167.w2", "model.layers.45.block_sparse_moe.experts.168.w2", "model.layers.45.block_sparse_moe.experts.169.w2", "model.layers.45.block_sparse_moe.experts.170.w2", "model.layers.45.block_sparse_moe.experts.171.w2", "model.layers.45.block_sparse_moe.experts.172.w2", "model.layers.45.block_sparse_moe.experts.173.w2", "model.layers.45.block_sparse_moe.experts.174.w2", "model.layers.45.block_sparse_moe.experts.175.w2", "model.layers.45.block_sparse_moe.experts.176.w2", "model.layers.45.block_sparse_moe.experts.177.w2", "model.layers.45.block_sparse_moe.experts.178.w2", "model.layers.45.block_sparse_moe.experts.179.w2", "model.layers.45.block_sparse_moe.experts.180.w2", "model.layers.45.block_sparse_moe.experts.181.w2", "model.layers.45.block_sparse_moe.experts.182.w2", "model.layers.45.block_sparse_moe.experts.183.w2", "model.layers.45.block_sparse_moe.experts.184.w2", "model.layers.45.block_sparse_moe.experts.185.w2", "model.layers.45.block_sparse_moe.experts.186.w2", "model.layers.45.block_sparse_moe.experts.187.w2", "model.layers.45.block_sparse_moe.experts.188.w2", "model.layers.45.block_sparse_moe.experts.189.w2", "model.layers.45.block_sparse_moe.experts.190.w2", "model.layers.45.block_sparse_moe.experts.191.w2", "model.layers.45.block_sparse_moe.experts.192.w2", "model.layers.45.block_sparse_moe.experts.193.w2", "model.layers.45.block_sparse_moe.experts.194.w2", "model.layers.45.block_sparse_moe.experts.195.w2", "model.layers.45.block_sparse_moe.experts.196.w2", "model.layers.45.block_sparse_moe.experts.197.w2", "model.layers.45.block_sparse_moe.experts.198.w2", "model.layers.45.block_sparse_moe.experts.199.w2", "model.layers.45.block_sparse_moe.experts.200.w2", "model.layers.45.block_sparse_moe.experts.201.w2", "model.layers.45.block_sparse_moe.experts.202.w2", "model.layers.45.block_sparse_moe.experts.203.w2", "model.layers.45.block_sparse_moe.experts.204.w2", "model.layers.45.block_sparse_moe.experts.205.w2", "model.layers.45.block_sparse_moe.experts.206.w2", "model.layers.45.block_sparse_moe.experts.207.w2", "model.layers.45.block_sparse_moe.experts.208.w2", "model.layers.45.block_sparse_moe.experts.209.w2", "model.layers.45.block_sparse_moe.experts.210.w2", "model.layers.45.block_sparse_moe.experts.211.w2", "model.layers.45.block_sparse_moe.experts.212.w2", "model.layers.45.block_sparse_moe.experts.213.w2", "model.layers.45.block_sparse_moe.experts.214.w2", "model.layers.45.block_sparse_moe.experts.215.w2", "model.layers.45.block_sparse_moe.experts.216.w2", "model.layers.45.block_sparse_moe.experts.217.w2", "model.layers.45.block_sparse_moe.experts.218.w2", "model.layers.45.block_sparse_moe.experts.219.w2", "model.layers.45.block_sparse_moe.experts.220.w2", "model.layers.45.block_sparse_moe.experts.221.w2", "model.layers.45.block_sparse_moe.experts.222.w2", "model.layers.45.block_sparse_moe.experts.223.w2", "model.layers.45.block_sparse_moe.experts.224.w2", "model.layers.45.block_sparse_moe.experts.225.w2", "model.layers.45.block_sparse_moe.experts.226.w2", "model.layers.45.block_sparse_moe.experts.227.w2", "model.layers.45.block_sparse_moe.experts.228.w2", "model.layers.45.block_sparse_moe.experts.229.w2", "model.layers.45.block_sparse_moe.experts.230.w2", "model.layers.45.block_sparse_moe.experts.231.w2", "model.layers.45.block_sparse_moe.experts.232.w2", "model.layers.45.block_sparse_moe.experts.233.w2", "model.layers.45.block_sparse_moe.experts.234.w2", "model.layers.45.block_sparse_moe.experts.235.w2", "model.layers.45.block_sparse_moe.experts.236.w2", "model.layers.45.block_sparse_moe.experts.237.w2", "model.layers.45.block_sparse_moe.experts.238.w2", "model.layers.45.block_sparse_moe.experts.239.w2", "model.layers.45.block_sparse_moe.experts.240.w2", "model.layers.45.block_sparse_moe.experts.241.w2", "model.layers.45.block_sparse_moe.experts.242.w2", "model.layers.45.block_sparse_moe.experts.243.w2", "model.layers.45.block_sparse_moe.experts.244.w2", "model.layers.45.block_sparse_moe.experts.245.w2", "model.layers.45.block_sparse_moe.experts.246.w2", "model.layers.45.block_sparse_moe.experts.247.w2", "model.layers.45.block_sparse_moe.experts.248.w2", "model.layers.45.block_sparse_moe.experts.249.w2", "model.layers.45.block_sparse_moe.experts.250.w2", "model.layers.45.block_sparse_moe.experts.251.w2", "model.layers.45.block_sparse_moe.experts.252.w2", "model.layers.45.block_sparse_moe.experts.253.w2", "model.layers.45.block_sparse_moe.experts.254.w2", "model.layers.45.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0023768424987792303, "dbits": 1207959552 } ] }, { "idx": 230, "layers": [ "model.layers.46.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0008905112743377019, "dbits": 18874368 } ] }, { "idx": 231, "layers": [ "model.layers.46.self_attn.k_proj", "model.layers.46.self_attn.v_proj" ], "candidates": [ { "dkld": 0.008187428116798401, "dbits": 6291456 } ] }, { "idx": 232, "layers": [ "model.layers.46.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008838728070259094, "dbits": 18874368 } ] }, { "idx": 233, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w1", "model.layers.46.block_sparse_moe.experts.1.w1", "model.layers.46.block_sparse_moe.experts.2.w1", "model.layers.46.block_sparse_moe.experts.3.w1", "model.layers.46.block_sparse_moe.experts.4.w1", "model.layers.46.block_sparse_moe.experts.5.w1", "model.layers.46.block_sparse_moe.experts.6.w1", "model.layers.46.block_sparse_moe.experts.7.w1", "model.layers.46.block_sparse_moe.experts.8.w1", "model.layers.46.block_sparse_moe.experts.9.w1", "model.layers.46.block_sparse_moe.experts.10.w1", "model.layers.46.block_sparse_moe.experts.11.w1", "model.layers.46.block_sparse_moe.experts.12.w1", "model.layers.46.block_sparse_moe.experts.13.w1", "model.layers.46.block_sparse_moe.experts.14.w1", "model.layers.46.block_sparse_moe.experts.15.w1", "model.layers.46.block_sparse_moe.experts.16.w1", "model.layers.46.block_sparse_moe.experts.17.w1", "model.layers.46.block_sparse_moe.experts.18.w1", "model.layers.46.block_sparse_moe.experts.19.w1", "model.layers.46.block_sparse_moe.experts.20.w1", "model.layers.46.block_sparse_moe.experts.21.w1", "model.layers.46.block_sparse_moe.experts.22.w1", "model.layers.46.block_sparse_moe.experts.23.w1", "model.layers.46.block_sparse_moe.experts.24.w1", "model.layers.46.block_sparse_moe.experts.25.w1", "model.layers.46.block_sparse_moe.experts.26.w1", "model.layers.46.block_sparse_moe.experts.27.w1", "model.layers.46.block_sparse_moe.experts.28.w1", "model.layers.46.block_sparse_moe.experts.29.w1", "model.layers.46.block_sparse_moe.experts.30.w1", "model.layers.46.block_sparse_moe.experts.31.w1", "model.layers.46.block_sparse_moe.experts.32.w1", "model.layers.46.block_sparse_moe.experts.33.w1", "model.layers.46.block_sparse_moe.experts.34.w1", "model.layers.46.block_sparse_moe.experts.35.w1", "model.layers.46.block_sparse_moe.experts.36.w1", "model.layers.46.block_sparse_moe.experts.37.w1", "model.layers.46.block_sparse_moe.experts.38.w1", "model.layers.46.block_sparse_moe.experts.39.w1", "model.layers.46.block_sparse_moe.experts.40.w1", "model.layers.46.block_sparse_moe.experts.41.w1", "model.layers.46.block_sparse_moe.experts.42.w1", "model.layers.46.block_sparse_moe.experts.43.w1", "model.layers.46.block_sparse_moe.experts.44.w1", "model.layers.46.block_sparse_moe.experts.45.w1", "model.layers.46.block_sparse_moe.experts.46.w1", "model.layers.46.block_sparse_moe.experts.47.w1", "model.layers.46.block_sparse_moe.experts.48.w1", "model.layers.46.block_sparse_moe.experts.49.w1", "model.layers.46.block_sparse_moe.experts.50.w1", "model.layers.46.block_sparse_moe.experts.51.w1", "model.layers.46.block_sparse_moe.experts.52.w1", "model.layers.46.block_sparse_moe.experts.53.w1", "model.layers.46.block_sparse_moe.experts.54.w1", "model.layers.46.block_sparse_moe.experts.55.w1", "model.layers.46.block_sparse_moe.experts.56.w1", "model.layers.46.block_sparse_moe.experts.57.w1", "model.layers.46.block_sparse_moe.experts.58.w1", "model.layers.46.block_sparse_moe.experts.59.w1", "model.layers.46.block_sparse_moe.experts.60.w1", "model.layers.46.block_sparse_moe.experts.61.w1", "model.layers.46.block_sparse_moe.experts.62.w1", "model.layers.46.block_sparse_moe.experts.63.w1", "model.layers.46.block_sparse_moe.experts.64.w1", "model.layers.46.block_sparse_moe.experts.65.w1", "model.layers.46.block_sparse_moe.experts.66.w1", "model.layers.46.block_sparse_moe.experts.67.w1", "model.layers.46.block_sparse_moe.experts.68.w1", "model.layers.46.block_sparse_moe.experts.69.w1", "model.layers.46.block_sparse_moe.experts.70.w1", "model.layers.46.block_sparse_moe.experts.71.w1", "model.layers.46.block_sparse_moe.experts.72.w1", "model.layers.46.block_sparse_moe.experts.73.w1", "model.layers.46.block_sparse_moe.experts.74.w1", "model.layers.46.block_sparse_moe.experts.75.w1", "model.layers.46.block_sparse_moe.experts.76.w1", "model.layers.46.block_sparse_moe.experts.77.w1", "model.layers.46.block_sparse_moe.experts.78.w1", "model.layers.46.block_sparse_moe.experts.79.w1", "model.layers.46.block_sparse_moe.experts.80.w1", "model.layers.46.block_sparse_moe.experts.81.w1", "model.layers.46.block_sparse_moe.experts.82.w1", "model.layers.46.block_sparse_moe.experts.83.w1", "model.layers.46.block_sparse_moe.experts.84.w1", "model.layers.46.block_sparse_moe.experts.85.w1", "model.layers.46.block_sparse_moe.experts.86.w1", "model.layers.46.block_sparse_moe.experts.87.w1", "model.layers.46.block_sparse_moe.experts.88.w1", "model.layers.46.block_sparse_moe.experts.89.w1", "model.layers.46.block_sparse_moe.experts.90.w1", "model.layers.46.block_sparse_moe.experts.91.w1", "model.layers.46.block_sparse_moe.experts.92.w1", "model.layers.46.block_sparse_moe.experts.93.w1", "model.layers.46.block_sparse_moe.experts.94.w1", "model.layers.46.block_sparse_moe.experts.95.w1", "model.layers.46.block_sparse_moe.experts.96.w1", "model.layers.46.block_sparse_moe.experts.97.w1", "model.layers.46.block_sparse_moe.experts.98.w1", "model.layers.46.block_sparse_moe.experts.99.w1", "model.layers.46.block_sparse_moe.experts.100.w1", "model.layers.46.block_sparse_moe.experts.101.w1", "model.layers.46.block_sparse_moe.experts.102.w1", "model.layers.46.block_sparse_moe.experts.103.w1", "model.layers.46.block_sparse_moe.experts.104.w1", "model.layers.46.block_sparse_moe.experts.105.w1", "model.layers.46.block_sparse_moe.experts.106.w1", "model.layers.46.block_sparse_moe.experts.107.w1", "model.layers.46.block_sparse_moe.experts.108.w1", "model.layers.46.block_sparse_moe.experts.109.w1", "model.layers.46.block_sparse_moe.experts.110.w1", "model.layers.46.block_sparse_moe.experts.111.w1", "model.layers.46.block_sparse_moe.experts.112.w1", "model.layers.46.block_sparse_moe.experts.113.w1", "model.layers.46.block_sparse_moe.experts.114.w1", "model.layers.46.block_sparse_moe.experts.115.w1", "model.layers.46.block_sparse_moe.experts.116.w1", "model.layers.46.block_sparse_moe.experts.117.w1", "model.layers.46.block_sparse_moe.experts.118.w1", "model.layers.46.block_sparse_moe.experts.119.w1", "model.layers.46.block_sparse_moe.experts.120.w1", "model.layers.46.block_sparse_moe.experts.121.w1", "model.layers.46.block_sparse_moe.experts.122.w1", "model.layers.46.block_sparse_moe.experts.123.w1", "model.layers.46.block_sparse_moe.experts.124.w1", "model.layers.46.block_sparse_moe.experts.125.w1", "model.layers.46.block_sparse_moe.experts.126.w1", "model.layers.46.block_sparse_moe.experts.127.w1", "model.layers.46.block_sparse_moe.experts.128.w1", "model.layers.46.block_sparse_moe.experts.129.w1", "model.layers.46.block_sparse_moe.experts.130.w1", "model.layers.46.block_sparse_moe.experts.131.w1", "model.layers.46.block_sparse_moe.experts.132.w1", "model.layers.46.block_sparse_moe.experts.133.w1", "model.layers.46.block_sparse_moe.experts.134.w1", "model.layers.46.block_sparse_moe.experts.135.w1", "model.layers.46.block_sparse_moe.experts.136.w1", "model.layers.46.block_sparse_moe.experts.137.w1", "model.layers.46.block_sparse_moe.experts.138.w1", "model.layers.46.block_sparse_moe.experts.139.w1", "model.layers.46.block_sparse_moe.experts.140.w1", "model.layers.46.block_sparse_moe.experts.141.w1", "model.layers.46.block_sparse_moe.experts.142.w1", "model.layers.46.block_sparse_moe.experts.143.w1", "model.layers.46.block_sparse_moe.experts.144.w1", "model.layers.46.block_sparse_moe.experts.145.w1", "model.layers.46.block_sparse_moe.experts.146.w1", "model.layers.46.block_sparse_moe.experts.147.w1", "model.layers.46.block_sparse_moe.experts.148.w1", "model.layers.46.block_sparse_moe.experts.149.w1", "model.layers.46.block_sparse_moe.experts.150.w1", "model.layers.46.block_sparse_moe.experts.151.w1", "model.layers.46.block_sparse_moe.experts.152.w1", "model.layers.46.block_sparse_moe.experts.153.w1", "model.layers.46.block_sparse_moe.experts.154.w1", "model.layers.46.block_sparse_moe.experts.155.w1", "model.layers.46.block_sparse_moe.experts.156.w1", "model.layers.46.block_sparse_moe.experts.157.w1", "model.layers.46.block_sparse_moe.experts.158.w1", "model.layers.46.block_sparse_moe.experts.159.w1", "model.layers.46.block_sparse_moe.experts.160.w1", "model.layers.46.block_sparse_moe.experts.161.w1", "model.layers.46.block_sparse_moe.experts.162.w1", "model.layers.46.block_sparse_moe.experts.163.w1", "model.layers.46.block_sparse_moe.experts.164.w1", "model.layers.46.block_sparse_moe.experts.165.w1", "model.layers.46.block_sparse_moe.experts.166.w1", "model.layers.46.block_sparse_moe.experts.167.w1", "model.layers.46.block_sparse_moe.experts.168.w1", "model.layers.46.block_sparse_moe.experts.169.w1", "model.layers.46.block_sparse_moe.experts.170.w1", "model.layers.46.block_sparse_moe.experts.171.w1", "model.layers.46.block_sparse_moe.experts.172.w1", "model.layers.46.block_sparse_moe.experts.173.w1", "model.layers.46.block_sparse_moe.experts.174.w1", "model.layers.46.block_sparse_moe.experts.175.w1", "model.layers.46.block_sparse_moe.experts.176.w1", "model.layers.46.block_sparse_moe.experts.177.w1", "model.layers.46.block_sparse_moe.experts.178.w1", "model.layers.46.block_sparse_moe.experts.179.w1", "model.layers.46.block_sparse_moe.experts.180.w1", "model.layers.46.block_sparse_moe.experts.181.w1", "model.layers.46.block_sparse_moe.experts.182.w1", "model.layers.46.block_sparse_moe.experts.183.w1", "model.layers.46.block_sparse_moe.experts.184.w1", "model.layers.46.block_sparse_moe.experts.185.w1", "model.layers.46.block_sparse_moe.experts.186.w1", "model.layers.46.block_sparse_moe.experts.187.w1", "model.layers.46.block_sparse_moe.experts.188.w1", "model.layers.46.block_sparse_moe.experts.189.w1", "model.layers.46.block_sparse_moe.experts.190.w1", "model.layers.46.block_sparse_moe.experts.191.w1", "model.layers.46.block_sparse_moe.experts.192.w1", "model.layers.46.block_sparse_moe.experts.193.w1", "model.layers.46.block_sparse_moe.experts.194.w1", "model.layers.46.block_sparse_moe.experts.195.w1", "model.layers.46.block_sparse_moe.experts.196.w1", "model.layers.46.block_sparse_moe.experts.197.w1", "model.layers.46.block_sparse_moe.experts.198.w1", "model.layers.46.block_sparse_moe.experts.199.w1", "model.layers.46.block_sparse_moe.experts.200.w1", "model.layers.46.block_sparse_moe.experts.201.w1", "model.layers.46.block_sparse_moe.experts.202.w1", "model.layers.46.block_sparse_moe.experts.203.w1", "model.layers.46.block_sparse_moe.experts.204.w1", "model.layers.46.block_sparse_moe.experts.205.w1", "model.layers.46.block_sparse_moe.experts.206.w1", "model.layers.46.block_sparse_moe.experts.207.w1", "model.layers.46.block_sparse_moe.experts.208.w1", "model.layers.46.block_sparse_moe.experts.209.w1", "model.layers.46.block_sparse_moe.experts.210.w1", "model.layers.46.block_sparse_moe.experts.211.w1", "model.layers.46.block_sparse_moe.experts.212.w1", "model.layers.46.block_sparse_moe.experts.213.w1", "model.layers.46.block_sparse_moe.experts.214.w1", "model.layers.46.block_sparse_moe.experts.215.w1", "model.layers.46.block_sparse_moe.experts.216.w1", "model.layers.46.block_sparse_moe.experts.217.w1", "model.layers.46.block_sparse_moe.experts.218.w1", "model.layers.46.block_sparse_moe.experts.219.w1", "model.layers.46.block_sparse_moe.experts.220.w1", "model.layers.46.block_sparse_moe.experts.221.w1", "model.layers.46.block_sparse_moe.experts.222.w1", "model.layers.46.block_sparse_moe.experts.223.w1", "model.layers.46.block_sparse_moe.experts.224.w1", "model.layers.46.block_sparse_moe.experts.225.w1", "model.layers.46.block_sparse_moe.experts.226.w1", "model.layers.46.block_sparse_moe.experts.227.w1", "model.layers.46.block_sparse_moe.experts.228.w1", "model.layers.46.block_sparse_moe.experts.229.w1", "model.layers.46.block_sparse_moe.experts.230.w1", "model.layers.46.block_sparse_moe.experts.231.w1", "model.layers.46.block_sparse_moe.experts.232.w1", "model.layers.46.block_sparse_moe.experts.233.w1", "model.layers.46.block_sparse_moe.experts.234.w1", "model.layers.46.block_sparse_moe.experts.235.w1", "model.layers.46.block_sparse_moe.experts.236.w1", "model.layers.46.block_sparse_moe.experts.237.w1", "model.layers.46.block_sparse_moe.experts.238.w1", "model.layers.46.block_sparse_moe.experts.239.w1", "model.layers.46.block_sparse_moe.experts.240.w1", "model.layers.46.block_sparse_moe.experts.241.w1", "model.layers.46.block_sparse_moe.experts.242.w1", "model.layers.46.block_sparse_moe.experts.243.w1", "model.layers.46.block_sparse_moe.experts.244.w1", "model.layers.46.block_sparse_moe.experts.245.w1", "model.layers.46.block_sparse_moe.experts.246.w1", "model.layers.46.block_sparse_moe.experts.247.w1", "model.layers.46.block_sparse_moe.experts.248.w1", "model.layers.46.block_sparse_moe.experts.249.w1", "model.layers.46.block_sparse_moe.experts.250.w1", "model.layers.46.block_sparse_moe.experts.251.w1", "model.layers.46.block_sparse_moe.experts.252.w1", "model.layers.46.block_sparse_moe.experts.253.w1", "model.layers.46.block_sparse_moe.experts.254.w1", "model.layers.46.block_sparse_moe.experts.255.w1", "model.layers.46.block_sparse_moe.experts.0.w3", "model.layers.46.block_sparse_moe.experts.1.w3", "model.layers.46.block_sparse_moe.experts.2.w3", "model.layers.46.block_sparse_moe.experts.3.w3", "model.layers.46.block_sparse_moe.experts.4.w3", "model.layers.46.block_sparse_moe.experts.5.w3", "model.layers.46.block_sparse_moe.experts.6.w3", "model.layers.46.block_sparse_moe.experts.7.w3", "model.layers.46.block_sparse_moe.experts.8.w3", "model.layers.46.block_sparse_moe.experts.9.w3", "model.layers.46.block_sparse_moe.experts.10.w3", "model.layers.46.block_sparse_moe.experts.11.w3", "model.layers.46.block_sparse_moe.experts.12.w3", "model.layers.46.block_sparse_moe.experts.13.w3", "model.layers.46.block_sparse_moe.experts.14.w3", "model.layers.46.block_sparse_moe.experts.15.w3", "model.layers.46.block_sparse_moe.experts.16.w3", "model.layers.46.block_sparse_moe.experts.17.w3", "model.layers.46.block_sparse_moe.experts.18.w3", "model.layers.46.block_sparse_moe.experts.19.w3", "model.layers.46.block_sparse_moe.experts.20.w3", "model.layers.46.block_sparse_moe.experts.21.w3", "model.layers.46.block_sparse_moe.experts.22.w3", "model.layers.46.block_sparse_moe.experts.23.w3", "model.layers.46.block_sparse_moe.experts.24.w3", "model.layers.46.block_sparse_moe.experts.25.w3", "model.layers.46.block_sparse_moe.experts.26.w3", "model.layers.46.block_sparse_moe.experts.27.w3", "model.layers.46.block_sparse_moe.experts.28.w3", "model.layers.46.block_sparse_moe.experts.29.w3", "model.layers.46.block_sparse_moe.experts.30.w3", "model.layers.46.block_sparse_moe.experts.31.w3", "model.layers.46.block_sparse_moe.experts.32.w3", "model.layers.46.block_sparse_moe.experts.33.w3", "model.layers.46.block_sparse_moe.experts.34.w3", "model.layers.46.block_sparse_moe.experts.35.w3", "model.layers.46.block_sparse_moe.experts.36.w3", "model.layers.46.block_sparse_moe.experts.37.w3", "model.layers.46.block_sparse_moe.experts.38.w3", "model.layers.46.block_sparse_moe.experts.39.w3", "model.layers.46.block_sparse_moe.experts.40.w3", "model.layers.46.block_sparse_moe.experts.41.w3", "model.layers.46.block_sparse_moe.experts.42.w3", "model.layers.46.block_sparse_moe.experts.43.w3", "model.layers.46.block_sparse_moe.experts.44.w3", "model.layers.46.block_sparse_moe.experts.45.w3", "model.layers.46.block_sparse_moe.experts.46.w3", "model.layers.46.block_sparse_moe.experts.47.w3", "model.layers.46.block_sparse_moe.experts.48.w3", "model.layers.46.block_sparse_moe.experts.49.w3", "model.layers.46.block_sparse_moe.experts.50.w3", "model.layers.46.block_sparse_moe.experts.51.w3", "model.layers.46.block_sparse_moe.experts.52.w3", "model.layers.46.block_sparse_moe.experts.53.w3", "model.layers.46.block_sparse_moe.experts.54.w3", "model.layers.46.block_sparse_moe.experts.55.w3", "model.layers.46.block_sparse_moe.experts.56.w3", "model.layers.46.block_sparse_moe.experts.57.w3", "model.layers.46.block_sparse_moe.experts.58.w3", "model.layers.46.block_sparse_moe.experts.59.w3", "model.layers.46.block_sparse_moe.experts.60.w3", "model.layers.46.block_sparse_moe.experts.61.w3", "model.layers.46.block_sparse_moe.experts.62.w3", "model.layers.46.block_sparse_moe.experts.63.w3", "model.layers.46.block_sparse_moe.experts.64.w3", "model.layers.46.block_sparse_moe.experts.65.w3", "model.layers.46.block_sparse_moe.experts.66.w3", "model.layers.46.block_sparse_moe.experts.67.w3", "model.layers.46.block_sparse_moe.experts.68.w3", "model.layers.46.block_sparse_moe.experts.69.w3", "model.layers.46.block_sparse_moe.experts.70.w3", "model.layers.46.block_sparse_moe.experts.71.w3", "model.layers.46.block_sparse_moe.experts.72.w3", "model.layers.46.block_sparse_moe.experts.73.w3", "model.layers.46.block_sparse_moe.experts.74.w3", "model.layers.46.block_sparse_moe.experts.75.w3", "model.layers.46.block_sparse_moe.experts.76.w3", "model.layers.46.block_sparse_moe.experts.77.w3", "model.layers.46.block_sparse_moe.experts.78.w3", "model.layers.46.block_sparse_moe.experts.79.w3", "model.layers.46.block_sparse_moe.experts.80.w3", "model.layers.46.block_sparse_moe.experts.81.w3", "model.layers.46.block_sparse_moe.experts.82.w3", "model.layers.46.block_sparse_moe.experts.83.w3", "model.layers.46.block_sparse_moe.experts.84.w3", "model.layers.46.block_sparse_moe.experts.85.w3", "model.layers.46.block_sparse_moe.experts.86.w3", "model.layers.46.block_sparse_moe.experts.87.w3", "model.layers.46.block_sparse_moe.experts.88.w3", "model.layers.46.block_sparse_moe.experts.89.w3", "model.layers.46.block_sparse_moe.experts.90.w3", "model.layers.46.block_sparse_moe.experts.91.w3", "model.layers.46.block_sparse_moe.experts.92.w3", "model.layers.46.block_sparse_moe.experts.93.w3", "model.layers.46.block_sparse_moe.experts.94.w3", "model.layers.46.block_sparse_moe.experts.95.w3", "model.layers.46.block_sparse_moe.experts.96.w3", "model.layers.46.block_sparse_moe.experts.97.w3", "model.layers.46.block_sparse_moe.experts.98.w3", "model.layers.46.block_sparse_moe.experts.99.w3", "model.layers.46.block_sparse_moe.experts.100.w3", "model.layers.46.block_sparse_moe.experts.101.w3", "model.layers.46.block_sparse_moe.experts.102.w3", "model.layers.46.block_sparse_moe.experts.103.w3", "model.layers.46.block_sparse_moe.experts.104.w3", "model.layers.46.block_sparse_moe.experts.105.w3", "model.layers.46.block_sparse_moe.experts.106.w3", "model.layers.46.block_sparse_moe.experts.107.w3", "model.layers.46.block_sparse_moe.experts.108.w3", "model.layers.46.block_sparse_moe.experts.109.w3", "model.layers.46.block_sparse_moe.experts.110.w3", "model.layers.46.block_sparse_moe.experts.111.w3", "model.layers.46.block_sparse_moe.experts.112.w3", "model.layers.46.block_sparse_moe.experts.113.w3", "model.layers.46.block_sparse_moe.experts.114.w3", "model.layers.46.block_sparse_moe.experts.115.w3", "model.layers.46.block_sparse_moe.experts.116.w3", "model.layers.46.block_sparse_moe.experts.117.w3", "model.layers.46.block_sparse_moe.experts.118.w3", "model.layers.46.block_sparse_moe.experts.119.w3", "model.layers.46.block_sparse_moe.experts.120.w3", "model.layers.46.block_sparse_moe.experts.121.w3", "model.layers.46.block_sparse_moe.experts.122.w3", "model.layers.46.block_sparse_moe.experts.123.w3", "model.layers.46.block_sparse_moe.experts.124.w3", "model.layers.46.block_sparse_moe.experts.125.w3", "model.layers.46.block_sparse_moe.experts.126.w3", "model.layers.46.block_sparse_moe.experts.127.w3", "model.layers.46.block_sparse_moe.experts.128.w3", "model.layers.46.block_sparse_moe.experts.129.w3", "model.layers.46.block_sparse_moe.experts.130.w3", "model.layers.46.block_sparse_moe.experts.131.w3", "model.layers.46.block_sparse_moe.experts.132.w3", "model.layers.46.block_sparse_moe.experts.133.w3", "model.layers.46.block_sparse_moe.experts.134.w3", "model.layers.46.block_sparse_moe.experts.135.w3", "model.layers.46.block_sparse_moe.experts.136.w3", "model.layers.46.block_sparse_moe.experts.137.w3", "model.layers.46.block_sparse_moe.experts.138.w3", "model.layers.46.block_sparse_moe.experts.139.w3", "model.layers.46.block_sparse_moe.experts.140.w3", "model.layers.46.block_sparse_moe.experts.141.w3", "model.layers.46.block_sparse_moe.experts.142.w3", "model.layers.46.block_sparse_moe.experts.143.w3", "model.layers.46.block_sparse_moe.experts.144.w3", "model.layers.46.block_sparse_moe.experts.145.w3", "model.layers.46.block_sparse_moe.experts.146.w3", "model.layers.46.block_sparse_moe.experts.147.w3", "model.layers.46.block_sparse_moe.experts.148.w3", "model.layers.46.block_sparse_moe.experts.149.w3", "model.layers.46.block_sparse_moe.experts.150.w3", "model.layers.46.block_sparse_moe.experts.151.w3", "model.layers.46.block_sparse_moe.experts.152.w3", "model.layers.46.block_sparse_moe.experts.153.w3", "model.layers.46.block_sparse_moe.experts.154.w3", "model.layers.46.block_sparse_moe.experts.155.w3", "model.layers.46.block_sparse_moe.experts.156.w3", "model.layers.46.block_sparse_moe.experts.157.w3", "model.layers.46.block_sparse_moe.experts.158.w3", "model.layers.46.block_sparse_moe.experts.159.w3", "model.layers.46.block_sparse_moe.experts.160.w3", "model.layers.46.block_sparse_moe.experts.161.w3", "model.layers.46.block_sparse_moe.experts.162.w3", "model.layers.46.block_sparse_moe.experts.163.w3", "model.layers.46.block_sparse_moe.experts.164.w3", "model.layers.46.block_sparse_moe.experts.165.w3", "model.layers.46.block_sparse_moe.experts.166.w3", "model.layers.46.block_sparse_moe.experts.167.w3", "model.layers.46.block_sparse_moe.experts.168.w3", "model.layers.46.block_sparse_moe.experts.169.w3", "model.layers.46.block_sparse_moe.experts.170.w3", "model.layers.46.block_sparse_moe.experts.171.w3", "model.layers.46.block_sparse_moe.experts.172.w3", "model.layers.46.block_sparse_moe.experts.173.w3", "model.layers.46.block_sparse_moe.experts.174.w3", "model.layers.46.block_sparse_moe.experts.175.w3", "model.layers.46.block_sparse_moe.experts.176.w3", "model.layers.46.block_sparse_moe.experts.177.w3", "model.layers.46.block_sparse_moe.experts.178.w3", "model.layers.46.block_sparse_moe.experts.179.w3", "model.layers.46.block_sparse_moe.experts.180.w3", "model.layers.46.block_sparse_moe.experts.181.w3", "model.layers.46.block_sparse_moe.experts.182.w3", "model.layers.46.block_sparse_moe.experts.183.w3", "model.layers.46.block_sparse_moe.experts.184.w3", "model.layers.46.block_sparse_moe.experts.185.w3", "model.layers.46.block_sparse_moe.experts.186.w3", "model.layers.46.block_sparse_moe.experts.187.w3", "model.layers.46.block_sparse_moe.experts.188.w3", "model.layers.46.block_sparse_moe.experts.189.w3", "model.layers.46.block_sparse_moe.experts.190.w3", "model.layers.46.block_sparse_moe.experts.191.w3", "model.layers.46.block_sparse_moe.experts.192.w3", "model.layers.46.block_sparse_moe.experts.193.w3", "model.layers.46.block_sparse_moe.experts.194.w3", "model.layers.46.block_sparse_moe.experts.195.w3", "model.layers.46.block_sparse_moe.experts.196.w3", "model.layers.46.block_sparse_moe.experts.197.w3", "model.layers.46.block_sparse_moe.experts.198.w3", "model.layers.46.block_sparse_moe.experts.199.w3", "model.layers.46.block_sparse_moe.experts.200.w3", "model.layers.46.block_sparse_moe.experts.201.w3", "model.layers.46.block_sparse_moe.experts.202.w3", "model.layers.46.block_sparse_moe.experts.203.w3", "model.layers.46.block_sparse_moe.experts.204.w3", "model.layers.46.block_sparse_moe.experts.205.w3", "model.layers.46.block_sparse_moe.experts.206.w3", "model.layers.46.block_sparse_moe.experts.207.w3", "model.layers.46.block_sparse_moe.experts.208.w3", "model.layers.46.block_sparse_moe.experts.209.w3", "model.layers.46.block_sparse_moe.experts.210.w3", "model.layers.46.block_sparse_moe.experts.211.w3", "model.layers.46.block_sparse_moe.experts.212.w3", "model.layers.46.block_sparse_moe.experts.213.w3", "model.layers.46.block_sparse_moe.experts.214.w3", "model.layers.46.block_sparse_moe.experts.215.w3", "model.layers.46.block_sparse_moe.experts.216.w3", "model.layers.46.block_sparse_moe.experts.217.w3", "model.layers.46.block_sparse_moe.experts.218.w3", "model.layers.46.block_sparse_moe.experts.219.w3", "model.layers.46.block_sparse_moe.experts.220.w3", "model.layers.46.block_sparse_moe.experts.221.w3", "model.layers.46.block_sparse_moe.experts.222.w3", "model.layers.46.block_sparse_moe.experts.223.w3", "model.layers.46.block_sparse_moe.experts.224.w3", "model.layers.46.block_sparse_moe.experts.225.w3", "model.layers.46.block_sparse_moe.experts.226.w3", "model.layers.46.block_sparse_moe.experts.227.w3", "model.layers.46.block_sparse_moe.experts.228.w3", "model.layers.46.block_sparse_moe.experts.229.w3", "model.layers.46.block_sparse_moe.experts.230.w3", "model.layers.46.block_sparse_moe.experts.231.w3", "model.layers.46.block_sparse_moe.experts.232.w3", "model.layers.46.block_sparse_moe.experts.233.w3", "model.layers.46.block_sparse_moe.experts.234.w3", "model.layers.46.block_sparse_moe.experts.235.w3", "model.layers.46.block_sparse_moe.experts.236.w3", "model.layers.46.block_sparse_moe.experts.237.w3", "model.layers.46.block_sparse_moe.experts.238.w3", "model.layers.46.block_sparse_moe.experts.239.w3", "model.layers.46.block_sparse_moe.experts.240.w3", "model.layers.46.block_sparse_moe.experts.241.w3", "model.layers.46.block_sparse_moe.experts.242.w3", "model.layers.46.block_sparse_moe.experts.243.w3", "model.layers.46.block_sparse_moe.experts.244.w3", "model.layers.46.block_sparse_moe.experts.245.w3", "model.layers.46.block_sparse_moe.experts.246.w3", "model.layers.46.block_sparse_moe.experts.247.w3", "model.layers.46.block_sparse_moe.experts.248.w3", "model.layers.46.block_sparse_moe.experts.249.w3", "model.layers.46.block_sparse_moe.experts.250.w3", "model.layers.46.block_sparse_moe.experts.251.w3", "model.layers.46.block_sparse_moe.experts.252.w3", "model.layers.46.block_sparse_moe.experts.253.w3", "model.layers.46.block_sparse_moe.experts.254.w3", "model.layers.46.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0015209972858428733, "dbits": 2415919104 } ] }, { "idx": 234, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w2", "model.layers.46.block_sparse_moe.experts.1.w2", "model.layers.46.block_sparse_moe.experts.2.w2", "model.layers.46.block_sparse_moe.experts.3.w2", "model.layers.46.block_sparse_moe.experts.4.w2", "model.layers.46.block_sparse_moe.experts.5.w2", "model.layers.46.block_sparse_moe.experts.6.w2", "model.layers.46.block_sparse_moe.experts.7.w2", "model.layers.46.block_sparse_moe.experts.8.w2", "model.layers.46.block_sparse_moe.experts.9.w2", "model.layers.46.block_sparse_moe.experts.10.w2", "model.layers.46.block_sparse_moe.experts.11.w2", "model.layers.46.block_sparse_moe.experts.12.w2", "model.layers.46.block_sparse_moe.experts.13.w2", "model.layers.46.block_sparse_moe.experts.14.w2", "model.layers.46.block_sparse_moe.experts.15.w2", "model.layers.46.block_sparse_moe.experts.16.w2", "model.layers.46.block_sparse_moe.experts.17.w2", "model.layers.46.block_sparse_moe.experts.18.w2", "model.layers.46.block_sparse_moe.experts.19.w2", "model.layers.46.block_sparse_moe.experts.20.w2", "model.layers.46.block_sparse_moe.experts.21.w2", "model.layers.46.block_sparse_moe.experts.22.w2", "model.layers.46.block_sparse_moe.experts.23.w2", "model.layers.46.block_sparse_moe.experts.24.w2", "model.layers.46.block_sparse_moe.experts.25.w2", "model.layers.46.block_sparse_moe.experts.26.w2", "model.layers.46.block_sparse_moe.experts.27.w2", "model.layers.46.block_sparse_moe.experts.28.w2", "model.layers.46.block_sparse_moe.experts.29.w2", "model.layers.46.block_sparse_moe.experts.30.w2", "model.layers.46.block_sparse_moe.experts.31.w2", "model.layers.46.block_sparse_moe.experts.32.w2", "model.layers.46.block_sparse_moe.experts.33.w2", "model.layers.46.block_sparse_moe.experts.34.w2", "model.layers.46.block_sparse_moe.experts.35.w2", "model.layers.46.block_sparse_moe.experts.36.w2", "model.layers.46.block_sparse_moe.experts.37.w2", "model.layers.46.block_sparse_moe.experts.38.w2", "model.layers.46.block_sparse_moe.experts.39.w2", "model.layers.46.block_sparse_moe.experts.40.w2", "model.layers.46.block_sparse_moe.experts.41.w2", "model.layers.46.block_sparse_moe.experts.42.w2", "model.layers.46.block_sparse_moe.experts.43.w2", "model.layers.46.block_sparse_moe.experts.44.w2", "model.layers.46.block_sparse_moe.experts.45.w2", "model.layers.46.block_sparse_moe.experts.46.w2", "model.layers.46.block_sparse_moe.experts.47.w2", "model.layers.46.block_sparse_moe.experts.48.w2", "model.layers.46.block_sparse_moe.experts.49.w2", "model.layers.46.block_sparse_moe.experts.50.w2", "model.layers.46.block_sparse_moe.experts.51.w2", "model.layers.46.block_sparse_moe.experts.52.w2", "model.layers.46.block_sparse_moe.experts.53.w2", "model.layers.46.block_sparse_moe.experts.54.w2", "model.layers.46.block_sparse_moe.experts.55.w2", "model.layers.46.block_sparse_moe.experts.56.w2", "model.layers.46.block_sparse_moe.experts.57.w2", "model.layers.46.block_sparse_moe.experts.58.w2", "model.layers.46.block_sparse_moe.experts.59.w2", "model.layers.46.block_sparse_moe.experts.60.w2", "model.layers.46.block_sparse_moe.experts.61.w2", "model.layers.46.block_sparse_moe.experts.62.w2", "model.layers.46.block_sparse_moe.experts.63.w2", "model.layers.46.block_sparse_moe.experts.64.w2", "model.layers.46.block_sparse_moe.experts.65.w2", "model.layers.46.block_sparse_moe.experts.66.w2", "model.layers.46.block_sparse_moe.experts.67.w2", "model.layers.46.block_sparse_moe.experts.68.w2", "model.layers.46.block_sparse_moe.experts.69.w2", "model.layers.46.block_sparse_moe.experts.70.w2", "model.layers.46.block_sparse_moe.experts.71.w2", "model.layers.46.block_sparse_moe.experts.72.w2", "model.layers.46.block_sparse_moe.experts.73.w2", "model.layers.46.block_sparse_moe.experts.74.w2", "model.layers.46.block_sparse_moe.experts.75.w2", "model.layers.46.block_sparse_moe.experts.76.w2", "model.layers.46.block_sparse_moe.experts.77.w2", "model.layers.46.block_sparse_moe.experts.78.w2", "model.layers.46.block_sparse_moe.experts.79.w2", "model.layers.46.block_sparse_moe.experts.80.w2", "model.layers.46.block_sparse_moe.experts.81.w2", "model.layers.46.block_sparse_moe.experts.82.w2", "model.layers.46.block_sparse_moe.experts.83.w2", "model.layers.46.block_sparse_moe.experts.84.w2", "model.layers.46.block_sparse_moe.experts.85.w2", "model.layers.46.block_sparse_moe.experts.86.w2", "model.layers.46.block_sparse_moe.experts.87.w2", "model.layers.46.block_sparse_moe.experts.88.w2", "model.layers.46.block_sparse_moe.experts.89.w2", "model.layers.46.block_sparse_moe.experts.90.w2", "model.layers.46.block_sparse_moe.experts.91.w2", "model.layers.46.block_sparse_moe.experts.92.w2", "model.layers.46.block_sparse_moe.experts.93.w2", "model.layers.46.block_sparse_moe.experts.94.w2", "model.layers.46.block_sparse_moe.experts.95.w2", "model.layers.46.block_sparse_moe.experts.96.w2", "model.layers.46.block_sparse_moe.experts.97.w2", "model.layers.46.block_sparse_moe.experts.98.w2", "model.layers.46.block_sparse_moe.experts.99.w2", "model.layers.46.block_sparse_moe.experts.100.w2", "model.layers.46.block_sparse_moe.experts.101.w2", "model.layers.46.block_sparse_moe.experts.102.w2", "model.layers.46.block_sparse_moe.experts.103.w2", "model.layers.46.block_sparse_moe.experts.104.w2", "model.layers.46.block_sparse_moe.experts.105.w2", "model.layers.46.block_sparse_moe.experts.106.w2", "model.layers.46.block_sparse_moe.experts.107.w2", "model.layers.46.block_sparse_moe.experts.108.w2", "model.layers.46.block_sparse_moe.experts.109.w2", "model.layers.46.block_sparse_moe.experts.110.w2", "model.layers.46.block_sparse_moe.experts.111.w2", "model.layers.46.block_sparse_moe.experts.112.w2", "model.layers.46.block_sparse_moe.experts.113.w2", "model.layers.46.block_sparse_moe.experts.114.w2", "model.layers.46.block_sparse_moe.experts.115.w2", "model.layers.46.block_sparse_moe.experts.116.w2", "model.layers.46.block_sparse_moe.experts.117.w2", "model.layers.46.block_sparse_moe.experts.118.w2", "model.layers.46.block_sparse_moe.experts.119.w2", "model.layers.46.block_sparse_moe.experts.120.w2", "model.layers.46.block_sparse_moe.experts.121.w2", "model.layers.46.block_sparse_moe.experts.122.w2", "model.layers.46.block_sparse_moe.experts.123.w2", "model.layers.46.block_sparse_moe.experts.124.w2", "model.layers.46.block_sparse_moe.experts.125.w2", "model.layers.46.block_sparse_moe.experts.126.w2", "model.layers.46.block_sparse_moe.experts.127.w2", "model.layers.46.block_sparse_moe.experts.128.w2", "model.layers.46.block_sparse_moe.experts.129.w2", "model.layers.46.block_sparse_moe.experts.130.w2", "model.layers.46.block_sparse_moe.experts.131.w2", "model.layers.46.block_sparse_moe.experts.132.w2", "model.layers.46.block_sparse_moe.experts.133.w2", "model.layers.46.block_sparse_moe.experts.134.w2", "model.layers.46.block_sparse_moe.experts.135.w2", "model.layers.46.block_sparse_moe.experts.136.w2", "model.layers.46.block_sparse_moe.experts.137.w2", "model.layers.46.block_sparse_moe.experts.138.w2", "model.layers.46.block_sparse_moe.experts.139.w2", "model.layers.46.block_sparse_moe.experts.140.w2", "model.layers.46.block_sparse_moe.experts.141.w2", "model.layers.46.block_sparse_moe.experts.142.w2", "model.layers.46.block_sparse_moe.experts.143.w2", "model.layers.46.block_sparse_moe.experts.144.w2", "model.layers.46.block_sparse_moe.experts.145.w2", "model.layers.46.block_sparse_moe.experts.146.w2", "model.layers.46.block_sparse_moe.experts.147.w2", "model.layers.46.block_sparse_moe.experts.148.w2", "model.layers.46.block_sparse_moe.experts.149.w2", "model.layers.46.block_sparse_moe.experts.150.w2", "model.layers.46.block_sparse_moe.experts.151.w2", "model.layers.46.block_sparse_moe.experts.152.w2", "model.layers.46.block_sparse_moe.experts.153.w2", "model.layers.46.block_sparse_moe.experts.154.w2", "model.layers.46.block_sparse_moe.experts.155.w2", "model.layers.46.block_sparse_moe.experts.156.w2", "model.layers.46.block_sparse_moe.experts.157.w2", "model.layers.46.block_sparse_moe.experts.158.w2", "model.layers.46.block_sparse_moe.experts.159.w2", "model.layers.46.block_sparse_moe.experts.160.w2", "model.layers.46.block_sparse_moe.experts.161.w2", "model.layers.46.block_sparse_moe.experts.162.w2", "model.layers.46.block_sparse_moe.experts.163.w2", "model.layers.46.block_sparse_moe.experts.164.w2", "model.layers.46.block_sparse_moe.experts.165.w2", "model.layers.46.block_sparse_moe.experts.166.w2", "model.layers.46.block_sparse_moe.experts.167.w2", "model.layers.46.block_sparse_moe.experts.168.w2", "model.layers.46.block_sparse_moe.experts.169.w2", "model.layers.46.block_sparse_moe.experts.170.w2", "model.layers.46.block_sparse_moe.experts.171.w2", "model.layers.46.block_sparse_moe.experts.172.w2", "model.layers.46.block_sparse_moe.experts.173.w2", "model.layers.46.block_sparse_moe.experts.174.w2", "model.layers.46.block_sparse_moe.experts.175.w2", "model.layers.46.block_sparse_moe.experts.176.w2", "model.layers.46.block_sparse_moe.experts.177.w2", "model.layers.46.block_sparse_moe.experts.178.w2", "model.layers.46.block_sparse_moe.experts.179.w2", "model.layers.46.block_sparse_moe.experts.180.w2", "model.layers.46.block_sparse_moe.experts.181.w2", "model.layers.46.block_sparse_moe.experts.182.w2", "model.layers.46.block_sparse_moe.experts.183.w2", "model.layers.46.block_sparse_moe.experts.184.w2", "model.layers.46.block_sparse_moe.experts.185.w2", "model.layers.46.block_sparse_moe.experts.186.w2", "model.layers.46.block_sparse_moe.experts.187.w2", "model.layers.46.block_sparse_moe.experts.188.w2", "model.layers.46.block_sparse_moe.experts.189.w2", "model.layers.46.block_sparse_moe.experts.190.w2", "model.layers.46.block_sparse_moe.experts.191.w2", "model.layers.46.block_sparse_moe.experts.192.w2", "model.layers.46.block_sparse_moe.experts.193.w2", "model.layers.46.block_sparse_moe.experts.194.w2", "model.layers.46.block_sparse_moe.experts.195.w2", "model.layers.46.block_sparse_moe.experts.196.w2", "model.layers.46.block_sparse_moe.experts.197.w2", "model.layers.46.block_sparse_moe.experts.198.w2", "model.layers.46.block_sparse_moe.experts.199.w2", "model.layers.46.block_sparse_moe.experts.200.w2", "model.layers.46.block_sparse_moe.experts.201.w2", "model.layers.46.block_sparse_moe.experts.202.w2", "model.layers.46.block_sparse_moe.experts.203.w2", "model.layers.46.block_sparse_moe.experts.204.w2", "model.layers.46.block_sparse_moe.experts.205.w2", "model.layers.46.block_sparse_moe.experts.206.w2", "model.layers.46.block_sparse_moe.experts.207.w2", "model.layers.46.block_sparse_moe.experts.208.w2", "model.layers.46.block_sparse_moe.experts.209.w2", "model.layers.46.block_sparse_moe.experts.210.w2", "model.layers.46.block_sparse_moe.experts.211.w2", "model.layers.46.block_sparse_moe.experts.212.w2", "model.layers.46.block_sparse_moe.experts.213.w2", "model.layers.46.block_sparse_moe.experts.214.w2", "model.layers.46.block_sparse_moe.experts.215.w2", "model.layers.46.block_sparse_moe.experts.216.w2", "model.layers.46.block_sparse_moe.experts.217.w2", "model.layers.46.block_sparse_moe.experts.218.w2", "model.layers.46.block_sparse_moe.experts.219.w2", "model.layers.46.block_sparse_moe.experts.220.w2", "model.layers.46.block_sparse_moe.experts.221.w2", "model.layers.46.block_sparse_moe.experts.222.w2", "model.layers.46.block_sparse_moe.experts.223.w2", "model.layers.46.block_sparse_moe.experts.224.w2", "model.layers.46.block_sparse_moe.experts.225.w2", "model.layers.46.block_sparse_moe.experts.226.w2", "model.layers.46.block_sparse_moe.experts.227.w2", "model.layers.46.block_sparse_moe.experts.228.w2", "model.layers.46.block_sparse_moe.experts.229.w2", "model.layers.46.block_sparse_moe.experts.230.w2", "model.layers.46.block_sparse_moe.experts.231.w2", "model.layers.46.block_sparse_moe.experts.232.w2", "model.layers.46.block_sparse_moe.experts.233.w2", "model.layers.46.block_sparse_moe.experts.234.w2", "model.layers.46.block_sparse_moe.experts.235.w2", "model.layers.46.block_sparse_moe.experts.236.w2", "model.layers.46.block_sparse_moe.experts.237.w2", "model.layers.46.block_sparse_moe.experts.238.w2", "model.layers.46.block_sparse_moe.experts.239.w2", "model.layers.46.block_sparse_moe.experts.240.w2", "model.layers.46.block_sparse_moe.experts.241.w2", "model.layers.46.block_sparse_moe.experts.242.w2", "model.layers.46.block_sparse_moe.experts.243.w2", "model.layers.46.block_sparse_moe.experts.244.w2", "model.layers.46.block_sparse_moe.experts.245.w2", "model.layers.46.block_sparse_moe.experts.246.w2", "model.layers.46.block_sparse_moe.experts.247.w2", "model.layers.46.block_sparse_moe.experts.248.w2", "model.layers.46.block_sparse_moe.experts.249.w2", "model.layers.46.block_sparse_moe.experts.250.w2", "model.layers.46.block_sparse_moe.experts.251.w2", "model.layers.46.block_sparse_moe.experts.252.w2", "model.layers.46.block_sparse_moe.experts.253.w2", "model.layers.46.block_sparse_moe.experts.254.w2", "model.layers.46.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008784443140029685, "dbits": 1207959552 } ] }, { "idx": 235, "layers": [ "model.layers.47.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0018350780010223389, "dbits": 18874368 } ] }, { "idx": 236, "layers": [ "model.layers.47.self_attn.k_proj", "model.layers.47.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00040260255336765916, "dbits": 6291456 } ] }, { "idx": 237, "layers": [ "model.layers.47.self_attn.o_proj" ], "candidates": [ { "dkld": -0.007858583331108004, "dbits": 18874368 } ] }, { "idx": 238, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w1", "model.layers.47.block_sparse_moe.experts.1.w1", "model.layers.47.block_sparse_moe.experts.2.w1", "model.layers.47.block_sparse_moe.experts.3.w1", "model.layers.47.block_sparse_moe.experts.4.w1", "model.layers.47.block_sparse_moe.experts.5.w1", "model.layers.47.block_sparse_moe.experts.6.w1", "model.layers.47.block_sparse_moe.experts.7.w1", "model.layers.47.block_sparse_moe.experts.8.w1", "model.layers.47.block_sparse_moe.experts.9.w1", "model.layers.47.block_sparse_moe.experts.10.w1", "model.layers.47.block_sparse_moe.experts.11.w1", "model.layers.47.block_sparse_moe.experts.12.w1", "model.layers.47.block_sparse_moe.experts.13.w1", "model.layers.47.block_sparse_moe.experts.14.w1", "model.layers.47.block_sparse_moe.experts.15.w1", "model.layers.47.block_sparse_moe.experts.16.w1", "model.layers.47.block_sparse_moe.experts.17.w1", "model.layers.47.block_sparse_moe.experts.18.w1", "model.layers.47.block_sparse_moe.experts.19.w1", "model.layers.47.block_sparse_moe.experts.20.w1", "model.layers.47.block_sparse_moe.experts.21.w1", "model.layers.47.block_sparse_moe.experts.22.w1", "model.layers.47.block_sparse_moe.experts.23.w1", "model.layers.47.block_sparse_moe.experts.24.w1", "model.layers.47.block_sparse_moe.experts.25.w1", "model.layers.47.block_sparse_moe.experts.26.w1", "model.layers.47.block_sparse_moe.experts.27.w1", "model.layers.47.block_sparse_moe.experts.28.w1", "model.layers.47.block_sparse_moe.experts.29.w1", "model.layers.47.block_sparse_moe.experts.30.w1", "model.layers.47.block_sparse_moe.experts.31.w1", "model.layers.47.block_sparse_moe.experts.32.w1", "model.layers.47.block_sparse_moe.experts.33.w1", "model.layers.47.block_sparse_moe.experts.34.w1", "model.layers.47.block_sparse_moe.experts.35.w1", "model.layers.47.block_sparse_moe.experts.36.w1", "model.layers.47.block_sparse_moe.experts.37.w1", "model.layers.47.block_sparse_moe.experts.38.w1", "model.layers.47.block_sparse_moe.experts.39.w1", "model.layers.47.block_sparse_moe.experts.40.w1", "model.layers.47.block_sparse_moe.experts.41.w1", "model.layers.47.block_sparse_moe.experts.42.w1", "model.layers.47.block_sparse_moe.experts.43.w1", "model.layers.47.block_sparse_moe.experts.44.w1", "model.layers.47.block_sparse_moe.experts.45.w1", "model.layers.47.block_sparse_moe.experts.46.w1", "model.layers.47.block_sparse_moe.experts.47.w1", "model.layers.47.block_sparse_moe.experts.48.w1", "model.layers.47.block_sparse_moe.experts.49.w1", "model.layers.47.block_sparse_moe.experts.50.w1", "model.layers.47.block_sparse_moe.experts.51.w1", "model.layers.47.block_sparse_moe.experts.52.w1", "model.layers.47.block_sparse_moe.experts.53.w1", "model.layers.47.block_sparse_moe.experts.54.w1", "model.layers.47.block_sparse_moe.experts.55.w1", "model.layers.47.block_sparse_moe.experts.56.w1", "model.layers.47.block_sparse_moe.experts.57.w1", "model.layers.47.block_sparse_moe.experts.58.w1", "model.layers.47.block_sparse_moe.experts.59.w1", "model.layers.47.block_sparse_moe.experts.60.w1", "model.layers.47.block_sparse_moe.experts.61.w1", "model.layers.47.block_sparse_moe.experts.62.w1", "model.layers.47.block_sparse_moe.experts.63.w1", "model.layers.47.block_sparse_moe.experts.64.w1", "model.layers.47.block_sparse_moe.experts.65.w1", "model.layers.47.block_sparse_moe.experts.66.w1", "model.layers.47.block_sparse_moe.experts.67.w1", "model.layers.47.block_sparse_moe.experts.68.w1", "model.layers.47.block_sparse_moe.experts.69.w1", "model.layers.47.block_sparse_moe.experts.70.w1", "model.layers.47.block_sparse_moe.experts.71.w1", "model.layers.47.block_sparse_moe.experts.72.w1", "model.layers.47.block_sparse_moe.experts.73.w1", "model.layers.47.block_sparse_moe.experts.74.w1", "model.layers.47.block_sparse_moe.experts.75.w1", "model.layers.47.block_sparse_moe.experts.76.w1", "model.layers.47.block_sparse_moe.experts.77.w1", "model.layers.47.block_sparse_moe.experts.78.w1", "model.layers.47.block_sparse_moe.experts.79.w1", "model.layers.47.block_sparse_moe.experts.80.w1", "model.layers.47.block_sparse_moe.experts.81.w1", "model.layers.47.block_sparse_moe.experts.82.w1", "model.layers.47.block_sparse_moe.experts.83.w1", "model.layers.47.block_sparse_moe.experts.84.w1", "model.layers.47.block_sparse_moe.experts.85.w1", "model.layers.47.block_sparse_moe.experts.86.w1", "model.layers.47.block_sparse_moe.experts.87.w1", "model.layers.47.block_sparse_moe.experts.88.w1", "model.layers.47.block_sparse_moe.experts.89.w1", "model.layers.47.block_sparse_moe.experts.90.w1", "model.layers.47.block_sparse_moe.experts.91.w1", "model.layers.47.block_sparse_moe.experts.92.w1", "model.layers.47.block_sparse_moe.experts.93.w1", "model.layers.47.block_sparse_moe.experts.94.w1", "model.layers.47.block_sparse_moe.experts.95.w1", "model.layers.47.block_sparse_moe.experts.96.w1", "model.layers.47.block_sparse_moe.experts.97.w1", "model.layers.47.block_sparse_moe.experts.98.w1", "model.layers.47.block_sparse_moe.experts.99.w1", "model.layers.47.block_sparse_moe.experts.100.w1", "model.layers.47.block_sparse_moe.experts.101.w1", "model.layers.47.block_sparse_moe.experts.102.w1", "model.layers.47.block_sparse_moe.experts.103.w1", "model.layers.47.block_sparse_moe.experts.104.w1", "model.layers.47.block_sparse_moe.experts.105.w1", "model.layers.47.block_sparse_moe.experts.106.w1", "model.layers.47.block_sparse_moe.experts.107.w1", "model.layers.47.block_sparse_moe.experts.108.w1", "model.layers.47.block_sparse_moe.experts.109.w1", "model.layers.47.block_sparse_moe.experts.110.w1", "model.layers.47.block_sparse_moe.experts.111.w1", "model.layers.47.block_sparse_moe.experts.112.w1", "model.layers.47.block_sparse_moe.experts.113.w1", "model.layers.47.block_sparse_moe.experts.114.w1", "model.layers.47.block_sparse_moe.experts.115.w1", "model.layers.47.block_sparse_moe.experts.116.w1", "model.layers.47.block_sparse_moe.experts.117.w1", "model.layers.47.block_sparse_moe.experts.118.w1", "model.layers.47.block_sparse_moe.experts.119.w1", "model.layers.47.block_sparse_moe.experts.120.w1", "model.layers.47.block_sparse_moe.experts.121.w1", "model.layers.47.block_sparse_moe.experts.122.w1", "model.layers.47.block_sparse_moe.experts.123.w1", "model.layers.47.block_sparse_moe.experts.124.w1", "model.layers.47.block_sparse_moe.experts.125.w1", "model.layers.47.block_sparse_moe.experts.126.w1", "model.layers.47.block_sparse_moe.experts.127.w1", "model.layers.47.block_sparse_moe.experts.128.w1", "model.layers.47.block_sparse_moe.experts.129.w1", "model.layers.47.block_sparse_moe.experts.130.w1", "model.layers.47.block_sparse_moe.experts.131.w1", "model.layers.47.block_sparse_moe.experts.132.w1", "model.layers.47.block_sparse_moe.experts.133.w1", "model.layers.47.block_sparse_moe.experts.134.w1", "model.layers.47.block_sparse_moe.experts.135.w1", "model.layers.47.block_sparse_moe.experts.136.w1", "model.layers.47.block_sparse_moe.experts.137.w1", "model.layers.47.block_sparse_moe.experts.138.w1", "model.layers.47.block_sparse_moe.experts.139.w1", "model.layers.47.block_sparse_moe.experts.140.w1", "model.layers.47.block_sparse_moe.experts.141.w1", "model.layers.47.block_sparse_moe.experts.142.w1", "model.layers.47.block_sparse_moe.experts.143.w1", "model.layers.47.block_sparse_moe.experts.144.w1", "model.layers.47.block_sparse_moe.experts.145.w1", "model.layers.47.block_sparse_moe.experts.146.w1", "model.layers.47.block_sparse_moe.experts.147.w1", "model.layers.47.block_sparse_moe.experts.148.w1", "model.layers.47.block_sparse_moe.experts.149.w1", "model.layers.47.block_sparse_moe.experts.150.w1", "model.layers.47.block_sparse_moe.experts.151.w1", "model.layers.47.block_sparse_moe.experts.152.w1", "model.layers.47.block_sparse_moe.experts.153.w1", "model.layers.47.block_sparse_moe.experts.154.w1", "model.layers.47.block_sparse_moe.experts.155.w1", "model.layers.47.block_sparse_moe.experts.156.w1", "model.layers.47.block_sparse_moe.experts.157.w1", "model.layers.47.block_sparse_moe.experts.158.w1", "model.layers.47.block_sparse_moe.experts.159.w1", "model.layers.47.block_sparse_moe.experts.160.w1", "model.layers.47.block_sparse_moe.experts.161.w1", "model.layers.47.block_sparse_moe.experts.162.w1", "model.layers.47.block_sparse_moe.experts.163.w1", "model.layers.47.block_sparse_moe.experts.164.w1", "model.layers.47.block_sparse_moe.experts.165.w1", "model.layers.47.block_sparse_moe.experts.166.w1", "model.layers.47.block_sparse_moe.experts.167.w1", "model.layers.47.block_sparse_moe.experts.168.w1", "model.layers.47.block_sparse_moe.experts.169.w1", "model.layers.47.block_sparse_moe.experts.170.w1", "model.layers.47.block_sparse_moe.experts.171.w1", "model.layers.47.block_sparse_moe.experts.172.w1", "model.layers.47.block_sparse_moe.experts.173.w1", "model.layers.47.block_sparse_moe.experts.174.w1", "model.layers.47.block_sparse_moe.experts.175.w1", "model.layers.47.block_sparse_moe.experts.176.w1", "model.layers.47.block_sparse_moe.experts.177.w1", "model.layers.47.block_sparse_moe.experts.178.w1", "model.layers.47.block_sparse_moe.experts.179.w1", "model.layers.47.block_sparse_moe.experts.180.w1", "model.layers.47.block_sparse_moe.experts.181.w1", "model.layers.47.block_sparse_moe.experts.182.w1", "model.layers.47.block_sparse_moe.experts.183.w1", "model.layers.47.block_sparse_moe.experts.184.w1", "model.layers.47.block_sparse_moe.experts.185.w1", "model.layers.47.block_sparse_moe.experts.186.w1", "model.layers.47.block_sparse_moe.experts.187.w1", "model.layers.47.block_sparse_moe.experts.188.w1", "model.layers.47.block_sparse_moe.experts.189.w1", "model.layers.47.block_sparse_moe.experts.190.w1", "model.layers.47.block_sparse_moe.experts.191.w1", "model.layers.47.block_sparse_moe.experts.192.w1", "model.layers.47.block_sparse_moe.experts.193.w1", "model.layers.47.block_sparse_moe.experts.194.w1", "model.layers.47.block_sparse_moe.experts.195.w1", "model.layers.47.block_sparse_moe.experts.196.w1", "model.layers.47.block_sparse_moe.experts.197.w1", "model.layers.47.block_sparse_moe.experts.198.w1", "model.layers.47.block_sparse_moe.experts.199.w1", "model.layers.47.block_sparse_moe.experts.200.w1", "model.layers.47.block_sparse_moe.experts.201.w1", "model.layers.47.block_sparse_moe.experts.202.w1", "model.layers.47.block_sparse_moe.experts.203.w1", "model.layers.47.block_sparse_moe.experts.204.w1", "model.layers.47.block_sparse_moe.experts.205.w1", "model.layers.47.block_sparse_moe.experts.206.w1", "model.layers.47.block_sparse_moe.experts.207.w1", "model.layers.47.block_sparse_moe.experts.208.w1", "model.layers.47.block_sparse_moe.experts.209.w1", "model.layers.47.block_sparse_moe.experts.210.w1", "model.layers.47.block_sparse_moe.experts.211.w1", "model.layers.47.block_sparse_moe.experts.212.w1", "model.layers.47.block_sparse_moe.experts.213.w1", "model.layers.47.block_sparse_moe.experts.214.w1", "model.layers.47.block_sparse_moe.experts.215.w1", "model.layers.47.block_sparse_moe.experts.216.w1", "model.layers.47.block_sparse_moe.experts.217.w1", "model.layers.47.block_sparse_moe.experts.218.w1", "model.layers.47.block_sparse_moe.experts.219.w1", "model.layers.47.block_sparse_moe.experts.220.w1", "model.layers.47.block_sparse_moe.experts.221.w1", "model.layers.47.block_sparse_moe.experts.222.w1", "model.layers.47.block_sparse_moe.experts.223.w1", "model.layers.47.block_sparse_moe.experts.224.w1", "model.layers.47.block_sparse_moe.experts.225.w1", "model.layers.47.block_sparse_moe.experts.226.w1", "model.layers.47.block_sparse_moe.experts.227.w1", "model.layers.47.block_sparse_moe.experts.228.w1", "model.layers.47.block_sparse_moe.experts.229.w1", "model.layers.47.block_sparse_moe.experts.230.w1", "model.layers.47.block_sparse_moe.experts.231.w1", "model.layers.47.block_sparse_moe.experts.232.w1", "model.layers.47.block_sparse_moe.experts.233.w1", "model.layers.47.block_sparse_moe.experts.234.w1", "model.layers.47.block_sparse_moe.experts.235.w1", "model.layers.47.block_sparse_moe.experts.236.w1", "model.layers.47.block_sparse_moe.experts.237.w1", "model.layers.47.block_sparse_moe.experts.238.w1", "model.layers.47.block_sparse_moe.experts.239.w1", "model.layers.47.block_sparse_moe.experts.240.w1", "model.layers.47.block_sparse_moe.experts.241.w1", "model.layers.47.block_sparse_moe.experts.242.w1", "model.layers.47.block_sparse_moe.experts.243.w1", "model.layers.47.block_sparse_moe.experts.244.w1", "model.layers.47.block_sparse_moe.experts.245.w1", "model.layers.47.block_sparse_moe.experts.246.w1", "model.layers.47.block_sparse_moe.experts.247.w1", "model.layers.47.block_sparse_moe.experts.248.w1", "model.layers.47.block_sparse_moe.experts.249.w1", "model.layers.47.block_sparse_moe.experts.250.w1", "model.layers.47.block_sparse_moe.experts.251.w1", "model.layers.47.block_sparse_moe.experts.252.w1", "model.layers.47.block_sparse_moe.experts.253.w1", "model.layers.47.block_sparse_moe.experts.254.w1", "model.layers.47.block_sparse_moe.experts.255.w1", "model.layers.47.block_sparse_moe.experts.0.w3", "model.layers.47.block_sparse_moe.experts.1.w3", "model.layers.47.block_sparse_moe.experts.2.w3", "model.layers.47.block_sparse_moe.experts.3.w3", "model.layers.47.block_sparse_moe.experts.4.w3", "model.layers.47.block_sparse_moe.experts.5.w3", "model.layers.47.block_sparse_moe.experts.6.w3", "model.layers.47.block_sparse_moe.experts.7.w3", "model.layers.47.block_sparse_moe.experts.8.w3", "model.layers.47.block_sparse_moe.experts.9.w3", "model.layers.47.block_sparse_moe.experts.10.w3", "model.layers.47.block_sparse_moe.experts.11.w3", "model.layers.47.block_sparse_moe.experts.12.w3", "model.layers.47.block_sparse_moe.experts.13.w3", "model.layers.47.block_sparse_moe.experts.14.w3", "model.layers.47.block_sparse_moe.experts.15.w3", "model.layers.47.block_sparse_moe.experts.16.w3", "model.layers.47.block_sparse_moe.experts.17.w3", "model.layers.47.block_sparse_moe.experts.18.w3", "model.layers.47.block_sparse_moe.experts.19.w3", "model.layers.47.block_sparse_moe.experts.20.w3", "model.layers.47.block_sparse_moe.experts.21.w3", "model.layers.47.block_sparse_moe.experts.22.w3", "model.layers.47.block_sparse_moe.experts.23.w3", "model.layers.47.block_sparse_moe.experts.24.w3", "model.layers.47.block_sparse_moe.experts.25.w3", "model.layers.47.block_sparse_moe.experts.26.w3", "model.layers.47.block_sparse_moe.experts.27.w3", "model.layers.47.block_sparse_moe.experts.28.w3", "model.layers.47.block_sparse_moe.experts.29.w3", "model.layers.47.block_sparse_moe.experts.30.w3", "model.layers.47.block_sparse_moe.experts.31.w3", "model.layers.47.block_sparse_moe.experts.32.w3", "model.layers.47.block_sparse_moe.experts.33.w3", "model.layers.47.block_sparse_moe.experts.34.w3", "model.layers.47.block_sparse_moe.experts.35.w3", "model.layers.47.block_sparse_moe.experts.36.w3", "model.layers.47.block_sparse_moe.experts.37.w3", "model.layers.47.block_sparse_moe.experts.38.w3", "model.layers.47.block_sparse_moe.experts.39.w3", "model.layers.47.block_sparse_moe.experts.40.w3", "model.layers.47.block_sparse_moe.experts.41.w3", "model.layers.47.block_sparse_moe.experts.42.w3", "model.layers.47.block_sparse_moe.experts.43.w3", "model.layers.47.block_sparse_moe.experts.44.w3", "model.layers.47.block_sparse_moe.experts.45.w3", "model.layers.47.block_sparse_moe.experts.46.w3", "model.layers.47.block_sparse_moe.experts.47.w3", "model.layers.47.block_sparse_moe.experts.48.w3", "model.layers.47.block_sparse_moe.experts.49.w3", "model.layers.47.block_sparse_moe.experts.50.w3", "model.layers.47.block_sparse_moe.experts.51.w3", "model.layers.47.block_sparse_moe.experts.52.w3", "model.layers.47.block_sparse_moe.experts.53.w3", "model.layers.47.block_sparse_moe.experts.54.w3", "model.layers.47.block_sparse_moe.experts.55.w3", "model.layers.47.block_sparse_moe.experts.56.w3", "model.layers.47.block_sparse_moe.experts.57.w3", "model.layers.47.block_sparse_moe.experts.58.w3", "model.layers.47.block_sparse_moe.experts.59.w3", "model.layers.47.block_sparse_moe.experts.60.w3", "model.layers.47.block_sparse_moe.experts.61.w3", "model.layers.47.block_sparse_moe.experts.62.w3", "model.layers.47.block_sparse_moe.experts.63.w3", "model.layers.47.block_sparse_moe.experts.64.w3", "model.layers.47.block_sparse_moe.experts.65.w3", "model.layers.47.block_sparse_moe.experts.66.w3", "model.layers.47.block_sparse_moe.experts.67.w3", "model.layers.47.block_sparse_moe.experts.68.w3", "model.layers.47.block_sparse_moe.experts.69.w3", "model.layers.47.block_sparse_moe.experts.70.w3", "model.layers.47.block_sparse_moe.experts.71.w3", "model.layers.47.block_sparse_moe.experts.72.w3", "model.layers.47.block_sparse_moe.experts.73.w3", "model.layers.47.block_sparse_moe.experts.74.w3", "model.layers.47.block_sparse_moe.experts.75.w3", "model.layers.47.block_sparse_moe.experts.76.w3", "model.layers.47.block_sparse_moe.experts.77.w3", "model.layers.47.block_sparse_moe.experts.78.w3", "model.layers.47.block_sparse_moe.experts.79.w3", "model.layers.47.block_sparse_moe.experts.80.w3", "model.layers.47.block_sparse_moe.experts.81.w3", "model.layers.47.block_sparse_moe.experts.82.w3", "model.layers.47.block_sparse_moe.experts.83.w3", "model.layers.47.block_sparse_moe.experts.84.w3", "model.layers.47.block_sparse_moe.experts.85.w3", "model.layers.47.block_sparse_moe.experts.86.w3", "model.layers.47.block_sparse_moe.experts.87.w3", "model.layers.47.block_sparse_moe.experts.88.w3", "model.layers.47.block_sparse_moe.experts.89.w3", "model.layers.47.block_sparse_moe.experts.90.w3", "model.layers.47.block_sparse_moe.experts.91.w3", "model.layers.47.block_sparse_moe.experts.92.w3", "model.layers.47.block_sparse_moe.experts.93.w3", "model.layers.47.block_sparse_moe.experts.94.w3", "model.layers.47.block_sparse_moe.experts.95.w3", "model.layers.47.block_sparse_moe.experts.96.w3", "model.layers.47.block_sparse_moe.experts.97.w3", "model.layers.47.block_sparse_moe.experts.98.w3", "model.layers.47.block_sparse_moe.experts.99.w3", "model.layers.47.block_sparse_moe.experts.100.w3", "model.layers.47.block_sparse_moe.experts.101.w3", "model.layers.47.block_sparse_moe.experts.102.w3", "model.layers.47.block_sparse_moe.experts.103.w3", "model.layers.47.block_sparse_moe.experts.104.w3", "model.layers.47.block_sparse_moe.experts.105.w3", "model.layers.47.block_sparse_moe.experts.106.w3", "model.layers.47.block_sparse_moe.experts.107.w3", "model.layers.47.block_sparse_moe.experts.108.w3", "model.layers.47.block_sparse_moe.experts.109.w3", "model.layers.47.block_sparse_moe.experts.110.w3", "model.layers.47.block_sparse_moe.experts.111.w3", "model.layers.47.block_sparse_moe.experts.112.w3", "model.layers.47.block_sparse_moe.experts.113.w3", "model.layers.47.block_sparse_moe.experts.114.w3", "model.layers.47.block_sparse_moe.experts.115.w3", "model.layers.47.block_sparse_moe.experts.116.w3", "model.layers.47.block_sparse_moe.experts.117.w3", "model.layers.47.block_sparse_moe.experts.118.w3", "model.layers.47.block_sparse_moe.experts.119.w3", "model.layers.47.block_sparse_moe.experts.120.w3", "model.layers.47.block_sparse_moe.experts.121.w3", "model.layers.47.block_sparse_moe.experts.122.w3", "model.layers.47.block_sparse_moe.experts.123.w3", "model.layers.47.block_sparse_moe.experts.124.w3", "model.layers.47.block_sparse_moe.experts.125.w3", "model.layers.47.block_sparse_moe.experts.126.w3", "model.layers.47.block_sparse_moe.experts.127.w3", "model.layers.47.block_sparse_moe.experts.128.w3", "model.layers.47.block_sparse_moe.experts.129.w3", "model.layers.47.block_sparse_moe.experts.130.w3", "model.layers.47.block_sparse_moe.experts.131.w3", "model.layers.47.block_sparse_moe.experts.132.w3", "model.layers.47.block_sparse_moe.experts.133.w3", "model.layers.47.block_sparse_moe.experts.134.w3", "model.layers.47.block_sparse_moe.experts.135.w3", "model.layers.47.block_sparse_moe.experts.136.w3", "model.layers.47.block_sparse_moe.experts.137.w3", "model.layers.47.block_sparse_moe.experts.138.w3", "model.layers.47.block_sparse_moe.experts.139.w3", "model.layers.47.block_sparse_moe.experts.140.w3", "model.layers.47.block_sparse_moe.experts.141.w3", "model.layers.47.block_sparse_moe.experts.142.w3", "model.layers.47.block_sparse_moe.experts.143.w3", "model.layers.47.block_sparse_moe.experts.144.w3", "model.layers.47.block_sparse_moe.experts.145.w3", "model.layers.47.block_sparse_moe.experts.146.w3", "model.layers.47.block_sparse_moe.experts.147.w3", "model.layers.47.block_sparse_moe.experts.148.w3", "model.layers.47.block_sparse_moe.experts.149.w3", "model.layers.47.block_sparse_moe.experts.150.w3", "model.layers.47.block_sparse_moe.experts.151.w3", "model.layers.47.block_sparse_moe.experts.152.w3", "model.layers.47.block_sparse_moe.experts.153.w3", "model.layers.47.block_sparse_moe.experts.154.w3", "model.layers.47.block_sparse_moe.experts.155.w3", "model.layers.47.block_sparse_moe.experts.156.w3", "model.layers.47.block_sparse_moe.experts.157.w3", "model.layers.47.block_sparse_moe.experts.158.w3", "model.layers.47.block_sparse_moe.experts.159.w3", "model.layers.47.block_sparse_moe.experts.160.w3", "model.layers.47.block_sparse_moe.experts.161.w3", "model.layers.47.block_sparse_moe.experts.162.w3", "model.layers.47.block_sparse_moe.experts.163.w3", "model.layers.47.block_sparse_moe.experts.164.w3", "model.layers.47.block_sparse_moe.experts.165.w3", "model.layers.47.block_sparse_moe.experts.166.w3", "model.layers.47.block_sparse_moe.experts.167.w3", "model.layers.47.block_sparse_moe.experts.168.w3", "model.layers.47.block_sparse_moe.experts.169.w3", "model.layers.47.block_sparse_moe.experts.170.w3", "model.layers.47.block_sparse_moe.experts.171.w3", "model.layers.47.block_sparse_moe.experts.172.w3", "model.layers.47.block_sparse_moe.experts.173.w3", "model.layers.47.block_sparse_moe.experts.174.w3", "model.layers.47.block_sparse_moe.experts.175.w3", "model.layers.47.block_sparse_moe.experts.176.w3", "model.layers.47.block_sparse_moe.experts.177.w3", "model.layers.47.block_sparse_moe.experts.178.w3", "model.layers.47.block_sparse_moe.experts.179.w3", "model.layers.47.block_sparse_moe.experts.180.w3", "model.layers.47.block_sparse_moe.experts.181.w3", "model.layers.47.block_sparse_moe.experts.182.w3", "model.layers.47.block_sparse_moe.experts.183.w3", "model.layers.47.block_sparse_moe.experts.184.w3", "model.layers.47.block_sparse_moe.experts.185.w3", "model.layers.47.block_sparse_moe.experts.186.w3", "model.layers.47.block_sparse_moe.experts.187.w3", "model.layers.47.block_sparse_moe.experts.188.w3", "model.layers.47.block_sparse_moe.experts.189.w3", "model.layers.47.block_sparse_moe.experts.190.w3", "model.layers.47.block_sparse_moe.experts.191.w3", "model.layers.47.block_sparse_moe.experts.192.w3", "model.layers.47.block_sparse_moe.experts.193.w3", "model.layers.47.block_sparse_moe.experts.194.w3", "model.layers.47.block_sparse_moe.experts.195.w3", "model.layers.47.block_sparse_moe.experts.196.w3", "model.layers.47.block_sparse_moe.experts.197.w3", "model.layers.47.block_sparse_moe.experts.198.w3", "model.layers.47.block_sparse_moe.experts.199.w3", "model.layers.47.block_sparse_moe.experts.200.w3", "model.layers.47.block_sparse_moe.experts.201.w3", "model.layers.47.block_sparse_moe.experts.202.w3", "model.layers.47.block_sparse_moe.experts.203.w3", "model.layers.47.block_sparse_moe.experts.204.w3", "model.layers.47.block_sparse_moe.experts.205.w3", "model.layers.47.block_sparse_moe.experts.206.w3", "model.layers.47.block_sparse_moe.experts.207.w3", "model.layers.47.block_sparse_moe.experts.208.w3", "model.layers.47.block_sparse_moe.experts.209.w3", "model.layers.47.block_sparse_moe.experts.210.w3", "model.layers.47.block_sparse_moe.experts.211.w3", "model.layers.47.block_sparse_moe.experts.212.w3", "model.layers.47.block_sparse_moe.experts.213.w3", "model.layers.47.block_sparse_moe.experts.214.w3", "model.layers.47.block_sparse_moe.experts.215.w3", "model.layers.47.block_sparse_moe.experts.216.w3", "model.layers.47.block_sparse_moe.experts.217.w3", "model.layers.47.block_sparse_moe.experts.218.w3", "model.layers.47.block_sparse_moe.experts.219.w3", "model.layers.47.block_sparse_moe.experts.220.w3", "model.layers.47.block_sparse_moe.experts.221.w3", "model.layers.47.block_sparse_moe.experts.222.w3", "model.layers.47.block_sparse_moe.experts.223.w3", "model.layers.47.block_sparse_moe.experts.224.w3", "model.layers.47.block_sparse_moe.experts.225.w3", "model.layers.47.block_sparse_moe.experts.226.w3", "model.layers.47.block_sparse_moe.experts.227.w3", "model.layers.47.block_sparse_moe.experts.228.w3", "model.layers.47.block_sparse_moe.experts.229.w3", "model.layers.47.block_sparse_moe.experts.230.w3", "model.layers.47.block_sparse_moe.experts.231.w3", "model.layers.47.block_sparse_moe.experts.232.w3", "model.layers.47.block_sparse_moe.experts.233.w3", "model.layers.47.block_sparse_moe.experts.234.w3", "model.layers.47.block_sparse_moe.experts.235.w3", "model.layers.47.block_sparse_moe.experts.236.w3", "model.layers.47.block_sparse_moe.experts.237.w3", "model.layers.47.block_sparse_moe.experts.238.w3", "model.layers.47.block_sparse_moe.experts.239.w3", "model.layers.47.block_sparse_moe.experts.240.w3", "model.layers.47.block_sparse_moe.experts.241.w3", "model.layers.47.block_sparse_moe.experts.242.w3", "model.layers.47.block_sparse_moe.experts.243.w3", "model.layers.47.block_sparse_moe.experts.244.w3", "model.layers.47.block_sparse_moe.experts.245.w3", "model.layers.47.block_sparse_moe.experts.246.w3", "model.layers.47.block_sparse_moe.experts.247.w3", "model.layers.47.block_sparse_moe.experts.248.w3", "model.layers.47.block_sparse_moe.experts.249.w3", "model.layers.47.block_sparse_moe.experts.250.w3", "model.layers.47.block_sparse_moe.experts.251.w3", "model.layers.47.block_sparse_moe.experts.252.w3", "model.layers.47.block_sparse_moe.experts.253.w3", "model.layers.47.block_sparse_moe.experts.254.w3", "model.layers.47.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00047829747200012207, "dbits": 2415919104 } ] }, { "idx": 239, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w2", "model.layers.47.block_sparse_moe.experts.1.w2", "model.layers.47.block_sparse_moe.experts.2.w2", "model.layers.47.block_sparse_moe.experts.3.w2", "model.layers.47.block_sparse_moe.experts.4.w2", "model.layers.47.block_sparse_moe.experts.5.w2", "model.layers.47.block_sparse_moe.experts.6.w2", "model.layers.47.block_sparse_moe.experts.7.w2", "model.layers.47.block_sparse_moe.experts.8.w2", "model.layers.47.block_sparse_moe.experts.9.w2", "model.layers.47.block_sparse_moe.experts.10.w2", "model.layers.47.block_sparse_moe.experts.11.w2", "model.layers.47.block_sparse_moe.experts.12.w2", "model.layers.47.block_sparse_moe.experts.13.w2", "model.layers.47.block_sparse_moe.experts.14.w2", "model.layers.47.block_sparse_moe.experts.15.w2", "model.layers.47.block_sparse_moe.experts.16.w2", "model.layers.47.block_sparse_moe.experts.17.w2", "model.layers.47.block_sparse_moe.experts.18.w2", "model.layers.47.block_sparse_moe.experts.19.w2", "model.layers.47.block_sparse_moe.experts.20.w2", "model.layers.47.block_sparse_moe.experts.21.w2", "model.layers.47.block_sparse_moe.experts.22.w2", "model.layers.47.block_sparse_moe.experts.23.w2", "model.layers.47.block_sparse_moe.experts.24.w2", "model.layers.47.block_sparse_moe.experts.25.w2", "model.layers.47.block_sparse_moe.experts.26.w2", "model.layers.47.block_sparse_moe.experts.27.w2", "model.layers.47.block_sparse_moe.experts.28.w2", "model.layers.47.block_sparse_moe.experts.29.w2", "model.layers.47.block_sparse_moe.experts.30.w2", "model.layers.47.block_sparse_moe.experts.31.w2", "model.layers.47.block_sparse_moe.experts.32.w2", "model.layers.47.block_sparse_moe.experts.33.w2", "model.layers.47.block_sparse_moe.experts.34.w2", "model.layers.47.block_sparse_moe.experts.35.w2", "model.layers.47.block_sparse_moe.experts.36.w2", "model.layers.47.block_sparse_moe.experts.37.w2", "model.layers.47.block_sparse_moe.experts.38.w2", "model.layers.47.block_sparse_moe.experts.39.w2", "model.layers.47.block_sparse_moe.experts.40.w2", "model.layers.47.block_sparse_moe.experts.41.w2", "model.layers.47.block_sparse_moe.experts.42.w2", "model.layers.47.block_sparse_moe.experts.43.w2", "model.layers.47.block_sparse_moe.experts.44.w2", "model.layers.47.block_sparse_moe.experts.45.w2", "model.layers.47.block_sparse_moe.experts.46.w2", "model.layers.47.block_sparse_moe.experts.47.w2", "model.layers.47.block_sparse_moe.experts.48.w2", "model.layers.47.block_sparse_moe.experts.49.w2", "model.layers.47.block_sparse_moe.experts.50.w2", "model.layers.47.block_sparse_moe.experts.51.w2", "model.layers.47.block_sparse_moe.experts.52.w2", "model.layers.47.block_sparse_moe.experts.53.w2", "model.layers.47.block_sparse_moe.experts.54.w2", "model.layers.47.block_sparse_moe.experts.55.w2", "model.layers.47.block_sparse_moe.experts.56.w2", "model.layers.47.block_sparse_moe.experts.57.w2", "model.layers.47.block_sparse_moe.experts.58.w2", "model.layers.47.block_sparse_moe.experts.59.w2", "model.layers.47.block_sparse_moe.experts.60.w2", "model.layers.47.block_sparse_moe.experts.61.w2", "model.layers.47.block_sparse_moe.experts.62.w2", "model.layers.47.block_sparse_moe.experts.63.w2", "model.layers.47.block_sparse_moe.experts.64.w2", "model.layers.47.block_sparse_moe.experts.65.w2", "model.layers.47.block_sparse_moe.experts.66.w2", "model.layers.47.block_sparse_moe.experts.67.w2", "model.layers.47.block_sparse_moe.experts.68.w2", "model.layers.47.block_sparse_moe.experts.69.w2", "model.layers.47.block_sparse_moe.experts.70.w2", "model.layers.47.block_sparse_moe.experts.71.w2", "model.layers.47.block_sparse_moe.experts.72.w2", "model.layers.47.block_sparse_moe.experts.73.w2", "model.layers.47.block_sparse_moe.experts.74.w2", "model.layers.47.block_sparse_moe.experts.75.w2", "model.layers.47.block_sparse_moe.experts.76.w2", "model.layers.47.block_sparse_moe.experts.77.w2", "model.layers.47.block_sparse_moe.experts.78.w2", "model.layers.47.block_sparse_moe.experts.79.w2", "model.layers.47.block_sparse_moe.experts.80.w2", "model.layers.47.block_sparse_moe.experts.81.w2", "model.layers.47.block_sparse_moe.experts.82.w2", "model.layers.47.block_sparse_moe.experts.83.w2", "model.layers.47.block_sparse_moe.experts.84.w2", "model.layers.47.block_sparse_moe.experts.85.w2", "model.layers.47.block_sparse_moe.experts.86.w2", "model.layers.47.block_sparse_moe.experts.87.w2", "model.layers.47.block_sparse_moe.experts.88.w2", "model.layers.47.block_sparse_moe.experts.89.w2", "model.layers.47.block_sparse_moe.experts.90.w2", "model.layers.47.block_sparse_moe.experts.91.w2", "model.layers.47.block_sparse_moe.experts.92.w2", "model.layers.47.block_sparse_moe.experts.93.w2", "model.layers.47.block_sparse_moe.experts.94.w2", "model.layers.47.block_sparse_moe.experts.95.w2", "model.layers.47.block_sparse_moe.experts.96.w2", "model.layers.47.block_sparse_moe.experts.97.w2", "model.layers.47.block_sparse_moe.experts.98.w2", "model.layers.47.block_sparse_moe.experts.99.w2", "model.layers.47.block_sparse_moe.experts.100.w2", "model.layers.47.block_sparse_moe.experts.101.w2", "model.layers.47.block_sparse_moe.experts.102.w2", "model.layers.47.block_sparse_moe.experts.103.w2", "model.layers.47.block_sparse_moe.experts.104.w2", "model.layers.47.block_sparse_moe.experts.105.w2", "model.layers.47.block_sparse_moe.experts.106.w2", "model.layers.47.block_sparse_moe.experts.107.w2", "model.layers.47.block_sparse_moe.experts.108.w2", "model.layers.47.block_sparse_moe.experts.109.w2", "model.layers.47.block_sparse_moe.experts.110.w2", "model.layers.47.block_sparse_moe.experts.111.w2", "model.layers.47.block_sparse_moe.experts.112.w2", "model.layers.47.block_sparse_moe.experts.113.w2", "model.layers.47.block_sparse_moe.experts.114.w2", "model.layers.47.block_sparse_moe.experts.115.w2", "model.layers.47.block_sparse_moe.experts.116.w2", "model.layers.47.block_sparse_moe.experts.117.w2", "model.layers.47.block_sparse_moe.experts.118.w2", "model.layers.47.block_sparse_moe.experts.119.w2", "model.layers.47.block_sparse_moe.experts.120.w2", "model.layers.47.block_sparse_moe.experts.121.w2", "model.layers.47.block_sparse_moe.experts.122.w2", "model.layers.47.block_sparse_moe.experts.123.w2", "model.layers.47.block_sparse_moe.experts.124.w2", "model.layers.47.block_sparse_moe.experts.125.w2", "model.layers.47.block_sparse_moe.experts.126.w2", "model.layers.47.block_sparse_moe.experts.127.w2", "model.layers.47.block_sparse_moe.experts.128.w2", "model.layers.47.block_sparse_moe.experts.129.w2", "model.layers.47.block_sparse_moe.experts.130.w2", "model.layers.47.block_sparse_moe.experts.131.w2", "model.layers.47.block_sparse_moe.experts.132.w2", "model.layers.47.block_sparse_moe.experts.133.w2", "model.layers.47.block_sparse_moe.experts.134.w2", "model.layers.47.block_sparse_moe.experts.135.w2", "model.layers.47.block_sparse_moe.experts.136.w2", "model.layers.47.block_sparse_moe.experts.137.w2", "model.layers.47.block_sparse_moe.experts.138.w2", "model.layers.47.block_sparse_moe.experts.139.w2", "model.layers.47.block_sparse_moe.experts.140.w2", "model.layers.47.block_sparse_moe.experts.141.w2", "model.layers.47.block_sparse_moe.experts.142.w2", "model.layers.47.block_sparse_moe.experts.143.w2", "model.layers.47.block_sparse_moe.experts.144.w2", "model.layers.47.block_sparse_moe.experts.145.w2", "model.layers.47.block_sparse_moe.experts.146.w2", "model.layers.47.block_sparse_moe.experts.147.w2", "model.layers.47.block_sparse_moe.experts.148.w2", "model.layers.47.block_sparse_moe.experts.149.w2", "model.layers.47.block_sparse_moe.experts.150.w2", "model.layers.47.block_sparse_moe.experts.151.w2", "model.layers.47.block_sparse_moe.experts.152.w2", "model.layers.47.block_sparse_moe.experts.153.w2", "model.layers.47.block_sparse_moe.experts.154.w2", "model.layers.47.block_sparse_moe.experts.155.w2", "model.layers.47.block_sparse_moe.experts.156.w2", "model.layers.47.block_sparse_moe.experts.157.w2", "model.layers.47.block_sparse_moe.experts.158.w2", "model.layers.47.block_sparse_moe.experts.159.w2", "model.layers.47.block_sparse_moe.experts.160.w2", "model.layers.47.block_sparse_moe.experts.161.w2", "model.layers.47.block_sparse_moe.experts.162.w2", "model.layers.47.block_sparse_moe.experts.163.w2", "model.layers.47.block_sparse_moe.experts.164.w2", "model.layers.47.block_sparse_moe.experts.165.w2", "model.layers.47.block_sparse_moe.experts.166.w2", "model.layers.47.block_sparse_moe.experts.167.w2", "model.layers.47.block_sparse_moe.experts.168.w2", "model.layers.47.block_sparse_moe.experts.169.w2", "model.layers.47.block_sparse_moe.experts.170.w2", "model.layers.47.block_sparse_moe.experts.171.w2", "model.layers.47.block_sparse_moe.experts.172.w2", "model.layers.47.block_sparse_moe.experts.173.w2", "model.layers.47.block_sparse_moe.experts.174.w2", "model.layers.47.block_sparse_moe.experts.175.w2", "model.layers.47.block_sparse_moe.experts.176.w2", "model.layers.47.block_sparse_moe.experts.177.w2", "model.layers.47.block_sparse_moe.experts.178.w2", "model.layers.47.block_sparse_moe.experts.179.w2", "model.layers.47.block_sparse_moe.experts.180.w2", "model.layers.47.block_sparse_moe.experts.181.w2", "model.layers.47.block_sparse_moe.experts.182.w2", "model.layers.47.block_sparse_moe.experts.183.w2", "model.layers.47.block_sparse_moe.experts.184.w2", "model.layers.47.block_sparse_moe.experts.185.w2", "model.layers.47.block_sparse_moe.experts.186.w2", "model.layers.47.block_sparse_moe.experts.187.w2", "model.layers.47.block_sparse_moe.experts.188.w2", "model.layers.47.block_sparse_moe.experts.189.w2", "model.layers.47.block_sparse_moe.experts.190.w2", "model.layers.47.block_sparse_moe.experts.191.w2", "model.layers.47.block_sparse_moe.experts.192.w2", "model.layers.47.block_sparse_moe.experts.193.w2", "model.layers.47.block_sparse_moe.experts.194.w2", "model.layers.47.block_sparse_moe.experts.195.w2", "model.layers.47.block_sparse_moe.experts.196.w2", "model.layers.47.block_sparse_moe.experts.197.w2", "model.layers.47.block_sparse_moe.experts.198.w2", "model.layers.47.block_sparse_moe.experts.199.w2", "model.layers.47.block_sparse_moe.experts.200.w2", "model.layers.47.block_sparse_moe.experts.201.w2", "model.layers.47.block_sparse_moe.experts.202.w2", "model.layers.47.block_sparse_moe.experts.203.w2", "model.layers.47.block_sparse_moe.experts.204.w2", "model.layers.47.block_sparse_moe.experts.205.w2", "model.layers.47.block_sparse_moe.experts.206.w2", "model.layers.47.block_sparse_moe.experts.207.w2", "model.layers.47.block_sparse_moe.experts.208.w2", "model.layers.47.block_sparse_moe.experts.209.w2", "model.layers.47.block_sparse_moe.experts.210.w2", "model.layers.47.block_sparse_moe.experts.211.w2", "model.layers.47.block_sparse_moe.experts.212.w2", "model.layers.47.block_sparse_moe.experts.213.w2", "model.layers.47.block_sparse_moe.experts.214.w2", "model.layers.47.block_sparse_moe.experts.215.w2", "model.layers.47.block_sparse_moe.experts.216.w2", "model.layers.47.block_sparse_moe.experts.217.w2", "model.layers.47.block_sparse_moe.experts.218.w2", "model.layers.47.block_sparse_moe.experts.219.w2", "model.layers.47.block_sparse_moe.experts.220.w2", "model.layers.47.block_sparse_moe.experts.221.w2", "model.layers.47.block_sparse_moe.experts.222.w2", "model.layers.47.block_sparse_moe.experts.223.w2", "model.layers.47.block_sparse_moe.experts.224.w2", "model.layers.47.block_sparse_moe.experts.225.w2", "model.layers.47.block_sparse_moe.experts.226.w2", "model.layers.47.block_sparse_moe.experts.227.w2", "model.layers.47.block_sparse_moe.experts.228.w2", "model.layers.47.block_sparse_moe.experts.229.w2", "model.layers.47.block_sparse_moe.experts.230.w2", "model.layers.47.block_sparse_moe.experts.231.w2", "model.layers.47.block_sparse_moe.experts.232.w2", "model.layers.47.block_sparse_moe.experts.233.w2", "model.layers.47.block_sparse_moe.experts.234.w2", "model.layers.47.block_sparse_moe.experts.235.w2", "model.layers.47.block_sparse_moe.experts.236.w2", "model.layers.47.block_sparse_moe.experts.237.w2", "model.layers.47.block_sparse_moe.experts.238.w2", "model.layers.47.block_sparse_moe.experts.239.w2", "model.layers.47.block_sparse_moe.experts.240.w2", "model.layers.47.block_sparse_moe.experts.241.w2", "model.layers.47.block_sparse_moe.experts.242.w2", "model.layers.47.block_sparse_moe.experts.243.w2", "model.layers.47.block_sparse_moe.experts.244.w2", "model.layers.47.block_sparse_moe.experts.245.w2", "model.layers.47.block_sparse_moe.experts.246.w2", "model.layers.47.block_sparse_moe.experts.247.w2", "model.layers.47.block_sparse_moe.experts.248.w2", "model.layers.47.block_sparse_moe.experts.249.w2", "model.layers.47.block_sparse_moe.experts.250.w2", "model.layers.47.block_sparse_moe.experts.251.w2", "model.layers.47.block_sparse_moe.experts.252.w2", "model.layers.47.block_sparse_moe.experts.253.w2", "model.layers.47.block_sparse_moe.experts.254.w2", "model.layers.47.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006339758634567039, "dbits": 1207959552 } ] }, { "idx": 240, "layers": [ "model.layers.48.self_attn.q_proj" ], "candidates": [ { "dkld": 9.589195251469285e-05, "dbits": 18874368 } ] }, { "idx": 241, "layers": [ "model.layers.48.self_attn.k_proj", "model.layers.48.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009403353929519631, "dbits": 6291456 } ] }, { "idx": 242, "layers": [ "model.layers.48.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001550421118736267, "dbits": 18874368 } ] }, { "idx": 243, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w1", "model.layers.48.block_sparse_moe.experts.1.w1", "model.layers.48.block_sparse_moe.experts.2.w1", "model.layers.48.block_sparse_moe.experts.3.w1", "model.layers.48.block_sparse_moe.experts.4.w1", "model.layers.48.block_sparse_moe.experts.5.w1", "model.layers.48.block_sparse_moe.experts.6.w1", "model.layers.48.block_sparse_moe.experts.7.w1", "model.layers.48.block_sparse_moe.experts.8.w1", "model.layers.48.block_sparse_moe.experts.9.w1", "model.layers.48.block_sparse_moe.experts.10.w1", "model.layers.48.block_sparse_moe.experts.11.w1", "model.layers.48.block_sparse_moe.experts.12.w1", "model.layers.48.block_sparse_moe.experts.13.w1", "model.layers.48.block_sparse_moe.experts.14.w1", "model.layers.48.block_sparse_moe.experts.15.w1", "model.layers.48.block_sparse_moe.experts.16.w1", "model.layers.48.block_sparse_moe.experts.17.w1", "model.layers.48.block_sparse_moe.experts.18.w1", "model.layers.48.block_sparse_moe.experts.19.w1", "model.layers.48.block_sparse_moe.experts.20.w1", "model.layers.48.block_sparse_moe.experts.21.w1", "model.layers.48.block_sparse_moe.experts.22.w1", "model.layers.48.block_sparse_moe.experts.23.w1", "model.layers.48.block_sparse_moe.experts.24.w1", "model.layers.48.block_sparse_moe.experts.25.w1", "model.layers.48.block_sparse_moe.experts.26.w1", "model.layers.48.block_sparse_moe.experts.27.w1", "model.layers.48.block_sparse_moe.experts.28.w1", "model.layers.48.block_sparse_moe.experts.29.w1", "model.layers.48.block_sparse_moe.experts.30.w1", "model.layers.48.block_sparse_moe.experts.31.w1", "model.layers.48.block_sparse_moe.experts.32.w1", "model.layers.48.block_sparse_moe.experts.33.w1", "model.layers.48.block_sparse_moe.experts.34.w1", "model.layers.48.block_sparse_moe.experts.35.w1", "model.layers.48.block_sparse_moe.experts.36.w1", "model.layers.48.block_sparse_moe.experts.37.w1", "model.layers.48.block_sparse_moe.experts.38.w1", "model.layers.48.block_sparse_moe.experts.39.w1", "model.layers.48.block_sparse_moe.experts.40.w1", "model.layers.48.block_sparse_moe.experts.41.w1", "model.layers.48.block_sparse_moe.experts.42.w1", "model.layers.48.block_sparse_moe.experts.43.w1", "model.layers.48.block_sparse_moe.experts.44.w1", "model.layers.48.block_sparse_moe.experts.45.w1", "model.layers.48.block_sparse_moe.experts.46.w1", "model.layers.48.block_sparse_moe.experts.47.w1", "model.layers.48.block_sparse_moe.experts.48.w1", "model.layers.48.block_sparse_moe.experts.49.w1", "model.layers.48.block_sparse_moe.experts.50.w1", "model.layers.48.block_sparse_moe.experts.51.w1", "model.layers.48.block_sparse_moe.experts.52.w1", "model.layers.48.block_sparse_moe.experts.53.w1", "model.layers.48.block_sparse_moe.experts.54.w1", "model.layers.48.block_sparse_moe.experts.55.w1", "model.layers.48.block_sparse_moe.experts.56.w1", "model.layers.48.block_sparse_moe.experts.57.w1", "model.layers.48.block_sparse_moe.experts.58.w1", "model.layers.48.block_sparse_moe.experts.59.w1", "model.layers.48.block_sparse_moe.experts.60.w1", "model.layers.48.block_sparse_moe.experts.61.w1", "model.layers.48.block_sparse_moe.experts.62.w1", "model.layers.48.block_sparse_moe.experts.63.w1", "model.layers.48.block_sparse_moe.experts.64.w1", "model.layers.48.block_sparse_moe.experts.65.w1", "model.layers.48.block_sparse_moe.experts.66.w1", "model.layers.48.block_sparse_moe.experts.67.w1", "model.layers.48.block_sparse_moe.experts.68.w1", "model.layers.48.block_sparse_moe.experts.69.w1", "model.layers.48.block_sparse_moe.experts.70.w1", "model.layers.48.block_sparse_moe.experts.71.w1", "model.layers.48.block_sparse_moe.experts.72.w1", "model.layers.48.block_sparse_moe.experts.73.w1", "model.layers.48.block_sparse_moe.experts.74.w1", "model.layers.48.block_sparse_moe.experts.75.w1", "model.layers.48.block_sparse_moe.experts.76.w1", "model.layers.48.block_sparse_moe.experts.77.w1", "model.layers.48.block_sparse_moe.experts.78.w1", "model.layers.48.block_sparse_moe.experts.79.w1", "model.layers.48.block_sparse_moe.experts.80.w1", "model.layers.48.block_sparse_moe.experts.81.w1", "model.layers.48.block_sparse_moe.experts.82.w1", "model.layers.48.block_sparse_moe.experts.83.w1", "model.layers.48.block_sparse_moe.experts.84.w1", "model.layers.48.block_sparse_moe.experts.85.w1", "model.layers.48.block_sparse_moe.experts.86.w1", "model.layers.48.block_sparse_moe.experts.87.w1", "model.layers.48.block_sparse_moe.experts.88.w1", "model.layers.48.block_sparse_moe.experts.89.w1", "model.layers.48.block_sparse_moe.experts.90.w1", "model.layers.48.block_sparse_moe.experts.91.w1", "model.layers.48.block_sparse_moe.experts.92.w1", "model.layers.48.block_sparse_moe.experts.93.w1", "model.layers.48.block_sparse_moe.experts.94.w1", "model.layers.48.block_sparse_moe.experts.95.w1", "model.layers.48.block_sparse_moe.experts.96.w1", "model.layers.48.block_sparse_moe.experts.97.w1", "model.layers.48.block_sparse_moe.experts.98.w1", "model.layers.48.block_sparse_moe.experts.99.w1", "model.layers.48.block_sparse_moe.experts.100.w1", "model.layers.48.block_sparse_moe.experts.101.w1", "model.layers.48.block_sparse_moe.experts.102.w1", "model.layers.48.block_sparse_moe.experts.103.w1", "model.layers.48.block_sparse_moe.experts.104.w1", "model.layers.48.block_sparse_moe.experts.105.w1", "model.layers.48.block_sparse_moe.experts.106.w1", "model.layers.48.block_sparse_moe.experts.107.w1", "model.layers.48.block_sparse_moe.experts.108.w1", "model.layers.48.block_sparse_moe.experts.109.w1", "model.layers.48.block_sparse_moe.experts.110.w1", "model.layers.48.block_sparse_moe.experts.111.w1", "model.layers.48.block_sparse_moe.experts.112.w1", "model.layers.48.block_sparse_moe.experts.113.w1", "model.layers.48.block_sparse_moe.experts.114.w1", "model.layers.48.block_sparse_moe.experts.115.w1", "model.layers.48.block_sparse_moe.experts.116.w1", "model.layers.48.block_sparse_moe.experts.117.w1", "model.layers.48.block_sparse_moe.experts.118.w1", "model.layers.48.block_sparse_moe.experts.119.w1", "model.layers.48.block_sparse_moe.experts.120.w1", "model.layers.48.block_sparse_moe.experts.121.w1", "model.layers.48.block_sparse_moe.experts.122.w1", "model.layers.48.block_sparse_moe.experts.123.w1", "model.layers.48.block_sparse_moe.experts.124.w1", "model.layers.48.block_sparse_moe.experts.125.w1", "model.layers.48.block_sparse_moe.experts.126.w1", "model.layers.48.block_sparse_moe.experts.127.w1", "model.layers.48.block_sparse_moe.experts.128.w1", "model.layers.48.block_sparse_moe.experts.129.w1", "model.layers.48.block_sparse_moe.experts.130.w1", "model.layers.48.block_sparse_moe.experts.131.w1", "model.layers.48.block_sparse_moe.experts.132.w1", "model.layers.48.block_sparse_moe.experts.133.w1", "model.layers.48.block_sparse_moe.experts.134.w1", "model.layers.48.block_sparse_moe.experts.135.w1", "model.layers.48.block_sparse_moe.experts.136.w1", "model.layers.48.block_sparse_moe.experts.137.w1", "model.layers.48.block_sparse_moe.experts.138.w1", "model.layers.48.block_sparse_moe.experts.139.w1", "model.layers.48.block_sparse_moe.experts.140.w1", "model.layers.48.block_sparse_moe.experts.141.w1", "model.layers.48.block_sparse_moe.experts.142.w1", "model.layers.48.block_sparse_moe.experts.143.w1", "model.layers.48.block_sparse_moe.experts.144.w1", "model.layers.48.block_sparse_moe.experts.145.w1", "model.layers.48.block_sparse_moe.experts.146.w1", "model.layers.48.block_sparse_moe.experts.147.w1", "model.layers.48.block_sparse_moe.experts.148.w1", "model.layers.48.block_sparse_moe.experts.149.w1", "model.layers.48.block_sparse_moe.experts.150.w1", "model.layers.48.block_sparse_moe.experts.151.w1", "model.layers.48.block_sparse_moe.experts.152.w1", "model.layers.48.block_sparse_moe.experts.153.w1", "model.layers.48.block_sparse_moe.experts.154.w1", "model.layers.48.block_sparse_moe.experts.155.w1", "model.layers.48.block_sparse_moe.experts.156.w1", "model.layers.48.block_sparse_moe.experts.157.w1", "model.layers.48.block_sparse_moe.experts.158.w1", "model.layers.48.block_sparse_moe.experts.159.w1", "model.layers.48.block_sparse_moe.experts.160.w1", "model.layers.48.block_sparse_moe.experts.161.w1", "model.layers.48.block_sparse_moe.experts.162.w1", "model.layers.48.block_sparse_moe.experts.163.w1", "model.layers.48.block_sparse_moe.experts.164.w1", "model.layers.48.block_sparse_moe.experts.165.w1", "model.layers.48.block_sparse_moe.experts.166.w1", "model.layers.48.block_sparse_moe.experts.167.w1", "model.layers.48.block_sparse_moe.experts.168.w1", "model.layers.48.block_sparse_moe.experts.169.w1", "model.layers.48.block_sparse_moe.experts.170.w1", "model.layers.48.block_sparse_moe.experts.171.w1", "model.layers.48.block_sparse_moe.experts.172.w1", "model.layers.48.block_sparse_moe.experts.173.w1", "model.layers.48.block_sparse_moe.experts.174.w1", "model.layers.48.block_sparse_moe.experts.175.w1", "model.layers.48.block_sparse_moe.experts.176.w1", "model.layers.48.block_sparse_moe.experts.177.w1", "model.layers.48.block_sparse_moe.experts.178.w1", "model.layers.48.block_sparse_moe.experts.179.w1", "model.layers.48.block_sparse_moe.experts.180.w1", "model.layers.48.block_sparse_moe.experts.181.w1", "model.layers.48.block_sparse_moe.experts.182.w1", "model.layers.48.block_sparse_moe.experts.183.w1", "model.layers.48.block_sparse_moe.experts.184.w1", "model.layers.48.block_sparse_moe.experts.185.w1", "model.layers.48.block_sparse_moe.experts.186.w1", "model.layers.48.block_sparse_moe.experts.187.w1", "model.layers.48.block_sparse_moe.experts.188.w1", "model.layers.48.block_sparse_moe.experts.189.w1", "model.layers.48.block_sparse_moe.experts.190.w1", "model.layers.48.block_sparse_moe.experts.191.w1", "model.layers.48.block_sparse_moe.experts.192.w1", "model.layers.48.block_sparse_moe.experts.193.w1", "model.layers.48.block_sparse_moe.experts.194.w1", "model.layers.48.block_sparse_moe.experts.195.w1", "model.layers.48.block_sparse_moe.experts.196.w1", "model.layers.48.block_sparse_moe.experts.197.w1", "model.layers.48.block_sparse_moe.experts.198.w1", "model.layers.48.block_sparse_moe.experts.199.w1", "model.layers.48.block_sparse_moe.experts.200.w1", "model.layers.48.block_sparse_moe.experts.201.w1", "model.layers.48.block_sparse_moe.experts.202.w1", "model.layers.48.block_sparse_moe.experts.203.w1", "model.layers.48.block_sparse_moe.experts.204.w1", "model.layers.48.block_sparse_moe.experts.205.w1", "model.layers.48.block_sparse_moe.experts.206.w1", "model.layers.48.block_sparse_moe.experts.207.w1", "model.layers.48.block_sparse_moe.experts.208.w1", "model.layers.48.block_sparse_moe.experts.209.w1", "model.layers.48.block_sparse_moe.experts.210.w1", "model.layers.48.block_sparse_moe.experts.211.w1", "model.layers.48.block_sparse_moe.experts.212.w1", "model.layers.48.block_sparse_moe.experts.213.w1", "model.layers.48.block_sparse_moe.experts.214.w1", "model.layers.48.block_sparse_moe.experts.215.w1", "model.layers.48.block_sparse_moe.experts.216.w1", "model.layers.48.block_sparse_moe.experts.217.w1", "model.layers.48.block_sparse_moe.experts.218.w1", "model.layers.48.block_sparse_moe.experts.219.w1", "model.layers.48.block_sparse_moe.experts.220.w1", "model.layers.48.block_sparse_moe.experts.221.w1", "model.layers.48.block_sparse_moe.experts.222.w1", "model.layers.48.block_sparse_moe.experts.223.w1", "model.layers.48.block_sparse_moe.experts.224.w1", "model.layers.48.block_sparse_moe.experts.225.w1", "model.layers.48.block_sparse_moe.experts.226.w1", "model.layers.48.block_sparse_moe.experts.227.w1", "model.layers.48.block_sparse_moe.experts.228.w1", "model.layers.48.block_sparse_moe.experts.229.w1", "model.layers.48.block_sparse_moe.experts.230.w1", "model.layers.48.block_sparse_moe.experts.231.w1", "model.layers.48.block_sparse_moe.experts.232.w1", "model.layers.48.block_sparse_moe.experts.233.w1", "model.layers.48.block_sparse_moe.experts.234.w1", "model.layers.48.block_sparse_moe.experts.235.w1", "model.layers.48.block_sparse_moe.experts.236.w1", "model.layers.48.block_sparse_moe.experts.237.w1", "model.layers.48.block_sparse_moe.experts.238.w1", "model.layers.48.block_sparse_moe.experts.239.w1", "model.layers.48.block_sparse_moe.experts.240.w1", "model.layers.48.block_sparse_moe.experts.241.w1", "model.layers.48.block_sparse_moe.experts.242.w1", "model.layers.48.block_sparse_moe.experts.243.w1", "model.layers.48.block_sparse_moe.experts.244.w1", "model.layers.48.block_sparse_moe.experts.245.w1", "model.layers.48.block_sparse_moe.experts.246.w1", "model.layers.48.block_sparse_moe.experts.247.w1", "model.layers.48.block_sparse_moe.experts.248.w1", "model.layers.48.block_sparse_moe.experts.249.w1", "model.layers.48.block_sparse_moe.experts.250.w1", "model.layers.48.block_sparse_moe.experts.251.w1", "model.layers.48.block_sparse_moe.experts.252.w1", "model.layers.48.block_sparse_moe.experts.253.w1", "model.layers.48.block_sparse_moe.experts.254.w1", "model.layers.48.block_sparse_moe.experts.255.w1", "model.layers.48.block_sparse_moe.experts.0.w3", "model.layers.48.block_sparse_moe.experts.1.w3", "model.layers.48.block_sparse_moe.experts.2.w3", "model.layers.48.block_sparse_moe.experts.3.w3", "model.layers.48.block_sparse_moe.experts.4.w3", "model.layers.48.block_sparse_moe.experts.5.w3", "model.layers.48.block_sparse_moe.experts.6.w3", "model.layers.48.block_sparse_moe.experts.7.w3", "model.layers.48.block_sparse_moe.experts.8.w3", "model.layers.48.block_sparse_moe.experts.9.w3", "model.layers.48.block_sparse_moe.experts.10.w3", "model.layers.48.block_sparse_moe.experts.11.w3", "model.layers.48.block_sparse_moe.experts.12.w3", "model.layers.48.block_sparse_moe.experts.13.w3", "model.layers.48.block_sparse_moe.experts.14.w3", "model.layers.48.block_sparse_moe.experts.15.w3", "model.layers.48.block_sparse_moe.experts.16.w3", "model.layers.48.block_sparse_moe.experts.17.w3", "model.layers.48.block_sparse_moe.experts.18.w3", "model.layers.48.block_sparse_moe.experts.19.w3", "model.layers.48.block_sparse_moe.experts.20.w3", "model.layers.48.block_sparse_moe.experts.21.w3", "model.layers.48.block_sparse_moe.experts.22.w3", "model.layers.48.block_sparse_moe.experts.23.w3", "model.layers.48.block_sparse_moe.experts.24.w3", "model.layers.48.block_sparse_moe.experts.25.w3", "model.layers.48.block_sparse_moe.experts.26.w3", "model.layers.48.block_sparse_moe.experts.27.w3", "model.layers.48.block_sparse_moe.experts.28.w3", "model.layers.48.block_sparse_moe.experts.29.w3", "model.layers.48.block_sparse_moe.experts.30.w3", "model.layers.48.block_sparse_moe.experts.31.w3", "model.layers.48.block_sparse_moe.experts.32.w3", "model.layers.48.block_sparse_moe.experts.33.w3", "model.layers.48.block_sparse_moe.experts.34.w3", "model.layers.48.block_sparse_moe.experts.35.w3", "model.layers.48.block_sparse_moe.experts.36.w3", "model.layers.48.block_sparse_moe.experts.37.w3", "model.layers.48.block_sparse_moe.experts.38.w3", "model.layers.48.block_sparse_moe.experts.39.w3", "model.layers.48.block_sparse_moe.experts.40.w3", "model.layers.48.block_sparse_moe.experts.41.w3", "model.layers.48.block_sparse_moe.experts.42.w3", "model.layers.48.block_sparse_moe.experts.43.w3", "model.layers.48.block_sparse_moe.experts.44.w3", "model.layers.48.block_sparse_moe.experts.45.w3", "model.layers.48.block_sparse_moe.experts.46.w3", "model.layers.48.block_sparse_moe.experts.47.w3", "model.layers.48.block_sparse_moe.experts.48.w3", "model.layers.48.block_sparse_moe.experts.49.w3", "model.layers.48.block_sparse_moe.experts.50.w3", "model.layers.48.block_sparse_moe.experts.51.w3", "model.layers.48.block_sparse_moe.experts.52.w3", "model.layers.48.block_sparse_moe.experts.53.w3", "model.layers.48.block_sparse_moe.experts.54.w3", "model.layers.48.block_sparse_moe.experts.55.w3", "model.layers.48.block_sparse_moe.experts.56.w3", "model.layers.48.block_sparse_moe.experts.57.w3", "model.layers.48.block_sparse_moe.experts.58.w3", "model.layers.48.block_sparse_moe.experts.59.w3", "model.layers.48.block_sparse_moe.experts.60.w3", "model.layers.48.block_sparse_moe.experts.61.w3", "model.layers.48.block_sparse_moe.experts.62.w3", "model.layers.48.block_sparse_moe.experts.63.w3", "model.layers.48.block_sparse_moe.experts.64.w3", "model.layers.48.block_sparse_moe.experts.65.w3", "model.layers.48.block_sparse_moe.experts.66.w3", "model.layers.48.block_sparse_moe.experts.67.w3", "model.layers.48.block_sparse_moe.experts.68.w3", "model.layers.48.block_sparse_moe.experts.69.w3", "model.layers.48.block_sparse_moe.experts.70.w3", "model.layers.48.block_sparse_moe.experts.71.w3", "model.layers.48.block_sparse_moe.experts.72.w3", "model.layers.48.block_sparse_moe.experts.73.w3", "model.layers.48.block_sparse_moe.experts.74.w3", "model.layers.48.block_sparse_moe.experts.75.w3", "model.layers.48.block_sparse_moe.experts.76.w3", "model.layers.48.block_sparse_moe.experts.77.w3", "model.layers.48.block_sparse_moe.experts.78.w3", "model.layers.48.block_sparse_moe.experts.79.w3", "model.layers.48.block_sparse_moe.experts.80.w3", "model.layers.48.block_sparse_moe.experts.81.w3", "model.layers.48.block_sparse_moe.experts.82.w3", "model.layers.48.block_sparse_moe.experts.83.w3", "model.layers.48.block_sparse_moe.experts.84.w3", "model.layers.48.block_sparse_moe.experts.85.w3", "model.layers.48.block_sparse_moe.experts.86.w3", "model.layers.48.block_sparse_moe.experts.87.w3", "model.layers.48.block_sparse_moe.experts.88.w3", "model.layers.48.block_sparse_moe.experts.89.w3", "model.layers.48.block_sparse_moe.experts.90.w3", "model.layers.48.block_sparse_moe.experts.91.w3", "model.layers.48.block_sparse_moe.experts.92.w3", "model.layers.48.block_sparse_moe.experts.93.w3", "model.layers.48.block_sparse_moe.experts.94.w3", "model.layers.48.block_sparse_moe.experts.95.w3", "model.layers.48.block_sparse_moe.experts.96.w3", "model.layers.48.block_sparse_moe.experts.97.w3", "model.layers.48.block_sparse_moe.experts.98.w3", "model.layers.48.block_sparse_moe.experts.99.w3", "model.layers.48.block_sparse_moe.experts.100.w3", "model.layers.48.block_sparse_moe.experts.101.w3", "model.layers.48.block_sparse_moe.experts.102.w3", "model.layers.48.block_sparse_moe.experts.103.w3", "model.layers.48.block_sparse_moe.experts.104.w3", "model.layers.48.block_sparse_moe.experts.105.w3", "model.layers.48.block_sparse_moe.experts.106.w3", "model.layers.48.block_sparse_moe.experts.107.w3", "model.layers.48.block_sparse_moe.experts.108.w3", "model.layers.48.block_sparse_moe.experts.109.w3", "model.layers.48.block_sparse_moe.experts.110.w3", "model.layers.48.block_sparse_moe.experts.111.w3", "model.layers.48.block_sparse_moe.experts.112.w3", "model.layers.48.block_sparse_moe.experts.113.w3", "model.layers.48.block_sparse_moe.experts.114.w3", "model.layers.48.block_sparse_moe.experts.115.w3", "model.layers.48.block_sparse_moe.experts.116.w3", "model.layers.48.block_sparse_moe.experts.117.w3", "model.layers.48.block_sparse_moe.experts.118.w3", "model.layers.48.block_sparse_moe.experts.119.w3", "model.layers.48.block_sparse_moe.experts.120.w3", "model.layers.48.block_sparse_moe.experts.121.w3", "model.layers.48.block_sparse_moe.experts.122.w3", "model.layers.48.block_sparse_moe.experts.123.w3", "model.layers.48.block_sparse_moe.experts.124.w3", "model.layers.48.block_sparse_moe.experts.125.w3", "model.layers.48.block_sparse_moe.experts.126.w3", "model.layers.48.block_sparse_moe.experts.127.w3", "model.layers.48.block_sparse_moe.experts.128.w3", "model.layers.48.block_sparse_moe.experts.129.w3", "model.layers.48.block_sparse_moe.experts.130.w3", "model.layers.48.block_sparse_moe.experts.131.w3", "model.layers.48.block_sparse_moe.experts.132.w3", "model.layers.48.block_sparse_moe.experts.133.w3", "model.layers.48.block_sparse_moe.experts.134.w3", "model.layers.48.block_sparse_moe.experts.135.w3", "model.layers.48.block_sparse_moe.experts.136.w3", "model.layers.48.block_sparse_moe.experts.137.w3", "model.layers.48.block_sparse_moe.experts.138.w3", "model.layers.48.block_sparse_moe.experts.139.w3", "model.layers.48.block_sparse_moe.experts.140.w3", "model.layers.48.block_sparse_moe.experts.141.w3", "model.layers.48.block_sparse_moe.experts.142.w3", "model.layers.48.block_sparse_moe.experts.143.w3", "model.layers.48.block_sparse_moe.experts.144.w3", "model.layers.48.block_sparse_moe.experts.145.w3", "model.layers.48.block_sparse_moe.experts.146.w3", "model.layers.48.block_sparse_moe.experts.147.w3", "model.layers.48.block_sparse_moe.experts.148.w3", "model.layers.48.block_sparse_moe.experts.149.w3", "model.layers.48.block_sparse_moe.experts.150.w3", "model.layers.48.block_sparse_moe.experts.151.w3", "model.layers.48.block_sparse_moe.experts.152.w3", "model.layers.48.block_sparse_moe.experts.153.w3", "model.layers.48.block_sparse_moe.experts.154.w3", "model.layers.48.block_sparse_moe.experts.155.w3", "model.layers.48.block_sparse_moe.experts.156.w3", "model.layers.48.block_sparse_moe.experts.157.w3", "model.layers.48.block_sparse_moe.experts.158.w3", "model.layers.48.block_sparse_moe.experts.159.w3", "model.layers.48.block_sparse_moe.experts.160.w3", "model.layers.48.block_sparse_moe.experts.161.w3", "model.layers.48.block_sparse_moe.experts.162.w3", "model.layers.48.block_sparse_moe.experts.163.w3", "model.layers.48.block_sparse_moe.experts.164.w3", "model.layers.48.block_sparse_moe.experts.165.w3", "model.layers.48.block_sparse_moe.experts.166.w3", "model.layers.48.block_sparse_moe.experts.167.w3", "model.layers.48.block_sparse_moe.experts.168.w3", "model.layers.48.block_sparse_moe.experts.169.w3", "model.layers.48.block_sparse_moe.experts.170.w3", "model.layers.48.block_sparse_moe.experts.171.w3", "model.layers.48.block_sparse_moe.experts.172.w3", "model.layers.48.block_sparse_moe.experts.173.w3", "model.layers.48.block_sparse_moe.experts.174.w3", "model.layers.48.block_sparse_moe.experts.175.w3", "model.layers.48.block_sparse_moe.experts.176.w3", "model.layers.48.block_sparse_moe.experts.177.w3", "model.layers.48.block_sparse_moe.experts.178.w3", "model.layers.48.block_sparse_moe.experts.179.w3", "model.layers.48.block_sparse_moe.experts.180.w3", "model.layers.48.block_sparse_moe.experts.181.w3", "model.layers.48.block_sparse_moe.experts.182.w3", "model.layers.48.block_sparse_moe.experts.183.w3", "model.layers.48.block_sparse_moe.experts.184.w3", "model.layers.48.block_sparse_moe.experts.185.w3", "model.layers.48.block_sparse_moe.experts.186.w3", "model.layers.48.block_sparse_moe.experts.187.w3", "model.layers.48.block_sparse_moe.experts.188.w3", "model.layers.48.block_sparse_moe.experts.189.w3", "model.layers.48.block_sparse_moe.experts.190.w3", "model.layers.48.block_sparse_moe.experts.191.w3", "model.layers.48.block_sparse_moe.experts.192.w3", "model.layers.48.block_sparse_moe.experts.193.w3", "model.layers.48.block_sparse_moe.experts.194.w3", "model.layers.48.block_sparse_moe.experts.195.w3", "model.layers.48.block_sparse_moe.experts.196.w3", "model.layers.48.block_sparse_moe.experts.197.w3", "model.layers.48.block_sparse_moe.experts.198.w3", "model.layers.48.block_sparse_moe.experts.199.w3", "model.layers.48.block_sparse_moe.experts.200.w3", "model.layers.48.block_sparse_moe.experts.201.w3", "model.layers.48.block_sparse_moe.experts.202.w3", "model.layers.48.block_sparse_moe.experts.203.w3", "model.layers.48.block_sparse_moe.experts.204.w3", "model.layers.48.block_sparse_moe.experts.205.w3", "model.layers.48.block_sparse_moe.experts.206.w3", "model.layers.48.block_sparse_moe.experts.207.w3", "model.layers.48.block_sparse_moe.experts.208.w3", "model.layers.48.block_sparse_moe.experts.209.w3", "model.layers.48.block_sparse_moe.experts.210.w3", "model.layers.48.block_sparse_moe.experts.211.w3", "model.layers.48.block_sparse_moe.experts.212.w3", "model.layers.48.block_sparse_moe.experts.213.w3", "model.layers.48.block_sparse_moe.experts.214.w3", "model.layers.48.block_sparse_moe.experts.215.w3", "model.layers.48.block_sparse_moe.experts.216.w3", "model.layers.48.block_sparse_moe.experts.217.w3", "model.layers.48.block_sparse_moe.experts.218.w3", "model.layers.48.block_sparse_moe.experts.219.w3", "model.layers.48.block_sparse_moe.experts.220.w3", "model.layers.48.block_sparse_moe.experts.221.w3", "model.layers.48.block_sparse_moe.experts.222.w3", "model.layers.48.block_sparse_moe.experts.223.w3", "model.layers.48.block_sparse_moe.experts.224.w3", "model.layers.48.block_sparse_moe.experts.225.w3", "model.layers.48.block_sparse_moe.experts.226.w3", "model.layers.48.block_sparse_moe.experts.227.w3", "model.layers.48.block_sparse_moe.experts.228.w3", "model.layers.48.block_sparse_moe.experts.229.w3", "model.layers.48.block_sparse_moe.experts.230.w3", "model.layers.48.block_sparse_moe.experts.231.w3", "model.layers.48.block_sparse_moe.experts.232.w3", "model.layers.48.block_sparse_moe.experts.233.w3", "model.layers.48.block_sparse_moe.experts.234.w3", "model.layers.48.block_sparse_moe.experts.235.w3", "model.layers.48.block_sparse_moe.experts.236.w3", "model.layers.48.block_sparse_moe.experts.237.w3", "model.layers.48.block_sparse_moe.experts.238.w3", "model.layers.48.block_sparse_moe.experts.239.w3", "model.layers.48.block_sparse_moe.experts.240.w3", "model.layers.48.block_sparse_moe.experts.241.w3", "model.layers.48.block_sparse_moe.experts.242.w3", "model.layers.48.block_sparse_moe.experts.243.w3", "model.layers.48.block_sparse_moe.experts.244.w3", "model.layers.48.block_sparse_moe.experts.245.w3", "model.layers.48.block_sparse_moe.experts.246.w3", "model.layers.48.block_sparse_moe.experts.247.w3", "model.layers.48.block_sparse_moe.experts.248.w3", "model.layers.48.block_sparse_moe.experts.249.w3", "model.layers.48.block_sparse_moe.experts.250.w3", "model.layers.48.block_sparse_moe.experts.251.w3", "model.layers.48.block_sparse_moe.experts.252.w3", "model.layers.48.block_sparse_moe.experts.253.w3", "model.layers.48.block_sparse_moe.experts.254.w3", "model.layers.48.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005129486322402732, "dbits": 2415919104 } ] }, { "idx": 244, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w2", "model.layers.48.block_sparse_moe.experts.1.w2", "model.layers.48.block_sparse_moe.experts.2.w2", "model.layers.48.block_sparse_moe.experts.3.w2", "model.layers.48.block_sparse_moe.experts.4.w2", "model.layers.48.block_sparse_moe.experts.5.w2", "model.layers.48.block_sparse_moe.experts.6.w2", "model.layers.48.block_sparse_moe.experts.7.w2", "model.layers.48.block_sparse_moe.experts.8.w2", "model.layers.48.block_sparse_moe.experts.9.w2", "model.layers.48.block_sparse_moe.experts.10.w2", "model.layers.48.block_sparse_moe.experts.11.w2", "model.layers.48.block_sparse_moe.experts.12.w2", "model.layers.48.block_sparse_moe.experts.13.w2", "model.layers.48.block_sparse_moe.experts.14.w2", "model.layers.48.block_sparse_moe.experts.15.w2", "model.layers.48.block_sparse_moe.experts.16.w2", "model.layers.48.block_sparse_moe.experts.17.w2", "model.layers.48.block_sparse_moe.experts.18.w2", "model.layers.48.block_sparse_moe.experts.19.w2", "model.layers.48.block_sparse_moe.experts.20.w2", "model.layers.48.block_sparse_moe.experts.21.w2", "model.layers.48.block_sparse_moe.experts.22.w2", "model.layers.48.block_sparse_moe.experts.23.w2", "model.layers.48.block_sparse_moe.experts.24.w2", "model.layers.48.block_sparse_moe.experts.25.w2", "model.layers.48.block_sparse_moe.experts.26.w2", "model.layers.48.block_sparse_moe.experts.27.w2", "model.layers.48.block_sparse_moe.experts.28.w2", "model.layers.48.block_sparse_moe.experts.29.w2", "model.layers.48.block_sparse_moe.experts.30.w2", "model.layers.48.block_sparse_moe.experts.31.w2", "model.layers.48.block_sparse_moe.experts.32.w2", "model.layers.48.block_sparse_moe.experts.33.w2", "model.layers.48.block_sparse_moe.experts.34.w2", "model.layers.48.block_sparse_moe.experts.35.w2", "model.layers.48.block_sparse_moe.experts.36.w2", "model.layers.48.block_sparse_moe.experts.37.w2", "model.layers.48.block_sparse_moe.experts.38.w2", "model.layers.48.block_sparse_moe.experts.39.w2", "model.layers.48.block_sparse_moe.experts.40.w2", "model.layers.48.block_sparse_moe.experts.41.w2", "model.layers.48.block_sparse_moe.experts.42.w2", "model.layers.48.block_sparse_moe.experts.43.w2", "model.layers.48.block_sparse_moe.experts.44.w2", "model.layers.48.block_sparse_moe.experts.45.w2", "model.layers.48.block_sparse_moe.experts.46.w2", "model.layers.48.block_sparse_moe.experts.47.w2", "model.layers.48.block_sparse_moe.experts.48.w2", "model.layers.48.block_sparse_moe.experts.49.w2", "model.layers.48.block_sparse_moe.experts.50.w2", "model.layers.48.block_sparse_moe.experts.51.w2", "model.layers.48.block_sparse_moe.experts.52.w2", "model.layers.48.block_sparse_moe.experts.53.w2", "model.layers.48.block_sparse_moe.experts.54.w2", "model.layers.48.block_sparse_moe.experts.55.w2", "model.layers.48.block_sparse_moe.experts.56.w2", "model.layers.48.block_sparse_moe.experts.57.w2", "model.layers.48.block_sparse_moe.experts.58.w2", "model.layers.48.block_sparse_moe.experts.59.w2", "model.layers.48.block_sparse_moe.experts.60.w2", "model.layers.48.block_sparse_moe.experts.61.w2", "model.layers.48.block_sparse_moe.experts.62.w2", "model.layers.48.block_sparse_moe.experts.63.w2", "model.layers.48.block_sparse_moe.experts.64.w2", "model.layers.48.block_sparse_moe.experts.65.w2", "model.layers.48.block_sparse_moe.experts.66.w2", "model.layers.48.block_sparse_moe.experts.67.w2", "model.layers.48.block_sparse_moe.experts.68.w2", "model.layers.48.block_sparse_moe.experts.69.w2", "model.layers.48.block_sparse_moe.experts.70.w2", "model.layers.48.block_sparse_moe.experts.71.w2", "model.layers.48.block_sparse_moe.experts.72.w2", "model.layers.48.block_sparse_moe.experts.73.w2", "model.layers.48.block_sparse_moe.experts.74.w2", "model.layers.48.block_sparse_moe.experts.75.w2", "model.layers.48.block_sparse_moe.experts.76.w2", "model.layers.48.block_sparse_moe.experts.77.w2", "model.layers.48.block_sparse_moe.experts.78.w2", "model.layers.48.block_sparse_moe.experts.79.w2", "model.layers.48.block_sparse_moe.experts.80.w2", "model.layers.48.block_sparse_moe.experts.81.w2", "model.layers.48.block_sparse_moe.experts.82.w2", "model.layers.48.block_sparse_moe.experts.83.w2", "model.layers.48.block_sparse_moe.experts.84.w2", "model.layers.48.block_sparse_moe.experts.85.w2", "model.layers.48.block_sparse_moe.experts.86.w2", "model.layers.48.block_sparse_moe.experts.87.w2", "model.layers.48.block_sparse_moe.experts.88.w2", "model.layers.48.block_sparse_moe.experts.89.w2", "model.layers.48.block_sparse_moe.experts.90.w2", "model.layers.48.block_sparse_moe.experts.91.w2", "model.layers.48.block_sparse_moe.experts.92.w2", "model.layers.48.block_sparse_moe.experts.93.w2", "model.layers.48.block_sparse_moe.experts.94.w2", "model.layers.48.block_sparse_moe.experts.95.w2", "model.layers.48.block_sparse_moe.experts.96.w2", "model.layers.48.block_sparse_moe.experts.97.w2", "model.layers.48.block_sparse_moe.experts.98.w2", "model.layers.48.block_sparse_moe.experts.99.w2", "model.layers.48.block_sparse_moe.experts.100.w2", "model.layers.48.block_sparse_moe.experts.101.w2", "model.layers.48.block_sparse_moe.experts.102.w2", "model.layers.48.block_sparse_moe.experts.103.w2", "model.layers.48.block_sparse_moe.experts.104.w2", "model.layers.48.block_sparse_moe.experts.105.w2", "model.layers.48.block_sparse_moe.experts.106.w2", "model.layers.48.block_sparse_moe.experts.107.w2", "model.layers.48.block_sparse_moe.experts.108.w2", "model.layers.48.block_sparse_moe.experts.109.w2", "model.layers.48.block_sparse_moe.experts.110.w2", "model.layers.48.block_sparse_moe.experts.111.w2", "model.layers.48.block_sparse_moe.experts.112.w2", "model.layers.48.block_sparse_moe.experts.113.w2", "model.layers.48.block_sparse_moe.experts.114.w2", "model.layers.48.block_sparse_moe.experts.115.w2", "model.layers.48.block_sparse_moe.experts.116.w2", "model.layers.48.block_sparse_moe.experts.117.w2", "model.layers.48.block_sparse_moe.experts.118.w2", "model.layers.48.block_sparse_moe.experts.119.w2", "model.layers.48.block_sparse_moe.experts.120.w2", "model.layers.48.block_sparse_moe.experts.121.w2", "model.layers.48.block_sparse_moe.experts.122.w2", "model.layers.48.block_sparse_moe.experts.123.w2", "model.layers.48.block_sparse_moe.experts.124.w2", "model.layers.48.block_sparse_moe.experts.125.w2", "model.layers.48.block_sparse_moe.experts.126.w2", "model.layers.48.block_sparse_moe.experts.127.w2", "model.layers.48.block_sparse_moe.experts.128.w2", "model.layers.48.block_sparse_moe.experts.129.w2", "model.layers.48.block_sparse_moe.experts.130.w2", "model.layers.48.block_sparse_moe.experts.131.w2", "model.layers.48.block_sparse_moe.experts.132.w2", "model.layers.48.block_sparse_moe.experts.133.w2", "model.layers.48.block_sparse_moe.experts.134.w2", "model.layers.48.block_sparse_moe.experts.135.w2", "model.layers.48.block_sparse_moe.experts.136.w2", "model.layers.48.block_sparse_moe.experts.137.w2", "model.layers.48.block_sparse_moe.experts.138.w2", "model.layers.48.block_sparse_moe.experts.139.w2", "model.layers.48.block_sparse_moe.experts.140.w2", "model.layers.48.block_sparse_moe.experts.141.w2", "model.layers.48.block_sparse_moe.experts.142.w2", "model.layers.48.block_sparse_moe.experts.143.w2", "model.layers.48.block_sparse_moe.experts.144.w2", "model.layers.48.block_sparse_moe.experts.145.w2", "model.layers.48.block_sparse_moe.experts.146.w2", "model.layers.48.block_sparse_moe.experts.147.w2", "model.layers.48.block_sparse_moe.experts.148.w2", "model.layers.48.block_sparse_moe.experts.149.w2", "model.layers.48.block_sparse_moe.experts.150.w2", "model.layers.48.block_sparse_moe.experts.151.w2", "model.layers.48.block_sparse_moe.experts.152.w2", "model.layers.48.block_sparse_moe.experts.153.w2", "model.layers.48.block_sparse_moe.experts.154.w2", "model.layers.48.block_sparse_moe.experts.155.w2", "model.layers.48.block_sparse_moe.experts.156.w2", "model.layers.48.block_sparse_moe.experts.157.w2", "model.layers.48.block_sparse_moe.experts.158.w2", "model.layers.48.block_sparse_moe.experts.159.w2", "model.layers.48.block_sparse_moe.experts.160.w2", "model.layers.48.block_sparse_moe.experts.161.w2", "model.layers.48.block_sparse_moe.experts.162.w2", "model.layers.48.block_sparse_moe.experts.163.w2", "model.layers.48.block_sparse_moe.experts.164.w2", "model.layers.48.block_sparse_moe.experts.165.w2", "model.layers.48.block_sparse_moe.experts.166.w2", "model.layers.48.block_sparse_moe.experts.167.w2", "model.layers.48.block_sparse_moe.experts.168.w2", "model.layers.48.block_sparse_moe.experts.169.w2", "model.layers.48.block_sparse_moe.experts.170.w2", "model.layers.48.block_sparse_moe.experts.171.w2", "model.layers.48.block_sparse_moe.experts.172.w2", "model.layers.48.block_sparse_moe.experts.173.w2", "model.layers.48.block_sparse_moe.experts.174.w2", "model.layers.48.block_sparse_moe.experts.175.w2", "model.layers.48.block_sparse_moe.experts.176.w2", "model.layers.48.block_sparse_moe.experts.177.w2", "model.layers.48.block_sparse_moe.experts.178.w2", "model.layers.48.block_sparse_moe.experts.179.w2", "model.layers.48.block_sparse_moe.experts.180.w2", "model.layers.48.block_sparse_moe.experts.181.w2", "model.layers.48.block_sparse_moe.experts.182.w2", "model.layers.48.block_sparse_moe.experts.183.w2", "model.layers.48.block_sparse_moe.experts.184.w2", "model.layers.48.block_sparse_moe.experts.185.w2", "model.layers.48.block_sparse_moe.experts.186.w2", "model.layers.48.block_sparse_moe.experts.187.w2", "model.layers.48.block_sparse_moe.experts.188.w2", "model.layers.48.block_sparse_moe.experts.189.w2", "model.layers.48.block_sparse_moe.experts.190.w2", "model.layers.48.block_sparse_moe.experts.191.w2", "model.layers.48.block_sparse_moe.experts.192.w2", "model.layers.48.block_sparse_moe.experts.193.w2", "model.layers.48.block_sparse_moe.experts.194.w2", "model.layers.48.block_sparse_moe.experts.195.w2", "model.layers.48.block_sparse_moe.experts.196.w2", "model.layers.48.block_sparse_moe.experts.197.w2", "model.layers.48.block_sparse_moe.experts.198.w2", "model.layers.48.block_sparse_moe.experts.199.w2", "model.layers.48.block_sparse_moe.experts.200.w2", "model.layers.48.block_sparse_moe.experts.201.w2", "model.layers.48.block_sparse_moe.experts.202.w2", "model.layers.48.block_sparse_moe.experts.203.w2", "model.layers.48.block_sparse_moe.experts.204.w2", "model.layers.48.block_sparse_moe.experts.205.w2", "model.layers.48.block_sparse_moe.experts.206.w2", "model.layers.48.block_sparse_moe.experts.207.w2", "model.layers.48.block_sparse_moe.experts.208.w2", "model.layers.48.block_sparse_moe.experts.209.w2", "model.layers.48.block_sparse_moe.experts.210.w2", "model.layers.48.block_sparse_moe.experts.211.w2", "model.layers.48.block_sparse_moe.experts.212.w2", "model.layers.48.block_sparse_moe.experts.213.w2", "model.layers.48.block_sparse_moe.experts.214.w2", "model.layers.48.block_sparse_moe.experts.215.w2", "model.layers.48.block_sparse_moe.experts.216.w2", "model.layers.48.block_sparse_moe.experts.217.w2", "model.layers.48.block_sparse_moe.experts.218.w2", "model.layers.48.block_sparse_moe.experts.219.w2", "model.layers.48.block_sparse_moe.experts.220.w2", "model.layers.48.block_sparse_moe.experts.221.w2", "model.layers.48.block_sparse_moe.experts.222.w2", "model.layers.48.block_sparse_moe.experts.223.w2", "model.layers.48.block_sparse_moe.experts.224.w2", "model.layers.48.block_sparse_moe.experts.225.w2", "model.layers.48.block_sparse_moe.experts.226.w2", "model.layers.48.block_sparse_moe.experts.227.w2", "model.layers.48.block_sparse_moe.experts.228.w2", "model.layers.48.block_sparse_moe.experts.229.w2", "model.layers.48.block_sparse_moe.experts.230.w2", "model.layers.48.block_sparse_moe.experts.231.w2", "model.layers.48.block_sparse_moe.experts.232.w2", "model.layers.48.block_sparse_moe.experts.233.w2", "model.layers.48.block_sparse_moe.experts.234.w2", "model.layers.48.block_sparse_moe.experts.235.w2", "model.layers.48.block_sparse_moe.experts.236.w2", "model.layers.48.block_sparse_moe.experts.237.w2", "model.layers.48.block_sparse_moe.experts.238.w2", "model.layers.48.block_sparse_moe.experts.239.w2", "model.layers.48.block_sparse_moe.experts.240.w2", "model.layers.48.block_sparse_moe.experts.241.w2", "model.layers.48.block_sparse_moe.experts.242.w2", "model.layers.48.block_sparse_moe.experts.243.w2", "model.layers.48.block_sparse_moe.experts.244.w2", "model.layers.48.block_sparse_moe.experts.245.w2", "model.layers.48.block_sparse_moe.experts.246.w2", "model.layers.48.block_sparse_moe.experts.247.w2", "model.layers.48.block_sparse_moe.experts.248.w2", "model.layers.48.block_sparse_moe.experts.249.w2", "model.layers.48.block_sparse_moe.experts.250.w2", "model.layers.48.block_sparse_moe.experts.251.w2", "model.layers.48.block_sparse_moe.experts.252.w2", "model.layers.48.block_sparse_moe.experts.253.w2", "model.layers.48.block_sparse_moe.experts.254.w2", "model.layers.48.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00032397806644435256, "dbits": 1207959552 } ] }, { "idx": 245, "layers": [ "model.layers.49.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0029013186693190862, "dbits": 18874368 } ] }, { "idx": 246, "layers": [ "model.layers.49.self_attn.k_proj", "model.layers.49.self_attn.v_proj" ], "candidates": [ { "dkld": -0.001960015296936013, "dbits": 6291456 } ] }, { "idx": 247, "layers": [ "model.layers.49.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008966028690338135, "dbits": 18874368 } ] }, { "idx": 248, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w1", "model.layers.49.block_sparse_moe.experts.1.w1", "model.layers.49.block_sparse_moe.experts.2.w1", "model.layers.49.block_sparse_moe.experts.3.w1", "model.layers.49.block_sparse_moe.experts.4.w1", "model.layers.49.block_sparse_moe.experts.5.w1", "model.layers.49.block_sparse_moe.experts.6.w1", "model.layers.49.block_sparse_moe.experts.7.w1", "model.layers.49.block_sparse_moe.experts.8.w1", "model.layers.49.block_sparse_moe.experts.9.w1", "model.layers.49.block_sparse_moe.experts.10.w1", "model.layers.49.block_sparse_moe.experts.11.w1", "model.layers.49.block_sparse_moe.experts.12.w1", "model.layers.49.block_sparse_moe.experts.13.w1", "model.layers.49.block_sparse_moe.experts.14.w1", "model.layers.49.block_sparse_moe.experts.15.w1", "model.layers.49.block_sparse_moe.experts.16.w1", "model.layers.49.block_sparse_moe.experts.17.w1", "model.layers.49.block_sparse_moe.experts.18.w1", "model.layers.49.block_sparse_moe.experts.19.w1", "model.layers.49.block_sparse_moe.experts.20.w1", "model.layers.49.block_sparse_moe.experts.21.w1", "model.layers.49.block_sparse_moe.experts.22.w1", "model.layers.49.block_sparse_moe.experts.23.w1", "model.layers.49.block_sparse_moe.experts.24.w1", "model.layers.49.block_sparse_moe.experts.25.w1", "model.layers.49.block_sparse_moe.experts.26.w1", "model.layers.49.block_sparse_moe.experts.27.w1", "model.layers.49.block_sparse_moe.experts.28.w1", "model.layers.49.block_sparse_moe.experts.29.w1", "model.layers.49.block_sparse_moe.experts.30.w1", "model.layers.49.block_sparse_moe.experts.31.w1", "model.layers.49.block_sparse_moe.experts.32.w1", "model.layers.49.block_sparse_moe.experts.33.w1", "model.layers.49.block_sparse_moe.experts.34.w1", "model.layers.49.block_sparse_moe.experts.35.w1", "model.layers.49.block_sparse_moe.experts.36.w1", "model.layers.49.block_sparse_moe.experts.37.w1", "model.layers.49.block_sparse_moe.experts.38.w1", "model.layers.49.block_sparse_moe.experts.39.w1", "model.layers.49.block_sparse_moe.experts.40.w1", "model.layers.49.block_sparse_moe.experts.41.w1", "model.layers.49.block_sparse_moe.experts.42.w1", "model.layers.49.block_sparse_moe.experts.43.w1", "model.layers.49.block_sparse_moe.experts.44.w1", "model.layers.49.block_sparse_moe.experts.45.w1", "model.layers.49.block_sparse_moe.experts.46.w1", "model.layers.49.block_sparse_moe.experts.47.w1", "model.layers.49.block_sparse_moe.experts.48.w1", "model.layers.49.block_sparse_moe.experts.49.w1", "model.layers.49.block_sparse_moe.experts.50.w1", "model.layers.49.block_sparse_moe.experts.51.w1", "model.layers.49.block_sparse_moe.experts.52.w1", "model.layers.49.block_sparse_moe.experts.53.w1", "model.layers.49.block_sparse_moe.experts.54.w1", "model.layers.49.block_sparse_moe.experts.55.w1", "model.layers.49.block_sparse_moe.experts.56.w1", "model.layers.49.block_sparse_moe.experts.57.w1", "model.layers.49.block_sparse_moe.experts.58.w1", "model.layers.49.block_sparse_moe.experts.59.w1", "model.layers.49.block_sparse_moe.experts.60.w1", "model.layers.49.block_sparse_moe.experts.61.w1", "model.layers.49.block_sparse_moe.experts.62.w1", "model.layers.49.block_sparse_moe.experts.63.w1", "model.layers.49.block_sparse_moe.experts.64.w1", "model.layers.49.block_sparse_moe.experts.65.w1", "model.layers.49.block_sparse_moe.experts.66.w1", "model.layers.49.block_sparse_moe.experts.67.w1", "model.layers.49.block_sparse_moe.experts.68.w1", "model.layers.49.block_sparse_moe.experts.69.w1", "model.layers.49.block_sparse_moe.experts.70.w1", "model.layers.49.block_sparse_moe.experts.71.w1", "model.layers.49.block_sparse_moe.experts.72.w1", "model.layers.49.block_sparse_moe.experts.73.w1", "model.layers.49.block_sparse_moe.experts.74.w1", "model.layers.49.block_sparse_moe.experts.75.w1", "model.layers.49.block_sparse_moe.experts.76.w1", "model.layers.49.block_sparse_moe.experts.77.w1", "model.layers.49.block_sparse_moe.experts.78.w1", "model.layers.49.block_sparse_moe.experts.79.w1", "model.layers.49.block_sparse_moe.experts.80.w1", "model.layers.49.block_sparse_moe.experts.81.w1", "model.layers.49.block_sparse_moe.experts.82.w1", "model.layers.49.block_sparse_moe.experts.83.w1", "model.layers.49.block_sparse_moe.experts.84.w1", "model.layers.49.block_sparse_moe.experts.85.w1", "model.layers.49.block_sparse_moe.experts.86.w1", "model.layers.49.block_sparse_moe.experts.87.w1", "model.layers.49.block_sparse_moe.experts.88.w1", "model.layers.49.block_sparse_moe.experts.89.w1", "model.layers.49.block_sparse_moe.experts.90.w1", "model.layers.49.block_sparse_moe.experts.91.w1", "model.layers.49.block_sparse_moe.experts.92.w1", "model.layers.49.block_sparse_moe.experts.93.w1", "model.layers.49.block_sparse_moe.experts.94.w1", "model.layers.49.block_sparse_moe.experts.95.w1", "model.layers.49.block_sparse_moe.experts.96.w1", "model.layers.49.block_sparse_moe.experts.97.w1", "model.layers.49.block_sparse_moe.experts.98.w1", "model.layers.49.block_sparse_moe.experts.99.w1", "model.layers.49.block_sparse_moe.experts.100.w1", "model.layers.49.block_sparse_moe.experts.101.w1", "model.layers.49.block_sparse_moe.experts.102.w1", "model.layers.49.block_sparse_moe.experts.103.w1", "model.layers.49.block_sparse_moe.experts.104.w1", "model.layers.49.block_sparse_moe.experts.105.w1", "model.layers.49.block_sparse_moe.experts.106.w1", "model.layers.49.block_sparse_moe.experts.107.w1", "model.layers.49.block_sparse_moe.experts.108.w1", "model.layers.49.block_sparse_moe.experts.109.w1", "model.layers.49.block_sparse_moe.experts.110.w1", "model.layers.49.block_sparse_moe.experts.111.w1", "model.layers.49.block_sparse_moe.experts.112.w1", "model.layers.49.block_sparse_moe.experts.113.w1", "model.layers.49.block_sparse_moe.experts.114.w1", "model.layers.49.block_sparse_moe.experts.115.w1", "model.layers.49.block_sparse_moe.experts.116.w1", "model.layers.49.block_sparse_moe.experts.117.w1", "model.layers.49.block_sparse_moe.experts.118.w1", "model.layers.49.block_sparse_moe.experts.119.w1", "model.layers.49.block_sparse_moe.experts.120.w1", "model.layers.49.block_sparse_moe.experts.121.w1", "model.layers.49.block_sparse_moe.experts.122.w1", "model.layers.49.block_sparse_moe.experts.123.w1", "model.layers.49.block_sparse_moe.experts.124.w1", "model.layers.49.block_sparse_moe.experts.125.w1", "model.layers.49.block_sparse_moe.experts.126.w1", "model.layers.49.block_sparse_moe.experts.127.w1", "model.layers.49.block_sparse_moe.experts.128.w1", "model.layers.49.block_sparse_moe.experts.129.w1", "model.layers.49.block_sparse_moe.experts.130.w1", "model.layers.49.block_sparse_moe.experts.131.w1", "model.layers.49.block_sparse_moe.experts.132.w1", "model.layers.49.block_sparse_moe.experts.133.w1", "model.layers.49.block_sparse_moe.experts.134.w1", "model.layers.49.block_sparse_moe.experts.135.w1", "model.layers.49.block_sparse_moe.experts.136.w1", "model.layers.49.block_sparse_moe.experts.137.w1", "model.layers.49.block_sparse_moe.experts.138.w1", "model.layers.49.block_sparse_moe.experts.139.w1", "model.layers.49.block_sparse_moe.experts.140.w1", "model.layers.49.block_sparse_moe.experts.141.w1", "model.layers.49.block_sparse_moe.experts.142.w1", "model.layers.49.block_sparse_moe.experts.143.w1", "model.layers.49.block_sparse_moe.experts.144.w1", "model.layers.49.block_sparse_moe.experts.145.w1", "model.layers.49.block_sparse_moe.experts.146.w1", "model.layers.49.block_sparse_moe.experts.147.w1", "model.layers.49.block_sparse_moe.experts.148.w1", "model.layers.49.block_sparse_moe.experts.149.w1", "model.layers.49.block_sparse_moe.experts.150.w1", "model.layers.49.block_sparse_moe.experts.151.w1", "model.layers.49.block_sparse_moe.experts.152.w1", "model.layers.49.block_sparse_moe.experts.153.w1", "model.layers.49.block_sparse_moe.experts.154.w1", "model.layers.49.block_sparse_moe.experts.155.w1", "model.layers.49.block_sparse_moe.experts.156.w1", "model.layers.49.block_sparse_moe.experts.157.w1", "model.layers.49.block_sparse_moe.experts.158.w1", "model.layers.49.block_sparse_moe.experts.159.w1", "model.layers.49.block_sparse_moe.experts.160.w1", "model.layers.49.block_sparse_moe.experts.161.w1", "model.layers.49.block_sparse_moe.experts.162.w1", "model.layers.49.block_sparse_moe.experts.163.w1", "model.layers.49.block_sparse_moe.experts.164.w1", "model.layers.49.block_sparse_moe.experts.165.w1", "model.layers.49.block_sparse_moe.experts.166.w1", "model.layers.49.block_sparse_moe.experts.167.w1", "model.layers.49.block_sparse_moe.experts.168.w1", "model.layers.49.block_sparse_moe.experts.169.w1", "model.layers.49.block_sparse_moe.experts.170.w1", "model.layers.49.block_sparse_moe.experts.171.w1", "model.layers.49.block_sparse_moe.experts.172.w1", "model.layers.49.block_sparse_moe.experts.173.w1", "model.layers.49.block_sparse_moe.experts.174.w1", "model.layers.49.block_sparse_moe.experts.175.w1", "model.layers.49.block_sparse_moe.experts.176.w1", "model.layers.49.block_sparse_moe.experts.177.w1", "model.layers.49.block_sparse_moe.experts.178.w1", "model.layers.49.block_sparse_moe.experts.179.w1", "model.layers.49.block_sparse_moe.experts.180.w1", "model.layers.49.block_sparse_moe.experts.181.w1", "model.layers.49.block_sparse_moe.experts.182.w1", "model.layers.49.block_sparse_moe.experts.183.w1", "model.layers.49.block_sparse_moe.experts.184.w1", "model.layers.49.block_sparse_moe.experts.185.w1", "model.layers.49.block_sparse_moe.experts.186.w1", "model.layers.49.block_sparse_moe.experts.187.w1", "model.layers.49.block_sparse_moe.experts.188.w1", "model.layers.49.block_sparse_moe.experts.189.w1", "model.layers.49.block_sparse_moe.experts.190.w1", "model.layers.49.block_sparse_moe.experts.191.w1", "model.layers.49.block_sparse_moe.experts.192.w1", "model.layers.49.block_sparse_moe.experts.193.w1", "model.layers.49.block_sparse_moe.experts.194.w1", "model.layers.49.block_sparse_moe.experts.195.w1", "model.layers.49.block_sparse_moe.experts.196.w1", "model.layers.49.block_sparse_moe.experts.197.w1", "model.layers.49.block_sparse_moe.experts.198.w1", "model.layers.49.block_sparse_moe.experts.199.w1", "model.layers.49.block_sparse_moe.experts.200.w1", "model.layers.49.block_sparse_moe.experts.201.w1", "model.layers.49.block_sparse_moe.experts.202.w1", "model.layers.49.block_sparse_moe.experts.203.w1", "model.layers.49.block_sparse_moe.experts.204.w1", "model.layers.49.block_sparse_moe.experts.205.w1", "model.layers.49.block_sparse_moe.experts.206.w1", "model.layers.49.block_sparse_moe.experts.207.w1", "model.layers.49.block_sparse_moe.experts.208.w1", "model.layers.49.block_sparse_moe.experts.209.w1", "model.layers.49.block_sparse_moe.experts.210.w1", "model.layers.49.block_sparse_moe.experts.211.w1", "model.layers.49.block_sparse_moe.experts.212.w1", "model.layers.49.block_sparse_moe.experts.213.w1", "model.layers.49.block_sparse_moe.experts.214.w1", "model.layers.49.block_sparse_moe.experts.215.w1", "model.layers.49.block_sparse_moe.experts.216.w1", "model.layers.49.block_sparse_moe.experts.217.w1", "model.layers.49.block_sparse_moe.experts.218.w1", "model.layers.49.block_sparse_moe.experts.219.w1", "model.layers.49.block_sparse_moe.experts.220.w1", "model.layers.49.block_sparse_moe.experts.221.w1", "model.layers.49.block_sparse_moe.experts.222.w1", "model.layers.49.block_sparse_moe.experts.223.w1", "model.layers.49.block_sparse_moe.experts.224.w1", "model.layers.49.block_sparse_moe.experts.225.w1", "model.layers.49.block_sparse_moe.experts.226.w1", "model.layers.49.block_sparse_moe.experts.227.w1", "model.layers.49.block_sparse_moe.experts.228.w1", "model.layers.49.block_sparse_moe.experts.229.w1", "model.layers.49.block_sparse_moe.experts.230.w1", "model.layers.49.block_sparse_moe.experts.231.w1", "model.layers.49.block_sparse_moe.experts.232.w1", "model.layers.49.block_sparse_moe.experts.233.w1", "model.layers.49.block_sparse_moe.experts.234.w1", "model.layers.49.block_sparse_moe.experts.235.w1", "model.layers.49.block_sparse_moe.experts.236.w1", "model.layers.49.block_sparse_moe.experts.237.w1", "model.layers.49.block_sparse_moe.experts.238.w1", "model.layers.49.block_sparse_moe.experts.239.w1", "model.layers.49.block_sparse_moe.experts.240.w1", "model.layers.49.block_sparse_moe.experts.241.w1", "model.layers.49.block_sparse_moe.experts.242.w1", "model.layers.49.block_sparse_moe.experts.243.w1", "model.layers.49.block_sparse_moe.experts.244.w1", "model.layers.49.block_sparse_moe.experts.245.w1", "model.layers.49.block_sparse_moe.experts.246.w1", "model.layers.49.block_sparse_moe.experts.247.w1", "model.layers.49.block_sparse_moe.experts.248.w1", "model.layers.49.block_sparse_moe.experts.249.w1", "model.layers.49.block_sparse_moe.experts.250.w1", "model.layers.49.block_sparse_moe.experts.251.w1", "model.layers.49.block_sparse_moe.experts.252.w1", "model.layers.49.block_sparse_moe.experts.253.w1", "model.layers.49.block_sparse_moe.experts.254.w1", "model.layers.49.block_sparse_moe.experts.255.w1", "model.layers.49.block_sparse_moe.experts.0.w3", "model.layers.49.block_sparse_moe.experts.1.w3", "model.layers.49.block_sparse_moe.experts.2.w3", "model.layers.49.block_sparse_moe.experts.3.w3", "model.layers.49.block_sparse_moe.experts.4.w3", "model.layers.49.block_sparse_moe.experts.5.w3", "model.layers.49.block_sparse_moe.experts.6.w3", "model.layers.49.block_sparse_moe.experts.7.w3", "model.layers.49.block_sparse_moe.experts.8.w3", "model.layers.49.block_sparse_moe.experts.9.w3", "model.layers.49.block_sparse_moe.experts.10.w3", "model.layers.49.block_sparse_moe.experts.11.w3", "model.layers.49.block_sparse_moe.experts.12.w3", "model.layers.49.block_sparse_moe.experts.13.w3", "model.layers.49.block_sparse_moe.experts.14.w3", "model.layers.49.block_sparse_moe.experts.15.w3", "model.layers.49.block_sparse_moe.experts.16.w3", "model.layers.49.block_sparse_moe.experts.17.w3", "model.layers.49.block_sparse_moe.experts.18.w3", "model.layers.49.block_sparse_moe.experts.19.w3", "model.layers.49.block_sparse_moe.experts.20.w3", "model.layers.49.block_sparse_moe.experts.21.w3", "model.layers.49.block_sparse_moe.experts.22.w3", "model.layers.49.block_sparse_moe.experts.23.w3", "model.layers.49.block_sparse_moe.experts.24.w3", "model.layers.49.block_sparse_moe.experts.25.w3", "model.layers.49.block_sparse_moe.experts.26.w3", "model.layers.49.block_sparse_moe.experts.27.w3", "model.layers.49.block_sparse_moe.experts.28.w3", "model.layers.49.block_sparse_moe.experts.29.w3", "model.layers.49.block_sparse_moe.experts.30.w3", "model.layers.49.block_sparse_moe.experts.31.w3", "model.layers.49.block_sparse_moe.experts.32.w3", "model.layers.49.block_sparse_moe.experts.33.w3", "model.layers.49.block_sparse_moe.experts.34.w3", "model.layers.49.block_sparse_moe.experts.35.w3", "model.layers.49.block_sparse_moe.experts.36.w3", "model.layers.49.block_sparse_moe.experts.37.w3", "model.layers.49.block_sparse_moe.experts.38.w3", "model.layers.49.block_sparse_moe.experts.39.w3", "model.layers.49.block_sparse_moe.experts.40.w3", "model.layers.49.block_sparse_moe.experts.41.w3", "model.layers.49.block_sparse_moe.experts.42.w3", "model.layers.49.block_sparse_moe.experts.43.w3", "model.layers.49.block_sparse_moe.experts.44.w3", "model.layers.49.block_sparse_moe.experts.45.w3", "model.layers.49.block_sparse_moe.experts.46.w3", "model.layers.49.block_sparse_moe.experts.47.w3", "model.layers.49.block_sparse_moe.experts.48.w3", "model.layers.49.block_sparse_moe.experts.49.w3", "model.layers.49.block_sparse_moe.experts.50.w3", "model.layers.49.block_sparse_moe.experts.51.w3", "model.layers.49.block_sparse_moe.experts.52.w3", "model.layers.49.block_sparse_moe.experts.53.w3", "model.layers.49.block_sparse_moe.experts.54.w3", "model.layers.49.block_sparse_moe.experts.55.w3", "model.layers.49.block_sparse_moe.experts.56.w3", "model.layers.49.block_sparse_moe.experts.57.w3", "model.layers.49.block_sparse_moe.experts.58.w3", "model.layers.49.block_sparse_moe.experts.59.w3", "model.layers.49.block_sparse_moe.experts.60.w3", "model.layers.49.block_sparse_moe.experts.61.w3", "model.layers.49.block_sparse_moe.experts.62.w3", "model.layers.49.block_sparse_moe.experts.63.w3", "model.layers.49.block_sparse_moe.experts.64.w3", "model.layers.49.block_sparse_moe.experts.65.w3", "model.layers.49.block_sparse_moe.experts.66.w3", "model.layers.49.block_sparse_moe.experts.67.w3", "model.layers.49.block_sparse_moe.experts.68.w3", "model.layers.49.block_sparse_moe.experts.69.w3", "model.layers.49.block_sparse_moe.experts.70.w3", "model.layers.49.block_sparse_moe.experts.71.w3", "model.layers.49.block_sparse_moe.experts.72.w3", "model.layers.49.block_sparse_moe.experts.73.w3", "model.layers.49.block_sparse_moe.experts.74.w3", "model.layers.49.block_sparse_moe.experts.75.w3", "model.layers.49.block_sparse_moe.experts.76.w3", "model.layers.49.block_sparse_moe.experts.77.w3", "model.layers.49.block_sparse_moe.experts.78.w3", "model.layers.49.block_sparse_moe.experts.79.w3", "model.layers.49.block_sparse_moe.experts.80.w3", "model.layers.49.block_sparse_moe.experts.81.w3", "model.layers.49.block_sparse_moe.experts.82.w3", "model.layers.49.block_sparse_moe.experts.83.w3", "model.layers.49.block_sparse_moe.experts.84.w3", "model.layers.49.block_sparse_moe.experts.85.w3", "model.layers.49.block_sparse_moe.experts.86.w3", "model.layers.49.block_sparse_moe.experts.87.w3", "model.layers.49.block_sparse_moe.experts.88.w3", "model.layers.49.block_sparse_moe.experts.89.w3", "model.layers.49.block_sparse_moe.experts.90.w3", "model.layers.49.block_sparse_moe.experts.91.w3", "model.layers.49.block_sparse_moe.experts.92.w3", "model.layers.49.block_sparse_moe.experts.93.w3", "model.layers.49.block_sparse_moe.experts.94.w3", "model.layers.49.block_sparse_moe.experts.95.w3", "model.layers.49.block_sparse_moe.experts.96.w3", "model.layers.49.block_sparse_moe.experts.97.w3", "model.layers.49.block_sparse_moe.experts.98.w3", "model.layers.49.block_sparse_moe.experts.99.w3", "model.layers.49.block_sparse_moe.experts.100.w3", "model.layers.49.block_sparse_moe.experts.101.w3", "model.layers.49.block_sparse_moe.experts.102.w3", "model.layers.49.block_sparse_moe.experts.103.w3", "model.layers.49.block_sparse_moe.experts.104.w3", "model.layers.49.block_sparse_moe.experts.105.w3", "model.layers.49.block_sparse_moe.experts.106.w3", "model.layers.49.block_sparse_moe.experts.107.w3", "model.layers.49.block_sparse_moe.experts.108.w3", "model.layers.49.block_sparse_moe.experts.109.w3", "model.layers.49.block_sparse_moe.experts.110.w3", "model.layers.49.block_sparse_moe.experts.111.w3", "model.layers.49.block_sparse_moe.experts.112.w3", "model.layers.49.block_sparse_moe.experts.113.w3", "model.layers.49.block_sparse_moe.experts.114.w3", "model.layers.49.block_sparse_moe.experts.115.w3", "model.layers.49.block_sparse_moe.experts.116.w3", "model.layers.49.block_sparse_moe.experts.117.w3", "model.layers.49.block_sparse_moe.experts.118.w3", "model.layers.49.block_sparse_moe.experts.119.w3", "model.layers.49.block_sparse_moe.experts.120.w3", "model.layers.49.block_sparse_moe.experts.121.w3", "model.layers.49.block_sparse_moe.experts.122.w3", "model.layers.49.block_sparse_moe.experts.123.w3", "model.layers.49.block_sparse_moe.experts.124.w3", "model.layers.49.block_sparse_moe.experts.125.w3", "model.layers.49.block_sparse_moe.experts.126.w3", "model.layers.49.block_sparse_moe.experts.127.w3", "model.layers.49.block_sparse_moe.experts.128.w3", "model.layers.49.block_sparse_moe.experts.129.w3", "model.layers.49.block_sparse_moe.experts.130.w3", "model.layers.49.block_sparse_moe.experts.131.w3", "model.layers.49.block_sparse_moe.experts.132.w3", "model.layers.49.block_sparse_moe.experts.133.w3", "model.layers.49.block_sparse_moe.experts.134.w3", "model.layers.49.block_sparse_moe.experts.135.w3", "model.layers.49.block_sparse_moe.experts.136.w3", "model.layers.49.block_sparse_moe.experts.137.w3", "model.layers.49.block_sparse_moe.experts.138.w3", "model.layers.49.block_sparse_moe.experts.139.w3", "model.layers.49.block_sparse_moe.experts.140.w3", "model.layers.49.block_sparse_moe.experts.141.w3", "model.layers.49.block_sparse_moe.experts.142.w3", "model.layers.49.block_sparse_moe.experts.143.w3", "model.layers.49.block_sparse_moe.experts.144.w3", "model.layers.49.block_sparse_moe.experts.145.w3", "model.layers.49.block_sparse_moe.experts.146.w3", "model.layers.49.block_sparse_moe.experts.147.w3", "model.layers.49.block_sparse_moe.experts.148.w3", "model.layers.49.block_sparse_moe.experts.149.w3", "model.layers.49.block_sparse_moe.experts.150.w3", "model.layers.49.block_sparse_moe.experts.151.w3", "model.layers.49.block_sparse_moe.experts.152.w3", "model.layers.49.block_sparse_moe.experts.153.w3", "model.layers.49.block_sparse_moe.experts.154.w3", "model.layers.49.block_sparse_moe.experts.155.w3", "model.layers.49.block_sparse_moe.experts.156.w3", "model.layers.49.block_sparse_moe.experts.157.w3", "model.layers.49.block_sparse_moe.experts.158.w3", "model.layers.49.block_sparse_moe.experts.159.w3", "model.layers.49.block_sparse_moe.experts.160.w3", "model.layers.49.block_sparse_moe.experts.161.w3", "model.layers.49.block_sparse_moe.experts.162.w3", "model.layers.49.block_sparse_moe.experts.163.w3", "model.layers.49.block_sparse_moe.experts.164.w3", "model.layers.49.block_sparse_moe.experts.165.w3", "model.layers.49.block_sparse_moe.experts.166.w3", "model.layers.49.block_sparse_moe.experts.167.w3", "model.layers.49.block_sparse_moe.experts.168.w3", "model.layers.49.block_sparse_moe.experts.169.w3", "model.layers.49.block_sparse_moe.experts.170.w3", "model.layers.49.block_sparse_moe.experts.171.w3", "model.layers.49.block_sparse_moe.experts.172.w3", "model.layers.49.block_sparse_moe.experts.173.w3", "model.layers.49.block_sparse_moe.experts.174.w3", "model.layers.49.block_sparse_moe.experts.175.w3", "model.layers.49.block_sparse_moe.experts.176.w3", "model.layers.49.block_sparse_moe.experts.177.w3", "model.layers.49.block_sparse_moe.experts.178.w3", "model.layers.49.block_sparse_moe.experts.179.w3", "model.layers.49.block_sparse_moe.experts.180.w3", "model.layers.49.block_sparse_moe.experts.181.w3", "model.layers.49.block_sparse_moe.experts.182.w3", "model.layers.49.block_sparse_moe.experts.183.w3", "model.layers.49.block_sparse_moe.experts.184.w3", "model.layers.49.block_sparse_moe.experts.185.w3", "model.layers.49.block_sparse_moe.experts.186.w3", "model.layers.49.block_sparse_moe.experts.187.w3", "model.layers.49.block_sparse_moe.experts.188.w3", "model.layers.49.block_sparse_moe.experts.189.w3", "model.layers.49.block_sparse_moe.experts.190.w3", "model.layers.49.block_sparse_moe.experts.191.w3", "model.layers.49.block_sparse_moe.experts.192.w3", "model.layers.49.block_sparse_moe.experts.193.w3", "model.layers.49.block_sparse_moe.experts.194.w3", "model.layers.49.block_sparse_moe.experts.195.w3", "model.layers.49.block_sparse_moe.experts.196.w3", "model.layers.49.block_sparse_moe.experts.197.w3", "model.layers.49.block_sparse_moe.experts.198.w3", "model.layers.49.block_sparse_moe.experts.199.w3", "model.layers.49.block_sparse_moe.experts.200.w3", "model.layers.49.block_sparse_moe.experts.201.w3", "model.layers.49.block_sparse_moe.experts.202.w3", "model.layers.49.block_sparse_moe.experts.203.w3", "model.layers.49.block_sparse_moe.experts.204.w3", "model.layers.49.block_sparse_moe.experts.205.w3", "model.layers.49.block_sparse_moe.experts.206.w3", "model.layers.49.block_sparse_moe.experts.207.w3", "model.layers.49.block_sparse_moe.experts.208.w3", "model.layers.49.block_sparse_moe.experts.209.w3", "model.layers.49.block_sparse_moe.experts.210.w3", "model.layers.49.block_sparse_moe.experts.211.w3", "model.layers.49.block_sparse_moe.experts.212.w3", "model.layers.49.block_sparse_moe.experts.213.w3", "model.layers.49.block_sparse_moe.experts.214.w3", "model.layers.49.block_sparse_moe.experts.215.w3", "model.layers.49.block_sparse_moe.experts.216.w3", "model.layers.49.block_sparse_moe.experts.217.w3", "model.layers.49.block_sparse_moe.experts.218.w3", "model.layers.49.block_sparse_moe.experts.219.w3", "model.layers.49.block_sparse_moe.experts.220.w3", "model.layers.49.block_sparse_moe.experts.221.w3", "model.layers.49.block_sparse_moe.experts.222.w3", "model.layers.49.block_sparse_moe.experts.223.w3", "model.layers.49.block_sparse_moe.experts.224.w3", "model.layers.49.block_sparse_moe.experts.225.w3", "model.layers.49.block_sparse_moe.experts.226.w3", "model.layers.49.block_sparse_moe.experts.227.w3", "model.layers.49.block_sparse_moe.experts.228.w3", "model.layers.49.block_sparse_moe.experts.229.w3", "model.layers.49.block_sparse_moe.experts.230.w3", "model.layers.49.block_sparse_moe.experts.231.w3", "model.layers.49.block_sparse_moe.experts.232.w3", "model.layers.49.block_sparse_moe.experts.233.w3", "model.layers.49.block_sparse_moe.experts.234.w3", "model.layers.49.block_sparse_moe.experts.235.w3", "model.layers.49.block_sparse_moe.experts.236.w3", "model.layers.49.block_sparse_moe.experts.237.w3", "model.layers.49.block_sparse_moe.experts.238.w3", "model.layers.49.block_sparse_moe.experts.239.w3", "model.layers.49.block_sparse_moe.experts.240.w3", "model.layers.49.block_sparse_moe.experts.241.w3", "model.layers.49.block_sparse_moe.experts.242.w3", "model.layers.49.block_sparse_moe.experts.243.w3", "model.layers.49.block_sparse_moe.experts.244.w3", "model.layers.49.block_sparse_moe.experts.245.w3", "model.layers.49.block_sparse_moe.experts.246.w3", "model.layers.49.block_sparse_moe.experts.247.w3", "model.layers.49.block_sparse_moe.experts.248.w3", "model.layers.49.block_sparse_moe.experts.249.w3", "model.layers.49.block_sparse_moe.experts.250.w3", "model.layers.49.block_sparse_moe.experts.251.w3", "model.layers.49.block_sparse_moe.experts.252.w3", "model.layers.49.block_sparse_moe.experts.253.w3", "model.layers.49.block_sparse_moe.experts.254.w3", "model.layers.49.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005776345729827659, "dbits": 2415919104 } ] }, { "idx": 249, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w2", "model.layers.49.block_sparse_moe.experts.1.w2", "model.layers.49.block_sparse_moe.experts.2.w2", "model.layers.49.block_sparse_moe.experts.3.w2", "model.layers.49.block_sparse_moe.experts.4.w2", "model.layers.49.block_sparse_moe.experts.5.w2", "model.layers.49.block_sparse_moe.experts.6.w2", "model.layers.49.block_sparse_moe.experts.7.w2", "model.layers.49.block_sparse_moe.experts.8.w2", "model.layers.49.block_sparse_moe.experts.9.w2", "model.layers.49.block_sparse_moe.experts.10.w2", "model.layers.49.block_sparse_moe.experts.11.w2", "model.layers.49.block_sparse_moe.experts.12.w2", "model.layers.49.block_sparse_moe.experts.13.w2", "model.layers.49.block_sparse_moe.experts.14.w2", "model.layers.49.block_sparse_moe.experts.15.w2", "model.layers.49.block_sparse_moe.experts.16.w2", "model.layers.49.block_sparse_moe.experts.17.w2", "model.layers.49.block_sparse_moe.experts.18.w2", "model.layers.49.block_sparse_moe.experts.19.w2", "model.layers.49.block_sparse_moe.experts.20.w2", "model.layers.49.block_sparse_moe.experts.21.w2", "model.layers.49.block_sparse_moe.experts.22.w2", "model.layers.49.block_sparse_moe.experts.23.w2", "model.layers.49.block_sparse_moe.experts.24.w2", "model.layers.49.block_sparse_moe.experts.25.w2", "model.layers.49.block_sparse_moe.experts.26.w2", "model.layers.49.block_sparse_moe.experts.27.w2", "model.layers.49.block_sparse_moe.experts.28.w2", "model.layers.49.block_sparse_moe.experts.29.w2", "model.layers.49.block_sparse_moe.experts.30.w2", "model.layers.49.block_sparse_moe.experts.31.w2", "model.layers.49.block_sparse_moe.experts.32.w2", "model.layers.49.block_sparse_moe.experts.33.w2", "model.layers.49.block_sparse_moe.experts.34.w2", "model.layers.49.block_sparse_moe.experts.35.w2", "model.layers.49.block_sparse_moe.experts.36.w2", "model.layers.49.block_sparse_moe.experts.37.w2", "model.layers.49.block_sparse_moe.experts.38.w2", "model.layers.49.block_sparse_moe.experts.39.w2", "model.layers.49.block_sparse_moe.experts.40.w2", "model.layers.49.block_sparse_moe.experts.41.w2", "model.layers.49.block_sparse_moe.experts.42.w2", "model.layers.49.block_sparse_moe.experts.43.w2", "model.layers.49.block_sparse_moe.experts.44.w2", "model.layers.49.block_sparse_moe.experts.45.w2", "model.layers.49.block_sparse_moe.experts.46.w2", "model.layers.49.block_sparse_moe.experts.47.w2", "model.layers.49.block_sparse_moe.experts.48.w2", "model.layers.49.block_sparse_moe.experts.49.w2", "model.layers.49.block_sparse_moe.experts.50.w2", "model.layers.49.block_sparse_moe.experts.51.w2", "model.layers.49.block_sparse_moe.experts.52.w2", "model.layers.49.block_sparse_moe.experts.53.w2", "model.layers.49.block_sparse_moe.experts.54.w2", "model.layers.49.block_sparse_moe.experts.55.w2", "model.layers.49.block_sparse_moe.experts.56.w2", "model.layers.49.block_sparse_moe.experts.57.w2", "model.layers.49.block_sparse_moe.experts.58.w2", "model.layers.49.block_sparse_moe.experts.59.w2", "model.layers.49.block_sparse_moe.experts.60.w2", "model.layers.49.block_sparse_moe.experts.61.w2", "model.layers.49.block_sparse_moe.experts.62.w2", "model.layers.49.block_sparse_moe.experts.63.w2", "model.layers.49.block_sparse_moe.experts.64.w2", "model.layers.49.block_sparse_moe.experts.65.w2", "model.layers.49.block_sparse_moe.experts.66.w2", "model.layers.49.block_sparse_moe.experts.67.w2", "model.layers.49.block_sparse_moe.experts.68.w2", "model.layers.49.block_sparse_moe.experts.69.w2", "model.layers.49.block_sparse_moe.experts.70.w2", "model.layers.49.block_sparse_moe.experts.71.w2", "model.layers.49.block_sparse_moe.experts.72.w2", "model.layers.49.block_sparse_moe.experts.73.w2", "model.layers.49.block_sparse_moe.experts.74.w2", "model.layers.49.block_sparse_moe.experts.75.w2", "model.layers.49.block_sparse_moe.experts.76.w2", "model.layers.49.block_sparse_moe.experts.77.w2", "model.layers.49.block_sparse_moe.experts.78.w2", "model.layers.49.block_sparse_moe.experts.79.w2", "model.layers.49.block_sparse_moe.experts.80.w2", "model.layers.49.block_sparse_moe.experts.81.w2", "model.layers.49.block_sparse_moe.experts.82.w2", "model.layers.49.block_sparse_moe.experts.83.w2", "model.layers.49.block_sparse_moe.experts.84.w2", "model.layers.49.block_sparse_moe.experts.85.w2", "model.layers.49.block_sparse_moe.experts.86.w2", "model.layers.49.block_sparse_moe.experts.87.w2", "model.layers.49.block_sparse_moe.experts.88.w2", "model.layers.49.block_sparse_moe.experts.89.w2", "model.layers.49.block_sparse_moe.experts.90.w2", "model.layers.49.block_sparse_moe.experts.91.w2", "model.layers.49.block_sparse_moe.experts.92.w2", "model.layers.49.block_sparse_moe.experts.93.w2", "model.layers.49.block_sparse_moe.experts.94.w2", "model.layers.49.block_sparse_moe.experts.95.w2", "model.layers.49.block_sparse_moe.experts.96.w2", "model.layers.49.block_sparse_moe.experts.97.w2", "model.layers.49.block_sparse_moe.experts.98.w2", "model.layers.49.block_sparse_moe.experts.99.w2", "model.layers.49.block_sparse_moe.experts.100.w2", "model.layers.49.block_sparse_moe.experts.101.w2", "model.layers.49.block_sparse_moe.experts.102.w2", "model.layers.49.block_sparse_moe.experts.103.w2", "model.layers.49.block_sparse_moe.experts.104.w2", "model.layers.49.block_sparse_moe.experts.105.w2", "model.layers.49.block_sparse_moe.experts.106.w2", "model.layers.49.block_sparse_moe.experts.107.w2", "model.layers.49.block_sparse_moe.experts.108.w2", "model.layers.49.block_sparse_moe.experts.109.w2", "model.layers.49.block_sparse_moe.experts.110.w2", "model.layers.49.block_sparse_moe.experts.111.w2", "model.layers.49.block_sparse_moe.experts.112.w2", "model.layers.49.block_sparse_moe.experts.113.w2", "model.layers.49.block_sparse_moe.experts.114.w2", "model.layers.49.block_sparse_moe.experts.115.w2", "model.layers.49.block_sparse_moe.experts.116.w2", "model.layers.49.block_sparse_moe.experts.117.w2", "model.layers.49.block_sparse_moe.experts.118.w2", "model.layers.49.block_sparse_moe.experts.119.w2", "model.layers.49.block_sparse_moe.experts.120.w2", "model.layers.49.block_sparse_moe.experts.121.w2", "model.layers.49.block_sparse_moe.experts.122.w2", "model.layers.49.block_sparse_moe.experts.123.w2", "model.layers.49.block_sparse_moe.experts.124.w2", "model.layers.49.block_sparse_moe.experts.125.w2", "model.layers.49.block_sparse_moe.experts.126.w2", "model.layers.49.block_sparse_moe.experts.127.w2", "model.layers.49.block_sparse_moe.experts.128.w2", "model.layers.49.block_sparse_moe.experts.129.w2", "model.layers.49.block_sparse_moe.experts.130.w2", "model.layers.49.block_sparse_moe.experts.131.w2", "model.layers.49.block_sparse_moe.experts.132.w2", "model.layers.49.block_sparse_moe.experts.133.w2", "model.layers.49.block_sparse_moe.experts.134.w2", "model.layers.49.block_sparse_moe.experts.135.w2", "model.layers.49.block_sparse_moe.experts.136.w2", "model.layers.49.block_sparse_moe.experts.137.w2", "model.layers.49.block_sparse_moe.experts.138.w2", "model.layers.49.block_sparse_moe.experts.139.w2", "model.layers.49.block_sparse_moe.experts.140.w2", "model.layers.49.block_sparse_moe.experts.141.w2", "model.layers.49.block_sparse_moe.experts.142.w2", "model.layers.49.block_sparse_moe.experts.143.w2", "model.layers.49.block_sparse_moe.experts.144.w2", "model.layers.49.block_sparse_moe.experts.145.w2", "model.layers.49.block_sparse_moe.experts.146.w2", "model.layers.49.block_sparse_moe.experts.147.w2", "model.layers.49.block_sparse_moe.experts.148.w2", "model.layers.49.block_sparse_moe.experts.149.w2", "model.layers.49.block_sparse_moe.experts.150.w2", "model.layers.49.block_sparse_moe.experts.151.w2", "model.layers.49.block_sparse_moe.experts.152.w2", "model.layers.49.block_sparse_moe.experts.153.w2", "model.layers.49.block_sparse_moe.experts.154.w2", "model.layers.49.block_sparse_moe.experts.155.w2", "model.layers.49.block_sparse_moe.experts.156.w2", "model.layers.49.block_sparse_moe.experts.157.w2", "model.layers.49.block_sparse_moe.experts.158.w2", "model.layers.49.block_sparse_moe.experts.159.w2", "model.layers.49.block_sparse_moe.experts.160.w2", "model.layers.49.block_sparse_moe.experts.161.w2", "model.layers.49.block_sparse_moe.experts.162.w2", "model.layers.49.block_sparse_moe.experts.163.w2", "model.layers.49.block_sparse_moe.experts.164.w2", "model.layers.49.block_sparse_moe.experts.165.w2", "model.layers.49.block_sparse_moe.experts.166.w2", "model.layers.49.block_sparse_moe.experts.167.w2", "model.layers.49.block_sparse_moe.experts.168.w2", "model.layers.49.block_sparse_moe.experts.169.w2", "model.layers.49.block_sparse_moe.experts.170.w2", "model.layers.49.block_sparse_moe.experts.171.w2", "model.layers.49.block_sparse_moe.experts.172.w2", "model.layers.49.block_sparse_moe.experts.173.w2", "model.layers.49.block_sparse_moe.experts.174.w2", "model.layers.49.block_sparse_moe.experts.175.w2", "model.layers.49.block_sparse_moe.experts.176.w2", "model.layers.49.block_sparse_moe.experts.177.w2", "model.layers.49.block_sparse_moe.experts.178.w2", "model.layers.49.block_sparse_moe.experts.179.w2", "model.layers.49.block_sparse_moe.experts.180.w2", "model.layers.49.block_sparse_moe.experts.181.w2", "model.layers.49.block_sparse_moe.experts.182.w2", "model.layers.49.block_sparse_moe.experts.183.w2", "model.layers.49.block_sparse_moe.experts.184.w2", "model.layers.49.block_sparse_moe.experts.185.w2", "model.layers.49.block_sparse_moe.experts.186.w2", "model.layers.49.block_sparse_moe.experts.187.w2", "model.layers.49.block_sparse_moe.experts.188.w2", "model.layers.49.block_sparse_moe.experts.189.w2", "model.layers.49.block_sparse_moe.experts.190.w2", "model.layers.49.block_sparse_moe.experts.191.w2", "model.layers.49.block_sparse_moe.experts.192.w2", "model.layers.49.block_sparse_moe.experts.193.w2", "model.layers.49.block_sparse_moe.experts.194.w2", "model.layers.49.block_sparse_moe.experts.195.w2", "model.layers.49.block_sparse_moe.experts.196.w2", "model.layers.49.block_sparse_moe.experts.197.w2", "model.layers.49.block_sparse_moe.experts.198.w2", "model.layers.49.block_sparse_moe.experts.199.w2", "model.layers.49.block_sparse_moe.experts.200.w2", "model.layers.49.block_sparse_moe.experts.201.w2", "model.layers.49.block_sparse_moe.experts.202.w2", "model.layers.49.block_sparse_moe.experts.203.w2", "model.layers.49.block_sparse_moe.experts.204.w2", "model.layers.49.block_sparse_moe.experts.205.w2", "model.layers.49.block_sparse_moe.experts.206.w2", "model.layers.49.block_sparse_moe.experts.207.w2", "model.layers.49.block_sparse_moe.experts.208.w2", "model.layers.49.block_sparse_moe.experts.209.w2", "model.layers.49.block_sparse_moe.experts.210.w2", "model.layers.49.block_sparse_moe.experts.211.w2", "model.layers.49.block_sparse_moe.experts.212.w2", "model.layers.49.block_sparse_moe.experts.213.w2", "model.layers.49.block_sparse_moe.experts.214.w2", "model.layers.49.block_sparse_moe.experts.215.w2", "model.layers.49.block_sparse_moe.experts.216.w2", "model.layers.49.block_sparse_moe.experts.217.w2", "model.layers.49.block_sparse_moe.experts.218.w2", "model.layers.49.block_sparse_moe.experts.219.w2", "model.layers.49.block_sparse_moe.experts.220.w2", "model.layers.49.block_sparse_moe.experts.221.w2", "model.layers.49.block_sparse_moe.experts.222.w2", "model.layers.49.block_sparse_moe.experts.223.w2", "model.layers.49.block_sparse_moe.experts.224.w2", "model.layers.49.block_sparse_moe.experts.225.w2", "model.layers.49.block_sparse_moe.experts.226.w2", "model.layers.49.block_sparse_moe.experts.227.w2", "model.layers.49.block_sparse_moe.experts.228.w2", "model.layers.49.block_sparse_moe.experts.229.w2", "model.layers.49.block_sparse_moe.experts.230.w2", "model.layers.49.block_sparse_moe.experts.231.w2", "model.layers.49.block_sparse_moe.experts.232.w2", "model.layers.49.block_sparse_moe.experts.233.w2", "model.layers.49.block_sparse_moe.experts.234.w2", "model.layers.49.block_sparse_moe.experts.235.w2", "model.layers.49.block_sparse_moe.experts.236.w2", "model.layers.49.block_sparse_moe.experts.237.w2", "model.layers.49.block_sparse_moe.experts.238.w2", "model.layers.49.block_sparse_moe.experts.239.w2", "model.layers.49.block_sparse_moe.experts.240.w2", "model.layers.49.block_sparse_moe.experts.241.w2", "model.layers.49.block_sparse_moe.experts.242.w2", "model.layers.49.block_sparse_moe.experts.243.w2", "model.layers.49.block_sparse_moe.experts.244.w2", "model.layers.49.block_sparse_moe.experts.245.w2", "model.layers.49.block_sparse_moe.experts.246.w2", "model.layers.49.block_sparse_moe.experts.247.w2", "model.layers.49.block_sparse_moe.experts.248.w2", "model.layers.49.block_sparse_moe.experts.249.w2", "model.layers.49.block_sparse_moe.experts.250.w2", "model.layers.49.block_sparse_moe.experts.251.w2", "model.layers.49.block_sparse_moe.experts.252.w2", "model.layers.49.block_sparse_moe.experts.253.w2", "model.layers.49.block_sparse_moe.experts.254.w2", "model.layers.49.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00027286112308499977, "dbits": 1207959552 } ] }, { "idx": 250, "layers": [ "model.layers.50.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0008288711309433205, "dbits": 18874368 } ] }, { "idx": 251, "layers": [ "model.layers.50.self_attn.k_proj", "model.layers.50.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0015529841184616089, "dbits": 6291456 } ] }, { "idx": 252, "layers": [ "model.layers.50.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0022691473364829795, "dbits": 18874368 } ] }, { "idx": 253, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w1", "model.layers.50.block_sparse_moe.experts.1.w1", "model.layers.50.block_sparse_moe.experts.2.w1", "model.layers.50.block_sparse_moe.experts.3.w1", "model.layers.50.block_sparse_moe.experts.4.w1", "model.layers.50.block_sparse_moe.experts.5.w1", "model.layers.50.block_sparse_moe.experts.6.w1", "model.layers.50.block_sparse_moe.experts.7.w1", "model.layers.50.block_sparse_moe.experts.8.w1", "model.layers.50.block_sparse_moe.experts.9.w1", "model.layers.50.block_sparse_moe.experts.10.w1", "model.layers.50.block_sparse_moe.experts.11.w1", "model.layers.50.block_sparse_moe.experts.12.w1", "model.layers.50.block_sparse_moe.experts.13.w1", "model.layers.50.block_sparse_moe.experts.14.w1", "model.layers.50.block_sparse_moe.experts.15.w1", "model.layers.50.block_sparse_moe.experts.16.w1", "model.layers.50.block_sparse_moe.experts.17.w1", "model.layers.50.block_sparse_moe.experts.18.w1", "model.layers.50.block_sparse_moe.experts.19.w1", "model.layers.50.block_sparse_moe.experts.20.w1", "model.layers.50.block_sparse_moe.experts.21.w1", "model.layers.50.block_sparse_moe.experts.22.w1", "model.layers.50.block_sparse_moe.experts.23.w1", "model.layers.50.block_sparse_moe.experts.24.w1", "model.layers.50.block_sparse_moe.experts.25.w1", "model.layers.50.block_sparse_moe.experts.26.w1", "model.layers.50.block_sparse_moe.experts.27.w1", "model.layers.50.block_sparse_moe.experts.28.w1", "model.layers.50.block_sparse_moe.experts.29.w1", "model.layers.50.block_sparse_moe.experts.30.w1", "model.layers.50.block_sparse_moe.experts.31.w1", "model.layers.50.block_sparse_moe.experts.32.w1", "model.layers.50.block_sparse_moe.experts.33.w1", "model.layers.50.block_sparse_moe.experts.34.w1", "model.layers.50.block_sparse_moe.experts.35.w1", "model.layers.50.block_sparse_moe.experts.36.w1", "model.layers.50.block_sparse_moe.experts.37.w1", "model.layers.50.block_sparse_moe.experts.38.w1", "model.layers.50.block_sparse_moe.experts.39.w1", "model.layers.50.block_sparse_moe.experts.40.w1", "model.layers.50.block_sparse_moe.experts.41.w1", "model.layers.50.block_sparse_moe.experts.42.w1", "model.layers.50.block_sparse_moe.experts.43.w1", "model.layers.50.block_sparse_moe.experts.44.w1", "model.layers.50.block_sparse_moe.experts.45.w1", "model.layers.50.block_sparse_moe.experts.46.w1", "model.layers.50.block_sparse_moe.experts.47.w1", "model.layers.50.block_sparse_moe.experts.48.w1", "model.layers.50.block_sparse_moe.experts.49.w1", "model.layers.50.block_sparse_moe.experts.50.w1", "model.layers.50.block_sparse_moe.experts.51.w1", "model.layers.50.block_sparse_moe.experts.52.w1", "model.layers.50.block_sparse_moe.experts.53.w1", "model.layers.50.block_sparse_moe.experts.54.w1", "model.layers.50.block_sparse_moe.experts.55.w1", "model.layers.50.block_sparse_moe.experts.56.w1", "model.layers.50.block_sparse_moe.experts.57.w1", "model.layers.50.block_sparse_moe.experts.58.w1", "model.layers.50.block_sparse_moe.experts.59.w1", "model.layers.50.block_sparse_moe.experts.60.w1", "model.layers.50.block_sparse_moe.experts.61.w1", "model.layers.50.block_sparse_moe.experts.62.w1", "model.layers.50.block_sparse_moe.experts.63.w1", "model.layers.50.block_sparse_moe.experts.64.w1", "model.layers.50.block_sparse_moe.experts.65.w1", "model.layers.50.block_sparse_moe.experts.66.w1", "model.layers.50.block_sparse_moe.experts.67.w1", "model.layers.50.block_sparse_moe.experts.68.w1", "model.layers.50.block_sparse_moe.experts.69.w1", "model.layers.50.block_sparse_moe.experts.70.w1", "model.layers.50.block_sparse_moe.experts.71.w1", "model.layers.50.block_sparse_moe.experts.72.w1", "model.layers.50.block_sparse_moe.experts.73.w1", "model.layers.50.block_sparse_moe.experts.74.w1", "model.layers.50.block_sparse_moe.experts.75.w1", "model.layers.50.block_sparse_moe.experts.76.w1", "model.layers.50.block_sparse_moe.experts.77.w1", "model.layers.50.block_sparse_moe.experts.78.w1", "model.layers.50.block_sparse_moe.experts.79.w1", "model.layers.50.block_sparse_moe.experts.80.w1", "model.layers.50.block_sparse_moe.experts.81.w1", "model.layers.50.block_sparse_moe.experts.82.w1", "model.layers.50.block_sparse_moe.experts.83.w1", "model.layers.50.block_sparse_moe.experts.84.w1", "model.layers.50.block_sparse_moe.experts.85.w1", "model.layers.50.block_sparse_moe.experts.86.w1", "model.layers.50.block_sparse_moe.experts.87.w1", "model.layers.50.block_sparse_moe.experts.88.w1", "model.layers.50.block_sparse_moe.experts.89.w1", "model.layers.50.block_sparse_moe.experts.90.w1", "model.layers.50.block_sparse_moe.experts.91.w1", "model.layers.50.block_sparse_moe.experts.92.w1", "model.layers.50.block_sparse_moe.experts.93.w1", "model.layers.50.block_sparse_moe.experts.94.w1", "model.layers.50.block_sparse_moe.experts.95.w1", "model.layers.50.block_sparse_moe.experts.96.w1", "model.layers.50.block_sparse_moe.experts.97.w1", "model.layers.50.block_sparse_moe.experts.98.w1", "model.layers.50.block_sparse_moe.experts.99.w1", "model.layers.50.block_sparse_moe.experts.100.w1", "model.layers.50.block_sparse_moe.experts.101.w1", "model.layers.50.block_sparse_moe.experts.102.w1", "model.layers.50.block_sparse_moe.experts.103.w1", "model.layers.50.block_sparse_moe.experts.104.w1", "model.layers.50.block_sparse_moe.experts.105.w1", "model.layers.50.block_sparse_moe.experts.106.w1", "model.layers.50.block_sparse_moe.experts.107.w1", "model.layers.50.block_sparse_moe.experts.108.w1", "model.layers.50.block_sparse_moe.experts.109.w1", "model.layers.50.block_sparse_moe.experts.110.w1", "model.layers.50.block_sparse_moe.experts.111.w1", "model.layers.50.block_sparse_moe.experts.112.w1", "model.layers.50.block_sparse_moe.experts.113.w1", "model.layers.50.block_sparse_moe.experts.114.w1", "model.layers.50.block_sparse_moe.experts.115.w1", "model.layers.50.block_sparse_moe.experts.116.w1", "model.layers.50.block_sparse_moe.experts.117.w1", "model.layers.50.block_sparse_moe.experts.118.w1", "model.layers.50.block_sparse_moe.experts.119.w1", "model.layers.50.block_sparse_moe.experts.120.w1", "model.layers.50.block_sparse_moe.experts.121.w1", "model.layers.50.block_sparse_moe.experts.122.w1", "model.layers.50.block_sparse_moe.experts.123.w1", "model.layers.50.block_sparse_moe.experts.124.w1", "model.layers.50.block_sparse_moe.experts.125.w1", "model.layers.50.block_sparse_moe.experts.126.w1", "model.layers.50.block_sparse_moe.experts.127.w1", "model.layers.50.block_sparse_moe.experts.128.w1", "model.layers.50.block_sparse_moe.experts.129.w1", "model.layers.50.block_sparse_moe.experts.130.w1", "model.layers.50.block_sparse_moe.experts.131.w1", "model.layers.50.block_sparse_moe.experts.132.w1", "model.layers.50.block_sparse_moe.experts.133.w1", "model.layers.50.block_sparse_moe.experts.134.w1", "model.layers.50.block_sparse_moe.experts.135.w1", "model.layers.50.block_sparse_moe.experts.136.w1", "model.layers.50.block_sparse_moe.experts.137.w1", "model.layers.50.block_sparse_moe.experts.138.w1", "model.layers.50.block_sparse_moe.experts.139.w1", "model.layers.50.block_sparse_moe.experts.140.w1", "model.layers.50.block_sparse_moe.experts.141.w1", "model.layers.50.block_sparse_moe.experts.142.w1", "model.layers.50.block_sparse_moe.experts.143.w1", "model.layers.50.block_sparse_moe.experts.144.w1", "model.layers.50.block_sparse_moe.experts.145.w1", "model.layers.50.block_sparse_moe.experts.146.w1", "model.layers.50.block_sparse_moe.experts.147.w1", "model.layers.50.block_sparse_moe.experts.148.w1", "model.layers.50.block_sparse_moe.experts.149.w1", "model.layers.50.block_sparse_moe.experts.150.w1", "model.layers.50.block_sparse_moe.experts.151.w1", "model.layers.50.block_sparse_moe.experts.152.w1", "model.layers.50.block_sparse_moe.experts.153.w1", "model.layers.50.block_sparse_moe.experts.154.w1", "model.layers.50.block_sparse_moe.experts.155.w1", "model.layers.50.block_sparse_moe.experts.156.w1", "model.layers.50.block_sparse_moe.experts.157.w1", "model.layers.50.block_sparse_moe.experts.158.w1", "model.layers.50.block_sparse_moe.experts.159.w1", "model.layers.50.block_sparse_moe.experts.160.w1", "model.layers.50.block_sparse_moe.experts.161.w1", "model.layers.50.block_sparse_moe.experts.162.w1", "model.layers.50.block_sparse_moe.experts.163.w1", "model.layers.50.block_sparse_moe.experts.164.w1", "model.layers.50.block_sparse_moe.experts.165.w1", "model.layers.50.block_sparse_moe.experts.166.w1", "model.layers.50.block_sparse_moe.experts.167.w1", "model.layers.50.block_sparse_moe.experts.168.w1", "model.layers.50.block_sparse_moe.experts.169.w1", "model.layers.50.block_sparse_moe.experts.170.w1", "model.layers.50.block_sparse_moe.experts.171.w1", "model.layers.50.block_sparse_moe.experts.172.w1", "model.layers.50.block_sparse_moe.experts.173.w1", "model.layers.50.block_sparse_moe.experts.174.w1", "model.layers.50.block_sparse_moe.experts.175.w1", "model.layers.50.block_sparse_moe.experts.176.w1", "model.layers.50.block_sparse_moe.experts.177.w1", "model.layers.50.block_sparse_moe.experts.178.w1", "model.layers.50.block_sparse_moe.experts.179.w1", "model.layers.50.block_sparse_moe.experts.180.w1", "model.layers.50.block_sparse_moe.experts.181.w1", "model.layers.50.block_sparse_moe.experts.182.w1", "model.layers.50.block_sparse_moe.experts.183.w1", "model.layers.50.block_sparse_moe.experts.184.w1", "model.layers.50.block_sparse_moe.experts.185.w1", "model.layers.50.block_sparse_moe.experts.186.w1", "model.layers.50.block_sparse_moe.experts.187.w1", "model.layers.50.block_sparse_moe.experts.188.w1", "model.layers.50.block_sparse_moe.experts.189.w1", "model.layers.50.block_sparse_moe.experts.190.w1", "model.layers.50.block_sparse_moe.experts.191.w1", "model.layers.50.block_sparse_moe.experts.192.w1", "model.layers.50.block_sparse_moe.experts.193.w1", "model.layers.50.block_sparse_moe.experts.194.w1", "model.layers.50.block_sparse_moe.experts.195.w1", "model.layers.50.block_sparse_moe.experts.196.w1", "model.layers.50.block_sparse_moe.experts.197.w1", "model.layers.50.block_sparse_moe.experts.198.w1", "model.layers.50.block_sparse_moe.experts.199.w1", "model.layers.50.block_sparse_moe.experts.200.w1", "model.layers.50.block_sparse_moe.experts.201.w1", "model.layers.50.block_sparse_moe.experts.202.w1", "model.layers.50.block_sparse_moe.experts.203.w1", "model.layers.50.block_sparse_moe.experts.204.w1", "model.layers.50.block_sparse_moe.experts.205.w1", "model.layers.50.block_sparse_moe.experts.206.w1", "model.layers.50.block_sparse_moe.experts.207.w1", "model.layers.50.block_sparse_moe.experts.208.w1", "model.layers.50.block_sparse_moe.experts.209.w1", "model.layers.50.block_sparse_moe.experts.210.w1", "model.layers.50.block_sparse_moe.experts.211.w1", "model.layers.50.block_sparse_moe.experts.212.w1", "model.layers.50.block_sparse_moe.experts.213.w1", "model.layers.50.block_sparse_moe.experts.214.w1", "model.layers.50.block_sparse_moe.experts.215.w1", "model.layers.50.block_sparse_moe.experts.216.w1", "model.layers.50.block_sparse_moe.experts.217.w1", "model.layers.50.block_sparse_moe.experts.218.w1", "model.layers.50.block_sparse_moe.experts.219.w1", "model.layers.50.block_sparse_moe.experts.220.w1", "model.layers.50.block_sparse_moe.experts.221.w1", "model.layers.50.block_sparse_moe.experts.222.w1", "model.layers.50.block_sparse_moe.experts.223.w1", "model.layers.50.block_sparse_moe.experts.224.w1", "model.layers.50.block_sparse_moe.experts.225.w1", "model.layers.50.block_sparse_moe.experts.226.w1", "model.layers.50.block_sparse_moe.experts.227.w1", "model.layers.50.block_sparse_moe.experts.228.w1", "model.layers.50.block_sparse_moe.experts.229.w1", "model.layers.50.block_sparse_moe.experts.230.w1", "model.layers.50.block_sparse_moe.experts.231.w1", "model.layers.50.block_sparse_moe.experts.232.w1", "model.layers.50.block_sparse_moe.experts.233.w1", "model.layers.50.block_sparse_moe.experts.234.w1", "model.layers.50.block_sparse_moe.experts.235.w1", "model.layers.50.block_sparse_moe.experts.236.w1", "model.layers.50.block_sparse_moe.experts.237.w1", "model.layers.50.block_sparse_moe.experts.238.w1", "model.layers.50.block_sparse_moe.experts.239.w1", "model.layers.50.block_sparse_moe.experts.240.w1", "model.layers.50.block_sparse_moe.experts.241.w1", "model.layers.50.block_sparse_moe.experts.242.w1", "model.layers.50.block_sparse_moe.experts.243.w1", "model.layers.50.block_sparse_moe.experts.244.w1", "model.layers.50.block_sparse_moe.experts.245.w1", "model.layers.50.block_sparse_moe.experts.246.w1", "model.layers.50.block_sparse_moe.experts.247.w1", "model.layers.50.block_sparse_moe.experts.248.w1", "model.layers.50.block_sparse_moe.experts.249.w1", "model.layers.50.block_sparse_moe.experts.250.w1", "model.layers.50.block_sparse_moe.experts.251.w1", "model.layers.50.block_sparse_moe.experts.252.w1", "model.layers.50.block_sparse_moe.experts.253.w1", "model.layers.50.block_sparse_moe.experts.254.w1", "model.layers.50.block_sparse_moe.experts.255.w1", "model.layers.50.block_sparse_moe.experts.0.w3", "model.layers.50.block_sparse_moe.experts.1.w3", "model.layers.50.block_sparse_moe.experts.2.w3", "model.layers.50.block_sparse_moe.experts.3.w3", "model.layers.50.block_sparse_moe.experts.4.w3", "model.layers.50.block_sparse_moe.experts.5.w3", "model.layers.50.block_sparse_moe.experts.6.w3", "model.layers.50.block_sparse_moe.experts.7.w3", "model.layers.50.block_sparse_moe.experts.8.w3", "model.layers.50.block_sparse_moe.experts.9.w3", "model.layers.50.block_sparse_moe.experts.10.w3", "model.layers.50.block_sparse_moe.experts.11.w3", "model.layers.50.block_sparse_moe.experts.12.w3", "model.layers.50.block_sparse_moe.experts.13.w3", "model.layers.50.block_sparse_moe.experts.14.w3", "model.layers.50.block_sparse_moe.experts.15.w3", "model.layers.50.block_sparse_moe.experts.16.w3", "model.layers.50.block_sparse_moe.experts.17.w3", "model.layers.50.block_sparse_moe.experts.18.w3", "model.layers.50.block_sparse_moe.experts.19.w3", "model.layers.50.block_sparse_moe.experts.20.w3", "model.layers.50.block_sparse_moe.experts.21.w3", "model.layers.50.block_sparse_moe.experts.22.w3", "model.layers.50.block_sparse_moe.experts.23.w3", "model.layers.50.block_sparse_moe.experts.24.w3", "model.layers.50.block_sparse_moe.experts.25.w3", "model.layers.50.block_sparse_moe.experts.26.w3", "model.layers.50.block_sparse_moe.experts.27.w3", "model.layers.50.block_sparse_moe.experts.28.w3", "model.layers.50.block_sparse_moe.experts.29.w3", "model.layers.50.block_sparse_moe.experts.30.w3", "model.layers.50.block_sparse_moe.experts.31.w3", "model.layers.50.block_sparse_moe.experts.32.w3", "model.layers.50.block_sparse_moe.experts.33.w3", "model.layers.50.block_sparse_moe.experts.34.w3", "model.layers.50.block_sparse_moe.experts.35.w3", "model.layers.50.block_sparse_moe.experts.36.w3", "model.layers.50.block_sparse_moe.experts.37.w3", "model.layers.50.block_sparse_moe.experts.38.w3", "model.layers.50.block_sparse_moe.experts.39.w3", "model.layers.50.block_sparse_moe.experts.40.w3", "model.layers.50.block_sparse_moe.experts.41.w3", "model.layers.50.block_sparse_moe.experts.42.w3", "model.layers.50.block_sparse_moe.experts.43.w3", "model.layers.50.block_sparse_moe.experts.44.w3", "model.layers.50.block_sparse_moe.experts.45.w3", "model.layers.50.block_sparse_moe.experts.46.w3", "model.layers.50.block_sparse_moe.experts.47.w3", "model.layers.50.block_sparse_moe.experts.48.w3", "model.layers.50.block_sparse_moe.experts.49.w3", "model.layers.50.block_sparse_moe.experts.50.w3", "model.layers.50.block_sparse_moe.experts.51.w3", "model.layers.50.block_sparse_moe.experts.52.w3", "model.layers.50.block_sparse_moe.experts.53.w3", "model.layers.50.block_sparse_moe.experts.54.w3", "model.layers.50.block_sparse_moe.experts.55.w3", "model.layers.50.block_sparse_moe.experts.56.w3", "model.layers.50.block_sparse_moe.experts.57.w3", "model.layers.50.block_sparse_moe.experts.58.w3", "model.layers.50.block_sparse_moe.experts.59.w3", "model.layers.50.block_sparse_moe.experts.60.w3", "model.layers.50.block_sparse_moe.experts.61.w3", "model.layers.50.block_sparse_moe.experts.62.w3", "model.layers.50.block_sparse_moe.experts.63.w3", "model.layers.50.block_sparse_moe.experts.64.w3", "model.layers.50.block_sparse_moe.experts.65.w3", "model.layers.50.block_sparse_moe.experts.66.w3", "model.layers.50.block_sparse_moe.experts.67.w3", "model.layers.50.block_sparse_moe.experts.68.w3", "model.layers.50.block_sparse_moe.experts.69.w3", "model.layers.50.block_sparse_moe.experts.70.w3", "model.layers.50.block_sparse_moe.experts.71.w3", "model.layers.50.block_sparse_moe.experts.72.w3", "model.layers.50.block_sparse_moe.experts.73.w3", "model.layers.50.block_sparse_moe.experts.74.w3", "model.layers.50.block_sparse_moe.experts.75.w3", "model.layers.50.block_sparse_moe.experts.76.w3", "model.layers.50.block_sparse_moe.experts.77.w3", "model.layers.50.block_sparse_moe.experts.78.w3", "model.layers.50.block_sparse_moe.experts.79.w3", "model.layers.50.block_sparse_moe.experts.80.w3", "model.layers.50.block_sparse_moe.experts.81.w3", "model.layers.50.block_sparse_moe.experts.82.w3", "model.layers.50.block_sparse_moe.experts.83.w3", "model.layers.50.block_sparse_moe.experts.84.w3", "model.layers.50.block_sparse_moe.experts.85.w3", "model.layers.50.block_sparse_moe.experts.86.w3", "model.layers.50.block_sparse_moe.experts.87.w3", "model.layers.50.block_sparse_moe.experts.88.w3", "model.layers.50.block_sparse_moe.experts.89.w3", "model.layers.50.block_sparse_moe.experts.90.w3", "model.layers.50.block_sparse_moe.experts.91.w3", "model.layers.50.block_sparse_moe.experts.92.w3", "model.layers.50.block_sparse_moe.experts.93.w3", "model.layers.50.block_sparse_moe.experts.94.w3", "model.layers.50.block_sparse_moe.experts.95.w3", "model.layers.50.block_sparse_moe.experts.96.w3", "model.layers.50.block_sparse_moe.experts.97.w3", "model.layers.50.block_sparse_moe.experts.98.w3", "model.layers.50.block_sparse_moe.experts.99.w3", "model.layers.50.block_sparse_moe.experts.100.w3", "model.layers.50.block_sparse_moe.experts.101.w3", "model.layers.50.block_sparse_moe.experts.102.w3", "model.layers.50.block_sparse_moe.experts.103.w3", "model.layers.50.block_sparse_moe.experts.104.w3", "model.layers.50.block_sparse_moe.experts.105.w3", "model.layers.50.block_sparse_moe.experts.106.w3", "model.layers.50.block_sparse_moe.experts.107.w3", "model.layers.50.block_sparse_moe.experts.108.w3", "model.layers.50.block_sparse_moe.experts.109.w3", "model.layers.50.block_sparse_moe.experts.110.w3", "model.layers.50.block_sparse_moe.experts.111.w3", "model.layers.50.block_sparse_moe.experts.112.w3", "model.layers.50.block_sparse_moe.experts.113.w3", "model.layers.50.block_sparse_moe.experts.114.w3", "model.layers.50.block_sparse_moe.experts.115.w3", "model.layers.50.block_sparse_moe.experts.116.w3", "model.layers.50.block_sparse_moe.experts.117.w3", "model.layers.50.block_sparse_moe.experts.118.w3", "model.layers.50.block_sparse_moe.experts.119.w3", "model.layers.50.block_sparse_moe.experts.120.w3", "model.layers.50.block_sparse_moe.experts.121.w3", "model.layers.50.block_sparse_moe.experts.122.w3", "model.layers.50.block_sparse_moe.experts.123.w3", "model.layers.50.block_sparse_moe.experts.124.w3", "model.layers.50.block_sparse_moe.experts.125.w3", "model.layers.50.block_sparse_moe.experts.126.w3", "model.layers.50.block_sparse_moe.experts.127.w3", "model.layers.50.block_sparse_moe.experts.128.w3", "model.layers.50.block_sparse_moe.experts.129.w3", "model.layers.50.block_sparse_moe.experts.130.w3", "model.layers.50.block_sparse_moe.experts.131.w3", "model.layers.50.block_sparse_moe.experts.132.w3", "model.layers.50.block_sparse_moe.experts.133.w3", "model.layers.50.block_sparse_moe.experts.134.w3", "model.layers.50.block_sparse_moe.experts.135.w3", "model.layers.50.block_sparse_moe.experts.136.w3", "model.layers.50.block_sparse_moe.experts.137.w3", "model.layers.50.block_sparse_moe.experts.138.w3", "model.layers.50.block_sparse_moe.experts.139.w3", "model.layers.50.block_sparse_moe.experts.140.w3", "model.layers.50.block_sparse_moe.experts.141.w3", "model.layers.50.block_sparse_moe.experts.142.w3", "model.layers.50.block_sparse_moe.experts.143.w3", "model.layers.50.block_sparse_moe.experts.144.w3", "model.layers.50.block_sparse_moe.experts.145.w3", "model.layers.50.block_sparse_moe.experts.146.w3", "model.layers.50.block_sparse_moe.experts.147.w3", "model.layers.50.block_sparse_moe.experts.148.w3", "model.layers.50.block_sparse_moe.experts.149.w3", "model.layers.50.block_sparse_moe.experts.150.w3", "model.layers.50.block_sparse_moe.experts.151.w3", "model.layers.50.block_sparse_moe.experts.152.w3", "model.layers.50.block_sparse_moe.experts.153.w3", "model.layers.50.block_sparse_moe.experts.154.w3", "model.layers.50.block_sparse_moe.experts.155.w3", "model.layers.50.block_sparse_moe.experts.156.w3", "model.layers.50.block_sparse_moe.experts.157.w3", "model.layers.50.block_sparse_moe.experts.158.w3", "model.layers.50.block_sparse_moe.experts.159.w3", "model.layers.50.block_sparse_moe.experts.160.w3", "model.layers.50.block_sparse_moe.experts.161.w3", "model.layers.50.block_sparse_moe.experts.162.w3", "model.layers.50.block_sparse_moe.experts.163.w3", "model.layers.50.block_sparse_moe.experts.164.w3", "model.layers.50.block_sparse_moe.experts.165.w3", "model.layers.50.block_sparse_moe.experts.166.w3", "model.layers.50.block_sparse_moe.experts.167.w3", "model.layers.50.block_sparse_moe.experts.168.w3", "model.layers.50.block_sparse_moe.experts.169.w3", "model.layers.50.block_sparse_moe.experts.170.w3", "model.layers.50.block_sparse_moe.experts.171.w3", "model.layers.50.block_sparse_moe.experts.172.w3", "model.layers.50.block_sparse_moe.experts.173.w3", "model.layers.50.block_sparse_moe.experts.174.w3", "model.layers.50.block_sparse_moe.experts.175.w3", "model.layers.50.block_sparse_moe.experts.176.w3", "model.layers.50.block_sparse_moe.experts.177.w3", "model.layers.50.block_sparse_moe.experts.178.w3", "model.layers.50.block_sparse_moe.experts.179.w3", "model.layers.50.block_sparse_moe.experts.180.w3", "model.layers.50.block_sparse_moe.experts.181.w3", "model.layers.50.block_sparse_moe.experts.182.w3", "model.layers.50.block_sparse_moe.experts.183.w3", "model.layers.50.block_sparse_moe.experts.184.w3", "model.layers.50.block_sparse_moe.experts.185.w3", "model.layers.50.block_sparse_moe.experts.186.w3", "model.layers.50.block_sparse_moe.experts.187.w3", "model.layers.50.block_sparse_moe.experts.188.w3", "model.layers.50.block_sparse_moe.experts.189.w3", "model.layers.50.block_sparse_moe.experts.190.w3", "model.layers.50.block_sparse_moe.experts.191.w3", "model.layers.50.block_sparse_moe.experts.192.w3", "model.layers.50.block_sparse_moe.experts.193.w3", "model.layers.50.block_sparse_moe.experts.194.w3", "model.layers.50.block_sparse_moe.experts.195.w3", "model.layers.50.block_sparse_moe.experts.196.w3", "model.layers.50.block_sparse_moe.experts.197.w3", "model.layers.50.block_sparse_moe.experts.198.w3", "model.layers.50.block_sparse_moe.experts.199.w3", "model.layers.50.block_sparse_moe.experts.200.w3", "model.layers.50.block_sparse_moe.experts.201.w3", "model.layers.50.block_sparse_moe.experts.202.w3", "model.layers.50.block_sparse_moe.experts.203.w3", "model.layers.50.block_sparse_moe.experts.204.w3", "model.layers.50.block_sparse_moe.experts.205.w3", "model.layers.50.block_sparse_moe.experts.206.w3", "model.layers.50.block_sparse_moe.experts.207.w3", "model.layers.50.block_sparse_moe.experts.208.w3", "model.layers.50.block_sparse_moe.experts.209.w3", "model.layers.50.block_sparse_moe.experts.210.w3", "model.layers.50.block_sparse_moe.experts.211.w3", "model.layers.50.block_sparse_moe.experts.212.w3", "model.layers.50.block_sparse_moe.experts.213.w3", "model.layers.50.block_sparse_moe.experts.214.w3", "model.layers.50.block_sparse_moe.experts.215.w3", "model.layers.50.block_sparse_moe.experts.216.w3", "model.layers.50.block_sparse_moe.experts.217.w3", "model.layers.50.block_sparse_moe.experts.218.w3", "model.layers.50.block_sparse_moe.experts.219.w3", "model.layers.50.block_sparse_moe.experts.220.w3", "model.layers.50.block_sparse_moe.experts.221.w3", "model.layers.50.block_sparse_moe.experts.222.w3", "model.layers.50.block_sparse_moe.experts.223.w3", "model.layers.50.block_sparse_moe.experts.224.w3", "model.layers.50.block_sparse_moe.experts.225.w3", "model.layers.50.block_sparse_moe.experts.226.w3", "model.layers.50.block_sparse_moe.experts.227.w3", "model.layers.50.block_sparse_moe.experts.228.w3", "model.layers.50.block_sparse_moe.experts.229.w3", "model.layers.50.block_sparse_moe.experts.230.w3", "model.layers.50.block_sparse_moe.experts.231.w3", "model.layers.50.block_sparse_moe.experts.232.w3", "model.layers.50.block_sparse_moe.experts.233.w3", "model.layers.50.block_sparse_moe.experts.234.w3", "model.layers.50.block_sparse_moe.experts.235.w3", "model.layers.50.block_sparse_moe.experts.236.w3", "model.layers.50.block_sparse_moe.experts.237.w3", "model.layers.50.block_sparse_moe.experts.238.w3", "model.layers.50.block_sparse_moe.experts.239.w3", "model.layers.50.block_sparse_moe.experts.240.w3", "model.layers.50.block_sparse_moe.experts.241.w3", "model.layers.50.block_sparse_moe.experts.242.w3", "model.layers.50.block_sparse_moe.experts.243.w3", "model.layers.50.block_sparse_moe.experts.244.w3", "model.layers.50.block_sparse_moe.experts.245.w3", "model.layers.50.block_sparse_moe.experts.246.w3", "model.layers.50.block_sparse_moe.experts.247.w3", "model.layers.50.block_sparse_moe.experts.248.w3", "model.layers.50.block_sparse_moe.experts.249.w3", "model.layers.50.block_sparse_moe.experts.250.w3", "model.layers.50.block_sparse_moe.experts.251.w3", "model.layers.50.block_sparse_moe.experts.252.w3", "model.layers.50.block_sparse_moe.experts.253.w3", "model.layers.50.block_sparse_moe.experts.254.w3", "model.layers.50.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0002222329378128718, "dbits": 2415919104 } ] }, { "idx": 254, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w2", "model.layers.50.block_sparse_moe.experts.1.w2", "model.layers.50.block_sparse_moe.experts.2.w2", "model.layers.50.block_sparse_moe.experts.3.w2", "model.layers.50.block_sparse_moe.experts.4.w2", "model.layers.50.block_sparse_moe.experts.5.w2", "model.layers.50.block_sparse_moe.experts.6.w2", "model.layers.50.block_sparse_moe.experts.7.w2", "model.layers.50.block_sparse_moe.experts.8.w2", "model.layers.50.block_sparse_moe.experts.9.w2", "model.layers.50.block_sparse_moe.experts.10.w2", "model.layers.50.block_sparse_moe.experts.11.w2", "model.layers.50.block_sparse_moe.experts.12.w2", "model.layers.50.block_sparse_moe.experts.13.w2", "model.layers.50.block_sparse_moe.experts.14.w2", "model.layers.50.block_sparse_moe.experts.15.w2", "model.layers.50.block_sparse_moe.experts.16.w2", "model.layers.50.block_sparse_moe.experts.17.w2", "model.layers.50.block_sparse_moe.experts.18.w2", "model.layers.50.block_sparse_moe.experts.19.w2", "model.layers.50.block_sparse_moe.experts.20.w2", "model.layers.50.block_sparse_moe.experts.21.w2", "model.layers.50.block_sparse_moe.experts.22.w2", "model.layers.50.block_sparse_moe.experts.23.w2", "model.layers.50.block_sparse_moe.experts.24.w2", "model.layers.50.block_sparse_moe.experts.25.w2", "model.layers.50.block_sparse_moe.experts.26.w2", "model.layers.50.block_sparse_moe.experts.27.w2", "model.layers.50.block_sparse_moe.experts.28.w2", "model.layers.50.block_sparse_moe.experts.29.w2", "model.layers.50.block_sparse_moe.experts.30.w2", "model.layers.50.block_sparse_moe.experts.31.w2", "model.layers.50.block_sparse_moe.experts.32.w2", "model.layers.50.block_sparse_moe.experts.33.w2", "model.layers.50.block_sparse_moe.experts.34.w2", "model.layers.50.block_sparse_moe.experts.35.w2", "model.layers.50.block_sparse_moe.experts.36.w2", "model.layers.50.block_sparse_moe.experts.37.w2", "model.layers.50.block_sparse_moe.experts.38.w2", "model.layers.50.block_sparse_moe.experts.39.w2", "model.layers.50.block_sparse_moe.experts.40.w2", "model.layers.50.block_sparse_moe.experts.41.w2", "model.layers.50.block_sparse_moe.experts.42.w2", "model.layers.50.block_sparse_moe.experts.43.w2", "model.layers.50.block_sparse_moe.experts.44.w2", "model.layers.50.block_sparse_moe.experts.45.w2", "model.layers.50.block_sparse_moe.experts.46.w2", "model.layers.50.block_sparse_moe.experts.47.w2", "model.layers.50.block_sparse_moe.experts.48.w2", "model.layers.50.block_sparse_moe.experts.49.w2", "model.layers.50.block_sparse_moe.experts.50.w2", "model.layers.50.block_sparse_moe.experts.51.w2", "model.layers.50.block_sparse_moe.experts.52.w2", "model.layers.50.block_sparse_moe.experts.53.w2", "model.layers.50.block_sparse_moe.experts.54.w2", "model.layers.50.block_sparse_moe.experts.55.w2", "model.layers.50.block_sparse_moe.experts.56.w2", "model.layers.50.block_sparse_moe.experts.57.w2", "model.layers.50.block_sparse_moe.experts.58.w2", "model.layers.50.block_sparse_moe.experts.59.w2", "model.layers.50.block_sparse_moe.experts.60.w2", "model.layers.50.block_sparse_moe.experts.61.w2", "model.layers.50.block_sparse_moe.experts.62.w2", "model.layers.50.block_sparse_moe.experts.63.w2", "model.layers.50.block_sparse_moe.experts.64.w2", "model.layers.50.block_sparse_moe.experts.65.w2", "model.layers.50.block_sparse_moe.experts.66.w2", "model.layers.50.block_sparse_moe.experts.67.w2", "model.layers.50.block_sparse_moe.experts.68.w2", "model.layers.50.block_sparse_moe.experts.69.w2", "model.layers.50.block_sparse_moe.experts.70.w2", "model.layers.50.block_sparse_moe.experts.71.w2", "model.layers.50.block_sparse_moe.experts.72.w2", "model.layers.50.block_sparse_moe.experts.73.w2", "model.layers.50.block_sparse_moe.experts.74.w2", "model.layers.50.block_sparse_moe.experts.75.w2", "model.layers.50.block_sparse_moe.experts.76.w2", "model.layers.50.block_sparse_moe.experts.77.w2", "model.layers.50.block_sparse_moe.experts.78.w2", "model.layers.50.block_sparse_moe.experts.79.w2", "model.layers.50.block_sparse_moe.experts.80.w2", "model.layers.50.block_sparse_moe.experts.81.w2", "model.layers.50.block_sparse_moe.experts.82.w2", "model.layers.50.block_sparse_moe.experts.83.w2", "model.layers.50.block_sparse_moe.experts.84.w2", "model.layers.50.block_sparse_moe.experts.85.w2", "model.layers.50.block_sparse_moe.experts.86.w2", "model.layers.50.block_sparse_moe.experts.87.w2", "model.layers.50.block_sparse_moe.experts.88.w2", "model.layers.50.block_sparse_moe.experts.89.w2", "model.layers.50.block_sparse_moe.experts.90.w2", "model.layers.50.block_sparse_moe.experts.91.w2", "model.layers.50.block_sparse_moe.experts.92.w2", "model.layers.50.block_sparse_moe.experts.93.w2", "model.layers.50.block_sparse_moe.experts.94.w2", "model.layers.50.block_sparse_moe.experts.95.w2", "model.layers.50.block_sparse_moe.experts.96.w2", "model.layers.50.block_sparse_moe.experts.97.w2", "model.layers.50.block_sparse_moe.experts.98.w2", "model.layers.50.block_sparse_moe.experts.99.w2", "model.layers.50.block_sparse_moe.experts.100.w2", "model.layers.50.block_sparse_moe.experts.101.w2", "model.layers.50.block_sparse_moe.experts.102.w2", "model.layers.50.block_sparse_moe.experts.103.w2", "model.layers.50.block_sparse_moe.experts.104.w2", "model.layers.50.block_sparse_moe.experts.105.w2", "model.layers.50.block_sparse_moe.experts.106.w2", "model.layers.50.block_sparse_moe.experts.107.w2", "model.layers.50.block_sparse_moe.experts.108.w2", "model.layers.50.block_sparse_moe.experts.109.w2", "model.layers.50.block_sparse_moe.experts.110.w2", "model.layers.50.block_sparse_moe.experts.111.w2", "model.layers.50.block_sparse_moe.experts.112.w2", "model.layers.50.block_sparse_moe.experts.113.w2", "model.layers.50.block_sparse_moe.experts.114.w2", "model.layers.50.block_sparse_moe.experts.115.w2", "model.layers.50.block_sparse_moe.experts.116.w2", "model.layers.50.block_sparse_moe.experts.117.w2", "model.layers.50.block_sparse_moe.experts.118.w2", "model.layers.50.block_sparse_moe.experts.119.w2", "model.layers.50.block_sparse_moe.experts.120.w2", "model.layers.50.block_sparse_moe.experts.121.w2", "model.layers.50.block_sparse_moe.experts.122.w2", "model.layers.50.block_sparse_moe.experts.123.w2", "model.layers.50.block_sparse_moe.experts.124.w2", "model.layers.50.block_sparse_moe.experts.125.w2", "model.layers.50.block_sparse_moe.experts.126.w2", "model.layers.50.block_sparse_moe.experts.127.w2", "model.layers.50.block_sparse_moe.experts.128.w2", "model.layers.50.block_sparse_moe.experts.129.w2", "model.layers.50.block_sparse_moe.experts.130.w2", "model.layers.50.block_sparse_moe.experts.131.w2", "model.layers.50.block_sparse_moe.experts.132.w2", "model.layers.50.block_sparse_moe.experts.133.w2", "model.layers.50.block_sparse_moe.experts.134.w2", "model.layers.50.block_sparse_moe.experts.135.w2", "model.layers.50.block_sparse_moe.experts.136.w2", "model.layers.50.block_sparse_moe.experts.137.w2", "model.layers.50.block_sparse_moe.experts.138.w2", "model.layers.50.block_sparse_moe.experts.139.w2", "model.layers.50.block_sparse_moe.experts.140.w2", "model.layers.50.block_sparse_moe.experts.141.w2", "model.layers.50.block_sparse_moe.experts.142.w2", "model.layers.50.block_sparse_moe.experts.143.w2", "model.layers.50.block_sparse_moe.experts.144.w2", "model.layers.50.block_sparse_moe.experts.145.w2", "model.layers.50.block_sparse_moe.experts.146.w2", "model.layers.50.block_sparse_moe.experts.147.w2", "model.layers.50.block_sparse_moe.experts.148.w2", "model.layers.50.block_sparse_moe.experts.149.w2", "model.layers.50.block_sparse_moe.experts.150.w2", "model.layers.50.block_sparse_moe.experts.151.w2", "model.layers.50.block_sparse_moe.experts.152.w2", "model.layers.50.block_sparse_moe.experts.153.w2", "model.layers.50.block_sparse_moe.experts.154.w2", "model.layers.50.block_sparse_moe.experts.155.w2", "model.layers.50.block_sparse_moe.experts.156.w2", "model.layers.50.block_sparse_moe.experts.157.w2", "model.layers.50.block_sparse_moe.experts.158.w2", "model.layers.50.block_sparse_moe.experts.159.w2", "model.layers.50.block_sparse_moe.experts.160.w2", "model.layers.50.block_sparse_moe.experts.161.w2", "model.layers.50.block_sparse_moe.experts.162.w2", "model.layers.50.block_sparse_moe.experts.163.w2", "model.layers.50.block_sparse_moe.experts.164.w2", "model.layers.50.block_sparse_moe.experts.165.w2", "model.layers.50.block_sparse_moe.experts.166.w2", "model.layers.50.block_sparse_moe.experts.167.w2", "model.layers.50.block_sparse_moe.experts.168.w2", "model.layers.50.block_sparse_moe.experts.169.w2", "model.layers.50.block_sparse_moe.experts.170.w2", "model.layers.50.block_sparse_moe.experts.171.w2", "model.layers.50.block_sparse_moe.experts.172.w2", "model.layers.50.block_sparse_moe.experts.173.w2", "model.layers.50.block_sparse_moe.experts.174.w2", "model.layers.50.block_sparse_moe.experts.175.w2", "model.layers.50.block_sparse_moe.experts.176.w2", "model.layers.50.block_sparse_moe.experts.177.w2", "model.layers.50.block_sparse_moe.experts.178.w2", "model.layers.50.block_sparse_moe.experts.179.w2", "model.layers.50.block_sparse_moe.experts.180.w2", "model.layers.50.block_sparse_moe.experts.181.w2", "model.layers.50.block_sparse_moe.experts.182.w2", "model.layers.50.block_sparse_moe.experts.183.w2", "model.layers.50.block_sparse_moe.experts.184.w2", "model.layers.50.block_sparse_moe.experts.185.w2", "model.layers.50.block_sparse_moe.experts.186.w2", "model.layers.50.block_sparse_moe.experts.187.w2", "model.layers.50.block_sparse_moe.experts.188.w2", "model.layers.50.block_sparse_moe.experts.189.w2", "model.layers.50.block_sparse_moe.experts.190.w2", "model.layers.50.block_sparse_moe.experts.191.w2", "model.layers.50.block_sparse_moe.experts.192.w2", "model.layers.50.block_sparse_moe.experts.193.w2", "model.layers.50.block_sparse_moe.experts.194.w2", "model.layers.50.block_sparse_moe.experts.195.w2", "model.layers.50.block_sparse_moe.experts.196.w2", "model.layers.50.block_sparse_moe.experts.197.w2", "model.layers.50.block_sparse_moe.experts.198.w2", "model.layers.50.block_sparse_moe.experts.199.w2", "model.layers.50.block_sparse_moe.experts.200.w2", "model.layers.50.block_sparse_moe.experts.201.w2", "model.layers.50.block_sparse_moe.experts.202.w2", "model.layers.50.block_sparse_moe.experts.203.w2", "model.layers.50.block_sparse_moe.experts.204.w2", "model.layers.50.block_sparse_moe.experts.205.w2", "model.layers.50.block_sparse_moe.experts.206.w2", "model.layers.50.block_sparse_moe.experts.207.w2", "model.layers.50.block_sparse_moe.experts.208.w2", "model.layers.50.block_sparse_moe.experts.209.w2", "model.layers.50.block_sparse_moe.experts.210.w2", "model.layers.50.block_sparse_moe.experts.211.w2", "model.layers.50.block_sparse_moe.experts.212.w2", "model.layers.50.block_sparse_moe.experts.213.w2", "model.layers.50.block_sparse_moe.experts.214.w2", "model.layers.50.block_sparse_moe.experts.215.w2", "model.layers.50.block_sparse_moe.experts.216.w2", "model.layers.50.block_sparse_moe.experts.217.w2", "model.layers.50.block_sparse_moe.experts.218.w2", "model.layers.50.block_sparse_moe.experts.219.w2", "model.layers.50.block_sparse_moe.experts.220.w2", "model.layers.50.block_sparse_moe.experts.221.w2", "model.layers.50.block_sparse_moe.experts.222.w2", "model.layers.50.block_sparse_moe.experts.223.w2", "model.layers.50.block_sparse_moe.experts.224.w2", "model.layers.50.block_sparse_moe.experts.225.w2", "model.layers.50.block_sparse_moe.experts.226.w2", "model.layers.50.block_sparse_moe.experts.227.w2", "model.layers.50.block_sparse_moe.experts.228.w2", "model.layers.50.block_sparse_moe.experts.229.w2", "model.layers.50.block_sparse_moe.experts.230.w2", "model.layers.50.block_sparse_moe.experts.231.w2", "model.layers.50.block_sparse_moe.experts.232.w2", "model.layers.50.block_sparse_moe.experts.233.w2", "model.layers.50.block_sparse_moe.experts.234.w2", "model.layers.50.block_sparse_moe.experts.235.w2", "model.layers.50.block_sparse_moe.experts.236.w2", "model.layers.50.block_sparse_moe.experts.237.w2", "model.layers.50.block_sparse_moe.experts.238.w2", "model.layers.50.block_sparse_moe.experts.239.w2", "model.layers.50.block_sparse_moe.experts.240.w2", "model.layers.50.block_sparse_moe.experts.241.w2", "model.layers.50.block_sparse_moe.experts.242.w2", "model.layers.50.block_sparse_moe.experts.243.w2", "model.layers.50.block_sparse_moe.experts.244.w2", "model.layers.50.block_sparse_moe.experts.245.w2", "model.layers.50.block_sparse_moe.experts.246.w2", "model.layers.50.block_sparse_moe.experts.247.w2", "model.layers.50.block_sparse_moe.experts.248.w2", "model.layers.50.block_sparse_moe.experts.249.w2", "model.layers.50.block_sparse_moe.experts.250.w2", "model.layers.50.block_sparse_moe.experts.251.w2", "model.layers.50.block_sparse_moe.experts.252.w2", "model.layers.50.block_sparse_moe.experts.253.w2", "model.layers.50.block_sparse_moe.experts.254.w2", "model.layers.50.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0003754675388336626, "dbits": 1207959552 } ] }, { "idx": 255, "layers": [ "model.layers.51.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0018681168556212713, "dbits": 18874368 } ] }, { "idx": 256, "layers": [ "model.layers.51.self_attn.k_proj", "model.layers.51.self_attn.v_proj" ], "candidates": [ { "dkld": 0.012725257873535178, "dbits": 6291456 } ] }, { "idx": 257, "layers": [ "model.layers.51.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008850058913230874, "dbits": 18874368 } ] }, { "idx": 258, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w1", "model.layers.51.block_sparse_moe.experts.1.w1", "model.layers.51.block_sparse_moe.experts.2.w1", "model.layers.51.block_sparse_moe.experts.3.w1", "model.layers.51.block_sparse_moe.experts.4.w1", "model.layers.51.block_sparse_moe.experts.5.w1", "model.layers.51.block_sparse_moe.experts.6.w1", "model.layers.51.block_sparse_moe.experts.7.w1", "model.layers.51.block_sparse_moe.experts.8.w1", "model.layers.51.block_sparse_moe.experts.9.w1", "model.layers.51.block_sparse_moe.experts.10.w1", "model.layers.51.block_sparse_moe.experts.11.w1", "model.layers.51.block_sparse_moe.experts.12.w1", "model.layers.51.block_sparse_moe.experts.13.w1", "model.layers.51.block_sparse_moe.experts.14.w1", "model.layers.51.block_sparse_moe.experts.15.w1", "model.layers.51.block_sparse_moe.experts.16.w1", "model.layers.51.block_sparse_moe.experts.17.w1", "model.layers.51.block_sparse_moe.experts.18.w1", "model.layers.51.block_sparse_moe.experts.19.w1", "model.layers.51.block_sparse_moe.experts.20.w1", "model.layers.51.block_sparse_moe.experts.21.w1", "model.layers.51.block_sparse_moe.experts.22.w1", "model.layers.51.block_sparse_moe.experts.23.w1", "model.layers.51.block_sparse_moe.experts.24.w1", "model.layers.51.block_sparse_moe.experts.25.w1", "model.layers.51.block_sparse_moe.experts.26.w1", "model.layers.51.block_sparse_moe.experts.27.w1", "model.layers.51.block_sparse_moe.experts.28.w1", "model.layers.51.block_sparse_moe.experts.29.w1", "model.layers.51.block_sparse_moe.experts.30.w1", "model.layers.51.block_sparse_moe.experts.31.w1", "model.layers.51.block_sparse_moe.experts.32.w1", "model.layers.51.block_sparse_moe.experts.33.w1", "model.layers.51.block_sparse_moe.experts.34.w1", "model.layers.51.block_sparse_moe.experts.35.w1", "model.layers.51.block_sparse_moe.experts.36.w1", "model.layers.51.block_sparse_moe.experts.37.w1", "model.layers.51.block_sparse_moe.experts.38.w1", "model.layers.51.block_sparse_moe.experts.39.w1", "model.layers.51.block_sparse_moe.experts.40.w1", "model.layers.51.block_sparse_moe.experts.41.w1", "model.layers.51.block_sparse_moe.experts.42.w1", "model.layers.51.block_sparse_moe.experts.43.w1", "model.layers.51.block_sparse_moe.experts.44.w1", "model.layers.51.block_sparse_moe.experts.45.w1", "model.layers.51.block_sparse_moe.experts.46.w1", "model.layers.51.block_sparse_moe.experts.47.w1", "model.layers.51.block_sparse_moe.experts.48.w1", "model.layers.51.block_sparse_moe.experts.49.w1", "model.layers.51.block_sparse_moe.experts.50.w1", "model.layers.51.block_sparse_moe.experts.51.w1", "model.layers.51.block_sparse_moe.experts.52.w1", "model.layers.51.block_sparse_moe.experts.53.w1", "model.layers.51.block_sparse_moe.experts.54.w1", "model.layers.51.block_sparse_moe.experts.55.w1", "model.layers.51.block_sparse_moe.experts.56.w1", "model.layers.51.block_sparse_moe.experts.57.w1", "model.layers.51.block_sparse_moe.experts.58.w1", "model.layers.51.block_sparse_moe.experts.59.w1", "model.layers.51.block_sparse_moe.experts.60.w1", "model.layers.51.block_sparse_moe.experts.61.w1", "model.layers.51.block_sparse_moe.experts.62.w1", "model.layers.51.block_sparse_moe.experts.63.w1", "model.layers.51.block_sparse_moe.experts.64.w1", "model.layers.51.block_sparse_moe.experts.65.w1", "model.layers.51.block_sparse_moe.experts.66.w1", "model.layers.51.block_sparse_moe.experts.67.w1", "model.layers.51.block_sparse_moe.experts.68.w1", "model.layers.51.block_sparse_moe.experts.69.w1", "model.layers.51.block_sparse_moe.experts.70.w1", "model.layers.51.block_sparse_moe.experts.71.w1", "model.layers.51.block_sparse_moe.experts.72.w1", "model.layers.51.block_sparse_moe.experts.73.w1", "model.layers.51.block_sparse_moe.experts.74.w1", "model.layers.51.block_sparse_moe.experts.75.w1", "model.layers.51.block_sparse_moe.experts.76.w1", "model.layers.51.block_sparse_moe.experts.77.w1", "model.layers.51.block_sparse_moe.experts.78.w1", "model.layers.51.block_sparse_moe.experts.79.w1", "model.layers.51.block_sparse_moe.experts.80.w1", "model.layers.51.block_sparse_moe.experts.81.w1", "model.layers.51.block_sparse_moe.experts.82.w1", "model.layers.51.block_sparse_moe.experts.83.w1", "model.layers.51.block_sparse_moe.experts.84.w1", "model.layers.51.block_sparse_moe.experts.85.w1", "model.layers.51.block_sparse_moe.experts.86.w1", "model.layers.51.block_sparse_moe.experts.87.w1", "model.layers.51.block_sparse_moe.experts.88.w1", "model.layers.51.block_sparse_moe.experts.89.w1", "model.layers.51.block_sparse_moe.experts.90.w1", "model.layers.51.block_sparse_moe.experts.91.w1", "model.layers.51.block_sparse_moe.experts.92.w1", "model.layers.51.block_sparse_moe.experts.93.w1", "model.layers.51.block_sparse_moe.experts.94.w1", "model.layers.51.block_sparse_moe.experts.95.w1", "model.layers.51.block_sparse_moe.experts.96.w1", "model.layers.51.block_sparse_moe.experts.97.w1", "model.layers.51.block_sparse_moe.experts.98.w1", "model.layers.51.block_sparse_moe.experts.99.w1", "model.layers.51.block_sparse_moe.experts.100.w1", "model.layers.51.block_sparse_moe.experts.101.w1", "model.layers.51.block_sparse_moe.experts.102.w1", "model.layers.51.block_sparse_moe.experts.103.w1", "model.layers.51.block_sparse_moe.experts.104.w1", "model.layers.51.block_sparse_moe.experts.105.w1", "model.layers.51.block_sparse_moe.experts.106.w1", "model.layers.51.block_sparse_moe.experts.107.w1", "model.layers.51.block_sparse_moe.experts.108.w1", "model.layers.51.block_sparse_moe.experts.109.w1", "model.layers.51.block_sparse_moe.experts.110.w1", "model.layers.51.block_sparse_moe.experts.111.w1", "model.layers.51.block_sparse_moe.experts.112.w1", "model.layers.51.block_sparse_moe.experts.113.w1", "model.layers.51.block_sparse_moe.experts.114.w1", "model.layers.51.block_sparse_moe.experts.115.w1", "model.layers.51.block_sparse_moe.experts.116.w1", "model.layers.51.block_sparse_moe.experts.117.w1", "model.layers.51.block_sparse_moe.experts.118.w1", "model.layers.51.block_sparse_moe.experts.119.w1", "model.layers.51.block_sparse_moe.experts.120.w1", "model.layers.51.block_sparse_moe.experts.121.w1", "model.layers.51.block_sparse_moe.experts.122.w1", "model.layers.51.block_sparse_moe.experts.123.w1", "model.layers.51.block_sparse_moe.experts.124.w1", "model.layers.51.block_sparse_moe.experts.125.w1", "model.layers.51.block_sparse_moe.experts.126.w1", "model.layers.51.block_sparse_moe.experts.127.w1", "model.layers.51.block_sparse_moe.experts.128.w1", "model.layers.51.block_sparse_moe.experts.129.w1", "model.layers.51.block_sparse_moe.experts.130.w1", "model.layers.51.block_sparse_moe.experts.131.w1", "model.layers.51.block_sparse_moe.experts.132.w1", "model.layers.51.block_sparse_moe.experts.133.w1", "model.layers.51.block_sparse_moe.experts.134.w1", "model.layers.51.block_sparse_moe.experts.135.w1", "model.layers.51.block_sparse_moe.experts.136.w1", "model.layers.51.block_sparse_moe.experts.137.w1", "model.layers.51.block_sparse_moe.experts.138.w1", "model.layers.51.block_sparse_moe.experts.139.w1", "model.layers.51.block_sparse_moe.experts.140.w1", "model.layers.51.block_sparse_moe.experts.141.w1", "model.layers.51.block_sparse_moe.experts.142.w1", "model.layers.51.block_sparse_moe.experts.143.w1", "model.layers.51.block_sparse_moe.experts.144.w1", "model.layers.51.block_sparse_moe.experts.145.w1", "model.layers.51.block_sparse_moe.experts.146.w1", "model.layers.51.block_sparse_moe.experts.147.w1", "model.layers.51.block_sparse_moe.experts.148.w1", "model.layers.51.block_sparse_moe.experts.149.w1", "model.layers.51.block_sparse_moe.experts.150.w1", "model.layers.51.block_sparse_moe.experts.151.w1", "model.layers.51.block_sparse_moe.experts.152.w1", "model.layers.51.block_sparse_moe.experts.153.w1", "model.layers.51.block_sparse_moe.experts.154.w1", "model.layers.51.block_sparse_moe.experts.155.w1", "model.layers.51.block_sparse_moe.experts.156.w1", "model.layers.51.block_sparse_moe.experts.157.w1", "model.layers.51.block_sparse_moe.experts.158.w1", "model.layers.51.block_sparse_moe.experts.159.w1", "model.layers.51.block_sparse_moe.experts.160.w1", "model.layers.51.block_sparse_moe.experts.161.w1", "model.layers.51.block_sparse_moe.experts.162.w1", "model.layers.51.block_sparse_moe.experts.163.w1", "model.layers.51.block_sparse_moe.experts.164.w1", "model.layers.51.block_sparse_moe.experts.165.w1", "model.layers.51.block_sparse_moe.experts.166.w1", "model.layers.51.block_sparse_moe.experts.167.w1", "model.layers.51.block_sparse_moe.experts.168.w1", "model.layers.51.block_sparse_moe.experts.169.w1", "model.layers.51.block_sparse_moe.experts.170.w1", "model.layers.51.block_sparse_moe.experts.171.w1", "model.layers.51.block_sparse_moe.experts.172.w1", "model.layers.51.block_sparse_moe.experts.173.w1", "model.layers.51.block_sparse_moe.experts.174.w1", "model.layers.51.block_sparse_moe.experts.175.w1", "model.layers.51.block_sparse_moe.experts.176.w1", "model.layers.51.block_sparse_moe.experts.177.w1", "model.layers.51.block_sparse_moe.experts.178.w1", "model.layers.51.block_sparse_moe.experts.179.w1", "model.layers.51.block_sparse_moe.experts.180.w1", "model.layers.51.block_sparse_moe.experts.181.w1", "model.layers.51.block_sparse_moe.experts.182.w1", "model.layers.51.block_sparse_moe.experts.183.w1", "model.layers.51.block_sparse_moe.experts.184.w1", "model.layers.51.block_sparse_moe.experts.185.w1", "model.layers.51.block_sparse_moe.experts.186.w1", "model.layers.51.block_sparse_moe.experts.187.w1", "model.layers.51.block_sparse_moe.experts.188.w1", "model.layers.51.block_sparse_moe.experts.189.w1", "model.layers.51.block_sparse_moe.experts.190.w1", "model.layers.51.block_sparse_moe.experts.191.w1", "model.layers.51.block_sparse_moe.experts.192.w1", "model.layers.51.block_sparse_moe.experts.193.w1", "model.layers.51.block_sparse_moe.experts.194.w1", "model.layers.51.block_sparse_moe.experts.195.w1", "model.layers.51.block_sparse_moe.experts.196.w1", "model.layers.51.block_sparse_moe.experts.197.w1", "model.layers.51.block_sparse_moe.experts.198.w1", "model.layers.51.block_sparse_moe.experts.199.w1", "model.layers.51.block_sparse_moe.experts.200.w1", "model.layers.51.block_sparse_moe.experts.201.w1", "model.layers.51.block_sparse_moe.experts.202.w1", "model.layers.51.block_sparse_moe.experts.203.w1", "model.layers.51.block_sparse_moe.experts.204.w1", "model.layers.51.block_sparse_moe.experts.205.w1", "model.layers.51.block_sparse_moe.experts.206.w1", "model.layers.51.block_sparse_moe.experts.207.w1", "model.layers.51.block_sparse_moe.experts.208.w1", "model.layers.51.block_sparse_moe.experts.209.w1", "model.layers.51.block_sparse_moe.experts.210.w1", "model.layers.51.block_sparse_moe.experts.211.w1", "model.layers.51.block_sparse_moe.experts.212.w1", "model.layers.51.block_sparse_moe.experts.213.w1", "model.layers.51.block_sparse_moe.experts.214.w1", "model.layers.51.block_sparse_moe.experts.215.w1", "model.layers.51.block_sparse_moe.experts.216.w1", "model.layers.51.block_sparse_moe.experts.217.w1", "model.layers.51.block_sparse_moe.experts.218.w1", "model.layers.51.block_sparse_moe.experts.219.w1", "model.layers.51.block_sparse_moe.experts.220.w1", "model.layers.51.block_sparse_moe.experts.221.w1", "model.layers.51.block_sparse_moe.experts.222.w1", "model.layers.51.block_sparse_moe.experts.223.w1", "model.layers.51.block_sparse_moe.experts.224.w1", "model.layers.51.block_sparse_moe.experts.225.w1", "model.layers.51.block_sparse_moe.experts.226.w1", "model.layers.51.block_sparse_moe.experts.227.w1", "model.layers.51.block_sparse_moe.experts.228.w1", "model.layers.51.block_sparse_moe.experts.229.w1", "model.layers.51.block_sparse_moe.experts.230.w1", "model.layers.51.block_sparse_moe.experts.231.w1", "model.layers.51.block_sparse_moe.experts.232.w1", "model.layers.51.block_sparse_moe.experts.233.w1", "model.layers.51.block_sparse_moe.experts.234.w1", "model.layers.51.block_sparse_moe.experts.235.w1", "model.layers.51.block_sparse_moe.experts.236.w1", "model.layers.51.block_sparse_moe.experts.237.w1", "model.layers.51.block_sparse_moe.experts.238.w1", "model.layers.51.block_sparse_moe.experts.239.w1", "model.layers.51.block_sparse_moe.experts.240.w1", "model.layers.51.block_sparse_moe.experts.241.w1", "model.layers.51.block_sparse_moe.experts.242.w1", "model.layers.51.block_sparse_moe.experts.243.w1", "model.layers.51.block_sparse_moe.experts.244.w1", "model.layers.51.block_sparse_moe.experts.245.w1", "model.layers.51.block_sparse_moe.experts.246.w1", "model.layers.51.block_sparse_moe.experts.247.w1", "model.layers.51.block_sparse_moe.experts.248.w1", "model.layers.51.block_sparse_moe.experts.249.w1", "model.layers.51.block_sparse_moe.experts.250.w1", "model.layers.51.block_sparse_moe.experts.251.w1", "model.layers.51.block_sparse_moe.experts.252.w1", "model.layers.51.block_sparse_moe.experts.253.w1", "model.layers.51.block_sparse_moe.experts.254.w1", "model.layers.51.block_sparse_moe.experts.255.w1", "model.layers.51.block_sparse_moe.experts.0.w3", "model.layers.51.block_sparse_moe.experts.1.w3", "model.layers.51.block_sparse_moe.experts.2.w3", "model.layers.51.block_sparse_moe.experts.3.w3", "model.layers.51.block_sparse_moe.experts.4.w3", "model.layers.51.block_sparse_moe.experts.5.w3", "model.layers.51.block_sparse_moe.experts.6.w3", "model.layers.51.block_sparse_moe.experts.7.w3", "model.layers.51.block_sparse_moe.experts.8.w3", "model.layers.51.block_sparse_moe.experts.9.w3", "model.layers.51.block_sparse_moe.experts.10.w3", "model.layers.51.block_sparse_moe.experts.11.w3", "model.layers.51.block_sparse_moe.experts.12.w3", "model.layers.51.block_sparse_moe.experts.13.w3", "model.layers.51.block_sparse_moe.experts.14.w3", "model.layers.51.block_sparse_moe.experts.15.w3", "model.layers.51.block_sparse_moe.experts.16.w3", "model.layers.51.block_sparse_moe.experts.17.w3", "model.layers.51.block_sparse_moe.experts.18.w3", "model.layers.51.block_sparse_moe.experts.19.w3", "model.layers.51.block_sparse_moe.experts.20.w3", "model.layers.51.block_sparse_moe.experts.21.w3", "model.layers.51.block_sparse_moe.experts.22.w3", "model.layers.51.block_sparse_moe.experts.23.w3", "model.layers.51.block_sparse_moe.experts.24.w3", "model.layers.51.block_sparse_moe.experts.25.w3", "model.layers.51.block_sparse_moe.experts.26.w3", "model.layers.51.block_sparse_moe.experts.27.w3", "model.layers.51.block_sparse_moe.experts.28.w3", "model.layers.51.block_sparse_moe.experts.29.w3", "model.layers.51.block_sparse_moe.experts.30.w3", "model.layers.51.block_sparse_moe.experts.31.w3", "model.layers.51.block_sparse_moe.experts.32.w3", "model.layers.51.block_sparse_moe.experts.33.w3", "model.layers.51.block_sparse_moe.experts.34.w3", "model.layers.51.block_sparse_moe.experts.35.w3", "model.layers.51.block_sparse_moe.experts.36.w3", "model.layers.51.block_sparse_moe.experts.37.w3", "model.layers.51.block_sparse_moe.experts.38.w3", "model.layers.51.block_sparse_moe.experts.39.w3", "model.layers.51.block_sparse_moe.experts.40.w3", "model.layers.51.block_sparse_moe.experts.41.w3", "model.layers.51.block_sparse_moe.experts.42.w3", "model.layers.51.block_sparse_moe.experts.43.w3", "model.layers.51.block_sparse_moe.experts.44.w3", "model.layers.51.block_sparse_moe.experts.45.w3", "model.layers.51.block_sparse_moe.experts.46.w3", "model.layers.51.block_sparse_moe.experts.47.w3", "model.layers.51.block_sparse_moe.experts.48.w3", "model.layers.51.block_sparse_moe.experts.49.w3", "model.layers.51.block_sparse_moe.experts.50.w3", "model.layers.51.block_sparse_moe.experts.51.w3", "model.layers.51.block_sparse_moe.experts.52.w3", "model.layers.51.block_sparse_moe.experts.53.w3", "model.layers.51.block_sparse_moe.experts.54.w3", "model.layers.51.block_sparse_moe.experts.55.w3", "model.layers.51.block_sparse_moe.experts.56.w3", "model.layers.51.block_sparse_moe.experts.57.w3", "model.layers.51.block_sparse_moe.experts.58.w3", "model.layers.51.block_sparse_moe.experts.59.w3", "model.layers.51.block_sparse_moe.experts.60.w3", "model.layers.51.block_sparse_moe.experts.61.w3", "model.layers.51.block_sparse_moe.experts.62.w3", "model.layers.51.block_sparse_moe.experts.63.w3", "model.layers.51.block_sparse_moe.experts.64.w3", "model.layers.51.block_sparse_moe.experts.65.w3", "model.layers.51.block_sparse_moe.experts.66.w3", "model.layers.51.block_sparse_moe.experts.67.w3", "model.layers.51.block_sparse_moe.experts.68.w3", "model.layers.51.block_sparse_moe.experts.69.w3", "model.layers.51.block_sparse_moe.experts.70.w3", "model.layers.51.block_sparse_moe.experts.71.w3", "model.layers.51.block_sparse_moe.experts.72.w3", "model.layers.51.block_sparse_moe.experts.73.w3", "model.layers.51.block_sparse_moe.experts.74.w3", "model.layers.51.block_sparse_moe.experts.75.w3", "model.layers.51.block_sparse_moe.experts.76.w3", "model.layers.51.block_sparse_moe.experts.77.w3", "model.layers.51.block_sparse_moe.experts.78.w3", "model.layers.51.block_sparse_moe.experts.79.w3", "model.layers.51.block_sparse_moe.experts.80.w3", "model.layers.51.block_sparse_moe.experts.81.w3", "model.layers.51.block_sparse_moe.experts.82.w3", "model.layers.51.block_sparse_moe.experts.83.w3", "model.layers.51.block_sparse_moe.experts.84.w3", "model.layers.51.block_sparse_moe.experts.85.w3", "model.layers.51.block_sparse_moe.experts.86.w3", "model.layers.51.block_sparse_moe.experts.87.w3", "model.layers.51.block_sparse_moe.experts.88.w3", "model.layers.51.block_sparse_moe.experts.89.w3", "model.layers.51.block_sparse_moe.experts.90.w3", "model.layers.51.block_sparse_moe.experts.91.w3", "model.layers.51.block_sparse_moe.experts.92.w3", "model.layers.51.block_sparse_moe.experts.93.w3", "model.layers.51.block_sparse_moe.experts.94.w3", "model.layers.51.block_sparse_moe.experts.95.w3", "model.layers.51.block_sparse_moe.experts.96.w3", "model.layers.51.block_sparse_moe.experts.97.w3", "model.layers.51.block_sparse_moe.experts.98.w3", "model.layers.51.block_sparse_moe.experts.99.w3", "model.layers.51.block_sparse_moe.experts.100.w3", "model.layers.51.block_sparse_moe.experts.101.w3", "model.layers.51.block_sparse_moe.experts.102.w3", "model.layers.51.block_sparse_moe.experts.103.w3", "model.layers.51.block_sparse_moe.experts.104.w3", "model.layers.51.block_sparse_moe.experts.105.w3", "model.layers.51.block_sparse_moe.experts.106.w3", "model.layers.51.block_sparse_moe.experts.107.w3", "model.layers.51.block_sparse_moe.experts.108.w3", "model.layers.51.block_sparse_moe.experts.109.w3", "model.layers.51.block_sparse_moe.experts.110.w3", "model.layers.51.block_sparse_moe.experts.111.w3", "model.layers.51.block_sparse_moe.experts.112.w3", "model.layers.51.block_sparse_moe.experts.113.w3", "model.layers.51.block_sparse_moe.experts.114.w3", "model.layers.51.block_sparse_moe.experts.115.w3", "model.layers.51.block_sparse_moe.experts.116.w3", "model.layers.51.block_sparse_moe.experts.117.w3", "model.layers.51.block_sparse_moe.experts.118.w3", "model.layers.51.block_sparse_moe.experts.119.w3", "model.layers.51.block_sparse_moe.experts.120.w3", "model.layers.51.block_sparse_moe.experts.121.w3", "model.layers.51.block_sparse_moe.experts.122.w3", "model.layers.51.block_sparse_moe.experts.123.w3", "model.layers.51.block_sparse_moe.experts.124.w3", "model.layers.51.block_sparse_moe.experts.125.w3", "model.layers.51.block_sparse_moe.experts.126.w3", "model.layers.51.block_sparse_moe.experts.127.w3", "model.layers.51.block_sparse_moe.experts.128.w3", "model.layers.51.block_sparse_moe.experts.129.w3", "model.layers.51.block_sparse_moe.experts.130.w3", "model.layers.51.block_sparse_moe.experts.131.w3", "model.layers.51.block_sparse_moe.experts.132.w3", "model.layers.51.block_sparse_moe.experts.133.w3", "model.layers.51.block_sparse_moe.experts.134.w3", "model.layers.51.block_sparse_moe.experts.135.w3", "model.layers.51.block_sparse_moe.experts.136.w3", "model.layers.51.block_sparse_moe.experts.137.w3", "model.layers.51.block_sparse_moe.experts.138.w3", "model.layers.51.block_sparse_moe.experts.139.w3", "model.layers.51.block_sparse_moe.experts.140.w3", "model.layers.51.block_sparse_moe.experts.141.w3", "model.layers.51.block_sparse_moe.experts.142.w3", "model.layers.51.block_sparse_moe.experts.143.w3", "model.layers.51.block_sparse_moe.experts.144.w3", "model.layers.51.block_sparse_moe.experts.145.w3", "model.layers.51.block_sparse_moe.experts.146.w3", "model.layers.51.block_sparse_moe.experts.147.w3", "model.layers.51.block_sparse_moe.experts.148.w3", "model.layers.51.block_sparse_moe.experts.149.w3", "model.layers.51.block_sparse_moe.experts.150.w3", "model.layers.51.block_sparse_moe.experts.151.w3", "model.layers.51.block_sparse_moe.experts.152.w3", "model.layers.51.block_sparse_moe.experts.153.w3", "model.layers.51.block_sparse_moe.experts.154.w3", "model.layers.51.block_sparse_moe.experts.155.w3", "model.layers.51.block_sparse_moe.experts.156.w3", "model.layers.51.block_sparse_moe.experts.157.w3", "model.layers.51.block_sparse_moe.experts.158.w3", "model.layers.51.block_sparse_moe.experts.159.w3", "model.layers.51.block_sparse_moe.experts.160.w3", "model.layers.51.block_sparse_moe.experts.161.w3", "model.layers.51.block_sparse_moe.experts.162.w3", "model.layers.51.block_sparse_moe.experts.163.w3", "model.layers.51.block_sparse_moe.experts.164.w3", "model.layers.51.block_sparse_moe.experts.165.w3", "model.layers.51.block_sparse_moe.experts.166.w3", "model.layers.51.block_sparse_moe.experts.167.w3", "model.layers.51.block_sparse_moe.experts.168.w3", "model.layers.51.block_sparse_moe.experts.169.w3", "model.layers.51.block_sparse_moe.experts.170.w3", "model.layers.51.block_sparse_moe.experts.171.w3", "model.layers.51.block_sparse_moe.experts.172.w3", "model.layers.51.block_sparse_moe.experts.173.w3", "model.layers.51.block_sparse_moe.experts.174.w3", "model.layers.51.block_sparse_moe.experts.175.w3", "model.layers.51.block_sparse_moe.experts.176.w3", "model.layers.51.block_sparse_moe.experts.177.w3", "model.layers.51.block_sparse_moe.experts.178.w3", "model.layers.51.block_sparse_moe.experts.179.w3", "model.layers.51.block_sparse_moe.experts.180.w3", "model.layers.51.block_sparse_moe.experts.181.w3", "model.layers.51.block_sparse_moe.experts.182.w3", "model.layers.51.block_sparse_moe.experts.183.w3", "model.layers.51.block_sparse_moe.experts.184.w3", "model.layers.51.block_sparse_moe.experts.185.w3", "model.layers.51.block_sparse_moe.experts.186.w3", "model.layers.51.block_sparse_moe.experts.187.w3", "model.layers.51.block_sparse_moe.experts.188.w3", "model.layers.51.block_sparse_moe.experts.189.w3", "model.layers.51.block_sparse_moe.experts.190.w3", "model.layers.51.block_sparse_moe.experts.191.w3", "model.layers.51.block_sparse_moe.experts.192.w3", "model.layers.51.block_sparse_moe.experts.193.w3", "model.layers.51.block_sparse_moe.experts.194.w3", "model.layers.51.block_sparse_moe.experts.195.w3", "model.layers.51.block_sparse_moe.experts.196.w3", "model.layers.51.block_sparse_moe.experts.197.w3", "model.layers.51.block_sparse_moe.experts.198.w3", "model.layers.51.block_sparse_moe.experts.199.w3", "model.layers.51.block_sparse_moe.experts.200.w3", "model.layers.51.block_sparse_moe.experts.201.w3", "model.layers.51.block_sparse_moe.experts.202.w3", "model.layers.51.block_sparse_moe.experts.203.w3", "model.layers.51.block_sparse_moe.experts.204.w3", "model.layers.51.block_sparse_moe.experts.205.w3", "model.layers.51.block_sparse_moe.experts.206.w3", "model.layers.51.block_sparse_moe.experts.207.w3", "model.layers.51.block_sparse_moe.experts.208.w3", "model.layers.51.block_sparse_moe.experts.209.w3", "model.layers.51.block_sparse_moe.experts.210.w3", "model.layers.51.block_sparse_moe.experts.211.w3", "model.layers.51.block_sparse_moe.experts.212.w3", "model.layers.51.block_sparse_moe.experts.213.w3", "model.layers.51.block_sparse_moe.experts.214.w3", "model.layers.51.block_sparse_moe.experts.215.w3", "model.layers.51.block_sparse_moe.experts.216.w3", "model.layers.51.block_sparse_moe.experts.217.w3", "model.layers.51.block_sparse_moe.experts.218.w3", "model.layers.51.block_sparse_moe.experts.219.w3", "model.layers.51.block_sparse_moe.experts.220.w3", "model.layers.51.block_sparse_moe.experts.221.w3", "model.layers.51.block_sparse_moe.experts.222.w3", "model.layers.51.block_sparse_moe.experts.223.w3", "model.layers.51.block_sparse_moe.experts.224.w3", "model.layers.51.block_sparse_moe.experts.225.w3", "model.layers.51.block_sparse_moe.experts.226.w3", "model.layers.51.block_sparse_moe.experts.227.w3", "model.layers.51.block_sparse_moe.experts.228.w3", "model.layers.51.block_sparse_moe.experts.229.w3", "model.layers.51.block_sparse_moe.experts.230.w3", "model.layers.51.block_sparse_moe.experts.231.w3", "model.layers.51.block_sparse_moe.experts.232.w3", "model.layers.51.block_sparse_moe.experts.233.w3", "model.layers.51.block_sparse_moe.experts.234.w3", "model.layers.51.block_sparse_moe.experts.235.w3", "model.layers.51.block_sparse_moe.experts.236.w3", "model.layers.51.block_sparse_moe.experts.237.w3", "model.layers.51.block_sparse_moe.experts.238.w3", "model.layers.51.block_sparse_moe.experts.239.w3", "model.layers.51.block_sparse_moe.experts.240.w3", "model.layers.51.block_sparse_moe.experts.241.w3", "model.layers.51.block_sparse_moe.experts.242.w3", "model.layers.51.block_sparse_moe.experts.243.w3", "model.layers.51.block_sparse_moe.experts.244.w3", "model.layers.51.block_sparse_moe.experts.245.w3", "model.layers.51.block_sparse_moe.experts.246.w3", "model.layers.51.block_sparse_moe.experts.247.w3", "model.layers.51.block_sparse_moe.experts.248.w3", "model.layers.51.block_sparse_moe.experts.249.w3", "model.layers.51.block_sparse_moe.experts.250.w3", "model.layers.51.block_sparse_moe.experts.251.w3", "model.layers.51.block_sparse_moe.experts.252.w3", "model.layers.51.block_sparse_moe.experts.253.w3", "model.layers.51.block_sparse_moe.experts.254.w3", "model.layers.51.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 2.5054812431424267e-05, "dbits": 2415919104 } ] }, { "idx": 259, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w2", "model.layers.51.block_sparse_moe.experts.1.w2", "model.layers.51.block_sparse_moe.experts.2.w2", "model.layers.51.block_sparse_moe.experts.3.w2", "model.layers.51.block_sparse_moe.experts.4.w2", "model.layers.51.block_sparse_moe.experts.5.w2", "model.layers.51.block_sparse_moe.experts.6.w2", "model.layers.51.block_sparse_moe.experts.7.w2", "model.layers.51.block_sparse_moe.experts.8.w2", "model.layers.51.block_sparse_moe.experts.9.w2", "model.layers.51.block_sparse_moe.experts.10.w2", "model.layers.51.block_sparse_moe.experts.11.w2", "model.layers.51.block_sparse_moe.experts.12.w2", "model.layers.51.block_sparse_moe.experts.13.w2", "model.layers.51.block_sparse_moe.experts.14.w2", "model.layers.51.block_sparse_moe.experts.15.w2", "model.layers.51.block_sparse_moe.experts.16.w2", "model.layers.51.block_sparse_moe.experts.17.w2", "model.layers.51.block_sparse_moe.experts.18.w2", "model.layers.51.block_sparse_moe.experts.19.w2", "model.layers.51.block_sparse_moe.experts.20.w2", "model.layers.51.block_sparse_moe.experts.21.w2", "model.layers.51.block_sparse_moe.experts.22.w2", "model.layers.51.block_sparse_moe.experts.23.w2", "model.layers.51.block_sparse_moe.experts.24.w2", "model.layers.51.block_sparse_moe.experts.25.w2", "model.layers.51.block_sparse_moe.experts.26.w2", "model.layers.51.block_sparse_moe.experts.27.w2", "model.layers.51.block_sparse_moe.experts.28.w2", "model.layers.51.block_sparse_moe.experts.29.w2", "model.layers.51.block_sparse_moe.experts.30.w2", "model.layers.51.block_sparse_moe.experts.31.w2", "model.layers.51.block_sparse_moe.experts.32.w2", "model.layers.51.block_sparse_moe.experts.33.w2", "model.layers.51.block_sparse_moe.experts.34.w2", "model.layers.51.block_sparse_moe.experts.35.w2", "model.layers.51.block_sparse_moe.experts.36.w2", "model.layers.51.block_sparse_moe.experts.37.w2", "model.layers.51.block_sparse_moe.experts.38.w2", "model.layers.51.block_sparse_moe.experts.39.w2", "model.layers.51.block_sparse_moe.experts.40.w2", "model.layers.51.block_sparse_moe.experts.41.w2", "model.layers.51.block_sparse_moe.experts.42.w2", "model.layers.51.block_sparse_moe.experts.43.w2", "model.layers.51.block_sparse_moe.experts.44.w2", "model.layers.51.block_sparse_moe.experts.45.w2", "model.layers.51.block_sparse_moe.experts.46.w2", "model.layers.51.block_sparse_moe.experts.47.w2", "model.layers.51.block_sparse_moe.experts.48.w2", "model.layers.51.block_sparse_moe.experts.49.w2", "model.layers.51.block_sparse_moe.experts.50.w2", "model.layers.51.block_sparse_moe.experts.51.w2", "model.layers.51.block_sparse_moe.experts.52.w2", "model.layers.51.block_sparse_moe.experts.53.w2", "model.layers.51.block_sparse_moe.experts.54.w2", "model.layers.51.block_sparse_moe.experts.55.w2", "model.layers.51.block_sparse_moe.experts.56.w2", "model.layers.51.block_sparse_moe.experts.57.w2", "model.layers.51.block_sparse_moe.experts.58.w2", "model.layers.51.block_sparse_moe.experts.59.w2", "model.layers.51.block_sparse_moe.experts.60.w2", "model.layers.51.block_sparse_moe.experts.61.w2", "model.layers.51.block_sparse_moe.experts.62.w2", "model.layers.51.block_sparse_moe.experts.63.w2", "model.layers.51.block_sparse_moe.experts.64.w2", "model.layers.51.block_sparse_moe.experts.65.w2", "model.layers.51.block_sparse_moe.experts.66.w2", "model.layers.51.block_sparse_moe.experts.67.w2", "model.layers.51.block_sparse_moe.experts.68.w2", "model.layers.51.block_sparse_moe.experts.69.w2", "model.layers.51.block_sparse_moe.experts.70.w2", "model.layers.51.block_sparse_moe.experts.71.w2", "model.layers.51.block_sparse_moe.experts.72.w2", "model.layers.51.block_sparse_moe.experts.73.w2", "model.layers.51.block_sparse_moe.experts.74.w2", "model.layers.51.block_sparse_moe.experts.75.w2", "model.layers.51.block_sparse_moe.experts.76.w2", "model.layers.51.block_sparse_moe.experts.77.w2", "model.layers.51.block_sparse_moe.experts.78.w2", "model.layers.51.block_sparse_moe.experts.79.w2", "model.layers.51.block_sparse_moe.experts.80.w2", "model.layers.51.block_sparse_moe.experts.81.w2", "model.layers.51.block_sparse_moe.experts.82.w2", "model.layers.51.block_sparse_moe.experts.83.w2", "model.layers.51.block_sparse_moe.experts.84.w2", "model.layers.51.block_sparse_moe.experts.85.w2", "model.layers.51.block_sparse_moe.experts.86.w2", "model.layers.51.block_sparse_moe.experts.87.w2", "model.layers.51.block_sparse_moe.experts.88.w2", "model.layers.51.block_sparse_moe.experts.89.w2", "model.layers.51.block_sparse_moe.experts.90.w2", "model.layers.51.block_sparse_moe.experts.91.w2", "model.layers.51.block_sparse_moe.experts.92.w2", "model.layers.51.block_sparse_moe.experts.93.w2", "model.layers.51.block_sparse_moe.experts.94.w2", "model.layers.51.block_sparse_moe.experts.95.w2", "model.layers.51.block_sparse_moe.experts.96.w2", "model.layers.51.block_sparse_moe.experts.97.w2", "model.layers.51.block_sparse_moe.experts.98.w2", "model.layers.51.block_sparse_moe.experts.99.w2", "model.layers.51.block_sparse_moe.experts.100.w2", "model.layers.51.block_sparse_moe.experts.101.w2", "model.layers.51.block_sparse_moe.experts.102.w2", "model.layers.51.block_sparse_moe.experts.103.w2", "model.layers.51.block_sparse_moe.experts.104.w2", "model.layers.51.block_sparse_moe.experts.105.w2", "model.layers.51.block_sparse_moe.experts.106.w2", "model.layers.51.block_sparse_moe.experts.107.w2", "model.layers.51.block_sparse_moe.experts.108.w2", "model.layers.51.block_sparse_moe.experts.109.w2", "model.layers.51.block_sparse_moe.experts.110.w2", "model.layers.51.block_sparse_moe.experts.111.w2", "model.layers.51.block_sparse_moe.experts.112.w2", "model.layers.51.block_sparse_moe.experts.113.w2", "model.layers.51.block_sparse_moe.experts.114.w2", "model.layers.51.block_sparse_moe.experts.115.w2", "model.layers.51.block_sparse_moe.experts.116.w2", "model.layers.51.block_sparse_moe.experts.117.w2", "model.layers.51.block_sparse_moe.experts.118.w2", "model.layers.51.block_sparse_moe.experts.119.w2", "model.layers.51.block_sparse_moe.experts.120.w2", "model.layers.51.block_sparse_moe.experts.121.w2", "model.layers.51.block_sparse_moe.experts.122.w2", "model.layers.51.block_sparse_moe.experts.123.w2", "model.layers.51.block_sparse_moe.experts.124.w2", "model.layers.51.block_sparse_moe.experts.125.w2", "model.layers.51.block_sparse_moe.experts.126.w2", "model.layers.51.block_sparse_moe.experts.127.w2", "model.layers.51.block_sparse_moe.experts.128.w2", "model.layers.51.block_sparse_moe.experts.129.w2", "model.layers.51.block_sparse_moe.experts.130.w2", "model.layers.51.block_sparse_moe.experts.131.w2", "model.layers.51.block_sparse_moe.experts.132.w2", "model.layers.51.block_sparse_moe.experts.133.w2", "model.layers.51.block_sparse_moe.experts.134.w2", "model.layers.51.block_sparse_moe.experts.135.w2", "model.layers.51.block_sparse_moe.experts.136.w2", "model.layers.51.block_sparse_moe.experts.137.w2", "model.layers.51.block_sparse_moe.experts.138.w2", "model.layers.51.block_sparse_moe.experts.139.w2", "model.layers.51.block_sparse_moe.experts.140.w2", "model.layers.51.block_sparse_moe.experts.141.w2", "model.layers.51.block_sparse_moe.experts.142.w2", "model.layers.51.block_sparse_moe.experts.143.w2", "model.layers.51.block_sparse_moe.experts.144.w2", "model.layers.51.block_sparse_moe.experts.145.w2", "model.layers.51.block_sparse_moe.experts.146.w2", "model.layers.51.block_sparse_moe.experts.147.w2", "model.layers.51.block_sparse_moe.experts.148.w2", "model.layers.51.block_sparse_moe.experts.149.w2", "model.layers.51.block_sparse_moe.experts.150.w2", "model.layers.51.block_sparse_moe.experts.151.w2", "model.layers.51.block_sparse_moe.experts.152.w2", "model.layers.51.block_sparse_moe.experts.153.w2", "model.layers.51.block_sparse_moe.experts.154.w2", "model.layers.51.block_sparse_moe.experts.155.w2", "model.layers.51.block_sparse_moe.experts.156.w2", "model.layers.51.block_sparse_moe.experts.157.w2", "model.layers.51.block_sparse_moe.experts.158.w2", "model.layers.51.block_sparse_moe.experts.159.w2", "model.layers.51.block_sparse_moe.experts.160.w2", "model.layers.51.block_sparse_moe.experts.161.w2", "model.layers.51.block_sparse_moe.experts.162.w2", "model.layers.51.block_sparse_moe.experts.163.w2", "model.layers.51.block_sparse_moe.experts.164.w2", "model.layers.51.block_sparse_moe.experts.165.w2", "model.layers.51.block_sparse_moe.experts.166.w2", "model.layers.51.block_sparse_moe.experts.167.w2", "model.layers.51.block_sparse_moe.experts.168.w2", "model.layers.51.block_sparse_moe.experts.169.w2", "model.layers.51.block_sparse_moe.experts.170.w2", "model.layers.51.block_sparse_moe.experts.171.w2", "model.layers.51.block_sparse_moe.experts.172.w2", "model.layers.51.block_sparse_moe.experts.173.w2", "model.layers.51.block_sparse_moe.experts.174.w2", "model.layers.51.block_sparse_moe.experts.175.w2", "model.layers.51.block_sparse_moe.experts.176.w2", "model.layers.51.block_sparse_moe.experts.177.w2", "model.layers.51.block_sparse_moe.experts.178.w2", "model.layers.51.block_sparse_moe.experts.179.w2", "model.layers.51.block_sparse_moe.experts.180.w2", "model.layers.51.block_sparse_moe.experts.181.w2", "model.layers.51.block_sparse_moe.experts.182.w2", "model.layers.51.block_sparse_moe.experts.183.w2", "model.layers.51.block_sparse_moe.experts.184.w2", "model.layers.51.block_sparse_moe.experts.185.w2", "model.layers.51.block_sparse_moe.experts.186.w2", "model.layers.51.block_sparse_moe.experts.187.w2", "model.layers.51.block_sparse_moe.experts.188.w2", "model.layers.51.block_sparse_moe.experts.189.w2", "model.layers.51.block_sparse_moe.experts.190.w2", "model.layers.51.block_sparse_moe.experts.191.w2", "model.layers.51.block_sparse_moe.experts.192.w2", "model.layers.51.block_sparse_moe.experts.193.w2", "model.layers.51.block_sparse_moe.experts.194.w2", "model.layers.51.block_sparse_moe.experts.195.w2", "model.layers.51.block_sparse_moe.experts.196.w2", "model.layers.51.block_sparse_moe.experts.197.w2", "model.layers.51.block_sparse_moe.experts.198.w2", "model.layers.51.block_sparse_moe.experts.199.w2", "model.layers.51.block_sparse_moe.experts.200.w2", "model.layers.51.block_sparse_moe.experts.201.w2", "model.layers.51.block_sparse_moe.experts.202.w2", "model.layers.51.block_sparse_moe.experts.203.w2", "model.layers.51.block_sparse_moe.experts.204.w2", "model.layers.51.block_sparse_moe.experts.205.w2", "model.layers.51.block_sparse_moe.experts.206.w2", "model.layers.51.block_sparse_moe.experts.207.w2", "model.layers.51.block_sparse_moe.experts.208.w2", "model.layers.51.block_sparse_moe.experts.209.w2", "model.layers.51.block_sparse_moe.experts.210.w2", "model.layers.51.block_sparse_moe.experts.211.w2", "model.layers.51.block_sparse_moe.experts.212.w2", "model.layers.51.block_sparse_moe.experts.213.w2", "model.layers.51.block_sparse_moe.experts.214.w2", "model.layers.51.block_sparse_moe.experts.215.w2", "model.layers.51.block_sparse_moe.experts.216.w2", "model.layers.51.block_sparse_moe.experts.217.w2", "model.layers.51.block_sparse_moe.experts.218.w2", "model.layers.51.block_sparse_moe.experts.219.w2", "model.layers.51.block_sparse_moe.experts.220.w2", "model.layers.51.block_sparse_moe.experts.221.w2", "model.layers.51.block_sparse_moe.experts.222.w2", "model.layers.51.block_sparse_moe.experts.223.w2", "model.layers.51.block_sparse_moe.experts.224.w2", "model.layers.51.block_sparse_moe.experts.225.w2", "model.layers.51.block_sparse_moe.experts.226.w2", "model.layers.51.block_sparse_moe.experts.227.w2", "model.layers.51.block_sparse_moe.experts.228.w2", "model.layers.51.block_sparse_moe.experts.229.w2", "model.layers.51.block_sparse_moe.experts.230.w2", "model.layers.51.block_sparse_moe.experts.231.w2", "model.layers.51.block_sparse_moe.experts.232.w2", "model.layers.51.block_sparse_moe.experts.233.w2", "model.layers.51.block_sparse_moe.experts.234.w2", "model.layers.51.block_sparse_moe.experts.235.w2", "model.layers.51.block_sparse_moe.experts.236.w2", "model.layers.51.block_sparse_moe.experts.237.w2", "model.layers.51.block_sparse_moe.experts.238.w2", "model.layers.51.block_sparse_moe.experts.239.w2", "model.layers.51.block_sparse_moe.experts.240.w2", "model.layers.51.block_sparse_moe.experts.241.w2", "model.layers.51.block_sparse_moe.experts.242.w2", "model.layers.51.block_sparse_moe.experts.243.w2", "model.layers.51.block_sparse_moe.experts.244.w2", "model.layers.51.block_sparse_moe.experts.245.w2", "model.layers.51.block_sparse_moe.experts.246.w2", "model.layers.51.block_sparse_moe.experts.247.w2", "model.layers.51.block_sparse_moe.experts.248.w2", "model.layers.51.block_sparse_moe.experts.249.w2", "model.layers.51.block_sparse_moe.experts.250.w2", "model.layers.51.block_sparse_moe.experts.251.w2", "model.layers.51.block_sparse_moe.experts.252.w2", "model.layers.51.block_sparse_moe.experts.253.w2", "model.layers.51.block_sparse_moe.experts.254.w2", "model.layers.51.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.616447448728248e-05, "dbits": 1207959552 } ] }, { "idx": 260, "layers": [ "model.layers.52.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0020251899957657304, "dbits": 18874368 } ] }, { "idx": 261, "layers": [ "model.layers.52.self_attn.k_proj", "model.layers.52.self_attn.v_proj" ], "candidates": [ { "dkld": 0.005654072761535733, "dbits": 6291456 } ] }, { "idx": 262, "layers": [ "model.layers.52.self_attn.o_proj" ], "candidates": [ { "dkld": -0.007596290111541726, "dbits": 18874368 } ] }, { "idx": 263, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w1", "model.layers.52.block_sparse_moe.experts.1.w1", "model.layers.52.block_sparse_moe.experts.2.w1", "model.layers.52.block_sparse_moe.experts.3.w1", "model.layers.52.block_sparse_moe.experts.4.w1", "model.layers.52.block_sparse_moe.experts.5.w1", "model.layers.52.block_sparse_moe.experts.6.w1", "model.layers.52.block_sparse_moe.experts.7.w1", "model.layers.52.block_sparse_moe.experts.8.w1", "model.layers.52.block_sparse_moe.experts.9.w1", "model.layers.52.block_sparse_moe.experts.10.w1", "model.layers.52.block_sparse_moe.experts.11.w1", "model.layers.52.block_sparse_moe.experts.12.w1", "model.layers.52.block_sparse_moe.experts.13.w1", "model.layers.52.block_sparse_moe.experts.14.w1", "model.layers.52.block_sparse_moe.experts.15.w1", "model.layers.52.block_sparse_moe.experts.16.w1", "model.layers.52.block_sparse_moe.experts.17.w1", "model.layers.52.block_sparse_moe.experts.18.w1", "model.layers.52.block_sparse_moe.experts.19.w1", "model.layers.52.block_sparse_moe.experts.20.w1", "model.layers.52.block_sparse_moe.experts.21.w1", "model.layers.52.block_sparse_moe.experts.22.w1", "model.layers.52.block_sparse_moe.experts.23.w1", "model.layers.52.block_sparse_moe.experts.24.w1", "model.layers.52.block_sparse_moe.experts.25.w1", "model.layers.52.block_sparse_moe.experts.26.w1", "model.layers.52.block_sparse_moe.experts.27.w1", "model.layers.52.block_sparse_moe.experts.28.w1", "model.layers.52.block_sparse_moe.experts.29.w1", "model.layers.52.block_sparse_moe.experts.30.w1", "model.layers.52.block_sparse_moe.experts.31.w1", "model.layers.52.block_sparse_moe.experts.32.w1", "model.layers.52.block_sparse_moe.experts.33.w1", "model.layers.52.block_sparse_moe.experts.34.w1", "model.layers.52.block_sparse_moe.experts.35.w1", "model.layers.52.block_sparse_moe.experts.36.w1", "model.layers.52.block_sparse_moe.experts.37.w1", "model.layers.52.block_sparse_moe.experts.38.w1", "model.layers.52.block_sparse_moe.experts.39.w1", "model.layers.52.block_sparse_moe.experts.40.w1", "model.layers.52.block_sparse_moe.experts.41.w1", "model.layers.52.block_sparse_moe.experts.42.w1", "model.layers.52.block_sparse_moe.experts.43.w1", "model.layers.52.block_sparse_moe.experts.44.w1", "model.layers.52.block_sparse_moe.experts.45.w1", "model.layers.52.block_sparse_moe.experts.46.w1", "model.layers.52.block_sparse_moe.experts.47.w1", "model.layers.52.block_sparse_moe.experts.48.w1", "model.layers.52.block_sparse_moe.experts.49.w1", "model.layers.52.block_sparse_moe.experts.50.w1", "model.layers.52.block_sparse_moe.experts.51.w1", "model.layers.52.block_sparse_moe.experts.52.w1", "model.layers.52.block_sparse_moe.experts.53.w1", "model.layers.52.block_sparse_moe.experts.54.w1", "model.layers.52.block_sparse_moe.experts.55.w1", "model.layers.52.block_sparse_moe.experts.56.w1", "model.layers.52.block_sparse_moe.experts.57.w1", "model.layers.52.block_sparse_moe.experts.58.w1", "model.layers.52.block_sparse_moe.experts.59.w1", "model.layers.52.block_sparse_moe.experts.60.w1", "model.layers.52.block_sparse_moe.experts.61.w1", "model.layers.52.block_sparse_moe.experts.62.w1", "model.layers.52.block_sparse_moe.experts.63.w1", "model.layers.52.block_sparse_moe.experts.64.w1", "model.layers.52.block_sparse_moe.experts.65.w1", "model.layers.52.block_sparse_moe.experts.66.w1", "model.layers.52.block_sparse_moe.experts.67.w1", "model.layers.52.block_sparse_moe.experts.68.w1", "model.layers.52.block_sparse_moe.experts.69.w1", "model.layers.52.block_sparse_moe.experts.70.w1", "model.layers.52.block_sparse_moe.experts.71.w1", "model.layers.52.block_sparse_moe.experts.72.w1", "model.layers.52.block_sparse_moe.experts.73.w1", "model.layers.52.block_sparse_moe.experts.74.w1", "model.layers.52.block_sparse_moe.experts.75.w1", "model.layers.52.block_sparse_moe.experts.76.w1", "model.layers.52.block_sparse_moe.experts.77.w1", "model.layers.52.block_sparse_moe.experts.78.w1", "model.layers.52.block_sparse_moe.experts.79.w1", "model.layers.52.block_sparse_moe.experts.80.w1", "model.layers.52.block_sparse_moe.experts.81.w1", "model.layers.52.block_sparse_moe.experts.82.w1", "model.layers.52.block_sparse_moe.experts.83.w1", "model.layers.52.block_sparse_moe.experts.84.w1", "model.layers.52.block_sparse_moe.experts.85.w1", "model.layers.52.block_sparse_moe.experts.86.w1", "model.layers.52.block_sparse_moe.experts.87.w1", "model.layers.52.block_sparse_moe.experts.88.w1", "model.layers.52.block_sparse_moe.experts.89.w1", "model.layers.52.block_sparse_moe.experts.90.w1", "model.layers.52.block_sparse_moe.experts.91.w1", "model.layers.52.block_sparse_moe.experts.92.w1", "model.layers.52.block_sparse_moe.experts.93.w1", "model.layers.52.block_sparse_moe.experts.94.w1", "model.layers.52.block_sparse_moe.experts.95.w1", "model.layers.52.block_sparse_moe.experts.96.w1", "model.layers.52.block_sparse_moe.experts.97.w1", "model.layers.52.block_sparse_moe.experts.98.w1", "model.layers.52.block_sparse_moe.experts.99.w1", "model.layers.52.block_sparse_moe.experts.100.w1", "model.layers.52.block_sparse_moe.experts.101.w1", "model.layers.52.block_sparse_moe.experts.102.w1", "model.layers.52.block_sparse_moe.experts.103.w1", "model.layers.52.block_sparse_moe.experts.104.w1", "model.layers.52.block_sparse_moe.experts.105.w1", "model.layers.52.block_sparse_moe.experts.106.w1", "model.layers.52.block_sparse_moe.experts.107.w1", "model.layers.52.block_sparse_moe.experts.108.w1", "model.layers.52.block_sparse_moe.experts.109.w1", "model.layers.52.block_sparse_moe.experts.110.w1", "model.layers.52.block_sparse_moe.experts.111.w1", "model.layers.52.block_sparse_moe.experts.112.w1", "model.layers.52.block_sparse_moe.experts.113.w1", "model.layers.52.block_sparse_moe.experts.114.w1", "model.layers.52.block_sparse_moe.experts.115.w1", "model.layers.52.block_sparse_moe.experts.116.w1", "model.layers.52.block_sparse_moe.experts.117.w1", "model.layers.52.block_sparse_moe.experts.118.w1", "model.layers.52.block_sparse_moe.experts.119.w1", "model.layers.52.block_sparse_moe.experts.120.w1", "model.layers.52.block_sparse_moe.experts.121.w1", "model.layers.52.block_sparse_moe.experts.122.w1", "model.layers.52.block_sparse_moe.experts.123.w1", "model.layers.52.block_sparse_moe.experts.124.w1", "model.layers.52.block_sparse_moe.experts.125.w1", "model.layers.52.block_sparse_moe.experts.126.w1", "model.layers.52.block_sparse_moe.experts.127.w1", "model.layers.52.block_sparse_moe.experts.128.w1", "model.layers.52.block_sparse_moe.experts.129.w1", "model.layers.52.block_sparse_moe.experts.130.w1", "model.layers.52.block_sparse_moe.experts.131.w1", "model.layers.52.block_sparse_moe.experts.132.w1", "model.layers.52.block_sparse_moe.experts.133.w1", "model.layers.52.block_sparse_moe.experts.134.w1", "model.layers.52.block_sparse_moe.experts.135.w1", "model.layers.52.block_sparse_moe.experts.136.w1", "model.layers.52.block_sparse_moe.experts.137.w1", "model.layers.52.block_sparse_moe.experts.138.w1", "model.layers.52.block_sparse_moe.experts.139.w1", "model.layers.52.block_sparse_moe.experts.140.w1", "model.layers.52.block_sparse_moe.experts.141.w1", "model.layers.52.block_sparse_moe.experts.142.w1", "model.layers.52.block_sparse_moe.experts.143.w1", "model.layers.52.block_sparse_moe.experts.144.w1", "model.layers.52.block_sparse_moe.experts.145.w1", "model.layers.52.block_sparse_moe.experts.146.w1", "model.layers.52.block_sparse_moe.experts.147.w1", "model.layers.52.block_sparse_moe.experts.148.w1", "model.layers.52.block_sparse_moe.experts.149.w1", "model.layers.52.block_sparse_moe.experts.150.w1", "model.layers.52.block_sparse_moe.experts.151.w1", "model.layers.52.block_sparse_moe.experts.152.w1", "model.layers.52.block_sparse_moe.experts.153.w1", "model.layers.52.block_sparse_moe.experts.154.w1", "model.layers.52.block_sparse_moe.experts.155.w1", "model.layers.52.block_sparse_moe.experts.156.w1", "model.layers.52.block_sparse_moe.experts.157.w1", "model.layers.52.block_sparse_moe.experts.158.w1", "model.layers.52.block_sparse_moe.experts.159.w1", "model.layers.52.block_sparse_moe.experts.160.w1", "model.layers.52.block_sparse_moe.experts.161.w1", "model.layers.52.block_sparse_moe.experts.162.w1", "model.layers.52.block_sparse_moe.experts.163.w1", "model.layers.52.block_sparse_moe.experts.164.w1", "model.layers.52.block_sparse_moe.experts.165.w1", "model.layers.52.block_sparse_moe.experts.166.w1", "model.layers.52.block_sparse_moe.experts.167.w1", "model.layers.52.block_sparse_moe.experts.168.w1", "model.layers.52.block_sparse_moe.experts.169.w1", "model.layers.52.block_sparse_moe.experts.170.w1", "model.layers.52.block_sparse_moe.experts.171.w1", "model.layers.52.block_sparse_moe.experts.172.w1", "model.layers.52.block_sparse_moe.experts.173.w1", "model.layers.52.block_sparse_moe.experts.174.w1", "model.layers.52.block_sparse_moe.experts.175.w1", "model.layers.52.block_sparse_moe.experts.176.w1", "model.layers.52.block_sparse_moe.experts.177.w1", "model.layers.52.block_sparse_moe.experts.178.w1", "model.layers.52.block_sparse_moe.experts.179.w1", "model.layers.52.block_sparse_moe.experts.180.w1", "model.layers.52.block_sparse_moe.experts.181.w1", "model.layers.52.block_sparse_moe.experts.182.w1", "model.layers.52.block_sparse_moe.experts.183.w1", "model.layers.52.block_sparse_moe.experts.184.w1", "model.layers.52.block_sparse_moe.experts.185.w1", "model.layers.52.block_sparse_moe.experts.186.w1", "model.layers.52.block_sparse_moe.experts.187.w1", "model.layers.52.block_sparse_moe.experts.188.w1", "model.layers.52.block_sparse_moe.experts.189.w1", "model.layers.52.block_sparse_moe.experts.190.w1", "model.layers.52.block_sparse_moe.experts.191.w1", "model.layers.52.block_sparse_moe.experts.192.w1", "model.layers.52.block_sparse_moe.experts.193.w1", "model.layers.52.block_sparse_moe.experts.194.w1", "model.layers.52.block_sparse_moe.experts.195.w1", "model.layers.52.block_sparse_moe.experts.196.w1", "model.layers.52.block_sparse_moe.experts.197.w1", "model.layers.52.block_sparse_moe.experts.198.w1", "model.layers.52.block_sparse_moe.experts.199.w1", "model.layers.52.block_sparse_moe.experts.200.w1", "model.layers.52.block_sparse_moe.experts.201.w1", "model.layers.52.block_sparse_moe.experts.202.w1", "model.layers.52.block_sparse_moe.experts.203.w1", "model.layers.52.block_sparse_moe.experts.204.w1", "model.layers.52.block_sparse_moe.experts.205.w1", "model.layers.52.block_sparse_moe.experts.206.w1", "model.layers.52.block_sparse_moe.experts.207.w1", "model.layers.52.block_sparse_moe.experts.208.w1", "model.layers.52.block_sparse_moe.experts.209.w1", "model.layers.52.block_sparse_moe.experts.210.w1", "model.layers.52.block_sparse_moe.experts.211.w1", "model.layers.52.block_sparse_moe.experts.212.w1", "model.layers.52.block_sparse_moe.experts.213.w1", "model.layers.52.block_sparse_moe.experts.214.w1", "model.layers.52.block_sparse_moe.experts.215.w1", "model.layers.52.block_sparse_moe.experts.216.w1", "model.layers.52.block_sparse_moe.experts.217.w1", "model.layers.52.block_sparse_moe.experts.218.w1", "model.layers.52.block_sparse_moe.experts.219.w1", "model.layers.52.block_sparse_moe.experts.220.w1", "model.layers.52.block_sparse_moe.experts.221.w1", "model.layers.52.block_sparse_moe.experts.222.w1", "model.layers.52.block_sparse_moe.experts.223.w1", "model.layers.52.block_sparse_moe.experts.224.w1", "model.layers.52.block_sparse_moe.experts.225.w1", "model.layers.52.block_sparse_moe.experts.226.w1", "model.layers.52.block_sparse_moe.experts.227.w1", "model.layers.52.block_sparse_moe.experts.228.w1", "model.layers.52.block_sparse_moe.experts.229.w1", "model.layers.52.block_sparse_moe.experts.230.w1", "model.layers.52.block_sparse_moe.experts.231.w1", "model.layers.52.block_sparse_moe.experts.232.w1", "model.layers.52.block_sparse_moe.experts.233.w1", "model.layers.52.block_sparse_moe.experts.234.w1", "model.layers.52.block_sparse_moe.experts.235.w1", "model.layers.52.block_sparse_moe.experts.236.w1", "model.layers.52.block_sparse_moe.experts.237.w1", "model.layers.52.block_sparse_moe.experts.238.w1", "model.layers.52.block_sparse_moe.experts.239.w1", "model.layers.52.block_sparse_moe.experts.240.w1", "model.layers.52.block_sparse_moe.experts.241.w1", "model.layers.52.block_sparse_moe.experts.242.w1", "model.layers.52.block_sparse_moe.experts.243.w1", "model.layers.52.block_sparse_moe.experts.244.w1", "model.layers.52.block_sparse_moe.experts.245.w1", "model.layers.52.block_sparse_moe.experts.246.w1", "model.layers.52.block_sparse_moe.experts.247.w1", "model.layers.52.block_sparse_moe.experts.248.w1", "model.layers.52.block_sparse_moe.experts.249.w1", "model.layers.52.block_sparse_moe.experts.250.w1", "model.layers.52.block_sparse_moe.experts.251.w1", "model.layers.52.block_sparse_moe.experts.252.w1", "model.layers.52.block_sparse_moe.experts.253.w1", "model.layers.52.block_sparse_moe.experts.254.w1", "model.layers.52.block_sparse_moe.experts.255.w1", "model.layers.52.block_sparse_moe.experts.0.w3", "model.layers.52.block_sparse_moe.experts.1.w3", "model.layers.52.block_sparse_moe.experts.2.w3", "model.layers.52.block_sparse_moe.experts.3.w3", "model.layers.52.block_sparse_moe.experts.4.w3", "model.layers.52.block_sparse_moe.experts.5.w3", "model.layers.52.block_sparse_moe.experts.6.w3", "model.layers.52.block_sparse_moe.experts.7.w3", "model.layers.52.block_sparse_moe.experts.8.w3", "model.layers.52.block_sparse_moe.experts.9.w3", "model.layers.52.block_sparse_moe.experts.10.w3", "model.layers.52.block_sparse_moe.experts.11.w3", "model.layers.52.block_sparse_moe.experts.12.w3", "model.layers.52.block_sparse_moe.experts.13.w3", "model.layers.52.block_sparse_moe.experts.14.w3", "model.layers.52.block_sparse_moe.experts.15.w3", "model.layers.52.block_sparse_moe.experts.16.w3", "model.layers.52.block_sparse_moe.experts.17.w3", "model.layers.52.block_sparse_moe.experts.18.w3", "model.layers.52.block_sparse_moe.experts.19.w3", "model.layers.52.block_sparse_moe.experts.20.w3", "model.layers.52.block_sparse_moe.experts.21.w3", "model.layers.52.block_sparse_moe.experts.22.w3", "model.layers.52.block_sparse_moe.experts.23.w3", "model.layers.52.block_sparse_moe.experts.24.w3", "model.layers.52.block_sparse_moe.experts.25.w3", "model.layers.52.block_sparse_moe.experts.26.w3", "model.layers.52.block_sparse_moe.experts.27.w3", "model.layers.52.block_sparse_moe.experts.28.w3", "model.layers.52.block_sparse_moe.experts.29.w3", "model.layers.52.block_sparse_moe.experts.30.w3", "model.layers.52.block_sparse_moe.experts.31.w3", "model.layers.52.block_sparse_moe.experts.32.w3", "model.layers.52.block_sparse_moe.experts.33.w3", "model.layers.52.block_sparse_moe.experts.34.w3", "model.layers.52.block_sparse_moe.experts.35.w3", "model.layers.52.block_sparse_moe.experts.36.w3", "model.layers.52.block_sparse_moe.experts.37.w3", "model.layers.52.block_sparse_moe.experts.38.w3", "model.layers.52.block_sparse_moe.experts.39.w3", "model.layers.52.block_sparse_moe.experts.40.w3", "model.layers.52.block_sparse_moe.experts.41.w3", "model.layers.52.block_sparse_moe.experts.42.w3", "model.layers.52.block_sparse_moe.experts.43.w3", "model.layers.52.block_sparse_moe.experts.44.w3", "model.layers.52.block_sparse_moe.experts.45.w3", "model.layers.52.block_sparse_moe.experts.46.w3", "model.layers.52.block_sparse_moe.experts.47.w3", "model.layers.52.block_sparse_moe.experts.48.w3", "model.layers.52.block_sparse_moe.experts.49.w3", "model.layers.52.block_sparse_moe.experts.50.w3", "model.layers.52.block_sparse_moe.experts.51.w3", "model.layers.52.block_sparse_moe.experts.52.w3", "model.layers.52.block_sparse_moe.experts.53.w3", "model.layers.52.block_sparse_moe.experts.54.w3", "model.layers.52.block_sparse_moe.experts.55.w3", "model.layers.52.block_sparse_moe.experts.56.w3", "model.layers.52.block_sparse_moe.experts.57.w3", "model.layers.52.block_sparse_moe.experts.58.w3", "model.layers.52.block_sparse_moe.experts.59.w3", "model.layers.52.block_sparse_moe.experts.60.w3", "model.layers.52.block_sparse_moe.experts.61.w3", "model.layers.52.block_sparse_moe.experts.62.w3", "model.layers.52.block_sparse_moe.experts.63.w3", "model.layers.52.block_sparse_moe.experts.64.w3", "model.layers.52.block_sparse_moe.experts.65.w3", "model.layers.52.block_sparse_moe.experts.66.w3", "model.layers.52.block_sparse_moe.experts.67.w3", "model.layers.52.block_sparse_moe.experts.68.w3", "model.layers.52.block_sparse_moe.experts.69.w3", "model.layers.52.block_sparse_moe.experts.70.w3", "model.layers.52.block_sparse_moe.experts.71.w3", "model.layers.52.block_sparse_moe.experts.72.w3", "model.layers.52.block_sparse_moe.experts.73.w3", "model.layers.52.block_sparse_moe.experts.74.w3", "model.layers.52.block_sparse_moe.experts.75.w3", "model.layers.52.block_sparse_moe.experts.76.w3", "model.layers.52.block_sparse_moe.experts.77.w3", "model.layers.52.block_sparse_moe.experts.78.w3", "model.layers.52.block_sparse_moe.experts.79.w3", "model.layers.52.block_sparse_moe.experts.80.w3", "model.layers.52.block_sparse_moe.experts.81.w3", "model.layers.52.block_sparse_moe.experts.82.w3", "model.layers.52.block_sparse_moe.experts.83.w3", "model.layers.52.block_sparse_moe.experts.84.w3", "model.layers.52.block_sparse_moe.experts.85.w3", "model.layers.52.block_sparse_moe.experts.86.w3", "model.layers.52.block_sparse_moe.experts.87.w3", "model.layers.52.block_sparse_moe.experts.88.w3", "model.layers.52.block_sparse_moe.experts.89.w3", "model.layers.52.block_sparse_moe.experts.90.w3", "model.layers.52.block_sparse_moe.experts.91.w3", "model.layers.52.block_sparse_moe.experts.92.w3", "model.layers.52.block_sparse_moe.experts.93.w3", "model.layers.52.block_sparse_moe.experts.94.w3", "model.layers.52.block_sparse_moe.experts.95.w3", "model.layers.52.block_sparse_moe.experts.96.w3", "model.layers.52.block_sparse_moe.experts.97.w3", "model.layers.52.block_sparse_moe.experts.98.w3", "model.layers.52.block_sparse_moe.experts.99.w3", "model.layers.52.block_sparse_moe.experts.100.w3", "model.layers.52.block_sparse_moe.experts.101.w3", "model.layers.52.block_sparse_moe.experts.102.w3", "model.layers.52.block_sparse_moe.experts.103.w3", "model.layers.52.block_sparse_moe.experts.104.w3", "model.layers.52.block_sparse_moe.experts.105.w3", "model.layers.52.block_sparse_moe.experts.106.w3", "model.layers.52.block_sparse_moe.experts.107.w3", "model.layers.52.block_sparse_moe.experts.108.w3", "model.layers.52.block_sparse_moe.experts.109.w3", "model.layers.52.block_sparse_moe.experts.110.w3", "model.layers.52.block_sparse_moe.experts.111.w3", "model.layers.52.block_sparse_moe.experts.112.w3", "model.layers.52.block_sparse_moe.experts.113.w3", "model.layers.52.block_sparse_moe.experts.114.w3", "model.layers.52.block_sparse_moe.experts.115.w3", "model.layers.52.block_sparse_moe.experts.116.w3", "model.layers.52.block_sparse_moe.experts.117.w3", "model.layers.52.block_sparse_moe.experts.118.w3", "model.layers.52.block_sparse_moe.experts.119.w3", "model.layers.52.block_sparse_moe.experts.120.w3", "model.layers.52.block_sparse_moe.experts.121.w3", "model.layers.52.block_sparse_moe.experts.122.w3", "model.layers.52.block_sparse_moe.experts.123.w3", "model.layers.52.block_sparse_moe.experts.124.w3", "model.layers.52.block_sparse_moe.experts.125.w3", "model.layers.52.block_sparse_moe.experts.126.w3", "model.layers.52.block_sparse_moe.experts.127.w3", "model.layers.52.block_sparse_moe.experts.128.w3", "model.layers.52.block_sparse_moe.experts.129.w3", "model.layers.52.block_sparse_moe.experts.130.w3", "model.layers.52.block_sparse_moe.experts.131.w3", "model.layers.52.block_sparse_moe.experts.132.w3", "model.layers.52.block_sparse_moe.experts.133.w3", "model.layers.52.block_sparse_moe.experts.134.w3", "model.layers.52.block_sparse_moe.experts.135.w3", "model.layers.52.block_sparse_moe.experts.136.w3", "model.layers.52.block_sparse_moe.experts.137.w3", "model.layers.52.block_sparse_moe.experts.138.w3", "model.layers.52.block_sparse_moe.experts.139.w3", "model.layers.52.block_sparse_moe.experts.140.w3", "model.layers.52.block_sparse_moe.experts.141.w3", "model.layers.52.block_sparse_moe.experts.142.w3", "model.layers.52.block_sparse_moe.experts.143.w3", "model.layers.52.block_sparse_moe.experts.144.w3", "model.layers.52.block_sparse_moe.experts.145.w3", "model.layers.52.block_sparse_moe.experts.146.w3", "model.layers.52.block_sparse_moe.experts.147.w3", "model.layers.52.block_sparse_moe.experts.148.w3", "model.layers.52.block_sparse_moe.experts.149.w3", "model.layers.52.block_sparse_moe.experts.150.w3", "model.layers.52.block_sparse_moe.experts.151.w3", "model.layers.52.block_sparse_moe.experts.152.w3", "model.layers.52.block_sparse_moe.experts.153.w3", "model.layers.52.block_sparse_moe.experts.154.w3", "model.layers.52.block_sparse_moe.experts.155.w3", "model.layers.52.block_sparse_moe.experts.156.w3", "model.layers.52.block_sparse_moe.experts.157.w3", "model.layers.52.block_sparse_moe.experts.158.w3", "model.layers.52.block_sparse_moe.experts.159.w3", "model.layers.52.block_sparse_moe.experts.160.w3", "model.layers.52.block_sparse_moe.experts.161.w3", "model.layers.52.block_sparse_moe.experts.162.w3", "model.layers.52.block_sparse_moe.experts.163.w3", "model.layers.52.block_sparse_moe.experts.164.w3", "model.layers.52.block_sparse_moe.experts.165.w3", "model.layers.52.block_sparse_moe.experts.166.w3", "model.layers.52.block_sparse_moe.experts.167.w3", "model.layers.52.block_sparse_moe.experts.168.w3", "model.layers.52.block_sparse_moe.experts.169.w3", "model.layers.52.block_sparse_moe.experts.170.w3", "model.layers.52.block_sparse_moe.experts.171.w3", "model.layers.52.block_sparse_moe.experts.172.w3", "model.layers.52.block_sparse_moe.experts.173.w3", "model.layers.52.block_sparse_moe.experts.174.w3", "model.layers.52.block_sparse_moe.experts.175.w3", "model.layers.52.block_sparse_moe.experts.176.w3", "model.layers.52.block_sparse_moe.experts.177.w3", "model.layers.52.block_sparse_moe.experts.178.w3", "model.layers.52.block_sparse_moe.experts.179.w3", "model.layers.52.block_sparse_moe.experts.180.w3", "model.layers.52.block_sparse_moe.experts.181.w3", "model.layers.52.block_sparse_moe.experts.182.w3", "model.layers.52.block_sparse_moe.experts.183.w3", "model.layers.52.block_sparse_moe.experts.184.w3", "model.layers.52.block_sparse_moe.experts.185.w3", "model.layers.52.block_sparse_moe.experts.186.w3", "model.layers.52.block_sparse_moe.experts.187.w3", "model.layers.52.block_sparse_moe.experts.188.w3", "model.layers.52.block_sparse_moe.experts.189.w3", "model.layers.52.block_sparse_moe.experts.190.w3", "model.layers.52.block_sparse_moe.experts.191.w3", "model.layers.52.block_sparse_moe.experts.192.w3", "model.layers.52.block_sparse_moe.experts.193.w3", "model.layers.52.block_sparse_moe.experts.194.w3", "model.layers.52.block_sparse_moe.experts.195.w3", "model.layers.52.block_sparse_moe.experts.196.w3", "model.layers.52.block_sparse_moe.experts.197.w3", "model.layers.52.block_sparse_moe.experts.198.w3", "model.layers.52.block_sparse_moe.experts.199.w3", "model.layers.52.block_sparse_moe.experts.200.w3", "model.layers.52.block_sparse_moe.experts.201.w3", "model.layers.52.block_sparse_moe.experts.202.w3", "model.layers.52.block_sparse_moe.experts.203.w3", "model.layers.52.block_sparse_moe.experts.204.w3", "model.layers.52.block_sparse_moe.experts.205.w3", "model.layers.52.block_sparse_moe.experts.206.w3", "model.layers.52.block_sparse_moe.experts.207.w3", "model.layers.52.block_sparse_moe.experts.208.w3", "model.layers.52.block_sparse_moe.experts.209.w3", "model.layers.52.block_sparse_moe.experts.210.w3", "model.layers.52.block_sparse_moe.experts.211.w3", "model.layers.52.block_sparse_moe.experts.212.w3", "model.layers.52.block_sparse_moe.experts.213.w3", "model.layers.52.block_sparse_moe.experts.214.w3", "model.layers.52.block_sparse_moe.experts.215.w3", "model.layers.52.block_sparse_moe.experts.216.w3", "model.layers.52.block_sparse_moe.experts.217.w3", "model.layers.52.block_sparse_moe.experts.218.w3", "model.layers.52.block_sparse_moe.experts.219.w3", "model.layers.52.block_sparse_moe.experts.220.w3", "model.layers.52.block_sparse_moe.experts.221.w3", "model.layers.52.block_sparse_moe.experts.222.w3", "model.layers.52.block_sparse_moe.experts.223.w3", "model.layers.52.block_sparse_moe.experts.224.w3", "model.layers.52.block_sparse_moe.experts.225.w3", "model.layers.52.block_sparse_moe.experts.226.w3", "model.layers.52.block_sparse_moe.experts.227.w3", "model.layers.52.block_sparse_moe.experts.228.w3", "model.layers.52.block_sparse_moe.experts.229.w3", "model.layers.52.block_sparse_moe.experts.230.w3", "model.layers.52.block_sparse_moe.experts.231.w3", "model.layers.52.block_sparse_moe.experts.232.w3", "model.layers.52.block_sparse_moe.experts.233.w3", "model.layers.52.block_sparse_moe.experts.234.w3", "model.layers.52.block_sparse_moe.experts.235.w3", "model.layers.52.block_sparse_moe.experts.236.w3", "model.layers.52.block_sparse_moe.experts.237.w3", "model.layers.52.block_sparse_moe.experts.238.w3", "model.layers.52.block_sparse_moe.experts.239.w3", "model.layers.52.block_sparse_moe.experts.240.w3", "model.layers.52.block_sparse_moe.experts.241.w3", "model.layers.52.block_sparse_moe.experts.242.w3", "model.layers.52.block_sparse_moe.experts.243.w3", "model.layers.52.block_sparse_moe.experts.244.w3", "model.layers.52.block_sparse_moe.experts.245.w3", "model.layers.52.block_sparse_moe.experts.246.w3", "model.layers.52.block_sparse_moe.experts.247.w3", "model.layers.52.block_sparse_moe.experts.248.w3", "model.layers.52.block_sparse_moe.experts.249.w3", "model.layers.52.block_sparse_moe.experts.250.w3", "model.layers.52.block_sparse_moe.experts.251.w3", "model.layers.52.block_sparse_moe.experts.252.w3", "model.layers.52.block_sparse_moe.experts.253.w3", "model.layers.52.block_sparse_moe.experts.254.w3", "model.layers.52.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0015025943517684714, "dbits": 2415919104 } ] }, { "idx": 264, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w2", "model.layers.52.block_sparse_moe.experts.1.w2", "model.layers.52.block_sparse_moe.experts.2.w2", "model.layers.52.block_sparse_moe.experts.3.w2", "model.layers.52.block_sparse_moe.experts.4.w2", "model.layers.52.block_sparse_moe.experts.5.w2", "model.layers.52.block_sparse_moe.experts.6.w2", "model.layers.52.block_sparse_moe.experts.7.w2", "model.layers.52.block_sparse_moe.experts.8.w2", "model.layers.52.block_sparse_moe.experts.9.w2", "model.layers.52.block_sparse_moe.experts.10.w2", "model.layers.52.block_sparse_moe.experts.11.w2", "model.layers.52.block_sparse_moe.experts.12.w2", "model.layers.52.block_sparse_moe.experts.13.w2", "model.layers.52.block_sparse_moe.experts.14.w2", "model.layers.52.block_sparse_moe.experts.15.w2", "model.layers.52.block_sparse_moe.experts.16.w2", "model.layers.52.block_sparse_moe.experts.17.w2", "model.layers.52.block_sparse_moe.experts.18.w2", "model.layers.52.block_sparse_moe.experts.19.w2", "model.layers.52.block_sparse_moe.experts.20.w2", "model.layers.52.block_sparse_moe.experts.21.w2", "model.layers.52.block_sparse_moe.experts.22.w2", "model.layers.52.block_sparse_moe.experts.23.w2", "model.layers.52.block_sparse_moe.experts.24.w2", "model.layers.52.block_sparse_moe.experts.25.w2", "model.layers.52.block_sparse_moe.experts.26.w2", "model.layers.52.block_sparse_moe.experts.27.w2", "model.layers.52.block_sparse_moe.experts.28.w2", "model.layers.52.block_sparse_moe.experts.29.w2", "model.layers.52.block_sparse_moe.experts.30.w2", "model.layers.52.block_sparse_moe.experts.31.w2", "model.layers.52.block_sparse_moe.experts.32.w2", "model.layers.52.block_sparse_moe.experts.33.w2", "model.layers.52.block_sparse_moe.experts.34.w2", "model.layers.52.block_sparse_moe.experts.35.w2", "model.layers.52.block_sparse_moe.experts.36.w2", "model.layers.52.block_sparse_moe.experts.37.w2", "model.layers.52.block_sparse_moe.experts.38.w2", "model.layers.52.block_sparse_moe.experts.39.w2", "model.layers.52.block_sparse_moe.experts.40.w2", "model.layers.52.block_sparse_moe.experts.41.w2", "model.layers.52.block_sparse_moe.experts.42.w2", "model.layers.52.block_sparse_moe.experts.43.w2", "model.layers.52.block_sparse_moe.experts.44.w2", "model.layers.52.block_sparse_moe.experts.45.w2", "model.layers.52.block_sparse_moe.experts.46.w2", "model.layers.52.block_sparse_moe.experts.47.w2", "model.layers.52.block_sparse_moe.experts.48.w2", "model.layers.52.block_sparse_moe.experts.49.w2", "model.layers.52.block_sparse_moe.experts.50.w2", "model.layers.52.block_sparse_moe.experts.51.w2", "model.layers.52.block_sparse_moe.experts.52.w2", "model.layers.52.block_sparse_moe.experts.53.w2", "model.layers.52.block_sparse_moe.experts.54.w2", "model.layers.52.block_sparse_moe.experts.55.w2", "model.layers.52.block_sparse_moe.experts.56.w2", "model.layers.52.block_sparse_moe.experts.57.w2", "model.layers.52.block_sparse_moe.experts.58.w2", "model.layers.52.block_sparse_moe.experts.59.w2", "model.layers.52.block_sparse_moe.experts.60.w2", "model.layers.52.block_sparse_moe.experts.61.w2", "model.layers.52.block_sparse_moe.experts.62.w2", "model.layers.52.block_sparse_moe.experts.63.w2", "model.layers.52.block_sparse_moe.experts.64.w2", "model.layers.52.block_sparse_moe.experts.65.w2", "model.layers.52.block_sparse_moe.experts.66.w2", "model.layers.52.block_sparse_moe.experts.67.w2", "model.layers.52.block_sparse_moe.experts.68.w2", "model.layers.52.block_sparse_moe.experts.69.w2", "model.layers.52.block_sparse_moe.experts.70.w2", "model.layers.52.block_sparse_moe.experts.71.w2", "model.layers.52.block_sparse_moe.experts.72.w2", "model.layers.52.block_sparse_moe.experts.73.w2", "model.layers.52.block_sparse_moe.experts.74.w2", "model.layers.52.block_sparse_moe.experts.75.w2", "model.layers.52.block_sparse_moe.experts.76.w2", "model.layers.52.block_sparse_moe.experts.77.w2", "model.layers.52.block_sparse_moe.experts.78.w2", "model.layers.52.block_sparse_moe.experts.79.w2", "model.layers.52.block_sparse_moe.experts.80.w2", "model.layers.52.block_sparse_moe.experts.81.w2", "model.layers.52.block_sparse_moe.experts.82.w2", "model.layers.52.block_sparse_moe.experts.83.w2", "model.layers.52.block_sparse_moe.experts.84.w2", "model.layers.52.block_sparse_moe.experts.85.w2", "model.layers.52.block_sparse_moe.experts.86.w2", "model.layers.52.block_sparse_moe.experts.87.w2", "model.layers.52.block_sparse_moe.experts.88.w2", "model.layers.52.block_sparse_moe.experts.89.w2", "model.layers.52.block_sparse_moe.experts.90.w2", "model.layers.52.block_sparse_moe.experts.91.w2", "model.layers.52.block_sparse_moe.experts.92.w2", "model.layers.52.block_sparse_moe.experts.93.w2", "model.layers.52.block_sparse_moe.experts.94.w2", "model.layers.52.block_sparse_moe.experts.95.w2", "model.layers.52.block_sparse_moe.experts.96.w2", "model.layers.52.block_sparse_moe.experts.97.w2", "model.layers.52.block_sparse_moe.experts.98.w2", "model.layers.52.block_sparse_moe.experts.99.w2", "model.layers.52.block_sparse_moe.experts.100.w2", "model.layers.52.block_sparse_moe.experts.101.w2", "model.layers.52.block_sparse_moe.experts.102.w2", "model.layers.52.block_sparse_moe.experts.103.w2", "model.layers.52.block_sparse_moe.experts.104.w2", "model.layers.52.block_sparse_moe.experts.105.w2", "model.layers.52.block_sparse_moe.experts.106.w2", "model.layers.52.block_sparse_moe.experts.107.w2", "model.layers.52.block_sparse_moe.experts.108.w2", "model.layers.52.block_sparse_moe.experts.109.w2", "model.layers.52.block_sparse_moe.experts.110.w2", "model.layers.52.block_sparse_moe.experts.111.w2", "model.layers.52.block_sparse_moe.experts.112.w2", "model.layers.52.block_sparse_moe.experts.113.w2", "model.layers.52.block_sparse_moe.experts.114.w2", "model.layers.52.block_sparse_moe.experts.115.w2", "model.layers.52.block_sparse_moe.experts.116.w2", "model.layers.52.block_sparse_moe.experts.117.w2", "model.layers.52.block_sparse_moe.experts.118.w2", "model.layers.52.block_sparse_moe.experts.119.w2", "model.layers.52.block_sparse_moe.experts.120.w2", "model.layers.52.block_sparse_moe.experts.121.w2", "model.layers.52.block_sparse_moe.experts.122.w2", "model.layers.52.block_sparse_moe.experts.123.w2", "model.layers.52.block_sparse_moe.experts.124.w2", "model.layers.52.block_sparse_moe.experts.125.w2", "model.layers.52.block_sparse_moe.experts.126.w2", "model.layers.52.block_sparse_moe.experts.127.w2", "model.layers.52.block_sparse_moe.experts.128.w2", "model.layers.52.block_sparse_moe.experts.129.w2", "model.layers.52.block_sparse_moe.experts.130.w2", "model.layers.52.block_sparse_moe.experts.131.w2", "model.layers.52.block_sparse_moe.experts.132.w2", "model.layers.52.block_sparse_moe.experts.133.w2", "model.layers.52.block_sparse_moe.experts.134.w2", "model.layers.52.block_sparse_moe.experts.135.w2", "model.layers.52.block_sparse_moe.experts.136.w2", "model.layers.52.block_sparse_moe.experts.137.w2", "model.layers.52.block_sparse_moe.experts.138.w2", "model.layers.52.block_sparse_moe.experts.139.w2", "model.layers.52.block_sparse_moe.experts.140.w2", "model.layers.52.block_sparse_moe.experts.141.w2", "model.layers.52.block_sparse_moe.experts.142.w2", "model.layers.52.block_sparse_moe.experts.143.w2", "model.layers.52.block_sparse_moe.experts.144.w2", "model.layers.52.block_sparse_moe.experts.145.w2", "model.layers.52.block_sparse_moe.experts.146.w2", "model.layers.52.block_sparse_moe.experts.147.w2", "model.layers.52.block_sparse_moe.experts.148.w2", "model.layers.52.block_sparse_moe.experts.149.w2", "model.layers.52.block_sparse_moe.experts.150.w2", "model.layers.52.block_sparse_moe.experts.151.w2", "model.layers.52.block_sparse_moe.experts.152.w2", "model.layers.52.block_sparse_moe.experts.153.w2", "model.layers.52.block_sparse_moe.experts.154.w2", "model.layers.52.block_sparse_moe.experts.155.w2", "model.layers.52.block_sparse_moe.experts.156.w2", "model.layers.52.block_sparse_moe.experts.157.w2", "model.layers.52.block_sparse_moe.experts.158.w2", "model.layers.52.block_sparse_moe.experts.159.w2", "model.layers.52.block_sparse_moe.experts.160.w2", "model.layers.52.block_sparse_moe.experts.161.w2", "model.layers.52.block_sparse_moe.experts.162.w2", "model.layers.52.block_sparse_moe.experts.163.w2", "model.layers.52.block_sparse_moe.experts.164.w2", "model.layers.52.block_sparse_moe.experts.165.w2", "model.layers.52.block_sparse_moe.experts.166.w2", "model.layers.52.block_sparse_moe.experts.167.w2", "model.layers.52.block_sparse_moe.experts.168.w2", "model.layers.52.block_sparse_moe.experts.169.w2", "model.layers.52.block_sparse_moe.experts.170.w2", "model.layers.52.block_sparse_moe.experts.171.w2", "model.layers.52.block_sparse_moe.experts.172.w2", "model.layers.52.block_sparse_moe.experts.173.w2", "model.layers.52.block_sparse_moe.experts.174.w2", "model.layers.52.block_sparse_moe.experts.175.w2", "model.layers.52.block_sparse_moe.experts.176.w2", "model.layers.52.block_sparse_moe.experts.177.w2", "model.layers.52.block_sparse_moe.experts.178.w2", "model.layers.52.block_sparse_moe.experts.179.w2", "model.layers.52.block_sparse_moe.experts.180.w2", "model.layers.52.block_sparse_moe.experts.181.w2", "model.layers.52.block_sparse_moe.experts.182.w2", "model.layers.52.block_sparse_moe.experts.183.w2", "model.layers.52.block_sparse_moe.experts.184.w2", "model.layers.52.block_sparse_moe.experts.185.w2", "model.layers.52.block_sparse_moe.experts.186.w2", "model.layers.52.block_sparse_moe.experts.187.w2", "model.layers.52.block_sparse_moe.experts.188.w2", "model.layers.52.block_sparse_moe.experts.189.w2", "model.layers.52.block_sparse_moe.experts.190.w2", "model.layers.52.block_sparse_moe.experts.191.w2", "model.layers.52.block_sparse_moe.experts.192.w2", "model.layers.52.block_sparse_moe.experts.193.w2", "model.layers.52.block_sparse_moe.experts.194.w2", "model.layers.52.block_sparse_moe.experts.195.w2", "model.layers.52.block_sparse_moe.experts.196.w2", "model.layers.52.block_sparse_moe.experts.197.w2", "model.layers.52.block_sparse_moe.experts.198.w2", "model.layers.52.block_sparse_moe.experts.199.w2", "model.layers.52.block_sparse_moe.experts.200.w2", "model.layers.52.block_sparse_moe.experts.201.w2", "model.layers.52.block_sparse_moe.experts.202.w2", "model.layers.52.block_sparse_moe.experts.203.w2", "model.layers.52.block_sparse_moe.experts.204.w2", "model.layers.52.block_sparse_moe.experts.205.w2", "model.layers.52.block_sparse_moe.experts.206.w2", "model.layers.52.block_sparse_moe.experts.207.w2", "model.layers.52.block_sparse_moe.experts.208.w2", "model.layers.52.block_sparse_moe.experts.209.w2", "model.layers.52.block_sparse_moe.experts.210.w2", "model.layers.52.block_sparse_moe.experts.211.w2", "model.layers.52.block_sparse_moe.experts.212.w2", "model.layers.52.block_sparse_moe.experts.213.w2", "model.layers.52.block_sparse_moe.experts.214.w2", "model.layers.52.block_sparse_moe.experts.215.w2", "model.layers.52.block_sparse_moe.experts.216.w2", "model.layers.52.block_sparse_moe.experts.217.w2", "model.layers.52.block_sparse_moe.experts.218.w2", "model.layers.52.block_sparse_moe.experts.219.w2", "model.layers.52.block_sparse_moe.experts.220.w2", "model.layers.52.block_sparse_moe.experts.221.w2", "model.layers.52.block_sparse_moe.experts.222.w2", "model.layers.52.block_sparse_moe.experts.223.w2", "model.layers.52.block_sparse_moe.experts.224.w2", "model.layers.52.block_sparse_moe.experts.225.w2", "model.layers.52.block_sparse_moe.experts.226.w2", "model.layers.52.block_sparse_moe.experts.227.w2", "model.layers.52.block_sparse_moe.experts.228.w2", "model.layers.52.block_sparse_moe.experts.229.w2", "model.layers.52.block_sparse_moe.experts.230.w2", "model.layers.52.block_sparse_moe.experts.231.w2", "model.layers.52.block_sparse_moe.experts.232.w2", "model.layers.52.block_sparse_moe.experts.233.w2", "model.layers.52.block_sparse_moe.experts.234.w2", "model.layers.52.block_sparse_moe.experts.235.w2", "model.layers.52.block_sparse_moe.experts.236.w2", "model.layers.52.block_sparse_moe.experts.237.w2", "model.layers.52.block_sparse_moe.experts.238.w2", "model.layers.52.block_sparse_moe.experts.239.w2", "model.layers.52.block_sparse_moe.experts.240.w2", "model.layers.52.block_sparse_moe.experts.241.w2", "model.layers.52.block_sparse_moe.experts.242.w2", "model.layers.52.block_sparse_moe.experts.243.w2", "model.layers.52.block_sparse_moe.experts.244.w2", "model.layers.52.block_sparse_moe.experts.245.w2", "model.layers.52.block_sparse_moe.experts.246.w2", "model.layers.52.block_sparse_moe.experts.247.w2", "model.layers.52.block_sparse_moe.experts.248.w2", "model.layers.52.block_sparse_moe.experts.249.w2", "model.layers.52.block_sparse_moe.experts.250.w2", "model.layers.52.block_sparse_moe.experts.251.w2", "model.layers.52.block_sparse_moe.experts.252.w2", "model.layers.52.block_sparse_moe.experts.253.w2", "model.layers.52.block_sparse_moe.experts.254.w2", "model.layers.52.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005568951368331021, "dbits": 1207959552 } ] }, { "idx": 265, "layers": [ "model.layers.53.self_attn.q_proj" ], "candidates": [ { "dkld": -0.004669699072837763, "dbits": 18874368 } ] }, { "idx": 266, "layers": [ "model.layers.53.self_attn.k_proj", "model.layers.53.self_attn.v_proj" ], "candidates": [ { "dkld": -0.028607535362243586, "dbits": 6291456 } ] }, { "idx": 267, "layers": [ "model.layers.53.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004444086551666171, "dbits": 18874368 } ] }, { "idx": 268, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w1", "model.layers.53.block_sparse_moe.experts.1.w1", "model.layers.53.block_sparse_moe.experts.2.w1", "model.layers.53.block_sparse_moe.experts.3.w1", "model.layers.53.block_sparse_moe.experts.4.w1", "model.layers.53.block_sparse_moe.experts.5.w1", "model.layers.53.block_sparse_moe.experts.6.w1", "model.layers.53.block_sparse_moe.experts.7.w1", "model.layers.53.block_sparse_moe.experts.8.w1", "model.layers.53.block_sparse_moe.experts.9.w1", "model.layers.53.block_sparse_moe.experts.10.w1", "model.layers.53.block_sparse_moe.experts.11.w1", "model.layers.53.block_sparse_moe.experts.12.w1", "model.layers.53.block_sparse_moe.experts.13.w1", "model.layers.53.block_sparse_moe.experts.14.w1", "model.layers.53.block_sparse_moe.experts.15.w1", "model.layers.53.block_sparse_moe.experts.16.w1", "model.layers.53.block_sparse_moe.experts.17.w1", "model.layers.53.block_sparse_moe.experts.18.w1", "model.layers.53.block_sparse_moe.experts.19.w1", "model.layers.53.block_sparse_moe.experts.20.w1", "model.layers.53.block_sparse_moe.experts.21.w1", "model.layers.53.block_sparse_moe.experts.22.w1", "model.layers.53.block_sparse_moe.experts.23.w1", "model.layers.53.block_sparse_moe.experts.24.w1", "model.layers.53.block_sparse_moe.experts.25.w1", "model.layers.53.block_sparse_moe.experts.26.w1", "model.layers.53.block_sparse_moe.experts.27.w1", "model.layers.53.block_sparse_moe.experts.28.w1", "model.layers.53.block_sparse_moe.experts.29.w1", "model.layers.53.block_sparse_moe.experts.30.w1", "model.layers.53.block_sparse_moe.experts.31.w1", "model.layers.53.block_sparse_moe.experts.32.w1", "model.layers.53.block_sparse_moe.experts.33.w1", "model.layers.53.block_sparse_moe.experts.34.w1", "model.layers.53.block_sparse_moe.experts.35.w1", "model.layers.53.block_sparse_moe.experts.36.w1", "model.layers.53.block_sparse_moe.experts.37.w1", "model.layers.53.block_sparse_moe.experts.38.w1", "model.layers.53.block_sparse_moe.experts.39.w1", "model.layers.53.block_sparse_moe.experts.40.w1", "model.layers.53.block_sparse_moe.experts.41.w1", "model.layers.53.block_sparse_moe.experts.42.w1", "model.layers.53.block_sparse_moe.experts.43.w1", "model.layers.53.block_sparse_moe.experts.44.w1", "model.layers.53.block_sparse_moe.experts.45.w1", "model.layers.53.block_sparse_moe.experts.46.w1", "model.layers.53.block_sparse_moe.experts.47.w1", "model.layers.53.block_sparse_moe.experts.48.w1", "model.layers.53.block_sparse_moe.experts.49.w1", "model.layers.53.block_sparse_moe.experts.50.w1", "model.layers.53.block_sparse_moe.experts.51.w1", "model.layers.53.block_sparse_moe.experts.52.w1", "model.layers.53.block_sparse_moe.experts.53.w1", "model.layers.53.block_sparse_moe.experts.54.w1", "model.layers.53.block_sparse_moe.experts.55.w1", "model.layers.53.block_sparse_moe.experts.56.w1", "model.layers.53.block_sparse_moe.experts.57.w1", "model.layers.53.block_sparse_moe.experts.58.w1", "model.layers.53.block_sparse_moe.experts.59.w1", "model.layers.53.block_sparse_moe.experts.60.w1", "model.layers.53.block_sparse_moe.experts.61.w1", "model.layers.53.block_sparse_moe.experts.62.w1", "model.layers.53.block_sparse_moe.experts.63.w1", "model.layers.53.block_sparse_moe.experts.64.w1", "model.layers.53.block_sparse_moe.experts.65.w1", "model.layers.53.block_sparse_moe.experts.66.w1", "model.layers.53.block_sparse_moe.experts.67.w1", "model.layers.53.block_sparse_moe.experts.68.w1", "model.layers.53.block_sparse_moe.experts.69.w1", "model.layers.53.block_sparse_moe.experts.70.w1", "model.layers.53.block_sparse_moe.experts.71.w1", "model.layers.53.block_sparse_moe.experts.72.w1", "model.layers.53.block_sparse_moe.experts.73.w1", "model.layers.53.block_sparse_moe.experts.74.w1", "model.layers.53.block_sparse_moe.experts.75.w1", "model.layers.53.block_sparse_moe.experts.76.w1", "model.layers.53.block_sparse_moe.experts.77.w1", "model.layers.53.block_sparse_moe.experts.78.w1", "model.layers.53.block_sparse_moe.experts.79.w1", "model.layers.53.block_sparse_moe.experts.80.w1", "model.layers.53.block_sparse_moe.experts.81.w1", "model.layers.53.block_sparse_moe.experts.82.w1", "model.layers.53.block_sparse_moe.experts.83.w1", "model.layers.53.block_sparse_moe.experts.84.w1", "model.layers.53.block_sparse_moe.experts.85.w1", "model.layers.53.block_sparse_moe.experts.86.w1", "model.layers.53.block_sparse_moe.experts.87.w1", "model.layers.53.block_sparse_moe.experts.88.w1", "model.layers.53.block_sparse_moe.experts.89.w1", "model.layers.53.block_sparse_moe.experts.90.w1", "model.layers.53.block_sparse_moe.experts.91.w1", "model.layers.53.block_sparse_moe.experts.92.w1", "model.layers.53.block_sparse_moe.experts.93.w1", "model.layers.53.block_sparse_moe.experts.94.w1", "model.layers.53.block_sparse_moe.experts.95.w1", "model.layers.53.block_sparse_moe.experts.96.w1", "model.layers.53.block_sparse_moe.experts.97.w1", "model.layers.53.block_sparse_moe.experts.98.w1", "model.layers.53.block_sparse_moe.experts.99.w1", "model.layers.53.block_sparse_moe.experts.100.w1", "model.layers.53.block_sparse_moe.experts.101.w1", "model.layers.53.block_sparse_moe.experts.102.w1", "model.layers.53.block_sparse_moe.experts.103.w1", "model.layers.53.block_sparse_moe.experts.104.w1", "model.layers.53.block_sparse_moe.experts.105.w1", "model.layers.53.block_sparse_moe.experts.106.w1", "model.layers.53.block_sparse_moe.experts.107.w1", "model.layers.53.block_sparse_moe.experts.108.w1", "model.layers.53.block_sparse_moe.experts.109.w1", "model.layers.53.block_sparse_moe.experts.110.w1", "model.layers.53.block_sparse_moe.experts.111.w1", "model.layers.53.block_sparse_moe.experts.112.w1", "model.layers.53.block_sparse_moe.experts.113.w1", "model.layers.53.block_sparse_moe.experts.114.w1", "model.layers.53.block_sparse_moe.experts.115.w1", "model.layers.53.block_sparse_moe.experts.116.w1", "model.layers.53.block_sparse_moe.experts.117.w1", "model.layers.53.block_sparse_moe.experts.118.w1", "model.layers.53.block_sparse_moe.experts.119.w1", "model.layers.53.block_sparse_moe.experts.120.w1", "model.layers.53.block_sparse_moe.experts.121.w1", "model.layers.53.block_sparse_moe.experts.122.w1", "model.layers.53.block_sparse_moe.experts.123.w1", "model.layers.53.block_sparse_moe.experts.124.w1", "model.layers.53.block_sparse_moe.experts.125.w1", "model.layers.53.block_sparse_moe.experts.126.w1", "model.layers.53.block_sparse_moe.experts.127.w1", "model.layers.53.block_sparse_moe.experts.128.w1", "model.layers.53.block_sparse_moe.experts.129.w1", "model.layers.53.block_sparse_moe.experts.130.w1", "model.layers.53.block_sparse_moe.experts.131.w1", "model.layers.53.block_sparse_moe.experts.132.w1", "model.layers.53.block_sparse_moe.experts.133.w1", "model.layers.53.block_sparse_moe.experts.134.w1", "model.layers.53.block_sparse_moe.experts.135.w1", "model.layers.53.block_sparse_moe.experts.136.w1", "model.layers.53.block_sparse_moe.experts.137.w1", "model.layers.53.block_sparse_moe.experts.138.w1", "model.layers.53.block_sparse_moe.experts.139.w1", "model.layers.53.block_sparse_moe.experts.140.w1", "model.layers.53.block_sparse_moe.experts.141.w1", "model.layers.53.block_sparse_moe.experts.142.w1", "model.layers.53.block_sparse_moe.experts.143.w1", "model.layers.53.block_sparse_moe.experts.144.w1", "model.layers.53.block_sparse_moe.experts.145.w1", "model.layers.53.block_sparse_moe.experts.146.w1", "model.layers.53.block_sparse_moe.experts.147.w1", "model.layers.53.block_sparse_moe.experts.148.w1", "model.layers.53.block_sparse_moe.experts.149.w1", "model.layers.53.block_sparse_moe.experts.150.w1", "model.layers.53.block_sparse_moe.experts.151.w1", "model.layers.53.block_sparse_moe.experts.152.w1", "model.layers.53.block_sparse_moe.experts.153.w1", "model.layers.53.block_sparse_moe.experts.154.w1", "model.layers.53.block_sparse_moe.experts.155.w1", "model.layers.53.block_sparse_moe.experts.156.w1", "model.layers.53.block_sparse_moe.experts.157.w1", "model.layers.53.block_sparse_moe.experts.158.w1", "model.layers.53.block_sparse_moe.experts.159.w1", "model.layers.53.block_sparse_moe.experts.160.w1", "model.layers.53.block_sparse_moe.experts.161.w1", "model.layers.53.block_sparse_moe.experts.162.w1", "model.layers.53.block_sparse_moe.experts.163.w1", "model.layers.53.block_sparse_moe.experts.164.w1", "model.layers.53.block_sparse_moe.experts.165.w1", "model.layers.53.block_sparse_moe.experts.166.w1", "model.layers.53.block_sparse_moe.experts.167.w1", "model.layers.53.block_sparse_moe.experts.168.w1", "model.layers.53.block_sparse_moe.experts.169.w1", "model.layers.53.block_sparse_moe.experts.170.w1", "model.layers.53.block_sparse_moe.experts.171.w1", "model.layers.53.block_sparse_moe.experts.172.w1", "model.layers.53.block_sparse_moe.experts.173.w1", "model.layers.53.block_sparse_moe.experts.174.w1", "model.layers.53.block_sparse_moe.experts.175.w1", "model.layers.53.block_sparse_moe.experts.176.w1", "model.layers.53.block_sparse_moe.experts.177.w1", "model.layers.53.block_sparse_moe.experts.178.w1", "model.layers.53.block_sparse_moe.experts.179.w1", "model.layers.53.block_sparse_moe.experts.180.w1", "model.layers.53.block_sparse_moe.experts.181.w1", "model.layers.53.block_sparse_moe.experts.182.w1", "model.layers.53.block_sparse_moe.experts.183.w1", "model.layers.53.block_sparse_moe.experts.184.w1", "model.layers.53.block_sparse_moe.experts.185.w1", "model.layers.53.block_sparse_moe.experts.186.w1", "model.layers.53.block_sparse_moe.experts.187.w1", "model.layers.53.block_sparse_moe.experts.188.w1", "model.layers.53.block_sparse_moe.experts.189.w1", "model.layers.53.block_sparse_moe.experts.190.w1", "model.layers.53.block_sparse_moe.experts.191.w1", "model.layers.53.block_sparse_moe.experts.192.w1", "model.layers.53.block_sparse_moe.experts.193.w1", "model.layers.53.block_sparse_moe.experts.194.w1", "model.layers.53.block_sparse_moe.experts.195.w1", "model.layers.53.block_sparse_moe.experts.196.w1", "model.layers.53.block_sparse_moe.experts.197.w1", "model.layers.53.block_sparse_moe.experts.198.w1", "model.layers.53.block_sparse_moe.experts.199.w1", "model.layers.53.block_sparse_moe.experts.200.w1", "model.layers.53.block_sparse_moe.experts.201.w1", "model.layers.53.block_sparse_moe.experts.202.w1", "model.layers.53.block_sparse_moe.experts.203.w1", "model.layers.53.block_sparse_moe.experts.204.w1", "model.layers.53.block_sparse_moe.experts.205.w1", "model.layers.53.block_sparse_moe.experts.206.w1", "model.layers.53.block_sparse_moe.experts.207.w1", "model.layers.53.block_sparse_moe.experts.208.w1", "model.layers.53.block_sparse_moe.experts.209.w1", "model.layers.53.block_sparse_moe.experts.210.w1", "model.layers.53.block_sparse_moe.experts.211.w1", "model.layers.53.block_sparse_moe.experts.212.w1", "model.layers.53.block_sparse_moe.experts.213.w1", "model.layers.53.block_sparse_moe.experts.214.w1", "model.layers.53.block_sparse_moe.experts.215.w1", "model.layers.53.block_sparse_moe.experts.216.w1", "model.layers.53.block_sparse_moe.experts.217.w1", "model.layers.53.block_sparse_moe.experts.218.w1", "model.layers.53.block_sparse_moe.experts.219.w1", "model.layers.53.block_sparse_moe.experts.220.w1", "model.layers.53.block_sparse_moe.experts.221.w1", "model.layers.53.block_sparse_moe.experts.222.w1", "model.layers.53.block_sparse_moe.experts.223.w1", "model.layers.53.block_sparse_moe.experts.224.w1", "model.layers.53.block_sparse_moe.experts.225.w1", "model.layers.53.block_sparse_moe.experts.226.w1", "model.layers.53.block_sparse_moe.experts.227.w1", "model.layers.53.block_sparse_moe.experts.228.w1", "model.layers.53.block_sparse_moe.experts.229.w1", "model.layers.53.block_sparse_moe.experts.230.w1", "model.layers.53.block_sparse_moe.experts.231.w1", "model.layers.53.block_sparse_moe.experts.232.w1", "model.layers.53.block_sparse_moe.experts.233.w1", "model.layers.53.block_sparse_moe.experts.234.w1", "model.layers.53.block_sparse_moe.experts.235.w1", "model.layers.53.block_sparse_moe.experts.236.w1", "model.layers.53.block_sparse_moe.experts.237.w1", "model.layers.53.block_sparse_moe.experts.238.w1", "model.layers.53.block_sparse_moe.experts.239.w1", "model.layers.53.block_sparse_moe.experts.240.w1", "model.layers.53.block_sparse_moe.experts.241.w1", "model.layers.53.block_sparse_moe.experts.242.w1", "model.layers.53.block_sparse_moe.experts.243.w1", "model.layers.53.block_sparse_moe.experts.244.w1", "model.layers.53.block_sparse_moe.experts.245.w1", "model.layers.53.block_sparse_moe.experts.246.w1", "model.layers.53.block_sparse_moe.experts.247.w1", "model.layers.53.block_sparse_moe.experts.248.w1", "model.layers.53.block_sparse_moe.experts.249.w1", "model.layers.53.block_sparse_moe.experts.250.w1", "model.layers.53.block_sparse_moe.experts.251.w1", "model.layers.53.block_sparse_moe.experts.252.w1", "model.layers.53.block_sparse_moe.experts.253.w1", "model.layers.53.block_sparse_moe.experts.254.w1", "model.layers.53.block_sparse_moe.experts.255.w1", "model.layers.53.block_sparse_moe.experts.0.w3", "model.layers.53.block_sparse_moe.experts.1.w3", "model.layers.53.block_sparse_moe.experts.2.w3", "model.layers.53.block_sparse_moe.experts.3.w3", "model.layers.53.block_sparse_moe.experts.4.w3", "model.layers.53.block_sparse_moe.experts.5.w3", "model.layers.53.block_sparse_moe.experts.6.w3", "model.layers.53.block_sparse_moe.experts.7.w3", "model.layers.53.block_sparse_moe.experts.8.w3", "model.layers.53.block_sparse_moe.experts.9.w3", "model.layers.53.block_sparse_moe.experts.10.w3", "model.layers.53.block_sparse_moe.experts.11.w3", "model.layers.53.block_sparse_moe.experts.12.w3", "model.layers.53.block_sparse_moe.experts.13.w3", "model.layers.53.block_sparse_moe.experts.14.w3", "model.layers.53.block_sparse_moe.experts.15.w3", "model.layers.53.block_sparse_moe.experts.16.w3", "model.layers.53.block_sparse_moe.experts.17.w3", "model.layers.53.block_sparse_moe.experts.18.w3", "model.layers.53.block_sparse_moe.experts.19.w3", "model.layers.53.block_sparse_moe.experts.20.w3", "model.layers.53.block_sparse_moe.experts.21.w3", "model.layers.53.block_sparse_moe.experts.22.w3", "model.layers.53.block_sparse_moe.experts.23.w3", "model.layers.53.block_sparse_moe.experts.24.w3", "model.layers.53.block_sparse_moe.experts.25.w3", "model.layers.53.block_sparse_moe.experts.26.w3", "model.layers.53.block_sparse_moe.experts.27.w3", "model.layers.53.block_sparse_moe.experts.28.w3", "model.layers.53.block_sparse_moe.experts.29.w3", "model.layers.53.block_sparse_moe.experts.30.w3", "model.layers.53.block_sparse_moe.experts.31.w3", "model.layers.53.block_sparse_moe.experts.32.w3", "model.layers.53.block_sparse_moe.experts.33.w3", "model.layers.53.block_sparse_moe.experts.34.w3", "model.layers.53.block_sparse_moe.experts.35.w3", "model.layers.53.block_sparse_moe.experts.36.w3", "model.layers.53.block_sparse_moe.experts.37.w3", "model.layers.53.block_sparse_moe.experts.38.w3", "model.layers.53.block_sparse_moe.experts.39.w3", "model.layers.53.block_sparse_moe.experts.40.w3", "model.layers.53.block_sparse_moe.experts.41.w3", "model.layers.53.block_sparse_moe.experts.42.w3", "model.layers.53.block_sparse_moe.experts.43.w3", "model.layers.53.block_sparse_moe.experts.44.w3", "model.layers.53.block_sparse_moe.experts.45.w3", "model.layers.53.block_sparse_moe.experts.46.w3", "model.layers.53.block_sparse_moe.experts.47.w3", "model.layers.53.block_sparse_moe.experts.48.w3", "model.layers.53.block_sparse_moe.experts.49.w3", "model.layers.53.block_sparse_moe.experts.50.w3", "model.layers.53.block_sparse_moe.experts.51.w3", "model.layers.53.block_sparse_moe.experts.52.w3", "model.layers.53.block_sparse_moe.experts.53.w3", "model.layers.53.block_sparse_moe.experts.54.w3", "model.layers.53.block_sparse_moe.experts.55.w3", "model.layers.53.block_sparse_moe.experts.56.w3", "model.layers.53.block_sparse_moe.experts.57.w3", "model.layers.53.block_sparse_moe.experts.58.w3", "model.layers.53.block_sparse_moe.experts.59.w3", "model.layers.53.block_sparse_moe.experts.60.w3", "model.layers.53.block_sparse_moe.experts.61.w3", "model.layers.53.block_sparse_moe.experts.62.w3", "model.layers.53.block_sparse_moe.experts.63.w3", "model.layers.53.block_sparse_moe.experts.64.w3", "model.layers.53.block_sparse_moe.experts.65.w3", "model.layers.53.block_sparse_moe.experts.66.w3", "model.layers.53.block_sparse_moe.experts.67.w3", "model.layers.53.block_sparse_moe.experts.68.w3", "model.layers.53.block_sparse_moe.experts.69.w3", "model.layers.53.block_sparse_moe.experts.70.w3", "model.layers.53.block_sparse_moe.experts.71.w3", "model.layers.53.block_sparse_moe.experts.72.w3", "model.layers.53.block_sparse_moe.experts.73.w3", "model.layers.53.block_sparse_moe.experts.74.w3", "model.layers.53.block_sparse_moe.experts.75.w3", "model.layers.53.block_sparse_moe.experts.76.w3", "model.layers.53.block_sparse_moe.experts.77.w3", "model.layers.53.block_sparse_moe.experts.78.w3", "model.layers.53.block_sparse_moe.experts.79.w3", "model.layers.53.block_sparse_moe.experts.80.w3", "model.layers.53.block_sparse_moe.experts.81.w3", "model.layers.53.block_sparse_moe.experts.82.w3", "model.layers.53.block_sparse_moe.experts.83.w3", "model.layers.53.block_sparse_moe.experts.84.w3", "model.layers.53.block_sparse_moe.experts.85.w3", "model.layers.53.block_sparse_moe.experts.86.w3", "model.layers.53.block_sparse_moe.experts.87.w3", "model.layers.53.block_sparse_moe.experts.88.w3", "model.layers.53.block_sparse_moe.experts.89.w3", "model.layers.53.block_sparse_moe.experts.90.w3", "model.layers.53.block_sparse_moe.experts.91.w3", "model.layers.53.block_sparse_moe.experts.92.w3", "model.layers.53.block_sparse_moe.experts.93.w3", "model.layers.53.block_sparse_moe.experts.94.w3", "model.layers.53.block_sparse_moe.experts.95.w3", "model.layers.53.block_sparse_moe.experts.96.w3", "model.layers.53.block_sparse_moe.experts.97.w3", "model.layers.53.block_sparse_moe.experts.98.w3", "model.layers.53.block_sparse_moe.experts.99.w3", "model.layers.53.block_sparse_moe.experts.100.w3", "model.layers.53.block_sparse_moe.experts.101.w3", "model.layers.53.block_sparse_moe.experts.102.w3", "model.layers.53.block_sparse_moe.experts.103.w3", "model.layers.53.block_sparse_moe.experts.104.w3", "model.layers.53.block_sparse_moe.experts.105.w3", "model.layers.53.block_sparse_moe.experts.106.w3", "model.layers.53.block_sparse_moe.experts.107.w3", "model.layers.53.block_sparse_moe.experts.108.w3", "model.layers.53.block_sparse_moe.experts.109.w3", "model.layers.53.block_sparse_moe.experts.110.w3", "model.layers.53.block_sparse_moe.experts.111.w3", "model.layers.53.block_sparse_moe.experts.112.w3", "model.layers.53.block_sparse_moe.experts.113.w3", "model.layers.53.block_sparse_moe.experts.114.w3", "model.layers.53.block_sparse_moe.experts.115.w3", "model.layers.53.block_sparse_moe.experts.116.w3", "model.layers.53.block_sparse_moe.experts.117.w3", "model.layers.53.block_sparse_moe.experts.118.w3", "model.layers.53.block_sparse_moe.experts.119.w3", "model.layers.53.block_sparse_moe.experts.120.w3", "model.layers.53.block_sparse_moe.experts.121.w3", "model.layers.53.block_sparse_moe.experts.122.w3", "model.layers.53.block_sparse_moe.experts.123.w3", "model.layers.53.block_sparse_moe.experts.124.w3", "model.layers.53.block_sparse_moe.experts.125.w3", "model.layers.53.block_sparse_moe.experts.126.w3", "model.layers.53.block_sparse_moe.experts.127.w3", "model.layers.53.block_sparse_moe.experts.128.w3", "model.layers.53.block_sparse_moe.experts.129.w3", "model.layers.53.block_sparse_moe.experts.130.w3", "model.layers.53.block_sparse_moe.experts.131.w3", "model.layers.53.block_sparse_moe.experts.132.w3", "model.layers.53.block_sparse_moe.experts.133.w3", "model.layers.53.block_sparse_moe.experts.134.w3", "model.layers.53.block_sparse_moe.experts.135.w3", "model.layers.53.block_sparse_moe.experts.136.w3", "model.layers.53.block_sparse_moe.experts.137.w3", "model.layers.53.block_sparse_moe.experts.138.w3", "model.layers.53.block_sparse_moe.experts.139.w3", "model.layers.53.block_sparse_moe.experts.140.w3", "model.layers.53.block_sparse_moe.experts.141.w3", "model.layers.53.block_sparse_moe.experts.142.w3", "model.layers.53.block_sparse_moe.experts.143.w3", "model.layers.53.block_sparse_moe.experts.144.w3", "model.layers.53.block_sparse_moe.experts.145.w3", "model.layers.53.block_sparse_moe.experts.146.w3", "model.layers.53.block_sparse_moe.experts.147.w3", "model.layers.53.block_sparse_moe.experts.148.w3", "model.layers.53.block_sparse_moe.experts.149.w3", "model.layers.53.block_sparse_moe.experts.150.w3", "model.layers.53.block_sparse_moe.experts.151.w3", "model.layers.53.block_sparse_moe.experts.152.w3", "model.layers.53.block_sparse_moe.experts.153.w3", "model.layers.53.block_sparse_moe.experts.154.w3", "model.layers.53.block_sparse_moe.experts.155.w3", "model.layers.53.block_sparse_moe.experts.156.w3", "model.layers.53.block_sparse_moe.experts.157.w3", "model.layers.53.block_sparse_moe.experts.158.w3", "model.layers.53.block_sparse_moe.experts.159.w3", "model.layers.53.block_sparse_moe.experts.160.w3", "model.layers.53.block_sparse_moe.experts.161.w3", "model.layers.53.block_sparse_moe.experts.162.w3", "model.layers.53.block_sparse_moe.experts.163.w3", "model.layers.53.block_sparse_moe.experts.164.w3", "model.layers.53.block_sparse_moe.experts.165.w3", "model.layers.53.block_sparse_moe.experts.166.w3", "model.layers.53.block_sparse_moe.experts.167.w3", "model.layers.53.block_sparse_moe.experts.168.w3", "model.layers.53.block_sparse_moe.experts.169.w3", "model.layers.53.block_sparse_moe.experts.170.w3", "model.layers.53.block_sparse_moe.experts.171.w3", "model.layers.53.block_sparse_moe.experts.172.w3", "model.layers.53.block_sparse_moe.experts.173.w3", "model.layers.53.block_sparse_moe.experts.174.w3", "model.layers.53.block_sparse_moe.experts.175.w3", "model.layers.53.block_sparse_moe.experts.176.w3", "model.layers.53.block_sparse_moe.experts.177.w3", "model.layers.53.block_sparse_moe.experts.178.w3", "model.layers.53.block_sparse_moe.experts.179.w3", "model.layers.53.block_sparse_moe.experts.180.w3", "model.layers.53.block_sparse_moe.experts.181.w3", "model.layers.53.block_sparse_moe.experts.182.w3", "model.layers.53.block_sparse_moe.experts.183.w3", "model.layers.53.block_sparse_moe.experts.184.w3", "model.layers.53.block_sparse_moe.experts.185.w3", "model.layers.53.block_sparse_moe.experts.186.w3", "model.layers.53.block_sparse_moe.experts.187.w3", "model.layers.53.block_sparse_moe.experts.188.w3", "model.layers.53.block_sparse_moe.experts.189.w3", "model.layers.53.block_sparse_moe.experts.190.w3", "model.layers.53.block_sparse_moe.experts.191.w3", "model.layers.53.block_sparse_moe.experts.192.w3", "model.layers.53.block_sparse_moe.experts.193.w3", "model.layers.53.block_sparse_moe.experts.194.w3", "model.layers.53.block_sparse_moe.experts.195.w3", "model.layers.53.block_sparse_moe.experts.196.w3", "model.layers.53.block_sparse_moe.experts.197.w3", "model.layers.53.block_sparse_moe.experts.198.w3", "model.layers.53.block_sparse_moe.experts.199.w3", "model.layers.53.block_sparse_moe.experts.200.w3", "model.layers.53.block_sparse_moe.experts.201.w3", "model.layers.53.block_sparse_moe.experts.202.w3", "model.layers.53.block_sparse_moe.experts.203.w3", "model.layers.53.block_sparse_moe.experts.204.w3", "model.layers.53.block_sparse_moe.experts.205.w3", "model.layers.53.block_sparse_moe.experts.206.w3", "model.layers.53.block_sparse_moe.experts.207.w3", "model.layers.53.block_sparse_moe.experts.208.w3", "model.layers.53.block_sparse_moe.experts.209.w3", "model.layers.53.block_sparse_moe.experts.210.w3", "model.layers.53.block_sparse_moe.experts.211.w3", "model.layers.53.block_sparse_moe.experts.212.w3", "model.layers.53.block_sparse_moe.experts.213.w3", "model.layers.53.block_sparse_moe.experts.214.w3", "model.layers.53.block_sparse_moe.experts.215.w3", "model.layers.53.block_sparse_moe.experts.216.w3", "model.layers.53.block_sparse_moe.experts.217.w3", "model.layers.53.block_sparse_moe.experts.218.w3", "model.layers.53.block_sparse_moe.experts.219.w3", "model.layers.53.block_sparse_moe.experts.220.w3", "model.layers.53.block_sparse_moe.experts.221.w3", "model.layers.53.block_sparse_moe.experts.222.w3", "model.layers.53.block_sparse_moe.experts.223.w3", "model.layers.53.block_sparse_moe.experts.224.w3", "model.layers.53.block_sparse_moe.experts.225.w3", "model.layers.53.block_sparse_moe.experts.226.w3", "model.layers.53.block_sparse_moe.experts.227.w3", "model.layers.53.block_sparse_moe.experts.228.w3", "model.layers.53.block_sparse_moe.experts.229.w3", "model.layers.53.block_sparse_moe.experts.230.w3", "model.layers.53.block_sparse_moe.experts.231.w3", "model.layers.53.block_sparse_moe.experts.232.w3", "model.layers.53.block_sparse_moe.experts.233.w3", "model.layers.53.block_sparse_moe.experts.234.w3", "model.layers.53.block_sparse_moe.experts.235.w3", "model.layers.53.block_sparse_moe.experts.236.w3", "model.layers.53.block_sparse_moe.experts.237.w3", "model.layers.53.block_sparse_moe.experts.238.w3", "model.layers.53.block_sparse_moe.experts.239.w3", "model.layers.53.block_sparse_moe.experts.240.w3", "model.layers.53.block_sparse_moe.experts.241.w3", "model.layers.53.block_sparse_moe.experts.242.w3", "model.layers.53.block_sparse_moe.experts.243.w3", "model.layers.53.block_sparse_moe.experts.244.w3", "model.layers.53.block_sparse_moe.experts.245.w3", "model.layers.53.block_sparse_moe.experts.246.w3", "model.layers.53.block_sparse_moe.experts.247.w3", "model.layers.53.block_sparse_moe.experts.248.w3", "model.layers.53.block_sparse_moe.experts.249.w3", "model.layers.53.block_sparse_moe.experts.250.w3", "model.layers.53.block_sparse_moe.experts.251.w3", "model.layers.53.block_sparse_moe.experts.252.w3", "model.layers.53.block_sparse_moe.experts.253.w3", "model.layers.53.block_sparse_moe.experts.254.w3", "model.layers.53.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0010054945945738858, "dbits": 2415919104 } ] }, { "idx": 269, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w2", "model.layers.53.block_sparse_moe.experts.1.w2", "model.layers.53.block_sparse_moe.experts.2.w2", "model.layers.53.block_sparse_moe.experts.3.w2", "model.layers.53.block_sparse_moe.experts.4.w2", "model.layers.53.block_sparse_moe.experts.5.w2", "model.layers.53.block_sparse_moe.experts.6.w2", "model.layers.53.block_sparse_moe.experts.7.w2", "model.layers.53.block_sparse_moe.experts.8.w2", "model.layers.53.block_sparse_moe.experts.9.w2", "model.layers.53.block_sparse_moe.experts.10.w2", "model.layers.53.block_sparse_moe.experts.11.w2", "model.layers.53.block_sparse_moe.experts.12.w2", "model.layers.53.block_sparse_moe.experts.13.w2", "model.layers.53.block_sparse_moe.experts.14.w2", "model.layers.53.block_sparse_moe.experts.15.w2", "model.layers.53.block_sparse_moe.experts.16.w2", "model.layers.53.block_sparse_moe.experts.17.w2", "model.layers.53.block_sparse_moe.experts.18.w2", "model.layers.53.block_sparse_moe.experts.19.w2", "model.layers.53.block_sparse_moe.experts.20.w2", "model.layers.53.block_sparse_moe.experts.21.w2", "model.layers.53.block_sparse_moe.experts.22.w2", "model.layers.53.block_sparse_moe.experts.23.w2", "model.layers.53.block_sparse_moe.experts.24.w2", "model.layers.53.block_sparse_moe.experts.25.w2", "model.layers.53.block_sparse_moe.experts.26.w2", "model.layers.53.block_sparse_moe.experts.27.w2", "model.layers.53.block_sparse_moe.experts.28.w2", "model.layers.53.block_sparse_moe.experts.29.w2", "model.layers.53.block_sparse_moe.experts.30.w2", "model.layers.53.block_sparse_moe.experts.31.w2", "model.layers.53.block_sparse_moe.experts.32.w2", "model.layers.53.block_sparse_moe.experts.33.w2", "model.layers.53.block_sparse_moe.experts.34.w2", "model.layers.53.block_sparse_moe.experts.35.w2", "model.layers.53.block_sparse_moe.experts.36.w2", "model.layers.53.block_sparse_moe.experts.37.w2", "model.layers.53.block_sparse_moe.experts.38.w2", "model.layers.53.block_sparse_moe.experts.39.w2", "model.layers.53.block_sparse_moe.experts.40.w2", "model.layers.53.block_sparse_moe.experts.41.w2", "model.layers.53.block_sparse_moe.experts.42.w2", "model.layers.53.block_sparse_moe.experts.43.w2", "model.layers.53.block_sparse_moe.experts.44.w2", "model.layers.53.block_sparse_moe.experts.45.w2", "model.layers.53.block_sparse_moe.experts.46.w2", "model.layers.53.block_sparse_moe.experts.47.w2", "model.layers.53.block_sparse_moe.experts.48.w2", "model.layers.53.block_sparse_moe.experts.49.w2", "model.layers.53.block_sparse_moe.experts.50.w2", "model.layers.53.block_sparse_moe.experts.51.w2", "model.layers.53.block_sparse_moe.experts.52.w2", "model.layers.53.block_sparse_moe.experts.53.w2", "model.layers.53.block_sparse_moe.experts.54.w2", "model.layers.53.block_sparse_moe.experts.55.w2", "model.layers.53.block_sparse_moe.experts.56.w2", "model.layers.53.block_sparse_moe.experts.57.w2", "model.layers.53.block_sparse_moe.experts.58.w2", "model.layers.53.block_sparse_moe.experts.59.w2", "model.layers.53.block_sparse_moe.experts.60.w2", "model.layers.53.block_sparse_moe.experts.61.w2", "model.layers.53.block_sparse_moe.experts.62.w2", "model.layers.53.block_sparse_moe.experts.63.w2", "model.layers.53.block_sparse_moe.experts.64.w2", "model.layers.53.block_sparse_moe.experts.65.w2", "model.layers.53.block_sparse_moe.experts.66.w2", "model.layers.53.block_sparse_moe.experts.67.w2", "model.layers.53.block_sparse_moe.experts.68.w2", "model.layers.53.block_sparse_moe.experts.69.w2", "model.layers.53.block_sparse_moe.experts.70.w2", "model.layers.53.block_sparse_moe.experts.71.w2", "model.layers.53.block_sparse_moe.experts.72.w2", "model.layers.53.block_sparse_moe.experts.73.w2", "model.layers.53.block_sparse_moe.experts.74.w2", "model.layers.53.block_sparse_moe.experts.75.w2", "model.layers.53.block_sparse_moe.experts.76.w2", "model.layers.53.block_sparse_moe.experts.77.w2", "model.layers.53.block_sparse_moe.experts.78.w2", "model.layers.53.block_sparse_moe.experts.79.w2", "model.layers.53.block_sparse_moe.experts.80.w2", "model.layers.53.block_sparse_moe.experts.81.w2", "model.layers.53.block_sparse_moe.experts.82.w2", "model.layers.53.block_sparse_moe.experts.83.w2", "model.layers.53.block_sparse_moe.experts.84.w2", "model.layers.53.block_sparse_moe.experts.85.w2", "model.layers.53.block_sparse_moe.experts.86.w2", "model.layers.53.block_sparse_moe.experts.87.w2", "model.layers.53.block_sparse_moe.experts.88.w2", "model.layers.53.block_sparse_moe.experts.89.w2", "model.layers.53.block_sparse_moe.experts.90.w2", "model.layers.53.block_sparse_moe.experts.91.w2", "model.layers.53.block_sparse_moe.experts.92.w2", "model.layers.53.block_sparse_moe.experts.93.w2", "model.layers.53.block_sparse_moe.experts.94.w2", "model.layers.53.block_sparse_moe.experts.95.w2", "model.layers.53.block_sparse_moe.experts.96.w2", "model.layers.53.block_sparse_moe.experts.97.w2", "model.layers.53.block_sparse_moe.experts.98.w2", "model.layers.53.block_sparse_moe.experts.99.w2", "model.layers.53.block_sparse_moe.experts.100.w2", "model.layers.53.block_sparse_moe.experts.101.w2", "model.layers.53.block_sparse_moe.experts.102.w2", "model.layers.53.block_sparse_moe.experts.103.w2", "model.layers.53.block_sparse_moe.experts.104.w2", "model.layers.53.block_sparse_moe.experts.105.w2", "model.layers.53.block_sparse_moe.experts.106.w2", "model.layers.53.block_sparse_moe.experts.107.w2", "model.layers.53.block_sparse_moe.experts.108.w2", "model.layers.53.block_sparse_moe.experts.109.w2", "model.layers.53.block_sparse_moe.experts.110.w2", "model.layers.53.block_sparse_moe.experts.111.w2", "model.layers.53.block_sparse_moe.experts.112.w2", "model.layers.53.block_sparse_moe.experts.113.w2", "model.layers.53.block_sparse_moe.experts.114.w2", "model.layers.53.block_sparse_moe.experts.115.w2", "model.layers.53.block_sparse_moe.experts.116.w2", "model.layers.53.block_sparse_moe.experts.117.w2", "model.layers.53.block_sparse_moe.experts.118.w2", "model.layers.53.block_sparse_moe.experts.119.w2", "model.layers.53.block_sparse_moe.experts.120.w2", "model.layers.53.block_sparse_moe.experts.121.w2", "model.layers.53.block_sparse_moe.experts.122.w2", "model.layers.53.block_sparse_moe.experts.123.w2", "model.layers.53.block_sparse_moe.experts.124.w2", "model.layers.53.block_sparse_moe.experts.125.w2", "model.layers.53.block_sparse_moe.experts.126.w2", "model.layers.53.block_sparse_moe.experts.127.w2", "model.layers.53.block_sparse_moe.experts.128.w2", "model.layers.53.block_sparse_moe.experts.129.w2", "model.layers.53.block_sparse_moe.experts.130.w2", "model.layers.53.block_sparse_moe.experts.131.w2", "model.layers.53.block_sparse_moe.experts.132.w2", "model.layers.53.block_sparse_moe.experts.133.w2", "model.layers.53.block_sparse_moe.experts.134.w2", "model.layers.53.block_sparse_moe.experts.135.w2", "model.layers.53.block_sparse_moe.experts.136.w2", "model.layers.53.block_sparse_moe.experts.137.w2", "model.layers.53.block_sparse_moe.experts.138.w2", "model.layers.53.block_sparse_moe.experts.139.w2", "model.layers.53.block_sparse_moe.experts.140.w2", "model.layers.53.block_sparse_moe.experts.141.w2", "model.layers.53.block_sparse_moe.experts.142.w2", "model.layers.53.block_sparse_moe.experts.143.w2", "model.layers.53.block_sparse_moe.experts.144.w2", "model.layers.53.block_sparse_moe.experts.145.w2", "model.layers.53.block_sparse_moe.experts.146.w2", "model.layers.53.block_sparse_moe.experts.147.w2", "model.layers.53.block_sparse_moe.experts.148.w2", "model.layers.53.block_sparse_moe.experts.149.w2", "model.layers.53.block_sparse_moe.experts.150.w2", "model.layers.53.block_sparse_moe.experts.151.w2", "model.layers.53.block_sparse_moe.experts.152.w2", "model.layers.53.block_sparse_moe.experts.153.w2", "model.layers.53.block_sparse_moe.experts.154.w2", "model.layers.53.block_sparse_moe.experts.155.w2", "model.layers.53.block_sparse_moe.experts.156.w2", "model.layers.53.block_sparse_moe.experts.157.w2", "model.layers.53.block_sparse_moe.experts.158.w2", "model.layers.53.block_sparse_moe.experts.159.w2", "model.layers.53.block_sparse_moe.experts.160.w2", "model.layers.53.block_sparse_moe.experts.161.w2", "model.layers.53.block_sparse_moe.experts.162.w2", "model.layers.53.block_sparse_moe.experts.163.w2", "model.layers.53.block_sparse_moe.experts.164.w2", "model.layers.53.block_sparse_moe.experts.165.w2", "model.layers.53.block_sparse_moe.experts.166.w2", "model.layers.53.block_sparse_moe.experts.167.w2", "model.layers.53.block_sparse_moe.experts.168.w2", "model.layers.53.block_sparse_moe.experts.169.w2", "model.layers.53.block_sparse_moe.experts.170.w2", "model.layers.53.block_sparse_moe.experts.171.w2", "model.layers.53.block_sparse_moe.experts.172.w2", "model.layers.53.block_sparse_moe.experts.173.w2", "model.layers.53.block_sparse_moe.experts.174.w2", "model.layers.53.block_sparse_moe.experts.175.w2", "model.layers.53.block_sparse_moe.experts.176.w2", "model.layers.53.block_sparse_moe.experts.177.w2", "model.layers.53.block_sparse_moe.experts.178.w2", "model.layers.53.block_sparse_moe.experts.179.w2", "model.layers.53.block_sparse_moe.experts.180.w2", "model.layers.53.block_sparse_moe.experts.181.w2", "model.layers.53.block_sparse_moe.experts.182.w2", "model.layers.53.block_sparse_moe.experts.183.w2", "model.layers.53.block_sparse_moe.experts.184.w2", "model.layers.53.block_sparse_moe.experts.185.w2", "model.layers.53.block_sparse_moe.experts.186.w2", "model.layers.53.block_sparse_moe.experts.187.w2", "model.layers.53.block_sparse_moe.experts.188.w2", "model.layers.53.block_sparse_moe.experts.189.w2", "model.layers.53.block_sparse_moe.experts.190.w2", "model.layers.53.block_sparse_moe.experts.191.w2", "model.layers.53.block_sparse_moe.experts.192.w2", "model.layers.53.block_sparse_moe.experts.193.w2", "model.layers.53.block_sparse_moe.experts.194.w2", "model.layers.53.block_sparse_moe.experts.195.w2", "model.layers.53.block_sparse_moe.experts.196.w2", "model.layers.53.block_sparse_moe.experts.197.w2", "model.layers.53.block_sparse_moe.experts.198.w2", "model.layers.53.block_sparse_moe.experts.199.w2", "model.layers.53.block_sparse_moe.experts.200.w2", "model.layers.53.block_sparse_moe.experts.201.w2", "model.layers.53.block_sparse_moe.experts.202.w2", "model.layers.53.block_sparse_moe.experts.203.w2", "model.layers.53.block_sparse_moe.experts.204.w2", "model.layers.53.block_sparse_moe.experts.205.w2", "model.layers.53.block_sparse_moe.experts.206.w2", "model.layers.53.block_sparse_moe.experts.207.w2", "model.layers.53.block_sparse_moe.experts.208.w2", "model.layers.53.block_sparse_moe.experts.209.w2", "model.layers.53.block_sparse_moe.experts.210.w2", "model.layers.53.block_sparse_moe.experts.211.w2", "model.layers.53.block_sparse_moe.experts.212.w2", "model.layers.53.block_sparse_moe.experts.213.w2", "model.layers.53.block_sparse_moe.experts.214.w2", "model.layers.53.block_sparse_moe.experts.215.w2", "model.layers.53.block_sparse_moe.experts.216.w2", "model.layers.53.block_sparse_moe.experts.217.w2", "model.layers.53.block_sparse_moe.experts.218.w2", "model.layers.53.block_sparse_moe.experts.219.w2", "model.layers.53.block_sparse_moe.experts.220.w2", "model.layers.53.block_sparse_moe.experts.221.w2", "model.layers.53.block_sparse_moe.experts.222.w2", "model.layers.53.block_sparse_moe.experts.223.w2", "model.layers.53.block_sparse_moe.experts.224.w2", "model.layers.53.block_sparse_moe.experts.225.w2", "model.layers.53.block_sparse_moe.experts.226.w2", "model.layers.53.block_sparse_moe.experts.227.w2", "model.layers.53.block_sparse_moe.experts.228.w2", "model.layers.53.block_sparse_moe.experts.229.w2", "model.layers.53.block_sparse_moe.experts.230.w2", "model.layers.53.block_sparse_moe.experts.231.w2", "model.layers.53.block_sparse_moe.experts.232.w2", "model.layers.53.block_sparse_moe.experts.233.w2", "model.layers.53.block_sparse_moe.experts.234.w2", "model.layers.53.block_sparse_moe.experts.235.w2", "model.layers.53.block_sparse_moe.experts.236.w2", "model.layers.53.block_sparse_moe.experts.237.w2", "model.layers.53.block_sparse_moe.experts.238.w2", "model.layers.53.block_sparse_moe.experts.239.w2", "model.layers.53.block_sparse_moe.experts.240.w2", "model.layers.53.block_sparse_moe.experts.241.w2", "model.layers.53.block_sparse_moe.experts.242.w2", "model.layers.53.block_sparse_moe.experts.243.w2", "model.layers.53.block_sparse_moe.experts.244.w2", "model.layers.53.block_sparse_moe.experts.245.w2", "model.layers.53.block_sparse_moe.experts.246.w2", "model.layers.53.block_sparse_moe.experts.247.w2", "model.layers.53.block_sparse_moe.experts.248.w2", "model.layers.53.block_sparse_moe.experts.249.w2", "model.layers.53.block_sparse_moe.experts.250.w2", "model.layers.53.block_sparse_moe.experts.251.w2", "model.layers.53.block_sparse_moe.experts.252.w2", "model.layers.53.block_sparse_moe.experts.253.w2", "model.layers.53.block_sparse_moe.experts.254.w2", "model.layers.53.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0008233845233918125, "dbits": 1207959552 } ] }, { "idx": 270, "layers": [ "model.layers.54.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0005081146955490112, "dbits": 18874368 } ] }, { "idx": 271, "layers": [ "model.layers.54.self_attn.k_proj", "model.layers.54.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004490509629249573, "dbits": 6291456 } ] }, { "idx": 272, "layers": [ "model.layers.54.self_attn.o_proj" ], "candidates": [ { "dkld": -0.01857744753360746, "dbits": 18874368 } ] }, { "idx": 273, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w1", "model.layers.54.block_sparse_moe.experts.1.w1", "model.layers.54.block_sparse_moe.experts.2.w1", "model.layers.54.block_sparse_moe.experts.3.w1", "model.layers.54.block_sparse_moe.experts.4.w1", "model.layers.54.block_sparse_moe.experts.5.w1", "model.layers.54.block_sparse_moe.experts.6.w1", "model.layers.54.block_sparse_moe.experts.7.w1", "model.layers.54.block_sparse_moe.experts.8.w1", "model.layers.54.block_sparse_moe.experts.9.w1", "model.layers.54.block_sparse_moe.experts.10.w1", "model.layers.54.block_sparse_moe.experts.11.w1", "model.layers.54.block_sparse_moe.experts.12.w1", "model.layers.54.block_sparse_moe.experts.13.w1", "model.layers.54.block_sparse_moe.experts.14.w1", "model.layers.54.block_sparse_moe.experts.15.w1", "model.layers.54.block_sparse_moe.experts.16.w1", "model.layers.54.block_sparse_moe.experts.17.w1", "model.layers.54.block_sparse_moe.experts.18.w1", "model.layers.54.block_sparse_moe.experts.19.w1", "model.layers.54.block_sparse_moe.experts.20.w1", "model.layers.54.block_sparse_moe.experts.21.w1", "model.layers.54.block_sparse_moe.experts.22.w1", "model.layers.54.block_sparse_moe.experts.23.w1", "model.layers.54.block_sparse_moe.experts.24.w1", "model.layers.54.block_sparse_moe.experts.25.w1", "model.layers.54.block_sparse_moe.experts.26.w1", "model.layers.54.block_sparse_moe.experts.27.w1", "model.layers.54.block_sparse_moe.experts.28.w1", "model.layers.54.block_sparse_moe.experts.29.w1", "model.layers.54.block_sparse_moe.experts.30.w1", "model.layers.54.block_sparse_moe.experts.31.w1", "model.layers.54.block_sparse_moe.experts.32.w1", "model.layers.54.block_sparse_moe.experts.33.w1", "model.layers.54.block_sparse_moe.experts.34.w1", "model.layers.54.block_sparse_moe.experts.35.w1", "model.layers.54.block_sparse_moe.experts.36.w1", "model.layers.54.block_sparse_moe.experts.37.w1", "model.layers.54.block_sparse_moe.experts.38.w1", "model.layers.54.block_sparse_moe.experts.39.w1", "model.layers.54.block_sparse_moe.experts.40.w1", "model.layers.54.block_sparse_moe.experts.41.w1", "model.layers.54.block_sparse_moe.experts.42.w1", "model.layers.54.block_sparse_moe.experts.43.w1", "model.layers.54.block_sparse_moe.experts.44.w1", "model.layers.54.block_sparse_moe.experts.45.w1", "model.layers.54.block_sparse_moe.experts.46.w1", "model.layers.54.block_sparse_moe.experts.47.w1", "model.layers.54.block_sparse_moe.experts.48.w1", "model.layers.54.block_sparse_moe.experts.49.w1", "model.layers.54.block_sparse_moe.experts.50.w1", "model.layers.54.block_sparse_moe.experts.51.w1", "model.layers.54.block_sparse_moe.experts.52.w1", "model.layers.54.block_sparse_moe.experts.53.w1", "model.layers.54.block_sparse_moe.experts.54.w1", "model.layers.54.block_sparse_moe.experts.55.w1", "model.layers.54.block_sparse_moe.experts.56.w1", "model.layers.54.block_sparse_moe.experts.57.w1", "model.layers.54.block_sparse_moe.experts.58.w1", "model.layers.54.block_sparse_moe.experts.59.w1", "model.layers.54.block_sparse_moe.experts.60.w1", "model.layers.54.block_sparse_moe.experts.61.w1", "model.layers.54.block_sparse_moe.experts.62.w1", "model.layers.54.block_sparse_moe.experts.63.w1", "model.layers.54.block_sparse_moe.experts.64.w1", "model.layers.54.block_sparse_moe.experts.65.w1", "model.layers.54.block_sparse_moe.experts.66.w1", "model.layers.54.block_sparse_moe.experts.67.w1", "model.layers.54.block_sparse_moe.experts.68.w1", "model.layers.54.block_sparse_moe.experts.69.w1", "model.layers.54.block_sparse_moe.experts.70.w1", "model.layers.54.block_sparse_moe.experts.71.w1", "model.layers.54.block_sparse_moe.experts.72.w1", "model.layers.54.block_sparse_moe.experts.73.w1", "model.layers.54.block_sparse_moe.experts.74.w1", "model.layers.54.block_sparse_moe.experts.75.w1", "model.layers.54.block_sparse_moe.experts.76.w1", "model.layers.54.block_sparse_moe.experts.77.w1", "model.layers.54.block_sparse_moe.experts.78.w1", "model.layers.54.block_sparse_moe.experts.79.w1", "model.layers.54.block_sparse_moe.experts.80.w1", "model.layers.54.block_sparse_moe.experts.81.w1", "model.layers.54.block_sparse_moe.experts.82.w1", "model.layers.54.block_sparse_moe.experts.83.w1", "model.layers.54.block_sparse_moe.experts.84.w1", "model.layers.54.block_sparse_moe.experts.85.w1", "model.layers.54.block_sparse_moe.experts.86.w1", "model.layers.54.block_sparse_moe.experts.87.w1", "model.layers.54.block_sparse_moe.experts.88.w1", "model.layers.54.block_sparse_moe.experts.89.w1", "model.layers.54.block_sparse_moe.experts.90.w1", "model.layers.54.block_sparse_moe.experts.91.w1", "model.layers.54.block_sparse_moe.experts.92.w1", "model.layers.54.block_sparse_moe.experts.93.w1", "model.layers.54.block_sparse_moe.experts.94.w1", "model.layers.54.block_sparse_moe.experts.95.w1", "model.layers.54.block_sparse_moe.experts.96.w1", "model.layers.54.block_sparse_moe.experts.97.w1", "model.layers.54.block_sparse_moe.experts.98.w1", "model.layers.54.block_sparse_moe.experts.99.w1", "model.layers.54.block_sparse_moe.experts.100.w1", "model.layers.54.block_sparse_moe.experts.101.w1", "model.layers.54.block_sparse_moe.experts.102.w1", "model.layers.54.block_sparse_moe.experts.103.w1", "model.layers.54.block_sparse_moe.experts.104.w1", "model.layers.54.block_sparse_moe.experts.105.w1", "model.layers.54.block_sparse_moe.experts.106.w1", "model.layers.54.block_sparse_moe.experts.107.w1", "model.layers.54.block_sparse_moe.experts.108.w1", "model.layers.54.block_sparse_moe.experts.109.w1", "model.layers.54.block_sparse_moe.experts.110.w1", "model.layers.54.block_sparse_moe.experts.111.w1", "model.layers.54.block_sparse_moe.experts.112.w1", "model.layers.54.block_sparse_moe.experts.113.w1", "model.layers.54.block_sparse_moe.experts.114.w1", "model.layers.54.block_sparse_moe.experts.115.w1", "model.layers.54.block_sparse_moe.experts.116.w1", "model.layers.54.block_sparse_moe.experts.117.w1", "model.layers.54.block_sparse_moe.experts.118.w1", "model.layers.54.block_sparse_moe.experts.119.w1", "model.layers.54.block_sparse_moe.experts.120.w1", "model.layers.54.block_sparse_moe.experts.121.w1", "model.layers.54.block_sparse_moe.experts.122.w1", "model.layers.54.block_sparse_moe.experts.123.w1", "model.layers.54.block_sparse_moe.experts.124.w1", "model.layers.54.block_sparse_moe.experts.125.w1", "model.layers.54.block_sparse_moe.experts.126.w1", "model.layers.54.block_sparse_moe.experts.127.w1", "model.layers.54.block_sparse_moe.experts.128.w1", "model.layers.54.block_sparse_moe.experts.129.w1", "model.layers.54.block_sparse_moe.experts.130.w1", "model.layers.54.block_sparse_moe.experts.131.w1", "model.layers.54.block_sparse_moe.experts.132.w1", "model.layers.54.block_sparse_moe.experts.133.w1", "model.layers.54.block_sparse_moe.experts.134.w1", "model.layers.54.block_sparse_moe.experts.135.w1", "model.layers.54.block_sparse_moe.experts.136.w1", "model.layers.54.block_sparse_moe.experts.137.w1", "model.layers.54.block_sparse_moe.experts.138.w1", "model.layers.54.block_sparse_moe.experts.139.w1", "model.layers.54.block_sparse_moe.experts.140.w1", "model.layers.54.block_sparse_moe.experts.141.w1", "model.layers.54.block_sparse_moe.experts.142.w1", "model.layers.54.block_sparse_moe.experts.143.w1", "model.layers.54.block_sparse_moe.experts.144.w1", "model.layers.54.block_sparse_moe.experts.145.w1", "model.layers.54.block_sparse_moe.experts.146.w1", "model.layers.54.block_sparse_moe.experts.147.w1", "model.layers.54.block_sparse_moe.experts.148.w1", "model.layers.54.block_sparse_moe.experts.149.w1", "model.layers.54.block_sparse_moe.experts.150.w1", "model.layers.54.block_sparse_moe.experts.151.w1", "model.layers.54.block_sparse_moe.experts.152.w1", "model.layers.54.block_sparse_moe.experts.153.w1", "model.layers.54.block_sparse_moe.experts.154.w1", "model.layers.54.block_sparse_moe.experts.155.w1", "model.layers.54.block_sparse_moe.experts.156.w1", "model.layers.54.block_sparse_moe.experts.157.w1", "model.layers.54.block_sparse_moe.experts.158.w1", "model.layers.54.block_sparse_moe.experts.159.w1", "model.layers.54.block_sparse_moe.experts.160.w1", "model.layers.54.block_sparse_moe.experts.161.w1", "model.layers.54.block_sparse_moe.experts.162.w1", "model.layers.54.block_sparse_moe.experts.163.w1", "model.layers.54.block_sparse_moe.experts.164.w1", "model.layers.54.block_sparse_moe.experts.165.w1", "model.layers.54.block_sparse_moe.experts.166.w1", "model.layers.54.block_sparse_moe.experts.167.w1", "model.layers.54.block_sparse_moe.experts.168.w1", "model.layers.54.block_sparse_moe.experts.169.w1", "model.layers.54.block_sparse_moe.experts.170.w1", "model.layers.54.block_sparse_moe.experts.171.w1", "model.layers.54.block_sparse_moe.experts.172.w1", "model.layers.54.block_sparse_moe.experts.173.w1", "model.layers.54.block_sparse_moe.experts.174.w1", "model.layers.54.block_sparse_moe.experts.175.w1", "model.layers.54.block_sparse_moe.experts.176.w1", "model.layers.54.block_sparse_moe.experts.177.w1", "model.layers.54.block_sparse_moe.experts.178.w1", "model.layers.54.block_sparse_moe.experts.179.w1", "model.layers.54.block_sparse_moe.experts.180.w1", "model.layers.54.block_sparse_moe.experts.181.w1", "model.layers.54.block_sparse_moe.experts.182.w1", "model.layers.54.block_sparse_moe.experts.183.w1", "model.layers.54.block_sparse_moe.experts.184.w1", "model.layers.54.block_sparse_moe.experts.185.w1", "model.layers.54.block_sparse_moe.experts.186.w1", "model.layers.54.block_sparse_moe.experts.187.w1", "model.layers.54.block_sparse_moe.experts.188.w1", "model.layers.54.block_sparse_moe.experts.189.w1", "model.layers.54.block_sparse_moe.experts.190.w1", "model.layers.54.block_sparse_moe.experts.191.w1", "model.layers.54.block_sparse_moe.experts.192.w1", "model.layers.54.block_sparse_moe.experts.193.w1", "model.layers.54.block_sparse_moe.experts.194.w1", "model.layers.54.block_sparse_moe.experts.195.w1", "model.layers.54.block_sparse_moe.experts.196.w1", "model.layers.54.block_sparse_moe.experts.197.w1", "model.layers.54.block_sparse_moe.experts.198.w1", "model.layers.54.block_sparse_moe.experts.199.w1", "model.layers.54.block_sparse_moe.experts.200.w1", "model.layers.54.block_sparse_moe.experts.201.w1", "model.layers.54.block_sparse_moe.experts.202.w1", "model.layers.54.block_sparse_moe.experts.203.w1", "model.layers.54.block_sparse_moe.experts.204.w1", "model.layers.54.block_sparse_moe.experts.205.w1", "model.layers.54.block_sparse_moe.experts.206.w1", "model.layers.54.block_sparse_moe.experts.207.w1", "model.layers.54.block_sparse_moe.experts.208.w1", "model.layers.54.block_sparse_moe.experts.209.w1", "model.layers.54.block_sparse_moe.experts.210.w1", "model.layers.54.block_sparse_moe.experts.211.w1", "model.layers.54.block_sparse_moe.experts.212.w1", "model.layers.54.block_sparse_moe.experts.213.w1", "model.layers.54.block_sparse_moe.experts.214.w1", "model.layers.54.block_sparse_moe.experts.215.w1", "model.layers.54.block_sparse_moe.experts.216.w1", "model.layers.54.block_sparse_moe.experts.217.w1", "model.layers.54.block_sparse_moe.experts.218.w1", "model.layers.54.block_sparse_moe.experts.219.w1", "model.layers.54.block_sparse_moe.experts.220.w1", "model.layers.54.block_sparse_moe.experts.221.w1", "model.layers.54.block_sparse_moe.experts.222.w1", "model.layers.54.block_sparse_moe.experts.223.w1", "model.layers.54.block_sparse_moe.experts.224.w1", "model.layers.54.block_sparse_moe.experts.225.w1", "model.layers.54.block_sparse_moe.experts.226.w1", "model.layers.54.block_sparse_moe.experts.227.w1", "model.layers.54.block_sparse_moe.experts.228.w1", "model.layers.54.block_sparse_moe.experts.229.w1", "model.layers.54.block_sparse_moe.experts.230.w1", "model.layers.54.block_sparse_moe.experts.231.w1", "model.layers.54.block_sparse_moe.experts.232.w1", "model.layers.54.block_sparse_moe.experts.233.w1", "model.layers.54.block_sparse_moe.experts.234.w1", "model.layers.54.block_sparse_moe.experts.235.w1", "model.layers.54.block_sparse_moe.experts.236.w1", "model.layers.54.block_sparse_moe.experts.237.w1", "model.layers.54.block_sparse_moe.experts.238.w1", "model.layers.54.block_sparse_moe.experts.239.w1", "model.layers.54.block_sparse_moe.experts.240.w1", "model.layers.54.block_sparse_moe.experts.241.w1", "model.layers.54.block_sparse_moe.experts.242.w1", "model.layers.54.block_sparse_moe.experts.243.w1", "model.layers.54.block_sparse_moe.experts.244.w1", "model.layers.54.block_sparse_moe.experts.245.w1", "model.layers.54.block_sparse_moe.experts.246.w1", "model.layers.54.block_sparse_moe.experts.247.w1", "model.layers.54.block_sparse_moe.experts.248.w1", "model.layers.54.block_sparse_moe.experts.249.w1", "model.layers.54.block_sparse_moe.experts.250.w1", "model.layers.54.block_sparse_moe.experts.251.w1", "model.layers.54.block_sparse_moe.experts.252.w1", "model.layers.54.block_sparse_moe.experts.253.w1", "model.layers.54.block_sparse_moe.experts.254.w1", "model.layers.54.block_sparse_moe.experts.255.w1", "model.layers.54.block_sparse_moe.experts.0.w3", "model.layers.54.block_sparse_moe.experts.1.w3", "model.layers.54.block_sparse_moe.experts.2.w3", "model.layers.54.block_sparse_moe.experts.3.w3", "model.layers.54.block_sparse_moe.experts.4.w3", "model.layers.54.block_sparse_moe.experts.5.w3", "model.layers.54.block_sparse_moe.experts.6.w3", "model.layers.54.block_sparse_moe.experts.7.w3", "model.layers.54.block_sparse_moe.experts.8.w3", "model.layers.54.block_sparse_moe.experts.9.w3", "model.layers.54.block_sparse_moe.experts.10.w3", "model.layers.54.block_sparse_moe.experts.11.w3", "model.layers.54.block_sparse_moe.experts.12.w3", "model.layers.54.block_sparse_moe.experts.13.w3", "model.layers.54.block_sparse_moe.experts.14.w3", "model.layers.54.block_sparse_moe.experts.15.w3", "model.layers.54.block_sparse_moe.experts.16.w3", "model.layers.54.block_sparse_moe.experts.17.w3", "model.layers.54.block_sparse_moe.experts.18.w3", "model.layers.54.block_sparse_moe.experts.19.w3", "model.layers.54.block_sparse_moe.experts.20.w3", "model.layers.54.block_sparse_moe.experts.21.w3", "model.layers.54.block_sparse_moe.experts.22.w3", "model.layers.54.block_sparse_moe.experts.23.w3", "model.layers.54.block_sparse_moe.experts.24.w3", "model.layers.54.block_sparse_moe.experts.25.w3", "model.layers.54.block_sparse_moe.experts.26.w3", "model.layers.54.block_sparse_moe.experts.27.w3", "model.layers.54.block_sparse_moe.experts.28.w3", "model.layers.54.block_sparse_moe.experts.29.w3", "model.layers.54.block_sparse_moe.experts.30.w3", "model.layers.54.block_sparse_moe.experts.31.w3", "model.layers.54.block_sparse_moe.experts.32.w3", "model.layers.54.block_sparse_moe.experts.33.w3", "model.layers.54.block_sparse_moe.experts.34.w3", "model.layers.54.block_sparse_moe.experts.35.w3", "model.layers.54.block_sparse_moe.experts.36.w3", "model.layers.54.block_sparse_moe.experts.37.w3", "model.layers.54.block_sparse_moe.experts.38.w3", "model.layers.54.block_sparse_moe.experts.39.w3", "model.layers.54.block_sparse_moe.experts.40.w3", "model.layers.54.block_sparse_moe.experts.41.w3", "model.layers.54.block_sparse_moe.experts.42.w3", "model.layers.54.block_sparse_moe.experts.43.w3", "model.layers.54.block_sparse_moe.experts.44.w3", "model.layers.54.block_sparse_moe.experts.45.w3", "model.layers.54.block_sparse_moe.experts.46.w3", "model.layers.54.block_sparse_moe.experts.47.w3", "model.layers.54.block_sparse_moe.experts.48.w3", "model.layers.54.block_sparse_moe.experts.49.w3", "model.layers.54.block_sparse_moe.experts.50.w3", "model.layers.54.block_sparse_moe.experts.51.w3", "model.layers.54.block_sparse_moe.experts.52.w3", "model.layers.54.block_sparse_moe.experts.53.w3", "model.layers.54.block_sparse_moe.experts.54.w3", "model.layers.54.block_sparse_moe.experts.55.w3", "model.layers.54.block_sparse_moe.experts.56.w3", "model.layers.54.block_sparse_moe.experts.57.w3", "model.layers.54.block_sparse_moe.experts.58.w3", "model.layers.54.block_sparse_moe.experts.59.w3", "model.layers.54.block_sparse_moe.experts.60.w3", "model.layers.54.block_sparse_moe.experts.61.w3", "model.layers.54.block_sparse_moe.experts.62.w3", "model.layers.54.block_sparse_moe.experts.63.w3", "model.layers.54.block_sparse_moe.experts.64.w3", "model.layers.54.block_sparse_moe.experts.65.w3", "model.layers.54.block_sparse_moe.experts.66.w3", "model.layers.54.block_sparse_moe.experts.67.w3", "model.layers.54.block_sparse_moe.experts.68.w3", "model.layers.54.block_sparse_moe.experts.69.w3", "model.layers.54.block_sparse_moe.experts.70.w3", "model.layers.54.block_sparse_moe.experts.71.w3", "model.layers.54.block_sparse_moe.experts.72.w3", "model.layers.54.block_sparse_moe.experts.73.w3", "model.layers.54.block_sparse_moe.experts.74.w3", "model.layers.54.block_sparse_moe.experts.75.w3", "model.layers.54.block_sparse_moe.experts.76.w3", "model.layers.54.block_sparse_moe.experts.77.w3", "model.layers.54.block_sparse_moe.experts.78.w3", "model.layers.54.block_sparse_moe.experts.79.w3", "model.layers.54.block_sparse_moe.experts.80.w3", "model.layers.54.block_sparse_moe.experts.81.w3", "model.layers.54.block_sparse_moe.experts.82.w3", "model.layers.54.block_sparse_moe.experts.83.w3", "model.layers.54.block_sparse_moe.experts.84.w3", "model.layers.54.block_sparse_moe.experts.85.w3", "model.layers.54.block_sparse_moe.experts.86.w3", "model.layers.54.block_sparse_moe.experts.87.w3", "model.layers.54.block_sparse_moe.experts.88.w3", "model.layers.54.block_sparse_moe.experts.89.w3", "model.layers.54.block_sparse_moe.experts.90.w3", "model.layers.54.block_sparse_moe.experts.91.w3", "model.layers.54.block_sparse_moe.experts.92.w3", "model.layers.54.block_sparse_moe.experts.93.w3", "model.layers.54.block_sparse_moe.experts.94.w3", "model.layers.54.block_sparse_moe.experts.95.w3", "model.layers.54.block_sparse_moe.experts.96.w3", "model.layers.54.block_sparse_moe.experts.97.w3", "model.layers.54.block_sparse_moe.experts.98.w3", "model.layers.54.block_sparse_moe.experts.99.w3", "model.layers.54.block_sparse_moe.experts.100.w3", "model.layers.54.block_sparse_moe.experts.101.w3", "model.layers.54.block_sparse_moe.experts.102.w3", "model.layers.54.block_sparse_moe.experts.103.w3", "model.layers.54.block_sparse_moe.experts.104.w3", "model.layers.54.block_sparse_moe.experts.105.w3", "model.layers.54.block_sparse_moe.experts.106.w3", "model.layers.54.block_sparse_moe.experts.107.w3", "model.layers.54.block_sparse_moe.experts.108.w3", "model.layers.54.block_sparse_moe.experts.109.w3", "model.layers.54.block_sparse_moe.experts.110.w3", "model.layers.54.block_sparse_moe.experts.111.w3", "model.layers.54.block_sparse_moe.experts.112.w3", "model.layers.54.block_sparse_moe.experts.113.w3", "model.layers.54.block_sparse_moe.experts.114.w3", "model.layers.54.block_sparse_moe.experts.115.w3", "model.layers.54.block_sparse_moe.experts.116.w3", "model.layers.54.block_sparse_moe.experts.117.w3", "model.layers.54.block_sparse_moe.experts.118.w3", "model.layers.54.block_sparse_moe.experts.119.w3", "model.layers.54.block_sparse_moe.experts.120.w3", "model.layers.54.block_sparse_moe.experts.121.w3", "model.layers.54.block_sparse_moe.experts.122.w3", "model.layers.54.block_sparse_moe.experts.123.w3", "model.layers.54.block_sparse_moe.experts.124.w3", "model.layers.54.block_sparse_moe.experts.125.w3", "model.layers.54.block_sparse_moe.experts.126.w3", "model.layers.54.block_sparse_moe.experts.127.w3", "model.layers.54.block_sparse_moe.experts.128.w3", "model.layers.54.block_sparse_moe.experts.129.w3", "model.layers.54.block_sparse_moe.experts.130.w3", "model.layers.54.block_sparse_moe.experts.131.w3", "model.layers.54.block_sparse_moe.experts.132.w3", "model.layers.54.block_sparse_moe.experts.133.w3", "model.layers.54.block_sparse_moe.experts.134.w3", "model.layers.54.block_sparse_moe.experts.135.w3", "model.layers.54.block_sparse_moe.experts.136.w3", "model.layers.54.block_sparse_moe.experts.137.w3", "model.layers.54.block_sparse_moe.experts.138.w3", "model.layers.54.block_sparse_moe.experts.139.w3", "model.layers.54.block_sparse_moe.experts.140.w3", "model.layers.54.block_sparse_moe.experts.141.w3", "model.layers.54.block_sparse_moe.experts.142.w3", "model.layers.54.block_sparse_moe.experts.143.w3", "model.layers.54.block_sparse_moe.experts.144.w3", "model.layers.54.block_sparse_moe.experts.145.w3", "model.layers.54.block_sparse_moe.experts.146.w3", "model.layers.54.block_sparse_moe.experts.147.w3", "model.layers.54.block_sparse_moe.experts.148.w3", "model.layers.54.block_sparse_moe.experts.149.w3", "model.layers.54.block_sparse_moe.experts.150.w3", "model.layers.54.block_sparse_moe.experts.151.w3", "model.layers.54.block_sparse_moe.experts.152.w3", "model.layers.54.block_sparse_moe.experts.153.w3", "model.layers.54.block_sparse_moe.experts.154.w3", "model.layers.54.block_sparse_moe.experts.155.w3", "model.layers.54.block_sparse_moe.experts.156.w3", "model.layers.54.block_sparse_moe.experts.157.w3", "model.layers.54.block_sparse_moe.experts.158.w3", "model.layers.54.block_sparse_moe.experts.159.w3", "model.layers.54.block_sparse_moe.experts.160.w3", "model.layers.54.block_sparse_moe.experts.161.w3", "model.layers.54.block_sparse_moe.experts.162.w3", "model.layers.54.block_sparse_moe.experts.163.w3", "model.layers.54.block_sparse_moe.experts.164.w3", "model.layers.54.block_sparse_moe.experts.165.w3", "model.layers.54.block_sparse_moe.experts.166.w3", "model.layers.54.block_sparse_moe.experts.167.w3", "model.layers.54.block_sparse_moe.experts.168.w3", "model.layers.54.block_sparse_moe.experts.169.w3", "model.layers.54.block_sparse_moe.experts.170.w3", "model.layers.54.block_sparse_moe.experts.171.w3", "model.layers.54.block_sparse_moe.experts.172.w3", "model.layers.54.block_sparse_moe.experts.173.w3", "model.layers.54.block_sparse_moe.experts.174.w3", "model.layers.54.block_sparse_moe.experts.175.w3", "model.layers.54.block_sparse_moe.experts.176.w3", "model.layers.54.block_sparse_moe.experts.177.w3", "model.layers.54.block_sparse_moe.experts.178.w3", "model.layers.54.block_sparse_moe.experts.179.w3", "model.layers.54.block_sparse_moe.experts.180.w3", "model.layers.54.block_sparse_moe.experts.181.w3", "model.layers.54.block_sparse_moe.experts.182.w3", "model.layers.54.block_sparse_moe.experts.183.w3", "model.layers.54.block_sparse_moe.experts.184.w3", "model.layers.54.block_sparse_moe.experts.185.w3", "model.layers.54.block_sparse_moe.experts.186.w3", "model.layers.54.block_sparse_moe.experts.187.w3", "model.layers.54.block_sparse_moe.experts.188.w3", "model.layers.54.block_sparse_moe.experts.189.w3", "model.layers.54.block_sparse_moe.experts.190.w3", "model.layers.54.block_sparse_moe.experts.191.w3", "model.layers.54.block_sparse_moe.experts.192.w3", "model.layers.54.block_sparse_moe.experts.193.w3", "model.layers.54.block_sparse_moe.experts.194.w3", "model.layers.54.block_sparse_moe.experts.195.w3", "model.layers.54.block_sparse_moe.experts.196.w3", "model.layers.54.block_sparse_moe.experts.197.w3", "model.layers.54.block_sparse_moe.experts.198.w3", "model.layers.54.block_sparse_moe.experts.199.w3", "model.layers.54.block_sparse_moe.experts.200.w3", "model.layers.54.block_sparse_moe.experts.201.w3", "model.layers.54.block_sparse_moe.experts.202.w3", "model.layers.54.block_sparse_moe.experts.203.w3", "model.layers.54.block_sparse_moe.experts.204.w3", "model.layers.54.block_sparse_moe.experts.205.w3", "model.layers.54.block_sparse_moe.experts.206.w3", "model.layers.54.block_sparse_moe.experts.207.w3", "model.layers.54.block_sparse_moe.experts.208.w3", "model.layers.54.block_sparse_moe.experts.209.w3", "model.layers.54.block_sparse_moe.experts.210.w3", "model.layers.54.block_sparse_moe.experts.211.w3", "model.layers.54.block_sparse_moe.experts.212.w3", "model.layers.54.block_sparse_moe.experts.213.w3", "model.layers.54.block_sparse_moe.experts.214.w3", "model.layers.54.block_sparse_moe.experts.215.w3", "model.layers.54.block_sparse_moe.experts.216.w3", "model.layers.54.block_sparse_moe.experts.217.w3", "model.layers.54.block_sparse_moe.experts.218.w3", "model.layers.54.block_sparse_moe.experts.219.w3", "model.layers.54.block_sparse_moe.experts.220.w3", "model.layers.54.block_sparse_moe.experts.221.w3", "model.layers.54.block_sparse_moe.experts.222.w3", "model.layers.54.block_sparse_moe.experts.223.w3", "model.layers.54.block_sparse_moe.experts.224.w3", "model.layers.54.block_sparse_moe.experts.225.w3", "model.layers.54.block_sparse_moe.experts.226.w3", "model.layers.54.block_sparse_moe.experts.227.w3", "model.layers.54.block_sparse_moe.experts.228.w3", "model.layers.54.block_sparse_moe.experts.229.w3", "model.layers.54.block_sparse_moe.experts.230.w3", "model.layers.54.block_sparse_moe.experts.231.w3", "model.layers.54.block_sparse_moe.experts.232.w3", "model.layers.54.block_sparse_moe.experts.233.w3", "model.layers.54.block_sparse_moe.experts.234.w3", "model.layers.54.block_sparse_moe.experts.235.w3", "model.layers.54.block_sparse_moe.experts.236.w3", "model.layers.54.block_sparse_moe.experts.237.w3", "model.layers.54.block_sparse_moe.experts.238.w3", "model.layers.54.block_sparse_moe.experts.239.w3", "model.layers.54.block_sparse_moe.experts.240.w3", "model.layers.54.block_sparse_moe.experts.241.w3", "model.layers.54.block_sparse_moe.experts.242.w3", "model.layers.54.block_sparse_moe.experts.243.w3", "model.layers.54.block_sparse_moe.experts.244.w3", "model.layers.54.block_sparse_moe.experts.245.w3", "model.layers.54.block_sparse_moe.experts.246.w3", "model.layers.54.block_sparse_moe.experts.247.w3", "model.layers.54.block_sparse_moe.experts.248.w3", "model.layers.54.block_sparse_moe.experts.249.w3", "model.layers.54.block_sparse_moe.experts.250.w3", "model.layers.54.block_sparse_moe.experts.251.w3", "model.layers.54.block_sparse_moe.experts.252.w3", "model.layers.54.block_sparse_moe.experts.253.w3", "model.layers.54.block_sparse_moe.experts.254.w3", "model.layers.54.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0001247704029083696, "dbits": 2415919104 } ] }, { "idx": 274, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w2", "model.layers.54.block_sparse_moe.experts.1.w2", "model.layers.54.block_sparse_moe.experts.2.w2", "model.layers.54.block_sparse_moe.experts.3.w2", "model.layers.54.block_sparse_moe.experts.4.w2", "model.layers.54.block_sparse_moe.experts.5.w2", "model.layers.54.block_sparse_moe.experts.6.w2", "model.layers.54.block_sparse_moe.experts.7.w2", "model.layers.54.block_sparse_moe.experts.8.w2", "model.layers.54.block_sparse_moe.experts.9.w2", "model.layers.54.block_sparse_moe.experts.10.w2", "model.layers.54.block_sparse_moe.experts.11.w2", "model.layers.54.block_sparse_moe.experts.12.w2", "model.layers.54.block_sparse_moe.experts.13.w2", "model.layers.54.block_sparse_moe.experts.14.w2", "model.layers.54.block_sparse_moe.experts.15.w2", "model.layers.54.block_sparse_moe.experts.16.w2", "model.layers.54.block_sparse_moe.experts.17.w2", "model.layers.54.block_sparse_moe.experts.18.w2", "model.layers.54.block_sparse_moe.experts.19.w2", "model.layers.54.block_sparse_moe.experts.20.w2", "model.layers.54.block_sparse_moe.experts.21.w2", "model.layers.54.block_sparse_moe.experts.22.w2", "model.layers.54.block_sparse_moe.experts.23.w2", "model.layers.54.block_sparse_moe.experts.24.w2", "model.layers.54.block_sparse_moe.experts.25.w2", "model.layers.54.block_sparse_moe.experts.26.w2", "model.layers.54.block_sparse_moe.experts.27.w2", "model.layers.54.block_sparse_moe.experts.28.w2", "model.layers.54.block_sparse_moe.experts.29.w2", "model.layers.54.block_sparse_moe.experts.30.w2", "model.layers.54.block_sparse_moe.experts.31.w2", "model.layers.54.block_sparse_moe.experts.32.w2", "model.layers.54.block_sparse_moe.experts.33.w2", "model.layers.54.block_sparse_moe.experts.34.w2", "model.layers.54.block_sparse_moe.experts.35.w2", "model.layers.54.block_sparse_moe.experts.36.w2", "model.layers.54.block_sparse_moe.experts.37.w2", "model.layers.54.block_sparse_moe.experts.38.w2", "model.layers.54.block_sparse_moe.experts.39.w2", "model.layers.54.block_sparse_moe.experts.40.w2", "model.layers.54.block_sparse_moe.experts.41.w2", "model.layers.54.block_sparse_moe.experts.42.w2", "model.layers.54.block_sparse_moe.experts.43.w2", "model.layers.54.block_sparse_moe.experts.44.w2", "model.layers.54.block_sparse_moe.experts.45.w2", "model.layers.54.block_sparse_moe.experts.46.w2", "model.layers.54.block_sparse_moe.experts.47.w2", "model.layers.54.block_sparse_moe.experts.48.w2", "model.layers.54.block_sparse_moe.experts.49.w2", "model.layers.54.block_sparse_moe.experts.50.w2", "model.layers.54.block_sparse_moe.experts.51.w2", "model.layers.54.block_sparse_moe.experts.52.w2", "model.layers.54.block_sparse_moe.experts.53.w2", "model.layers.54.block_sparse_moe.experts.54.w2", "model.layers.54.block_sparse_moe.experts.55.w2", "model.layers.54.block_sparse_moe.experts.56.w2", "model.layers.54.block_sparse_moe.experts.57.w2", "model.layers.54.block_sparse_moe.experts.58.w2", "model.layers.54.block_sparse_moe.experts.59.w2", "model.layers.54.block_sparse_moe.experts.60.w2", "model.layers.54.block_sparse_moe.experts.61.w2", "model.layers.54.block_sparse_moe.experts.62.w2", "model.layers.54.block_sparse_moe.experts.63.w2", "model.layers.54.block_sparse_moe.experts.64.w2", "model.layers.54.block_sparse_moe.experts.65.w2", "model.layers.54.block_sparse_moe.experts.66.w2", "model.layers.54.block_sparse_moe.experts.67.w2", "model.layers.54.block_sparse_moe.experts.68.w2", "model.layers.54.block_sparse_moe.experts.69.w2", "model.layers.54.block_sparse_moe.experts.70.w2", "model.layers.54.block_sparse_moe.experts.71.w2", "model.layers.54.block_sparse_moe.experts.72.w2", "model.layers.54.block_sparse_moe.experts.73.w2", "model.layers.54.block_sparse_moe.experts.74.w2", "model.layers.54.block_sparse_moe.experts.75.w2", "model.layers.54.block_sparse_moe.experts.76.w2", "model.layers.54.block_sparse_moe.experts.77.w2", "model.layers.54.block_sparse_moe.experts.78.w2", "model.layers.54.block_sparse_moe.experts.79.w2", "model.layers.54.block_sparse_moe.experts.80.w2", "model.layers.54.block_sparse_moe.experts.81.w2", "model.layers.54.block_sparse_moe.experts.82.w2", "model.layers.54.block_sparse_moe.experts.83.w2", "model.layers.54.block_sparse_moe.experts.84.w2", "model.layers.54.block_sparse_moe.experts.85.w2", "model.layers.54.block_sparse_moe.experts.86.w2", "model.layers.54.block_sparse_moe.experts.87.w2", "model.layers.54.block_sparse_moe.experts.88.w2", "model.layers.54.block_sparse_moe.experts.89.w2", "model.layers.54.block_sparse_moe.experts.90.w2", "model.layers.54.block_sparse_moe.experts.91.w2", "model.layers.54.block_sparse_moe.experts.92.w2", "model.layers.54.block_sparse_moe.experts.93.w2", "model.layers.54.block_sparse_moe.experts.94.w2", "model.layers.54.block_sparse_moe.experts.95.w2", "model.layers.54.block_sparse_moe.experts.96.w2", "model.layers.54.block_sparse_moe.experts.97.w2", "model.layers.54.block_sparse_moe.experts.98.w2", "model.layers.54.block_sparse_moe.experts.99.w2", "model.layers.54.block_sparse_moe.experts.100.w2", "model.layers.54.block_sparse_moe.experts.101.w2", "model.layers.54.block_sparse_moe.experts.102.w2", "model.layers.54.block_sparse_moe.experts.103.w2", "model.layers.54.block_sparse_moe.experts.104.w2", "model.layers.54.block_sparse_moe.experts.105.w2", "model.layers.54.block_sparse_moe.experts.106.w2", "model.layers.54.block_sparse_moe.experts.107.w2", "model.layers.54.block_sparse_moe.experts.108.w2", "model.layers.54.block_sparse_moe.experts.109.w2", "model.layers.54.block_sparse_moe.experts.110.w2", "model.layers.54.block_sparse_moe.experts.111.w2", "model.layers.54.block_sparse_moe.experts.112.w2", "model.layers.54.block_sparse_moe.experts.113.w2", "model.layers.54.block_sparse_moe.experts.114.w2", "model.layers.54.block_sparse_moe.experts.115.w2", "model.layers.54.block_sparse_moe.experts.116.w2", "model.layers.54.block_sparse_moe.experts.117.w2", "model.layers.54.block_sparse_moe.experts.118.w2", "model.layers.54.block_sparse_moe.experts.119.w2", "model.layers.54.block_sparse_moe.experts.120.w2", "model.layers.54.block_sparse_moe.experts.121.w2", "model.layers.54.block_sparse_moe.experts.122.w2", "model.layers.54.block_sparse_moe.experts.123.w2", "model.layers.54.block_sparse_moe.experts.124.w2", "model.layers.54.block_sparse_moe.experts.125.w2", "model.layers.54.block_sparse_moe.experts.126.w2", "model.layers.54.block_sparse_moe.experts.127.w2", "model.layers.54.block_sparse_moe.experts.128.w2", "model.layers.54.block_sparse_moe.experts.129.w2", "model.layers.54.block_sparse_moe.experts.130.w2", "model.layers.54.block_sparse_moe.experts.131.w2", "model.layers.54.block_sparse_moe.experts.132.w2", "model.layers.54.block_sparse_moe.experts.133.w2", "model.layers.54.block_sparse_moe.experts.134.w2", "model.layers.54.block_sparse_moe.experts.135.w2", "model.layers.54.block_sparse_moe.experts.136.w2", "model.layers.54.block_sparse_moe.experts.137.w2", "model.layers.54.block_sparse_moe.experts.138.w2", "model.layers.54.block_sparse_moe.experts.139.w2", "model.layers.54.block_sparse_moe.experts.140.w2", "model.layers.54.block_sparse_moe.experts.141.w2", "model.layers.54.block_sparse_moe.experts.142.w2", "model.layers.54.block_sparse_moe.experts.143.w2", "model.layers.54.block_sparse_moe.experts.144.w2", "model.layers.54.block_sparse_moe.experts.145.w2", "model.layers.54.block_sparse_moe.experts.146.w2", "model.layers.54.block_sparse_moe.experts.147.w2", "model.layers.54.block_sparse_moe.experts.148.w2", "model.layers.54.block_sparse_moe.experts.149.w2", "model.layers.54.block_sparse_moe.experts.150.w2", "model.layers.54.block_sparse_moe.experts.151.w2", "model.layers.54.block_sparse_moe.experts.152.w2", "model.layers.54.block_sparse_moe.experts.153.w2", "model.layers.54.block_sparse_moe.experts.154.w2", "model.layers.54.block_sparse_moe.experts.155.w2", "model.layers.54.block_sparse_moe.experts.156.w2", "model.layers.54.block_sparse_moe.experts.157.w2", "model.layers.54.block_sparse_moe.experts.158.w2", "model.layers.54.block_sparse_moe.experts.159.w2", "model.layers.54.block_sparse_moe.experts.160.w2", "model.layers.54.block_sparse_moe.experts.161.w2", "model.layers.54.block_sparse_moe.experts.162.w2", "model.layers.54.block_sparse_moe.experts.163.w2", "model.layers.54.block_sparse_moe.experts.164.w2", "model.layers.54.block_sparse_moe.experts.165.w2", "model.layers.54.block_sparse_moe.experts.166.w2", "model.layers.54.block_sparse_moe.experts.167.w2", "model.layers.54.block_sparse_moe.experts.168.w2", "model.layers.54.block_sparse_moe.experts.169.w2", "model.layers.54.block_sparse_moe.experts.170.w2", "model.layers.54.block_sparse_moe.experts.171.w2", "model.layers.54.block_sparse_moe.experts.172.w2", "model.layers.54.block_sparse_moe.experts.173.w2", "model.layers.54.block_sparse_moe.experts.174.w2", "model.layers.54.block_sparse_moe.experts.175.w2", "model.layers.54.block_sparse_moe.experts.176.w2", "model.layers.54.block_sparse_moe.experts.177.w2", "model.layers.54.block_sparse_moe.experts.178.w2", "model.layers.54.block_sparse_moe.experts.179.w2", "model.layers.54.block_sparse_moe.experts.180.w2", "model.layers.54.block_sparse_moe.experts.181.w2", "model.layers.54.block_sparse_moe.experts.182.w2", "model.layers.54.block_sparse_moe.experts.183.w2", "model.layers.54.block_sparse_moe.experts.184.w2", "model.layers.54.block_sparse_moe.experts.185.w2", "model.layers.54.block_sparse_moe.experts.186.w2", "model.layers.54.block_sparse_moe.experts.187.w2", "model.layers.54.block_sparse_moe.experts.188.w2", "model.layers.54.block_sparse_moe.experts.189.w2", "model.layers.54.block_sparse_moe.experts.190.w2", "model.layers.54.block_sparse_moe.experts.191.w2", "model.layers.54.block_sparse_moe.experts.192.w2", "model.layers.54.block_sparse_moe.experts.193.w2", "model.layers.54.block_sparse_moe.experts.194.w2", "model.layers.54.block_sparse_moe.experts.195.w2", "model.layers.54.block_sparse_moe.experts.196.w2", "model.layers.54.block_sparse_moe.experts.197.w2", "model.layers.54.block_sparse_moe.experts.198.w2", "model.layers.54.block_sparse_moe.experts.199.w2", "model.layers.54.block_sparse_moe.experts.200.w2", "model.layers.54.block_sparse_moe.experts.201.w2", "model.layers.54.block_sparse_moe.experts.202.w2", "model.layers.54.block_sparse_moe.experts.203.w2", "model.layers.54.block_sparse_moe.experts.204.w2", "model.layers.54.block_sparse_moe.experts.205.w2", "model.layers.54.block_sparse_moe.experts.206.w2", "model.layers.54.block_sparse_moe.experts.207.w2", "model.layers.54.block_sparse_moe.experts.208.w2", "model.layers.54.block_sparse_moe.experts.209.w2", "model.layers.54.block_sparse_moe.experts.210.w2", "model.layers.54.block_sparse_moe.experts.211.w2", "model.layers.54.block_sparse_moe.experts.212.w2", "model.layers.54.block_sparse_moe.experts.213.w2", "model.layers.54.block_sparse_moe.experts.214.w2", "model.layers.54.block_sparse_moe.experts.215.w2", "model.layers.54.block_sparse_moe.experts.216.w2", "model.layers.54.block_sparse_moe.experts.217.w2", "model.layers.54.block_sparse_moe.experts.218.w2", "model.layers.54.block_sparse_moe.experts.219.w2", "model.layers.54.block_sparse_moe.experts.220.w2", "model.layers.54.block_sparse_moe.experts.221.w2", "model.layers.54.block_sparse_moe.experts.222.w2", "model.layers.54.block_sparse_moe.experts.223.w2", "model.layers.54.block_sparse_moe.experts.224.w2", "model.layers.54.block_sparse_moe.experts.225.w2", "model.layers.54.block_sparse_moe.experts.226.w2", "model.layers.54.block_sparse_moe.experts.227.w2", "model.layers.54.block_sparse_moe.experts.228.w2", "model.layers.54.block_sparse_moe.experts.229.w2", "model.layers.54.block_sparse_moe.experts.230.w2", "model.layers.54.block_sparse_moe.experts.231.w2", "model.layers.54.block_sparse_moe.experts.232.w2", "model.layers.54.block_sparse_moe.experts.233.w2", "model.layers.54.block_sparse_moe.experts.234.w2", "model.layers.54.block_sparse_moe.experts.235.w2", "model.layers.54.block_sparse_moe.experts.236.w2", "model.layers.54.block_sparse_moe.experts.237.w2", "model.layers.54.block_sparse_moe.experts.238.w2", "model.layers.54.block_sparse_moe.experts.239.w2", "model.layers.54.block_sparse_moe.experts.240.w2", "model.layers.54.block_sparse_moe.experts.241.w2", "model.layers.54.block_sparse_moe.experts.242.w2", "model.layers.54.block_sparse_moe.experts.243.w2", "model.layers.54.block_sparse_moe.experts.244.w2", "model.layers.54.block_sparse_moe.experts.245.w2", "model.layers.54.block_sparse_moe.experts.246.w2", "model.layers.54.block_sparse_moe.experts.247.w2", "model.layers.54.block_sparse_moe.experts.248.w2", "model.layers.54.block_sparse_moe.experts.249.w2", "model.layers.54.block_sparse_moe.experts.250.w2", "model.layers.54.block_sparse_moe.experts.251.w2", "model.layers.54.block_sparse_moe.experts.252.w2", "model.layers.54.block_sparse_moe.experts.253.w2", "model.layers.54.block_sparse_moe.experts.254.w2", "model.layers.54.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005021303892135398, "dbits": 1207959552 } ] }, { "idx": 275, "layers": [ "model.layers.55.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0011014401912690097, "dbits": 18874368 } ] }, { "idx": 276, "layers": [ "model.layers.55.self_attn.k_proj", "model.layers.55.self_attn.v_proj" ], "candidates": [ { "dkld": -0.010760667920112543, "dbits": 6291456 } ] }, { "idx": 277, "layers": [ "model.layers.55.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0032337605953216553, "dbits": 18874368 } ] }, { "idx": 278, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w1", "model.layers.55.block_sparse_moe.experts.1.w1", "model.layers.55.block_sparse_moe.experts.2.w1", "model.layers.55.block_sparse_moe.experts.3.w1", "model.layers.55.block_sparse_moe.experts.4.w1", "model.layers.55.block_sparse_moe.experts.5.w1", "model.layers.55.block_sparse_moe.experts.6.w1", "model.layers.55.block_sparse_moe.experts.7.w1", "model.layers.55.block_sparse_moe.experts.8.w1", "model.layers.55.block_sparse_moe.experts.9.w1", "model.layers.55.block_sparse_moe.experts.10.w1", "model.layers.55.block_sparse_moe.experts.11.w1", "model.layers.55.block_sparse_moe.experts.12.w1", "model.layers.55.block_sparse_moe.experts.13.w1", "model.layers.55.block_sparse_moe.experts.14.w1", "model.layers.55.block_sparse_moe.experts.15.w1", "model.layers.55.block_sparse_moe.experts.16.w1", "model.layers.55.block_sparse_moe.experts.17.w1", "model.layers.55.block_sparse_moe.experts.18.w1", "model.layers.55.block_sparse_moe.experts.19.w1", "model.layers.55.block_sparse_moe.experts.20.w1", "model.layers.55.block_sparse_moe.experts.21.w1", "model.layers.55.block_sparse_moe.experts.22.w1", "model.layers.55.block_sparse_moe.experts.23.w1", "model.layers.55.block_sparse_moe.experts.24.w1", "model.layers.55.block_sparse_moe.experts.25.w1", "model.layers.55.block_sparse_moe.experts.26.w1", "model.layers.55.block_sparse_moe.experts.27.w1", "model.layers.55.block_sparse_moe.experts.28.w1", "model.layers.55.block_sparse_moe.experts.29.w1", "model.layers.55.block_sparse_moe.experts.30.w1", "model.layers.55.block_sparse_moe.experts.31.w1", "model.layers.55.block_sparse_moe.experts.32.w1", "model.layers.55.block_sparse_moe.experts.33.w1", "model.layers.55.block_sparse_moe.experts.34.w1", "model.layers.55.block_sparse_moe.experts.35.w1", "model.layers.55.block_sparse_moe.experts.36.w1", "model.layers.55.block_sparse_moe.experts.37.w1", "model.layers.55.block_sparse_moe.experts.38.w1", "model.layers.55.block_sparse_moe.experts.39.w1", "model.layers.55.block_sparse_moe.experts.40.w1", "model.layers.55.block_sparse_moe.experts.41.w1", "model.layers.55.block_sparse_moe.experts.42.w1", "model.layers.55.block_sparse_moe.experts.43.w1", "model.layers.55.block_sparse_moe.experts.44.w1", "model.layers.55.block_sparse_moe.experts.45.w1", "model.layers.55.block_sparse_moe.experts.46.w1", "model.layers.55.block_sparse_moe.experts.47.w1", "model.layers.55.block_sparse_moe.experts.48.w1", "model.layers.55.block_sparse_moe.experts.49.w1", "model.layers.55.block_sparse_moe.experts.50.w1", "model.layers.55.block_sparse_moe.experts.51.w1", "model.layers.55.block_sparse_moe.experts.52.w1", "model.layers.55.block_sparse_moe.experts.53.w1", "model.layers.55.block_sparse_moe.experts.54.w1", "model.layers.55.block_sparse_moe.experts.55.w1", "model.layers.55.block_sparse_moe.experts.56.w1", "model.layers.55.block_sparse_moe.experts.57.w1", "model.layers.55.block_sparse_moe.experts.58.w1", "model.layers.55.block_sparse_moe.experts.59.w1", "model.layers.55.block_sparse_moe.experts.60.w1", "model.layers.55.block_sparse_moe.experts.61.w1", "model.layers.55.block_sparse_moe.experts.62.w1", "model.layers.55.block_sparse_moe.experts.63.w1", "model.layers.55.block_sparse_moe.experts.64.w1", "model.layers.55.block_sparse_moe.experts.65.w1", "model.layers.55.block_sparse_moe.experts.66.w1", "model.layers.55.block_sparse_moe.experts.67.w1", "model.layers.55.block_sparse_moe.experts.68.w1", "model.layers.55.block_sparse_moe.experts.69.w1", "model.layers.55.block_sparse_moe.experts.70.w1", "model.layers.55.block_sparse_moe.experts.71.w1", "model.layers.55.block_sparse_moe.experts.72.w1", "model.layers.55.block_sparse_moe.experts.73.w1", "model.layers.55.block_sparse_moe.experts.74.w1", "model.layers.55.block_sparse_moe.experts.75.w1", "model.layers.55.block_sparse_moe.experts.76.w1", "model.layers.55.block_sparse_moe.experts.77.w1", "model.layers.55.block_sparse_moe.experts.78.w1", "model.layers.55.block_sparse_moe.experts.79.w1", "model.layers.55.block_sparse_moe.experts.80.w1", "model.layers.55.block_sparse_moe.experts.81.w1", "model.layers.55.block_sparse_moe.experts.82.w1", "model.layers.55.block_sparse_moe.experts.83.w1", "model.layers.55.block_sparse_moe.experts.84.w1", "model.layers.55.block_sparse_moe.experts.85.w1", "model.layers.55.block_sparse_moe.experts.86.w1", "model.layers.55.block_sparse_moe.experts.87.w1", "model.layers.55.block_sparse_moe.experts.88.w1", "model.layers.55.block_sparse_moe.experts.89.w1", "model.layers.55.block_sparse_moe.experts.90.w1", "model.layers.55.block_sparse_moe.experts.91.w1", "model.layers.55.block_sparse_moe.experts.92.w1", "model.layers.55.block_sparse_moe.experts.93.w1", "model.layers.55.block_sparse_moe.experts.94.w1", "model.layers.55.block_sparse_moe.experts.95.w1", "model.layers.55.block_sparse_moe.experts.96.w1", "model.layers.55.block_sparse_moe.experts.97.w1", "model.layers.55.block_sparse_moe.experts.98.w1", "model.layers.55.block_sparse_moe.experts.99.w1", "model.layers.55.block_sparse_moe.experts.100.w1", "model.layers.55.block_sparse_moe.experts.101.w1", "model.layers.55.block_sparse_moe.experts.102.w1", "model.layers.55.block_sparse_moe.experts.103.w1", "model.layers.55.block_sparse_moe.experts.104.w1", "model.layers.55.block_sparse_moe.experts.105.w1", "model.layers.55.block_sparse_moe.experts.106.w1", "model.layers.55.block_sparse_moe.experts.107.w1", "model.layers.55.block_sparse_moe.experts.108.w1", "model.layers.55.block_sparse_moe.experts.109.w1", "model.layers.55.block_sparse_moe.experts.110.w1", "model.layers.55.block_sparse_moe.experts.111.w1", "model.layers.55.block_sparse_moe.experts.112.w1", "model.layers.55.block_sparse_moe.experts.113.w1", "model.layers.55.block_sparse_moe.experts.114.w1", "model.layers.55.block_sparse_moe.experts.115.w1", "model.layers.55.block_sparse_moe.experts.116.w1", "model.layers.55.block_sparse_moe.experts.117.w1", "model.layers.55.block_sparse_moe.experts.118.w1", "model.layers.55.block_sparse_moe.experts.119.w1", "model.layers.55.block_sparse_moe.experts.120.w1", "model.layers.55.block_sparse_moe.experts.121.w1", "model.layers.55.block_sparse_moe.experts.122.w1", "model.layers.55.block_sparse_moe.experts.123.w1", "model.layers.55.block_sparse_moe.experts.124.w1", "model.layers.55.block_sparse_moe.experts.125.w1", "model.layers.55.block_sparse_moe.experts.126.w1", "model.layers.55.block_sparse_moe.experts.127.w1", "model.layers.55.block_sparse_moe.experts.128.w1", "model.layers.55.block_sparse_moe.experts.129.w1", "model.layers.55.block_sparse_moe.experts.130.w1", "model.layers.55.block_sparse_moe.experts.131.w1", "model.layers.55.block_sparse_moe.experts.132.w1", "model.layers.55.block_sparse_moe.experts.133.w1", "model.layers.55.block_sparse_moe.experts.134.w1", "model.layers.55.block_sparse_moe.experts.135.w1", "model.layers.55.block_sparse_moe.experts.136.w1", "model.layers.55.block_sparse_moe.experts.137.w1", "model.layers.55.block_sparse_moe.experts.138.w1", "model.layers.55.block_sparse_moe.experts.139.w1", "model.layers.55.block_sparse_moe.experts.140.w1", "model.layers.55.block_sparse_moe.experts.141.w1", "model.layers.55.block_sparse_moe.experts.142.w1", "model.layers.55.block_sparse_moe.experts.143.w1", "model.layers.55.block_sparse_moe.experts.144.w1", "model.layers.55.block_sparse_moe.experts.145.w1", "model.layers.55.block_sparse_moe.experts.146.w1", "model.layers.55.block_sparse_moe.experts.147.w1", "model.layers.55.block_sparse_moe.experts.148.w1", "model.layers.55.block_sparse_moe.experts.149.w1", "model.layers.55.block_sparse_moe.experts.150.w1", "model.layers.55.block_sparse_moe.experts.151.w1", "model.layers.55.block_sparse_moe.experts.152.w1", "model.layers.55.block_sparse_moe.experts.153.w1", "model.layers.55.block_sparse_moe.experts.154.w1", "model.layers.55.block_sparse_moe.experts.155.w1", "model.layers.55.block_sparse_moe.experts.156.w1", "model.layers.55.block_sparse_moe.experts.157.w1", "model.layers.55.block_sparse_moe.experts.158.w1", "model.layers.55.block_sparse_moe.experts.159.w1", "model.layers.55.block_sparse_moe.experts.160.w1", "model.layers.55.block_sparse_moe.experts.161.w1", "model.layers.55.block_sparse_moe.experts.162.w1", "model.layers.55.block_sparse_moe.experts.163.w1", "model.layers.55.block_sparse_moe.experts.164.w1", "model.layers.55.block_sparse_moe.experts.165.w1", "model.layers.55.block_sparse_moe.experts.166.w1", "model.layers.55.block_sparse_moe.experts.167.w1", "model.layers.55.block_sparse_moe.experts.168.w1", "model.layers.55.block_sparse_moe.experts.169.w1", "model.layers.55.block_sparse_moe.experts.170.w1", "model.layers.55.block_sparse_moe.experts.171.w1", "model.layers.55.block_sparse_moe.experts.172.w1", "model.layers.55.block_sparse_moe.experts.173.w1", "model.layers.55.block_sparse_moe.experts.174.w1", "model.layers.55.block_sparse_moe.experts.175.w1", "model.layers.55.block_sparse_moe.experts.176.w1", "model.layers.55.block_sparse_moe.experts.177.w1", "model.layers.55.block_sparse_moe.experts.178.w1", "model.layers.55.block_sparse_moe.experts.179.w1", "model.layers.55.block_sparse_moe.experts.180.w1", "model.layers.55.block_sparse_moe.experts.181.w1", "model.layers.55.block_sparse_moe.experts.182.w1", "model.layers.55.block_sparse_moe.experts.183.w1", "model.layers.55.block_sparse_moe.experts.184.w1", "model.layers.55.block_sparse_moe.experts.185.w1", "model.layers.55.block_sparse_moe.experts.186.w1", "model.layers.55.block_sparse_moe.experts.187.w1", "model.layers.55.block_sparse_moe.experts.188.w1", "model.layers.55.block_sparse_moe.experts.189.w1", "model.layers.55.block_sparse_moe.experts.190.w1", "model.layers.55.block_sparse_moe.experts.191.w1", "model.layers.55.block_sparse_moe.experts.192.w1", "model.layers.55.block_sparse_moe.experts.193.w1", "model.layers.55.block_sparse_moe.experts.194.w1", "model.layers.55.block_sparse_moe.experts.195.w1", "model.layers.55.block_sparse_moe.experts.196.w1", "model.layers.55.block_sparse_moe.experts.197.w1", "model.layers.55.block_sparse_moe.experts.198.w1", "model.layers.55.block_sparse_moe.experts.199.w1", "model.layers.55.block_sparse_moe.experts.200.w1", "model.layers.55.block_sparse_moe.experts.201.w1", "model.layers.55.block_sparse_moe.experts.202.w1", "model.layers.55.block_sparse_moe.experts.203.w1", "model.layers.55.block_sparse_moe.experts.204.w1", "model.layers.55.block_sparse_moe.experts.205.w1", "model.layers.55.block_sparse_moe.experts.206.w1", "model.layers.55.block_sparse_moe.experts.207.w1", "model.layers.55.block_sparse_moe.experts.208.w1", "model.layers.55.block_sparse_moe.experts.209.w1", "model.layers.55.block_sparse_moe.experts.210.w1", "model.layers.55.block_sparse_moe.experts.211.w1", "model.layers.55.block_sparse_moe.experts.212.w1", "model.layers.55.block_sparse_moe.experts.213.w1", "model.layers.55.block_sparse_moe.experts.214.w1", "model.layers.55.block_sparse_moe.experts.215.w1", "model.layers.55.block_sparse_moe.experts.216.w1", "model.layers.55.block_sparse_moe.experts.217.w1", "model.layers.55.block_sparse_moe.experts.218.w1", "model.layers.55.block_sparse_moe.experts.219.w1", "model.layers.55.block_sparse_moe.experts.220.w1", "model.layers.55.block_sparse_moe.experts.221.w1", "model.layers.55.block_sparse_moe.experts.222.w1", "model.layers.55.block_sparse_moe.experts.223.w1", "model.layers.55.block_sparse_moe.experts.224.w1", "model.layers.55.block_sparse_moe.experts.225.w1", "model.layers.55.block_sparse_moe.experts.226.w1", "model.layers.55.block_sparse_moe.experts.227.w1", "model.layers.55.block_sparse_moe.experts.228.w1", "model.layers.55.block_sparse_moe.experts.229.w1", "model.layers.55.block_sparse_moe.experts.230.w1", "model.layers.55.block_sparse_moe.experts.231.w1", "model.layers.55.block_sparse_moe.experts.232.w1", "model.layers.55.block_sparse_moe.experts.233.w1", "model.layers.55.block_sparse_moe.experts.234.w1", "model.layers.55.block_sparse_moe.experts.235.w1", "model.layers.55.block_sparse_moe.experts.236.w1", "model.layers.55.block_sparse_moe.experts.237.w1", "model.layers.55.block_sparse_moe.experts.238.w1", "model.layers.55.block_sparse_moe.experts.239.w1", "model.layers.55.block_sparse_moe.experts.240.w1", "model.layers.55.block_sparse_moe.experts.241.w1", "model.layers.55.block_sparse_moe.experts.242.w1", "model.layers.55.block_sparse_moe.experts.243.w1", "model.layers.55.block_sparse_moe.experts.244.w1", "model.layers.55.block_sparse_moe.experts.245.w1", "model.layers.55.block_sparse_moe.experts.246.w1", "model.layers.55.block_sparse_moe.experts.247.w1", "model.layers.55.block_sparse_moe.experts.248.w1", "model.layers.55.block_sparse_moe.experts.249.w1", "model.layers.55.block_sparse_moe.experts.250.w1", "model.layers.55.block_sparse_moe.experts.251.w1", "model.layers.55.block_sparse_moe.experts.252.w1", "model.layers.55.block_sparse_moe.experts.253.w1", "model.layers.55.block_sparse_moe.experts.254.w1", "model.layers.55.block_sparse_moe.experts.255.w1", "model.layers.55.block_sparse_moe.experts.0.w3", "model.layers.55.block_sparse_moe.experts.1.w3", "model.layers.55.block_sparse_moe.experts.2.w3", "model.layers.55.block_sparse_moe.experts.3.w3", "model.layers.55.block_sparse_moe.experts.4.w3", "model.layers.55.block_sparse_moe.experts.5.w3", "model.layers.55.block_sparse_moe.experts.6.w3", "model.layers.55.block_sparse_moe.experts.7.w3", "model.layers.55.block_sparse_moe.experts.8.w3", "model.layers.55.block_sparse_moe.experts.9.w3", "model.layers.55.block_sparse_moe.experts.10.w3", "model.layers.55.block_sparse_moe.experts.11.w3", "model.layers.55.block_sparse_moe.experts.12.w3", "model.layers.55.block_sparse_moe.experts.13.w3", "model.layers.55.block_sparse_moe.experts.14.w3", "model.layers.55.block_sparse_moe.experts.15.w3", "model.layers.55.block_sparse_moe.experts.16.w3", "model.layers.55.block_sparse_moe.experts.17.w3", "model.layers.55.block_sparse_moe.experts.18.w3", "model.layers.55.block_sparse_moe.experts.19.w3", "model.layers.55.block_sparse_moe.experts.20.w3", "model.layers.55.block_sparse_moe.experts.21.w3", "model.layers.55.block_sparse_moe.experts.22.w3", "model.layers.55.block_sparse_moe.experts.23.w3", "model.layers.55.block_sparse_moe.experts.24.w3", "model.layers.55.block_sparse_moe.experts.25.w3", "model.layers.55.block_sparse_moe.experts.26.w3", "model.layers.55.block_sparse_moe.experts.27.w3", "model.layers.55.block_sparse_moe.experts.28.w3", "model.layers.55.block_sparse_moe.experts.29.w3", "model.layers.55.block_sparse_moe.experts.30.w3", "model.layers.55.block_sparse_moe.experts.31.w3", "model.layers.55.block_sparse_moe.experts.32.w3", "model.layers.55.block_sparse_moe.experts.33.w3", "model.layers.55.block_sparse_moe.experts.34.w3", "model.layers.55.block_sparse_moe.experts.35.w3", "model.layers.55.block_sparse_moe.experts.36.w3", "model.layers.55.block_sparse_moe.experts.37.w3", "model.layers.55.block_sparse_moe.experts.38.w3", "model.layers.55.block_sparse_moe.experts.39.w3", "model.layers.55.block_sparse_moe.experts.40.w3", "model.layers.55.block_sparse_moe.experts.41.w3", "model.layers.55.block_sparse_moe.experts.42.w3", "model.layers.55.block_sparse_moe.experts.43.w3", "model.layers.55.block_sparse_moe.experts.44.w3", "model.layers.55.block_sparse_moe.experts.45.w3", "model.layers.55.block_sparse_moe.experts.46.w3", "model.layers.55.block_sparse_moe.experts.47.w3", "model.layers.55.block_sparse_moe.experts.48.w3", "model.layers.55.block_sparse_moe.experts.49.w3", "model.layers.55.block_sparse_moe.experts.50.w3", "model.layers.55.block_sparse_moe.experts.51.w3", "model.layers.55.block_sparse_moe.experts.52.w3", "model.layers.55.block_sparse_moe.experts.53.w3", "model.layers.55.block_sparse_moe.experts.54.w3", "model.layers.55.block_sparse_moe.experts.55.w3", "model.layers.55.block_sparse_moe.experts.56.w3", "model.layers.55.block_sparse_moe.experts.57.w3", "model.layers.55.block_sparse_moe.experts.58.w3", "model.layers.55.block_sparse_moe.experts.59.w3", "model.layers.55.block_sparse_moe.experts.60.w3", "model.layers.55.block_sparse_moe.experts.61.w3", "model.layers.55.block_sparse_moe.experts.62.w3", "model.layers.55.block_sparse_moe.experts.63.w3", "model.layers.55.block_sparse_moe.experts.64.w3", "model.layers.55.block_sparse_moe.experts.65.w3", "model.layers.55.block_sparse_moe.experts.66.w3", "model.layers.55.block_sparse_moe.experts.67.w3", "model.layers.55.block_sparse_moe.experts.68.w3", "model.layers.55.block_sparse_moe.experts.69.w3", "model.layers.55.block_sparse_moe.experts.70.w3", "model.layers.55.block_sparse_moe.experts.71.w3", "model.layers.55.block_sparse_moe.experts.72.w3", "model.layers.55.block_sparse_moe.experts.73.w3", "model.layers.55.block_sparse_moe.experts.74.w3", "model.layers.55.block_sparse_moe.experts.75.w3", "model.layers.55.block_sparse_moe.experts.76.w3", "model.layers.55.block_sparse_moe.experts.77.w3", "model.layers.55.block_sparse_moe.experts.78.w3", "model.layers.55.block_sparse_moe.experts.79.w3", "model.layers.55.block_sparse_moe.experts.80.w3", "model.layers.55.block_sparse_moe.experts.81.w3", "model.layers.55.block_sparse_moe.experts.82.w3", "model.layers.55.block_sparse_moe.experts.83.w3", "model.layers.55.block_sparse_moe.experts.84.w3", "model.layers.55.block_sparse_moe.experts.85.w3", "model.layers.55.block_sparse_moe.experts.86.w3", "model.layers.55.block_sparse_moe.experts.87.w3", "model.layers.55.block_sparse_moe.experts.88.w3", "model.layers.55.block_sparse_moe.experts.89.w3", "model.layers.55.block_sparse_moe.experts.90.w3", "model.layers.55.block_sparse_moe.experts.91.w3", "model.layers.55.block_sparse_moe.experts.92.w3", "model.layers.55.block_sparse_moe.experts.93.w3", "model.layers.55.block_sparse_moe.experts.94.w3", "model.layers.55.block_sparse_moe.experts.95.w3", "model.layers.55.block_sparse_moe.experts.96.w3", "model.layers.55.block_sparse_moe.experts.97.w3", "model.layers.55.block_sparse_moe.experts.98.w3", "model.layers.55.block_sparse_moe.experts.99.w3", "model.layers.55.block_sparse_moe.experts.100.w3", "model.layers.55.block_sparse_moe.experts.101.w3", "model.layers.55.block_sparse_moe.experts.102.w3", "model.layers.55.block_sparse_moe.experts.103.w3", "model.layers.55.block_sparse_moe.experts.104.w3", "model.layers.55.block_sparse_moe.experts.105.w3", "model.layers.55.block_sparse_moe.experts.106.w3", "model.layers.55.block_sparse_moe.experts.107.w3", "model.layers.55.block_sparse_moe.experts.108.w3", "model.layers.55.block_sparse_moe.experts.109.w3", "model.layers.55.block_sparse_moe.experts.110.w3", "model.layers.55.block_sparse_moe.experts.111.w3", "model.layers.55.block_sparse_moe.experts.112.w3", "model.layers.55.block_sparse_moe.experts.113.w3", "model.layers.55.block_sparse_moe.experts.114.w3", "model.layers.55.block_sparse_moe.experts.115.w3", "model.layers.55.block_sparse_moe.experts.116.w3", "model.layers.55.block_sparse_moe.experts.117.w3", "model.layers.55.block_sparse_moe.experts.118.w3", "model.layers.55.block_sparse_moe.experts.119.w3", "model.layers.55.block_sparse_moe.experts.120.w3", "model.layers.55.block_sparse_moe.experts.121.w3", "model.layers.55.block_sparse_moe.experts.122.w3", "model.layers.55.block_sparse_moe.experts.123.w3", "model.layers.55.block_sparse_moe.experts.124.w3", "model.layers.55.block_sparse_moe.experts.125.w3", "model.layers.55.block_sparse_moe.experts.126.w3", "model.layers.55.block_sparse_moe.experts.127.w3", "model.layers.55.block_sparse_moe.experts.128.w3", "model.layers.55.block_sparse_moe.experts.129.w3", "model.layers.55.block_sparse_moe.experts.130.w3", "model.layers.55.block_sparse_moe.experts.131.w3", "model.layers.55.block_sparse_moe.experts.132.w3", "model.layers.55.block_sparse_moe.experts.133.w3", "model.layers.55.block_sparse_moe.experts.134.w3", "model.layers.55.block_sparse_moe.experts.135.w3", "model.layers.55.block_sparse_moe.experts.136.w3", "model.layers.55.block_sparse_moe.experts.137.w3", "model.layers.55.block_sparse_moe.experts.138.w3", "model.layers.55.block_sparse_moe.experts.139.w3", "model.layers.55.block_sparse_moe.experts.140.w3", "model.layers.55.block_sparse_moe.experts.141.w3", "model.layers.55.block_sparse_moe.experts.142.w3", "model.layers.55.block_sparse_moe.experts.143.w3", "model.layers.55.block_sparse_moe.experts.144.w3", "model.layers.55.block_sparse_moe.experts.145.w3", "model.layers.55.block_sparse_moe.experts.146.w3", "model.layers.55.block_sparse_moe.experts.147.w3", "model.layers.55.block_sparse_moe.experts.148.w3", "model.layers.55.block_sparse_moe.experts.149.w3", "model.layers.55.block_sparse_moe.experts.150.w3", "model.layers.55.block_sparse_moe.experts.151.w3", "model.layers.55.block_sparse_moe.experts.152.w3", "model.layers.55.block_sparse_moe.experts.153.w3", "model.layers.55.block_sparse_moe.experts.154.w3", "model.layers.55.block_sparse_moe.experts.155.w3", "model.layers.55.block_sparse_moe.experts.156.w3", "model.layers.55.block_sparse_moe.experts.157.w3", "model.layers.55.block_sparse_moe.experts.158.w3", "model.layers.55.block_sparse_moe.experts.159.w3", "model.layers.55.block_sparse_moe.experts.160.w3", "model.layers.55.block_sparse_moe.experts.161.w3", "model.layers.55.block_sparse_moe.experts.162.w3", "model.layers.55.block_sparse_moe.experts.163.w3", "model.layers.55.block_sparse_moe.experts.164.w3", "model.layers.55.block_sparse_moe.experts.165.w3", "model.layers.55.block_sparse_moe.experts.166.w3", "model.layers.55.block_sparse_moe.experts.167.w3", "model.layers.55.block_sparse_moe.experts.168.w3", "model.layers.55.block_sparse_moe.experts.169.w3", "model.layers.55.block_sparse_moe.experts.170.w3", "model.layers.55.block_sparse_moe.experts.171.w3", "model.layers.55.block_sparse_moe.experts.172.w3", "model.layers.55.block_sparse_moe.experts.173.w3", "model.layers.55.block_sparse_moe.experts.174.w3", "model.layers.55.block_sparse_moe.experts.175.w3", "model.layers.55.block_sparse_moe.experts.176.w3", "model.layers.55.block_sparse_moe.experts.177.w3", "model.layers.55.block_sparse_moe.experts.178.w3", "model.layers.55.block_sparse_moe.experts.179.w3", "model.layers.55.block_sparse_moe.experts.180.w3", "model.layers.55.block_sparse_moe.experts.181.w3", "model.layers.55.block_sparse_moe.experts.182.w3", "model.layers.55.block_sparse_moe.experts.183.w3", "model.layers.55.block_sparse_moe.experts.184.w3", "model.layers.55.block_sparse_moe.experts.185.w3", "model.layers.55.block_sparse_moe.experts.186.w3", "model.layers.55.block_sparse_moe.experts.187.w3", "model.layers.55.block_sparse_moe.experts.188.w3", "model.layers.55.block_sparse_moe.experts.189.w3", "model.layers.55.block_sparse_moe.experts.190.w3", "model.layers.55.block_sparse_moe.experts.191.w3", "model.layers.55.block_sparse_moe.experts.192.w3", "model.layers.55.block_sparse_moe.experts.193.w3", "model.layers.55.block_sparse_moe.experts.194.w3", "model.layers.55.block_sparse_moe.experts.195.w3", "model.layers.55.block_sparse_moe.experts.196.w3", "model.layers.55.block_sparse_moe.experts.197.w3", "model.layers.55.block_sparse_moe.experts.198.w3", "model.layers.55.block_sparse_moe.experts.199.w3", "model.layers.55.block_sparse_moe.experts.200.w3", "model.layers.55.block_sparse_moe.experts.201.w3", "model.layers.55.block_sparse_moe.experts.202.w3", "model.layers.55.block_sparse_moe.experts.203.w3", "model.layers.55.block_sparse_moe.experts.204.w3", "model.layers.55.block_sparse_moe.experts.205.w3", "model.layers.55.block_sparse_moe.experts.206.w3", "model.layers.55.block_sparse_moe.experts.207.w3", "model.layers.55.block_sparse_moe.experts.208.w3", "model.layers.55.block_sparse_moe.experts.209.w3", "model.layers.55.block_sparse_moe.experts.210.w3", "model.layers.55.block_sparse_moe.experts.211.w3", "model.layers.55.block_sparse_moe.experts.212.w3", "model.layers.55.block_sparse_moe.experts.213.w3", "model.layers.55.block_sparse_moe.experts.214.w3", "model.layers.55.block_sparse_moe.experts.215.w3", "model.layers.55.block_sparse_moe.experts.216.w3", "model.layers.55.block_sparse_moe.experts.217.w3", "model.layers.55.block_sparse_moe.experts.218.w3", "model.layers.55.block_sparse_moe.experts.219.w3", "model.layers.55.block_sparse_moe.experts.220.w3", "model.layers.55.block_sparse_moe.experts.221.w3", "model.layers.55.block_sparse_moe.experts.222.w3", "model.layers.55.block_sparse_moe.experts.223.w3", "model.layers.55.block_sparse_moe.experts.224.w3", "model.layers.55.block_sparse_moe.experts.225.w3", "model.layers.55.block_sparse_moe.experts.226.w3", "model.layers.55.block_sparse_moe.experts.227.w3", "model.layers.55.block_sparse_moe.experts.228.w3", "model.layers.55.block_sparse_moe.experts.229.w3", "model.layers.55.block_sparse_moe.experts.230.w3", "model.layers.55.block_sparse_moe.experts.231.w3", "model.layers.55.block_sparse_moe.experts.232.w3", "model.layers.55.block_sparse_moe.experts.233.w3", "model.layers.55.block_sparse_moe.experts.234.w3", "model.layers.55.block_sparse_moe.experts.235.w3", "model.layers.55.block_sparse_moe.experts.236.w3", "model.layers.55.block_sparse_moe.experts.237.w3", "model.layers.55.block_sparse_moe.experts.238.w3", "model.layers.55.block_sparse_moe.experts.239.w3", "model.layers.55.block_sparse_moe.experts.240.w3", "model.layers.55.block_sparse_moe.experts.241.w3", "model.layers.55.block_sparse_moe.experts.242.w3", "model.layers.55.block_sparse_moe.experts.243.w3", "model.layers.55.block_sparse_moe.experts.244.w3", "model.layers.55.block_sparse_moe.experts.245.w3", "model.layers.55.block_sparse_moe.experts.246.w3", "model.layers.55.block_sparse_moe.experts.247.w3", "model.layers.55.block_sparse_moe.experts.248.w3", "model.layers.55.block_sparse_moe.experts.249.w3", "model.layers.55.block_sparse_moe.experts.250.w3", "model.layers.55.block_sparse_moe.experts.251.w3", "model.layers.55.block_sparse_moe.experts.252.w3", "model.layers.55.block_sparse_moe.experts.253.w3", "model.layers.55.block_sparse_moe.experts.254.w3", "model.layers.55.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00018701255321507126, "dbits": 2415919104 } ] }, { "idx": 279, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w2", "model.layers.55.block_sparse_moe.experts.1.w2", "model.layers.55.block_sparse_moe.experts.2.w2", "model.layers.55.block_sparse_moe.experts.3.w2", "model.layers.55.block_sparse_moe.experts.4.w2", "model.layers.55.block_sparse_moe.experts.5.w2", "model.layers.55.block_sparse_moe.experts.6.w2", "model.layers.55.block_sparse_moe.experts.7.w2", "model.layers.55.block_sparse_moe.experts.8.w2", "model.layers.55.block_sparse_moe.experts.9.w2", "model.layers.55.block_sparse_moe.experts.10.w2", "model.layers.55.block_sparse_moe.experts.11.w2", "model.layers.55.block_sparse_moe.experts.12.w2", "model.layers.55.block_sparse_moe.experts.13.w2", "model.layers.55.block_sparse_moe.experts.14.w2", "model.layers.55.block_sparse_moe.experts.15.w2", "model.layers.55.block_sparse_moe.experts.16.w2", "model.layers.55.block_sparse_moe.experts.17.w2", "model.layers.55.block_sparse_moe.experts.18.w2", "model.layers.55.block_sparse_moe.experts.19.w2", "model.layers.55.block_sparse_moe.experts.20.w2", "model.layers.55.block_sparse_moe.experts.21.w2", "model.layers.55.block_sparse_moe.experts.22.w2", "model.layers.55.block_sparse_moe.experts.23.w2", "model.layers.55.block_sparse_moe.experts.24.w2", "model.layers.55.block_sparse_moe.experts.25.w2", "model.layers.55.block_sparse_moe.experts.26.w2", "model.layers.55.block_sparse_moe.experts.27.w2", "model.layers.55.block_sparse_moe.experts.28.w2", "model.layers.55.block_sparse_moe.experts.29.w2", "model.layers.55.block_sparse_moe.experts.30.w2", "model.layers.55.block_sparse_moe.experts.31.w2", "model.layers.55.block_sparse_moe.experts.32.w2", "model.layers.55.block_sparse_moe.experts.33.w2", "model.layers.55.block_sparse_moe.experts.34.w2", "model.layers.55.block_sparse_moe.experts.35.w2", "model.layers.55.block_sparse_moe.experts.36.w2", "model.layers.55.block_sparse_moe.experts.37.w2", "model.layers.55.block_sparse_moe.experts.38.w2", "model.layers.55.block_sparse_moe.experts.39.w2", "model.layers.55.block_sparse_moe.experts.40.w2", "model.layers.55.block_sparse_moe.experts.41.w2", "model.layers.55.block_sparse_moe.experts.42.w2", "model.layers.55.block_sparse_moe.experts.43.w2", "model.layers.55.block_sparse_moe.experts.44.w2", "model.layers.55.block_sparse_moe.experts.45.w2", "model.layers.55.block_sparse_moe.experts.46.w2", "model.layers.55.block_sparse_moe.experts.47.w2", "model.layers.55.block_sparse_moe.experts.48.w2", "model.layers.55.block_sparse_moe.experts.49.w2", "model.layers.55.block_sparse_moe.experts.50.w2", "model.layers.55.block_sparse_moe.experts.51.w2", "model.layers.55.block_sparse_moe.experts.52.w2", "model.layers.55.block_sparse_moe.experts.53.w2", "model.layers.55.block_sparse_moe.experts.54.w2", "model.layers.55.block_sparse_moe.experts.55.w2", "model.layers.55.block_sparse_moe.experts.56.w2", "model.layers.55.block_sparse_moe.experts.57.w2", "model.layers.55.block_sparse_moe.experts.58.w2", "model.layers.55.block_sparse_moe.experts.59.w2", "model.layers.55.block_sparse_moe.experts.60.w2", "model.layers.55.block_sparse_moe.experts.61.w2", "model.layers.55.block_sparse_moe.experts.62.w2", "model.layers.55.block_sparse_moe.experts.63.w2", "model.layers.55.block_sparse_moe.experts.64.w2", "model.layers.55.block_sparse_moe.experts.65.w2", "model.layers.55.block_sparse_moe.experts.66.w2", "model.layers.55.block_sparse_moe.experts.67.w2", "model.layers.55.block_sparse_moe.experts.68.w2", "model.layers.55.block_sparse_moe.experts.69.w2", "model.layers.55.block_sparse_moe.experts.70.w2", "model.layers.55.block_sparse_moe.experts.71.w2", "model.layers.55.block_sparse_moe.experts.72.w2", "model.layers.55.block_sparse_moe.experts.73.w2", "model.layers.55.block_sparse_moe.experts.74.w2", "model.layers.55.block_sparse_moe.experts.75.w2", "model.layers.55.block_sparse_moe.experts.76.w2", "model.layers.55.block_sparse_moe.experts.77.w2", "model.layers.55.block_sparse_moe.experts.78.w2", "model.layers.55.block_sparse_moe.experts.79.w2", "model.layers.55.block_sparse_moe.experts.80.w2", "model.layers.55.block_sparse_moe.experts.81.w2", "model.layers.55.block_sparse_moe.experts.82.w2", "model.layers.55.block_sparse_moe.experts.83.w2", "model.layers.55.block_sparse_moe.experts.84.w2", "model.layers.55.block_sparse_moe.experts.85.w2", "model.layers.55.block_sparse_moe.experts.86.w2", "model.layers.55.block_sparse_moe.experts.87.w2", "model.layers.55.block_sparse_moe.experts.88.w2", "model.layers.55.block_sparse_moe.experts.89.w2", "model.layers.55.block_sparse_moe.experts.90.w2", "model.layers.55.block_sparse_moe.experts.91.w2", "model.layers.55.block_sparse_moe.experts.92.w2", "model.layers.55.block_sparse_moe.experts.93.w2", "model.layers.55.block_sparse_moe.experts.94.w2", "model.layers.55.block_sparse_moe.experts.95.w2", "model.layers.55.block_sparse_moe.experts.96.w2", "model.layers.55.block_sparse_moe.experts.97.w2", "model.layers.55.block_sparse_moe.experts.98.w2", "model.layers.55.block_sparse_moe.experts.99.w2", "model.layers.55.block_sparse_moe.experts.100.w2", "model.layers.55.block_sparse_moe.experts.101.w2", "model.layers.55.block_sparse_moe.experts.102.w2", "model.layers.55.block_sparse_moe.experts.103.w2", "model.layers.55.block_sparse_moe.experts.104.w2", "model.layers.55.block_sparse_moe.experts.105.w2", "model.layers.55.block_sparse_moe.experts.106.w2", "model.layers.55.block_sparse_moe.experts.107.w2", "model.layers.55.block_sparse_moe.experts.108.w2", "model.layers.55.block_sparse_moe.experts.109.w2", "model.layers.55.block_sparse_moe.experts.110.w2", "model.layers.55.block_sparse_moe.experts.111.w2", "model.layers.55.block_sparse_moe.experts.112.w2", "model.layers.55.block_sparse_moe.experts.113.w2", "model.layers.55.block_sparse_moe.experts.114.w2", "model.layers.55.block_sparse_moe.experts.115.w2", "model.layers.55.block_sparse_moe.experts.116.w2", "model.layers.55.block_sparse_moe.experts.117.w2", "model.layers.55.block_sparse_moe.experts.118.w2", "model.layers.55.block_sparse_moe.experts.119.w2", "model.layers.55.block_sparse_moe.experts.120.w2", "model.layers.55.block_sparse_moe.experts.121.w2", "model.layers.55.block_sparse_moe.experts.122.w2", "model.layers.55.block_sparse_moe.experts.123.w2", "model.layers.55.block_sparse_moe.experts.124.w2", "model.layers.55.block_sparse_moe.experts.125.w2", "model.layers.55.block_sparse_moe.experts.126.w2", "model.layers.55.block_sparse_moe.experts.127.w2", "model.layers.55.block_sparse_moe.experts.128.w2", "model.layers.55.block_sparse_moe.experts.129.w2", "model.layers.55.block_sparse_moe.experts.130.w2", "model.layers.55.block_sparse_moe.experts.131.w2", "model.layers.55.block_sparse_moe.experts.132.w2", "model.layers.55.block_sparse_moe.experts.133.w2", "model.layers.55.block_sparse_moe.experts.134.w2", "model.layers.55.block_sparse_moe.experts.135.w2", "model.layers.55.block_sparse_moe.experts.136.w2", "model.layers.55.block_sparse_moe.experts.137.w2", "model.layers.55.block_sparse_moe.experts.138.w2", "model.layers.55.block_sparse_moe.experts.139.w2", "model.layers.55.block_sparse_moe.experts.140.w2", "model.layers.55.block_sparse_moe.experts.141.w2", "model.layers.55.block_sparse_moe.experts.142.w2", "model.layers.55.block_sparse_moe.experts.143.w2", "model.layers.55.block_sparse_moe.experts.144.w2", "model.layers.55.block_sparse_moe.experts.145.w2", "model.layers.55.block_sparse_moe.experts.146.w2", "model.layers.55.block_sparse_moe.experts.147.w2", "model.layers.55.block_sparse_moe.experts.148.w2", "model.layers.55.block_sparse_moe.experts.149.w2", "model.layers.55.block_sparse_moe.experts.150.w2", "model.layers.55.block_sparse_moe.experts.151.w2", "model.layers.55.block_sparse_moe.experts.152.w2", "model.layers.55.block_sparse_moe.experts.153.w2", "model.layers.55.block_sparse_moe.experts.154.w2", "model.layers.55.block_sparse_moe.experts.155.w2", "model.layers.55.block_sparse_moe.experts.156.w2", "model.layers.55.block_sparse_moe.experts.157.w2", "model.layers.55.block_sparse_moe.experts.158.w2", "model.layers.55.block_sparse_moe.experts.159.w2", "model.layers.55.block_sparse_moe.experts.160.w2", "model.layers.55.block_sparse_moe.experts.161.w2", "model.layers.55.block_sparse_moe.experts.162.w2", "model.layers.55.block_sparse_moe.experts.163.w2", "model.layers.55.block_sparse_moe.experts.164.w2", "model.layers.55.block_sparse_moe.experts.165.w2", "model.layers.55.block_sparse_moe.experts.166.w2", "model.layers.55.block_sparse_moe.experts.167.w2", "model.layers.55.block_sparse_moe.experts.168.w2", "model.layers.55.block_sparse_moe.experts.169.w2", "model.layers.55.block_sparse_moe.experts.170.w2", "model.layers.55.block_sparse_moe.experts.171.w2", "model.layers.55.block_sparse_moe.experts.172.w2", "model.layers.55.block_sparse_moe.experts.173.w2", "model.layers.55.block_sparse_moe.experts.174.w2", "model.layers.55.block_sparse_moe.experts.175.w2", "model.layers.55.block_sparse_moe.experts.176.w2", "model.layers.55.block_sparse_moe.experts.177.w2", "model.layers.55.block_sparse_moe.experts.178.w2", "model.layers.55.block_sparse_moe.experts.179.w2", "model.layers.55.block_sparse_moe.experts.180.w2", "model.layers.55.block_sparse_moe.experts.181.w2", "model.layers.55.block_sparse_moe.experts.182.w2", "model.layers.55.block_sparse_moe.experts.183.w2", "model.layers.55.block_sparse_moe.experts.184.w2", "model.layers.55.block_sparse_moe.experts.185.w2", "model.layers.55.block_sparse_moe.experts.186.w2", "model.layers.55.block_sparse_moe.experts.187.w2", "model.layers.55.block_sparse_moe.experts.188.w2", "model.layers.55.block_sparse_moe.experts.189.w2", "model.layers.55.block_sparse_moe.experts.190.w2", "model.layers.55.block_sparse_moe.experts.191.w2", "model.layers.55.block_sparse_moe.experts.192.w2", "model.layers.55.block_sparse_moe.experts.193.w2", "model.layers.55.block_sparse_moe.experts.194.w2", "model.layers.55.block_sparse_moe.experts.195.w2", "model.layers.55.block_sparse_moe.experts.196.w2", "model.layers.55.block_sparse_moe.experts.197.w2", "model.layers.55.block_sparse_moe.experts.198.w2", "model.layers.55.block_sparse_moe.experts.199.w2", "model.layers.55.block_sparse_moe.experts.200.w2", "model.layers.55.block_sparse_moe.experts.201.w2", "model.layers.55.block_sparse_moe.experts.202.w2", "model.layers.55.block_sparse_moe.experts.203.w2", "model.layers.55.block_sparse_moe.experts.204.w2", "model.layers.55.block_sparse_moe.experts.205.w2", "model.layers.55.block_sparse_moe.experts.206.w2", "model.layers.55.block_sparse_moe.experts.207.w2", "model.layers.55.block_sparse_moe.experts.208.w2", "model.layers.55.block_sparse_moe.experts.209.w2", "model.layers.55.block_sparse_moe.experts.210.w2", "model.layers.55.block_sparse_moe.experts.211.w2", "model.layers.55.block_sparse_moe.experts.212.w2", "model.layers.55.block_sparse_moe.experts.213.w2", "model.layers.55.block_sparse_moe.experts.214.w2", "model.layers.55.block_sparse_moe.experts.215.w2", "model.layers.55.block_sparse_moe.experts.216.w2", "model.layers.55.block_sparse_moe.experts.217.w2", "model.layers.55.block_sparse_moe.experts.218.w2", "model.layers.55.block_sparse_moe.experts.219.w2", "model.layers.55.block_sparse_moe.experts.220.w2", "model.layers.55.block_sparse_moe.experts.221.w2", "model.layers.55.block_sparse_moe.experts.222.w2", "model.layers.55.block_sparse_moe.experts.223.w2", "model.layers.55.block_sparse_moe.experts.224.w2", "model.layers.55.block_sparse_moe.experts.225.w2", "model.layers.55.block_sparse_moe.experts.226.w2", "model.layers.55.block_sparse_moe.experts.227.w2", "model.layers.55.block_sparse_moe.experts.228.w2", "model.layers.55.block_sparse_moe.experts.229.w2", "model.layers.55.block_sparse_moe.experts.230.w2", "model.layers.55.block_sparse_moe.experts.231.w2", "model.layers.55.block_sparse_moe.experts.232.w2", "model.layers.55.block_sparse_moe.experts.233.w2", "model.layers.55.block_sparse_moe.experts.234.w2", "model.layers.55.block_sparse_moe.experts.235.w2", "model.layers.55.block_sparse_moe.experts.236.w2", "model.layers.55.block_sparse_moe.experts.237.w2", "model.layers.55.block_sparse_moe.experts.238.w2", "model.layers.55.block_sparse_moe.experts.239.w2", "model.layers.55.block_sparse_moe.experts.240.w2", "model.layers.55.block_sparse_moe.experts.241.w2", "model.layers.55.block_sparse_moe.experts.242.w2", "model.layers.55.block_sparse_moe.experts.243.w2", "model.layers.55.block_sparse_moe.experts.244.w2", "model.layers.55.block_sparse_moe.experts.245.w2", "model.layers.55.block_sparse_moe.experts.246.w2", "model.layers.55.block_sparse_moe.experts.247.w2", "model.layers.55.block_sparse_moe.experts.248.w2", "model.layers.55.block_sparse_moe.experts.249.w2", "model.layers.55.block_sparse_moe.experts.250.w2", "model.layers.55.block_sparse_moe.experts.251.w2", "model.layers.55.block_sparse_moe.experts.252.w2", "model.layers.55.block_sparse_moe.experts.253.w2", "model.layers.55.block_sparse_moe.experts.254.w2", "model.layers.55.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00010454952716831833, "dbits": 1207959552 } ] }, { "idx": 280, "layers": [ "model.layers.56.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0012030243873595303, "dbits": 18874368 } ] }, { "idx": 281, "layers": [ "model.layers.56.self_attn.k_proj", "model.layers.56.self_attn.v_proj" ], "candidates": [ { "dkld": -0.018290507793426447, "dbits": 6291456 } ] }, { "idx": 282, "layers": [ "model.layers.56.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008808362483978205, "dbits": 18874368 } ] }, { "idx": 283, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w1", "model.layers.56.block_sparse_moe.experts.1.w1", "model.layers.56.block_sparse_moe.experts.2.w1", "model.layers.56.block_sparse_moe.experts.3.w1", "model.layers.56.block_sparse_moe.experts.4.w1", "model.layers.56.block_sparse_moe.experts.5.w1", "model.layers.56.block_sparse_moe.experts.6.w1", "model.layers.56.block_sparse_moe.experts.7.w1", "model.layers.56.block_sparse_moe.experts.8.w1", "model.layers.56.block_sparse_moe.experts.9.w1", "model.layers.56.block_sparse_moe.experts.10.w1", "model.layers.56.block_sparse_moe.experts.11.w1", "model.layers.56.block_sparse_moe.experts.12.w1", "model.layers.56.block_sparse_moe.experts.13.w1", "model.layers.56.block_sparse_moe.experts.14.w1", "model.layers.56.block_sparse_moe.experts.15.w1", "model.layers.56.block_sparse_moe.experts.16.w1", "model.layers.56.block_sparse_moe.experts.17.w1", "model.layers.56.block_sparse_moe.experts.18.w1", "model.layers.56.block_sparse_moe.experts.19.w1", "model.layers.56.block_sparse_moe.experts.20.w1", "model.layers.56.block_sparse_moe.experts.21.w1", "model.layers.56.block_sparse_moe.experts.22.w1", "model.layers.56.block_sparse_moe.experts.23.w1", "model.layers.56.block_sparse_moe.experts.24.w1", "model.layers.56.block_sparse_moe.experts.25.w1", "model.layers.56.block_sparse_moe.experts.26.w1", "model.layers.56.block_sparse_moe.experts.27.w1", "model.layers.56.block_sparse_moe.experts.28.w1", "model.layers.56.block_sparse_moe.experts.29.w1", "model.layers.56.block_sparse_moe.experts.30.w1", "model.layers.56.block_sparse_moe.experts.31.w1", "model.layers.56.block_sparse_moe.experts.32.w1", "model.layers.56.block_sparse_moe.experts.33.w1", "model.layers.56.block_sparse_moe.experts.34.w1", "model.layers.56.block_sparse_moe.experts.35.w1", "model.layers.56.block_sparse_moe.experts.36.w1", "model.layers.56.block_sparse_moe.experts.37.w1", "model.layers.56.block_sparse_moe.experts.38.w1", "model.layers.56.block_sparse_moe.experts.39.w1", "model.layers.56.block_sparse_moe.experts.40.w1", "model.layers.56.block_sparse_moe.experts.41.w1", "model.layers.56.block_sparse_moe.experts.42.w1", "model.layers.56.block_sparse_moe.experts.43.w1", "model.layers.56.block_sparse_moe.experts.44.w1", "model.layers.56.block_sparse_moe.experts.45.w1", "model.layers.56.block_sparse_moe.experts.46.w1", "model.layers.56.block_sparse_moe.experts.47.w1", "model.layers.56.block_sparse_moe.experts.48.w1", "model.layers.56.block_sparse_moe.experts.49.w1", "model.layers.56.block_sparse_moe.experts.50.w1", "model.layers.56.block_sparse_moe.experts.51.w1", "model.layers.56.block_sparse_moe.experts.52.w1", "model.layers.56.block_sparse_moe.experts.53.w1", "model.layers.56.block_sparse_moe.experts.54.w1", "model.layers.56.block_sparse_moe.experts.55.w1", "model.layers.56.block_sparse_moe.experts.56.w1", "model.layers.56.block_sparse_moe.experts.57.w1", "model.layers.56.block_sparse_moe.experts.58.w1", "model.layers.56.block_sparse_moe.experts.59.w1", "model.layers.56.block_sparse_moe.experts.60.w1", "model.layers.56.block_sparse_moe.experts.61.w1", "model.layers.56.block_sparse_moe.experts.62.w1", "model.layers.56.block_sparse_moe.experts.63.w1", "model.layers.56.block_sparse_moe.experts.64.w1", "model.layers.56.block_sparse_moe.experts.65.w1", "model.layers.56.block_sparse_moe.experts.66.w1", "model.layers.56.block_sparse_moe.experts.67.w1", "model.layers.56.block_sparse_moe.experts.68.w1", "model.layers.56.block_sparse_moe.experts.69.w1", "model.layers.56.block_sparse_moe.experts.70.w1", "model.layers.56.block_sparse_moe.experts.71.w1", "model.layers.56.block_sparse_moe.experts.72.w1", "model.layers.56.block_sparse_moe.experts.73.w1", "model.layers.56.block_sparse_moe.experts.74.w1", "model.layers.56.block_sparse_moe.experts.75.w1", "model.layers.56.block_sparse_moe.experts.76.w1", "model.layers.56.block_sparse_moe.experts.77.w1", "model.layers.56.block_sparse_moe.experts.78.w1", "model.layers.56.block_sparse_moe.experts.79.w1", "model.layers.56.block_sparse_moe.experts.80.w1", "model.layers.56.block_sparse_moe.experts.81.w1", "model.layers.56.block_sparse_moe.experts.82.w1", "model.layers.56.block_sparse_moe.experts.83.w1", "model.layers.56.block_sparse_moe.experts.84.w1", "model.layers.56.block_sparse_moe.experts.85.w1", "model.layers.56.block_sparse_moe.experts.86.w1", "model.layers.56.block_sparse_moe.experts.87.w1", "model.layers.56.block_sparse_moe.experts.88.w1", "model.layers.56.block_sparse_moe.experts.89.w1", "model.layers.56.block_sparse_moe.experts.90.w1", "model.layers.56.block_sparse_moe.experts.91.w1", "model.layers.56.block_sparse_moe.experts.92.w1", "model.layers.56.block_sparse_moe.experts.93.w1", "model.layers.56.block_sparse_moe.experts.94.w1", "model.layers.56.block_sparse_moe.experts.95.w1", "model.layers.56.block_sparse_moe.experts.96.w1", "model.layers.56.block_sparse_moe.experts.97.w1", "model.layers.56.block_sparse_moe.experts.98.w1", "model.layers.56.block_sparse_moe.experts.99.w1", "model.layers.56.block_sparse_moe.experts.100.w1", "model.layers.56.block_sparse_moe.experts.101.w1", "model.layers.56.block_sparse_moe.experts.102.w1", "model.layers.56.block_sparse_moe.experts.103.w1", "model.layers.56.block_sparse_moe.experts.104.w1", "model.layers.56.block_sparse_moe.experts.105.w1", "model.layers.56.block_sparse_moe.experts.106.w1", "model.layers.56.block_sparse_moe.experts.107.w1", "model.layers.56.block_sparse_moe.experts.108.w1", "model.layers.56.block_sparse_moe.experts.109.w1", "model.layers.56.block_sparse_moe.experts.110.w1", "model.layers.56.block_sparse_moe.experts.111.w1", "model.layers.56.block_sparse_moe.experts.112.w1", "model.layers.56.block_sparse_moe.experts.113.w1", "model.layers.56.block_sparse_moe.experts.114.w1", "model.layers.56.block_sparse_moe.experts.115.w1", "model.layers.56.block_sparse_moe.experts.116.w1", "model.layers.56.block_sparse_moe.experts.117.w1", "model.layers.56.block_sparse_moe.experts.118.w1", "model.layers.56.block_sparse_moe.experts.119.w1", "model.layers.56.block_sparse_moe.experts.120.w1", "model.layers.56.block_sparse_moe.experts.121.w1", "model.layers.56.block_sparse_moe.experts.122.w1", "model.layers.56.block_sparse_moe.experts.123.w1", "model.layers.56.block_sparse_moe.experts.124.w1", "model.layers.56.block_sparse_moe.experts.125.w1", "model.layers.56.block_sparse_moe.experts.126.w1", "model.layers.56.block_sparse_moe.experts.127.w1", "model.layers.56.block_sparse_moe.experts.128.w1", "model.layers.56.block_sparse_moe.experts.129.w1", "model.layers.56.block_sparse_moe.experts.130.w1", "model.layers.56.block_sparse_moe.experts.131.w1", "model.layers.56.block_sparse_moe.experts.132.w1", "model.layers.56.block_sparse_moe.experts.133.w1", "model.layers.56.block_sparse_moe.experts.134.w1", "model.layers.56.block_sparse_moe.experts.135.w1", "model.layers.56.block_sparse_moe.experts.136.w1", "model.layers.56.block_sparse_moe.experts.137.w1", "model.layers.56.block_sparse_moe.experts.138.w1", "model.layers.56.block_sparse_moe.experts.139.w1", "model.layers.56.block_sparse_moe.experts.140.w1", "model.layers.56.block_sparse_moe.experts.141.w1", "model.layers.56.block_sparse_moe.experts.142.w1", "model.layers.56.block_sparse_moe.experts.143.w1", "model.layers.56.block_sparse_moe.experts.144.w1", "model.layers.56.block_sparse_moe.experts.145.w1", "model.layers.56.block_sparse_moe.experts.146.w1", "model.layers.56.block_sparse_moe.experts.147.w1", "model.layers.56.block_sparse_moe.experts.148.w1", "model.layers.56.block_sparse_moe.experts.149.w1", "model.layers.56.block_sparse_moe.experts.150.w1", "model.layers.56.block_sparse_moe.experts.151.w1", "model.layers.56.block_sparse_moe.experts.152.w1", "model.layers.56.block_sparse_moe.experts.153.w1", "model.layers.56.block_sparse_moe.experts.154.w1", "model.layers.56.block_sparse_moe.experts.155.w1", "model.layers.56.block_sparse_moe.experts.156.w1", "model.layers.56.block_sparse_moe.experts.157.w1", "model.layers.56.block_sparse_moe.experts.158.w1", "model.layers.56.block_sparse_moe.experts.159.w1", "model.layers.56.block_sparse_moe.experts.160.w1", "model.layers.56.block_sparse_moe.experts.161.w1", "model.layers.56.block_sparse_moe.experts.162.w1", "model.layers.56.block_sparse_moe.experts.163.w1", "model.layers.56.block_sparse_moe.experts.164.w1", "model.layers.56.block_sparse_moe.experts.165.w1", "model.layers.56.block_sparse_moe.experts.166.w1", "model.layers.56.block_sparse_moe.experts.167.w1", "model.layers.56.block_sparse_moe.experts.168.w1", "model.layers.56.block_sparse_moe.experts.169.w1", "model.layers.56.block_sparse_moe.experts.170.w1", "model.layers.56.block_sparse_moe.experts.171.w1", "model.layers.56.block_sparse_moe.experts.172.w1", "model.layers.56.block_sparse_moe.experts.173.w1", "model.layers.56.block_sparse_moe.experts.174.w1", "model.layers.56.block_sparse_moe.experts.175.w1", "model.layers.56.block_sparse_moe.experts.176.w1", "model.layers.56.block_sparse_moe.experts.177.w1", "model.layers.56.block_sparse_moe.experts.178.w1", "model.layers.56.block_sparse_moe.experts.179.w1", "model.layers.56.block_sparse_moe.experts.180.w1", "model.layers.56.block_sparse_moe.experts.181.w1", "model.layers.56.block_sparse_moe.experts.182.w1", "model.layers.56.block_sparse_moe.experts.183.w1", "model.layers.56.block_sparse_moe.experts.184.w1", "model.layers.56.block_sparse_moe.experts.185.w1", "model.layers.56.block_sparse_moe.experts.186.w1", "model.layers.56.block_sparse_moe.experts.187.w1", "model.layers.56.block_sparse_moe.experts.188.w1", "model.layers.56.block_sparse_moe.experts.189.w1", "model.layers.56.block_sparse_moe.experts.190.w1", "model.layers.56.block_sparse_moe.experts.191.w1", "model.layers.56.block_sparse_moe.experts.192.w1", "model.layers.56.block_sparse_moe.experts.193.w1", "model.layers.56.block_sparse_moe.experts.194.w1", "model.layers.56.block_sparse_moe.experts.195.w1", "model.layers.56.block_sparse_moe.experts.196.w1", "model.layers.56.block_sparse_moe.experts.197.w1", "model.layers.56.block_sparse_moe.experts.198.w1", "model.layers.56.block_sparse_moe.experts.199.w1", "model.layers.56.block_sparse_moe.experts.200.w1", "model.layers.56.block_sparse_moe.experts.201.w1", "model.layers.56.block_sparse_moe.experts.202.w1", "model.layers.56.block_sparse_moe.experts.203.w1", "model.layers.56.block_sparse_moe.experts.204.w1", "model.layers.56.block_sparse_moe.experts.205.w1", "model.layers.56.block_sparse_moe.experts.206.w1", "model.layers.56.block_sparse_moe.experts.207.w1", "model.layers.56.block_sparse_moe.experts.208.w1", "model.layers.56.block_sparse_moe.experts.209.w1", "model.layers.56.block_sparse_moe.experts.210.w1", "model.layers.56.block_sparse_moe.experts.211.w1", "model.layers.56.block_sparse_moe.experts.212.w1", "model.layers.56.block_sparse_moe.experts.213.w1", "model.layers.56.block_sparse_moe.experts.214.w1", "model.layers.56.block_sparse_moe.experts.215.w1", "model.layers.56.block_sparse_moe.experts.216.w1", "model.layers.56.block_sparse_moe.experts.217.w1", "model.layers.56.block_sparse_moe.experts.218.w1", "model.layers.56.block_sparse_moe.experts.219.w1", "model.layers.56.block_sparse_moe.experts.220.w1", "model.layers.56.block_sparse_moe.experts.221.w1", "model.layers.56.block_sparse_moe.experts.222.w1", "model.layers.56.block_sparse_moe.experts.223.w1", "model.layers.56.block_sparse_moe.experts.224.w1", "model.layers.56.block_sparse_moe.experts.225.w1", "model.layers.56.block_sparse_moe.experts.226.w1", "model.layers.56.block_sparse_moe.experts.227.w1", "model.layers.56.block_sparse_moe.experts.228.w1", "model.layers.56.block_sparse_moe.experts.229.w1", "model.layers.56.block_sparse_moe.experts.230.w1", "model.layers.56.block_sparse_moe.experts.231.w1", "model.layers.56.block_sparse_moe.experts.232.w1", "model.layers.56.block_sparse_moe.experts.233.w1", "model.layers.56.block_sparse_moe.experts.234.w1", "model.layers.56.block_sparse_moe.experts.235.w1", "model.layers.56.block_sparse_moe.experts.236.w1", "model.layers.56.block_sparse_moe.experts.237.w1", "model.layers.56.block_sparse_moe.experts.238.w1", "model.layers.56.block_sparse_moe.experts.239.w1", "model.layers.56.block_sparse_moe.experts.240.w1", "model.layers.56.block_sparse_moe.experts.241.w1", "model.layers.56.block_sparse_moe.experts.242.w1", "model.layers.56.block_sparse_moe.experts.243.w1", "model.layers.56.block_sparse_moe.experts.244.w1", "model.layers.56.block_sparse_moe.experts.245.w1", "model.layers.56.block_sparse_moe.experts.246.w1", "model.layers.56.block_sparse_moe.experts.247.w1", "model.layers.56.block_sparse_moe.experts.248.w1", "model.layers.56.block_sparse_moe.experts.249.w1", "model.layers.56.block_sparse_moe.experts.250.w1", "model.layers.56.block_sparse_moe.experts.251.w1", "model.layers.56.block_sparse_moe.experts.252.w1", "model.layers.56.block_sparse_moe.experts.253.w1", "model.layers.56.block_sparse_moe.experts.254.w1", "model.layers.56.block_sparse_moe.experts.255.w1", "model.layers.56.block_sparse_moe.experts.0.w3", "model.layers.56.block_sparse_moe.experts.1.w3", "model.layers.56.block_sparse_moe.experts.2.w3", "model.layers.56.block_sparse_moe.experts.3.w3", "model.layers.56.block_sparse_moe.experts.4.w3", "model.layers.56.block_sparse_moe.experts.5.w3", "model.layers.56.block_sparse_moe.experts.6.w3", "model.layers.56.block_sparse_moe.experts.7.w3", "model.layers.56.block_sparse_moe.experts.8.w3", "model.layers.56.block_sparse_moe.experts.9.w3", "model.layers.56.block_sparse_moe.experts.10.w3", "model.layers.56.block_sparse_moe.experts.11.w3", "model.layers.56.block_sparse_moe.experts.12.w3", "model.layers.56.block_sparse_moe.experts.13.w3", "model.layers.56.block_sparse_moe.experts.14.w3", "model.layers.56.block_sparse_moe.experts.15.w3", "model.layers.56.block_sparse_moe.experts.16.w3", "model.layers.56.block_sparse_moe.experts.17.w3", "model.layers.56.block_sparse_moe.experts.18.w3", "model.layers.56.block_sparse_moe.experts.19.w3", "model.layers.56.block_sparse_moe.experts.20.w3", "model.layers.56.block_sparse_moe.experts.21.w3", "model.layers.56.block_sparse_moe.experts.22.w3", "model.layers.56.block_sparse_moe.experts.23.w3", "model.layers.56.block_sparse_moe.experts.24.w3", "model.layers.56.block_sparse_moe.experts.25.w3", "model.layers.56.block_sparse_moe.experts.26.w3", "model.layers.56.block_sparse_moe.experts.27.w3", "model.layers.56.block_sparse_moe.experts.28.w3", "model.layers.56.block_sparse_moe.experts.29.w3", "model.layers.56.block_sparse_moe.experts.30.w3", "model.layers.56.block_sparse_moe.experts.31.w3", "model.layers.56.block_sparse_moe.experts.32.w3", "model.layers.56.block_sparse_moe.experts.33.w3", "model.layers.56.block_sparse_moe.experts.34.w3", "model.layers.56.block_sparse_moe.experts.35.w3", "model.layers.56.block_sparse_moe.experts.36.w3", "model.layers.56.block_sparse_moe.experts.37.w3", "model.layers.56.block_sparse_moe.experts.38.w3", "model.layers.56.block_sparse_moe.experts.39.w3", "model.layers.56.block_sparse_moe.experts.40.w3", "model.layers.56.block_sparse_moe.experts.41.w3", "model.layers.56.block_sparse_moe.experts.42.w3", "model.layers.56.block_sparse_moe.experts.43.w3", "model.layers.56.block_sparse_moe.experts.44.w3", "model.layers.56.block_sparse_moe.experts.45.w3", "model.layers.56.block_sparse_moe.experts.46.w3", "model.layers.56.block_sparse_moe.experts.47.w3", "model.layers.56.block_sparse_moe.experts.48.w3", "model.layers.56.block_sparse_moe.experts.49.w3", "model.layers.56.block_sparse_moe.experts.50.w3", "model.layers.56.block_sparse_moe.experts.51.w3", "model.layers.56.block_sparse_moe.experts.52.w3", "model.layers.56.block_sparse_moe.experts.53.w3", "model.layers.56.block_sparse_moe.experts.54.w3", "model.layers.56.block_sparse_moe.experts.55.w3", "model.layers.56.block_sparse_moe.experts.56.w3", "model.layers.56.block_sparse_moe.experts.57.w3", "model.layers.56.block_sparse_moe.experts.58.w3", "model.layers.56.block_sparse_moe.experts.59.w3", "model.layers.56.block_sparse_moe.experts.60.w3", "model.layers.56.block_sparse_moe.experts.61.w3", "model.layers.56.block_sparse_moe.experts.62.w3", "model.layers.56.block_sparse_moe.experts.63.w3", "model.layers.56.block_sparse_moe.experts.64.w3", "model.layers.56.block_sparse_moe.experts.65.w3", "model.layers.56.block_sparse_moe.experts.66.w3", "model.layers.56.block_sparse_moe.experts.67.w3", "model.layers.56.block_sparse_moe.experts.68.w3", "model.layers.56.block_sparse_moe.experts.69.w3", "model.layers.56.block_sparse_moe.experts.70.w3", "model.layers.56.block_sparse_moe.experts.71.w3", "model.layers.56.block_sparse_moe.experts.72.w3", "model.layers.56.block_sparse_moe.experts.73.w3", "model.layers.56.block_sparse_moe.experts.74.w3", "model.layers.56.block_sparse_moe.experts.75.w3", "model.layers.56.block_sparse_moe.experts.76.w3", "model.layers.56.block_sparse_moe.experts.77.w3", "model.layers.56.block_sparse_moe.experts.78.w3", "model.layers.56.block_sparse_moe.experts.79.w3", "model.layers.56.block_sparse_moe.experts.80.w3", "model.layers.56.block_sparse_moe.experts.81.w3", "model.layers.56.block_sparse_moe.experts.82.w3", "model.layers.56.block_sparse_moe.experts.83.w3", "model.layers.56.block_sparse_moe.experts.84.w3", "model.layers.56.block_sparse_moe.experts.85.w3", "model.layers.56.block_sparse_moe.experts.86.w3", "model.layers.56.block_sparse_moe.experts.87.w3", "model.layers.56.block_sparse_moe.experts.88.w3", "model.layers.56.block_sparse_moe.experts.89.w3", "model.layers.56.block_sparse_moe.experts.90.w3", "model.layers.56.block_sparse_moe.experts.91.w3", "model.layers.56.block_sparse_moe.experts.92.w3", "model.layers.56.block_sparse_moe.experts.93.w3", "model.layers.56.block_sparse_moe.experts.94.w3", "model.layers.56.block_sparse_moe.experts.95.w3", "model.layers.56.block_sparse_moe.experts.96.w3", "model.layers.56.block_sparse_moe.experts.97.w3", "model.layers.56.block_sparse_moe.experts.98.w3", "model.layers.56.block_sparse_moe.experts.99.w3", "model.layers.56.block_sparse_moe.experts.100.w3", "model.layers.56.block_sparse_moe.experts.101.w3", "model.layers.56.block_sparse_moe.experts.102.w3", "model.layers.56.block_sparse_moe.experts.103.w3", "model.layers.56.block_sparse_moe.experts.104.w3", "model.layers.56.block_sparse_moe.experts.105.w3", "model.layers.56.block_sparse_moe.experts.106.w3", "model.layers.56.block_sparse_moe.experts.107.w3", "model.layers.56.block_sparse_moe.experts.108.w3", "model.layers.56.block_sparse_moe.experts.109.w3", "model.layers.56.block_sparse_moe.experts.110.w3", "model.layers.56.block_sparse_moe.experts.111.w3", "model.layers.56.block_sparse_moe.experts.112.w3", "model.layers.56.block_sparse_moe.experts.113.w3", "model.layers.56.block_sparse_moe.experts.114.w3", "model.layers.56.block_sparse_moe.experts.115.w3", "model.layers.56.block_sparse_moe.experts.116.w3", "model.layers.56.block_sparse_moe.experts.117.w3", "model.layers.56.block_sparse_moe.experts.118.w3", "model.layers.56.block_sparse_moe.experts.119.w3", "model.layers.56.block_sparse_moe.experts.120.w3", "model.layers.56.block_sparse_moe.experts.121.w3", "model.layers.56.block_sparse_moe.experts.122.w3", "model.layers.56.block_sparse_moe.experts.123.w3", "model.layers.56.block_sparse_moe.experts.124.w3", "model.layers.56.block_sparse_moe.experts.125.w3", "model.layers.56.block_sparse_moe.experts.126.w3", "model.layers.56.block_sparse_moe.experts.127.w3", "model.layers.56.block_sparse_moe.experts.128.w3", "model.layers.56.block_sparse_moe.experts.129.w3", "model.layers.56.block_sparse_moe.experts.130.w3", "model.layers.56.block_sparse_moe.experts.131.w3", "model.layers.56.block_sparse_moe.experts.132.w3", "model.layers.56.block_sparse_moe.experts.133.w3", "model.layers.56.block_sparse_moe.experts.134.w3", "model.layers.56.block_sparse_moe.experts.135.w3", "model.layers.56.block_sparse_moe.experts.136.w3", "model.layers.56.block_sparse_moe.experts.137.w3", "model.layers.56.block_sparse_moe.experts.138.w3", "model.layers.56.block_sparse_moe.experts.139.w3", "model.layers.56.block_sparse_moe.experts.140.w3", "model.layers.56.block_sparse_moe.experts.141.w3", "model.layers.56.block_sparse_moe.experts.142.w3", "model.layers.56.block_sparse_moe.experts.143.w3", "model.layers.56.block_sparse_moe.experts.144.w3", "model.layers.56.block_sparse_moe.experts.145.w3", "model.layers.56.block_sparse_moe.experts.146.w3", "model.layers.56.block_sparse_moe.experts.147.w3", "model.layers.56.block_sparse_moe.experts.148.w3", "model.layers.56.block_sparse_moe.experts.149.w3", "model.layers.56.block_sparse_moe.experts.150.w3", "model.layers.56.block_sparse_moe.experts.151.w3", "model.layers.56.block_sparse_moe.experts.152.w3", "model.layers.56.block_sparse_moe.experts.153.w3", "model.layers.56.block_sparse_moe.experts.154.w3", "model.layers.56.block_sparse_moe.experts.155.w3", "model.layers.56.block_sparse_moe.experts.156.w3", "model.layers.56.block_sparse_moe.experts.157.w3", "model.layers.56.block_sparse_moe.experts.158.w3", "model.layers.56.block_sparse_moe.experts.159.w3", "model.layers.56.block_sparse_moe.experts.160.w3", "model.layers.56.block_sparse_moe.experts.161.w3", "model.layers.56.block_sparse_moe.experts.162.w3", "model.layers.56.block_sparse_moe.experts.163.w3", "model.layers.56.block_sparse_moe.experts.164.w3", "model.layers.56.block_sparse_moe.experts.165.w3", "model.layers.56.block_sparse_moe.experts.166.w3", "model.layers.56.block_sparse_moe.experts.167.w3", "model.layers.56.block_sparse_moe.experts.168.w3", "model.layers.56.block_sparse_moe.experts.169.w3", "model.layers.56.block_sparse_moe.experts.170.w3", "model.layers.56.block_sparse_moe.experts.171.w3", "model.layers.56.block_sparse_moe.experts.172.w3", "model.layers.56.block_sparse_moe.experts.173.w3", "model.layers.56.block_sparse_moe.experts.174.w3", "model.layers.56.block_sparse_moe.experts.175.w3", "model.layers.56.block_sparse_moe.experts.176.w3", "model.layers.56.block_sparse_moe.experts.177.w3", "model.layers.56.block_sparse_moe.experts.178.w3", "model.layers.56.block_sparse_moe.experts.179.w3", "model.layers.56.block_sparse_moe.experts.180.w3", "model.layers.56.block_sparse_moe.experts.181.w3", "model.layers.56.block_sparse_moe.experts.182.w3", "model.layers.56.block_sparse_moe.experts.183.w3", "model.layers.56.block_sparse_moe.experts.184.w3", "model.layers.56.block_sparse_moe.experts.185.w3", "model.layers.56.block_sparse_moe.experts.186.w3", "model.layers.56.block_sparse_moe.experts.187.w3", "model.layers.56.block_sparse_moe.experts.188.w3", "model.layers.56.block_sparse_moe.experts.189.w3", "model.layers.56.block_sparse_moe.experts.190.w3", "model.layers.56.block_sparse_moe.experts.191.w3", "model.layers.56.block_sparse_moe.experts.192.w3", "model.layers.56.block_sparse_moe.experts.193.w3", "model.layers.56.block_sparse_moe.experts.194.w3", "model.layers.56.block_sparse_moe.experts.195.w3", "model.layers.56.block_sparse_moe.experts.196.w3", "model.layers.56.block_sparse_moe.experts.197.w3", "model.layers.56.block_sparse_moe.experts.198.w3", "model.layers.56.block_sparse_moe.experts.199.w3", "model.layers.56.block_sparse_moe.experts.200.w3", "model.layers.56.block_sparse_moe.experts.201.w3", "model.layers.56.block_sparse_moe.experts.202.w3", "model.layers.56.block_sparse_moe.experts.203.w3", "model.layers.56.block_sparse_moe.experts.204.w3", "model.layers.56.block_sparse_moe.experts.205.w3", "model.layers.56.block_sparse_moe.experts.206.w3", "model.layers.56.block_sparse_moe.experts.207.w3", "model.layers.56.block_sparse_moe.experts.208.w3", "model.layers.56.block_sparse_moe.experts.209.w3", "model.layers.56.block_sparse_moe.experts.210.w3", "model.layers.56.block_sparse_moe.experts.211.w3", "model.layers.56.block_sparse_moe.experts.212.w3", "model.layers.56.block_sparse_moe.experts.213.w3", "model.layers.56.block_sparse_moe.experts.214.w3", "model.layers.56.block_sparse_moe.experts.215.w3", "model.layers.56.block_sparse_moe.experts.216.w3", "model.layers.56.block_sparse_moe.experts.217.w3", "model.layers.56.block_sparse_moe.experts.218.w3", "model.layers.56.block_sparse_moe.experts.219.w3", "model.layers.56.block_sparse_moe.experts.220.w3", "model.layers.56.block_sparse_moe.experts.221.w3", "model.layers.56.block_sparse_moe.experts.222.w3", "model.layers.56.block_sparse_moe.experts.223.w3", "model.layers.56.block_sparse_moe.experts.224.w3", "model.layers.56.block_sparse_moe.experts.225.w3", "model.layers.56.block_sparse_moe.experts.226.w3", "model.layers.56.block_sparse_moe.experts.227.w3", "model.layers.56.block_sparse_moe.experts.228.w3", "model.layers.56.block_sparse_moe.experts.229.w3", "model.layers.56.block_sparse_moe.experts.230.w3", "model.layers.56.block_sparse_moe.experts.231.w3", "model.layers.56.block_sparse_moe.experts.232.w3", "model.layers.56.block_sparse_moe.experts.233.w3", "model.layers.56.block_sparse_moe.experts.234.w3", "model.layers.56.block_sparse_moe.experts.235.w3", "model.layers.56.block_sparse_moe.experts.236.w3", "model.layers.56.block_sparse_moe.experts.237.w3", "model.layers.56.block_sparse_moe.experts.238.w3", "model.layers.56.block_sparse_moe.experts.239.w3", "model.layers.56.block_sparse_moe.experts.240.w3", "model.layers.56.block_sparse_moe.experts.241.w3", "model.layers.56.block_sparse_moe.experts.242.w3", "model.layers.56.block_sparse_moe.experts.243.w3", "model.layers.56.block_sparse_moe.experts.244.w3", "model.layers.56.block_sparse_moe.experts.245.w3", "model.layers.56.block_sparse_moe.experts.246.w3", "model.layers.56.block_sparse_moe.experts.247.w3", "model.layers.56.block_sparse_moe.experts.248.w3", "model.layers.56.block_sparse_moe.experts.249.w3", "model.layers.56.block_sparse_moe.experts.250.w3", "model.layers.56.block_sparse_moe.experts.251.w3", "model.layers.56.block_sparse_moe.experts.252.w3", "model.layers.56.block_sparse_moe.experts.253.w3", "model.layers.56.block_sparse_moe.experts.254.w3", "model.layers.56.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00023097991943354934, "dbits": 2415919104 } ] }, { "idx": 284, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w2", "model.layers.56.block_sparse_moe.experts.1.w2", "model.layers.56.block_sparse_moe.experts.2.w2", "model.layers.56.block_sparse_moe.experts.3.w2", "model.layers.56.block_sparse_moe.experts.4.w2", "model.layers.56.block_sparse_moe.experts.5.w2", "model.layers.56.block_sparse_moe.experts.6.w2", "model.layers.56.block_sparse_moe.experts.7.w2", "model.layers.56.block_sparse_moe.experts.8.w2", "model.layers.56.block_sparse_moe.experts.9.w2", "model.layers.56.block_sparse_moe.experts.10.w2", "model.layers.56.block_sparse_moe.experts.11.w2", "model.layers.56.block_sparse_moe.experts.12.w2", "model.layers.56.block_sparse_moe.experts.13.w2", "model.layers.56.block_sparse_moe.experts.14.w2", "model.layers.56.block_sparse_moe.experts.15.w2", "model.layers.56.block_sparse_moe.experts.16.w2", "model.layers.56.block_sparse_moe.experts.17.w2", "model.layers.56.block_sparse_moe.experts.18.w2", "model.layers.56.block_sparse_moe.experts.19.w2", "model.layers.56.block_sparse_moe.experts.20.w2", "model.layers.56.block_sparse_moe.experts.21.w2", "model.layers.56.block_sparse_moe.experts.22.w2", "model.layers.56.block_sparse_moe.experts.23.w2", "model.layers.56.block_sparse_moe.experts.24.w2", "model.layers.56.block_sparse_moe.experts.25.w2", "model.layers.56.block_sparse_moe.experts.26.w2", "model.layers.56.block_sparse_moe.experts.27.w2", "model.layers.56.block_sparse_moe.experts.28.w2", "model.layers.56.block_sparse_moe.experts.29.w2", "model.layers.56.block_sparse_moe.experts.30.w2", "model.layers.56.block_sparse_moe.experts.31.w2", "model.layers.56.block_sparse_moe.experts.32.w2", "model.layers.56.block_sparse_moe.experts.33.w2", "model.layers.56.block_sparse_moe.experts.34.w2", "model.layers.56.block_sparse_moe.experts.35.w2", "model.layers.56.block_sparse_moe.experts.36.w2", "model.layers.56.block_sparse_moe.experts.37.w2", "model.layers.56.block_sparse_moe.experts.38.w2", "model.layers.56.block_sparse_moe.experts.39.w2", "model.layers.56.block_sparse_moe.experts.40.w2", "model.layers.56.block_sparse_moe.experts.41.w2", "model.layers.56.block_sparse_moe.experts.42.w2", "model.layers.56.block_sparse_moe.experts.43.w2", "model.layers.56.block_sparse_moe.experts.44.w2", "model.layers.56.block_sparse_moe.experts.45.w2", "model.layers.56.block_sparse_moe.experts.46.w2", "model.layers.56.block_sparse_moe.experts.47.w2", "model.layers.56.block_sparse_moe.experts.48.w2", "model.layers.56.block_sparse_moe.experts.49.w2", "model.layers.56.block_sparse_moe.experts.50.w2", "model.layers.56.block_sparse_moe.experts.51.w2", "model.layers.56.block_sparse_moe.experts.52.w2", "model.layers.56.block_sparse_moe.experts.53.w2", "model.layers.56.block_sparse_moe.experts.54.w2", "model.layers.56.block_sparse_moe.experts.55.w2", "model.layers.56.block_sparse_moe.experts.56.w2", "model.layers.56.block_sparse_moe.experts.57.w2", "model.layers.56.block_sparse_moe.experts.58.w2", "model.layers.56.block_sparse_moe.experts.59.w2", "model.layers.56.block_sparse_moe.experts.60.w2", "model.layers.56.block_sparse_moe.experts.61.w2", "model.layers.56.block_sparse_moe.experts.62.w2", "model.layers.56.block_sparse_moe.experts.63.w2", "model.layers.56.block_sparse_moe.experts.64.w2", "model.layers.56.block_sparse_moe.experts.65.w2", "model.layers.56.block_sparse_moe.experts.66.w2", "model.layers.56.block_sparse_moe.experts.67.w2", "model.layers.56.block_sparse_moe.experts.68.w2", "model.layers.56.block_sparse_moe.experts.69.w2", "model.layers.56.block_sparse_moe.experts.70.w2", "model.layers.56.block_sparse_moe.experts.71.w2", "model.layers.56.block_sparse_moe.experts.72.w2", "model.layers.56.block_sparse_moe.experts.73.w2", "model.layers.56.block_sparse_moe.experts.74.w2", "model.layers.56.block_sparse_moe.experts.75.w2", "model.layers.56.block_sparse_moe.experts.76.w2", "model.layers.56.block_sparse_moe.experts.77.w2", "model.layers.56.block_sparse_moe.experts.78.w2", "model.layers.56.block_sparse_moe.experts.79.w2", "model.layers.56.block_sparse_moe.experts.80.w2", "model.layers.56.block_sparse_moe.experts.81.w2", "model.layers.56.block_sparse_moe.experts.82.w2", "model.layers.56.block_sparse_moe.experts.83.w2", "model.layers.56.block_sparse_moe.experts.84.w2", "model.layers.56.block_sparse_moe.experts.85.w2", "model.layers.56.block_sparse_moe.experts.86.w2", "model.layers.56.block_sparse_moe.experts.87.w2", "model.layers.56.block_sparse_moe.experts.88.w2", "model.layers.56.block_sparse_moe.experts.89.w2", "model.layers.56.block_sparse_moe.experts.90.w2", "model.layers.56.block_sparse_moe.experts.91.w2", "model.layers.56.block_sparse_moe.experts.92.w2", "model.layers.56.block_sparse_moe.experts.93.w2", "model.layers.56.block_sparse_moe.experts.94.w2", "model.layers.56.block_sparse_moe.experts.95.w2", "model.layers.56.block_sparse_moe.experts.96.w2", "model.layers.56.block_sparse_moe.experts.97.w2", "model.layers.56.block_sparse_moe.experts.98.w2", "model.layers.56.block_sparse_moe.experts.99.w2", "model.layers.56.block_sparse_moe.experts.100.w2", "model.layers.56.block_sparse_moe.experts.101.w2", "model.layers.56.block_sparse_moe.experts.102.w2", "model.layers.56.block_sparse_moe.experts.103.w2", "model.layers.56.block_sparse_moe.experts.104.w2", "model.layers.56.block_sparse_moe.experts.105.w2", "model.layers.56.block_sparse_moe.experts.106.w2", "model.layers.56.block_sparse_moe.experts.107.w2", "model.layers.56.block_sparse_moe.experts.108.w2", "model.layers.56.block_sparse_moe.experts.109.w2", "model.layers.56.block_sparse_moe.experts.110.w2", "model.layers.56.block_sparse_moe.experts.111.w2", "model.layers.56.block_sparse_moe.experts.112.w2", "model.layers.56.block_sparse_moe.experts.113.w2", "model.layers.56.block_sparse_moe.experts.114.w2", "model.layers.56.block_sparse_moe.experts.115.w2", "model.layers.56.block_sparse_moe.experts.116.w2", "model.layers.56.block_sparse_moe.experts.117.w2", "model.layers.56.block_sparse_moe.experts.118.w2", "model.layers.56.block_sparse_moe.experts.119.w2", "model.layers.56.block_sparse_moe.experts.120.w2", "model.layers.56.block_sparse_moe.experts.121.w2", "model.layers.56.block_sparse_moe.experts.122.w2", "model.layers.56.block_sparse_moe.experts.123.w2", "model.layers.56.block_sparse_moe.experts.124.w2", "model.layers.56.block_sparse_moe.experts.125.w2", "model.layers.56.block_sparse_moe.experts.126.w2", "model.layers.56.block_sparse_moe.experts.127.w2", "model.layers.56.block_sparse_moe.experts.128.w2", "model.layers.56.block_sparse_moe.experts.129.w2", "model.layers.56.block_sparse_moe.experts.130.w2", "model.layers.56.block_sparse_moe.experts.131.w2", "model.layers.56.block_sparse_moe.experts.132.w2", "model.layers.56.block_sparse_moe.experts.133.w2", "model.layers.56.block_sparse_moe.experts.134.w2", "model.layers.56.block_sparse_moe.experts.135.w2", "model.layers.56.block_sparse_moe.experts.136.w2", "model.layers.56.block_sparse_moe.experts.137.w2", "model.layers.56.block_sparse_moe.experts.138.w2", "model.layers.56.block_sparse_moe.experts.139.w2", "model.layers.56.block_sparse_moe.experts.140.w2", "model.layers.56.block_sparse_moe.experts.141.w2", "model.layers.56.block_sparse_moe.experts.142.w2", "model.layers.56.block_sparse_moe.experts.143.w2", "model.layers.56.block_sparse_moe.experts.144.w2", "model.layers.56.block_sparse_moe.experts.145.w2", "model.layers.56.block_sparse_moe.experts.146.w2", "model.layers.56.block_sparse_moe.experts.147.w2", "model.layers.56.block_sparse_moe.experts.148.w2", "model.layers.56.block_sparse_moe.experts.149.w2", "model.layers.56.block_sparse_moe.experts.150.w2", "model.layers.56.block_sparse_moe.experts.151.w2", "model.layers.56.block_sparse_moe.experts.152.w2", "model.layers.56.block_sparse_moe.experts.153.w2", "model.layers.56.block_sparse_moe.experts.154.w2", "model.layers.56.block_sparse_moe.experts.155.w2", "model.layers.56.block_sparse_moe.experts.156.w2", "model.layers.56.block_sparse_moe.experts.157.w2", "model.layers.56.block_sparse_moe.experts.158.w2", "model.layers.56.block_sparse_moe.experts.159.w2", "model.layers.56.block_sparse_moe.experts.160.w2", "model.layers.56.block_sparse_moe.experts.161.w2", "model.layers.56.block_sparse_moe.experts.162.w2", "model.layers.56.block_sparse_moe.experts.163.w2", "model.layers.56.block_sparse_moe.experts.164.w2", "model.layers.56.block_sparse_moe.experts.165.w2", "model.layers.56.block_sparse_moe.experts.166.w2", "model.layers.56.block_sparse_moe.experts.167.w2", "model.layers.56.block_sparse_moe.experts.168.w2", "model.layers.56.block_sparse_moe.experts.169.w2", "model.layers.56.block_sparse_moe.experts.170.w2", "model.layers.56.block_sparse_moe.experts.171.w2", "model.layers.56.block_sparse_moe.experts.172.w2", "model.layers.56.block_sparse_moe.experts.173.w2", "model.layers.56.block_sparse_moe.experts.174.w2", "model.layers.56.block_sparse_moe.experts.175.w2", "model.layers.56.block_sparse_moe.experts.176.w2", "model.layers.56.block_sparse_moe.experts.177.w2", "model.layers.56.block_sparse_moe.experts.178.w2", "model.layers.56.block_sparse_moe.experts.179.w2", "model.layers.56.block_sparse_moe.experts.180.w2", "model.layers.56.block_sparse_moe.experts.181.w2", "model.layers.56.block_sparse_moe.experts.182.w2", "model.layers.56.block_sparse_moe.experts.183.w2", "model.layers.56.block_sparse_moe.experts.184.w2", "model.layers.56.block_sparse_moe.experts.185.w2", "model.layers.56.block_sparse_moe.experts.186.w2", "model.layers.56.block_sparse_moe.experts.187.w2", "model.layers.56.block_sparse_moe.experts.188.w2", "model.layers.56.block_sparse_moe.experts.189.w2", "model.layers.56.block_sparse_moe.experts.190.w2", "model.layers.56.block_sparse_moe.experts.191.w2", "model.layers.56.block_sparse_moe.experts.192.w2", "model.layers.56.block_sparse_moe.experts.193.w2", "model.layers.56.block_sparse_moe.experts.194.w2", "model.layers.56.block_sparse_moe.experts.195.w2", "model.layers.56.block_sparse_moe.experts.196.w2", "model.layers.56.block_sparse_moe.experts.197.w2", "model.layers.56.block_sparse_moe.experts.198.w2", "model.layers.56.block_sparse_moe.experts.199.w2", "model.layers.56.block_sparse_moe.experts.200.w2", "model.layers.56.block_sparse_moe.experts.201.w2", "model.layers.56.block_sparse_moe.experts.202.w2", "model.layers.56.block_sparse_moe.experts.203.w2", "model.layers.56.block_sparse_moe.experts.204.w2", "model.layers.56.block_sparse_moe.experts.205.w2", "model.layers.56.block_sparse_moe.experts.206.w2", "model.layers.56.block_sparse_moe.experts.207.w2", "model.layers.56.block_sparse_moe.experts.208.w2", "model.layers.56.block_sparse_moe.experts.209.w2", "model.layers.56.block_sparse_moe.experts.210.w2", "model.layers.56.block_sparse_moe.experts.211.w2", "model.layers.56.block_sparse_moe.experts.212.w2", "model.layers.56.block_sparse_moe.experts.213.w2", "model.layers.56.block_sparse_moe.experts.214.w2", "model.layers.56.block_sparse_moe.experts.215.w2", "model.layers.56.block_sparse_moe.experts.216.w2", "model.layers.56.block_sparse_moe.experts.217.w2", "model.layers.56.block_sparse_moe.experts.218.w2", "model.layers.56.block_sparse_moe.experts.219.w2", "model.layers.56.block_sparse_moe.experts.220.w2", "model.layers.56.block_sparse_moe.experts.221.w2", "model.layers.56.block_sparse_moe.experts.222.w2", "model.layers.56.block_sparse_moe.experts.223.w2", "model.layers.56.block_sparse_moe.experts.224.w2", "model.layers.56.block_sparse_moe.experts.225.w2", "model.layers.56.block_sparse_moe.experts.226.w2", "model.layers.56.block_sparse_moe.experts.227.w2", "model.layers.56.block_sparse_moe.experts.228.w2", "model.layers.56.block_sparse_moe.experts.229.w2", "model.layers.56.block_sparse_moe.experts.230.w2", "model.layers.56.block_sparse_moe.experts.231.w2", "model.layers.56.block_sparse_moe.experts.232.w2", "model.layers.56.block_sparse_moe.experts.233.w2", "model.layers.56.block_sparse_moe.experts.234.w2", "model.layers.56.block_sparse_moe.experts.235.w2", "model.layers.56.block_sparse_moe.experts.236.w2", "model.layers.56.block_sparse_moe.experts.237.w2", "model.layers.56.block_sparse_moe.experts.238.w2", "model.layers.56.block_sparse_moe.experts.239.w2", "model.layers.56.block_sparse_moe.experts.240.w2", "model.layers.56.block_sparse_moe.experts.241.w2", "model.layers.56.block_sparse_moe.experts.242.w2", "model.layers.56.block_sparse_moe.experts.243.w2", "model.layers.56.block_sparse_moe.experts.244.w2", "model.layers.56.block_sparse_moe.experts.245.w2", "model.layers.56.block_sparse_moe.experts.246.w2", "model.layers.56.block_sparse_moe.experts.247.w2", "model.layers.56.block_sparse_moe.experts.248.w2", "model.layers.56.block_sparse_moe.experts.249.w2", "model.layers.56.block_sparse_moe.experts.250.w2", "model.layers.56.block_sparse_moe.experts.251.w2", "model.layers.56.block_sparse_moe.experts.252.w2", "model.layers.56.block_sparse_moe.experts.253.w2", "model.layers.56.block_sparse_moe.experts.254.w2", "model.layers.56.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0001925915479659146, "dbits": 1207959552 } ] }, { "idx": 285, "layers": [ "model.layers.57.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0013524740934371948, "dbits": 18874368 } ] }, { "idx": 286, "layers": [ "model.layers.57.self_attn.k_proj", "model.layers.57.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0016640216112135953, "dbits": 6291456 } ] }, { "idx": 287, "layers": [ "model.layers.57.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00042847096920006766, "dbits": 18874368 } ] }, { "idx": 288, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w1", "model.layers.57.block_sparse_moe.experts.1.w1", "model.layers.57.block_sparse_moe.experts.2.w1", "model.layers.57.block_sparse_moe.experts.3.w1", "model.layers.57.block_sparse_moe.experts.4.w1", "model.layers.57.block_sparse_moe.experts.5.w1", "model.layers.57.block_sparse_moe.experts.6.w1", "model.layers.57.block_sparse_moe.experts.7.w1", "model.layers.57.block_sparse_moe.experts.8.w1", "model.layers.57.block_sparse_moe.experts.9.w1", "model.layers.57.block_sparse_moe.experts.10.w1", "model.layers.57.block_sparse_moe.experts.11.w1", "model.layers.57.block_sparse_moe.experts.12.w1", "model.layers.57.block_sparse_moe.experts.13.w1", "model.layers.57.block_sparse_moe.experts.14.w1", "model.layers.57.block_sparse_moe.experts.15.w1", "model.layers.57.block_sparse_moe.experts.16.w1", "model.layers.57.block_sparse_moe.experts.17.w1", "model.layers.57.block_sparse_moe.experts.18.w1", "model.layers.57.block_sparse_moe.experts.19.w1", "model.layers.57.block_sparse_moe.experts.20.w1", "model.layers.57.block_sparse_moe.experts.21.w1", "model.layers.57.block_sparse_moe.experts.22.w1", "model.layers.57.block_sparse_moe.experts.23.w1", "model.layers.57.block_sparse_moe.experts.24.w1", "model.layers.57.block_sparse_moe.experts.25.w1", "model.layers.57.block_sparse_moe.experts.26.w1", "model.layers.57.block_sparse_moe.experts.27.w1", "model.layers.57.block_sparse_moe.experts.28.w1", "model.layers.57.block_sparse_moe.experts.29.w1", "model.layers.57.block_sparse_moe.experts.30.w1", "model.layers.57.block_sparse_moe.experts.31.w1", "model.layers.57.block_sparse_moe.experts.32.w1", "model.layers.57.block_sparse_moe.experts.33.w1", "model.layers.57.block_sparse_moe.experts.34.w1", "model.layers.57.block_sparse_moe.experts.35.w1", "model.layers.57.block_sparse_moe.experts.36.w1", "model.layers.57.block_sparse_moe.experts.37.w1", "model.layers.57.block_sparse_moe.experts.38.w1", "model.layers.57.block_sparse_moe.experts.39.w1", "model.layers.57.block_sparse_moe.experts.40.w1", "model.layers.57.block_sparse_moe.experts.41.w1", "model.layers.57.block_sparse_moe.experts.42.w1", "model.layers.57.block_sparse_moe.experts.43.w1", "model.layers.57.block_sparse_moe.experts.44.w1", "model.layers.57.block_sparse_moe.experts.45.w1", "model.layers.57.block_sparse_moe.experts.46.w1", "model.layers.57.block_sparse_moe.experts.47.w1", "model.layers.57.block_sparse_moe.experts.48.w1", "model.layers.57.block_sparse_moe.experts.49.w1", "model.layers.57.block_sparse_moe.experts.50.w1", "model.layers.57.block_sparse_moe.experts.51.w1", "model.layers.57.block_sparse_moe.experts.52.w1", "model.layers.57.block_sparse_moe.experts.53.w1", "model.layers.57.block_sparse_moe.experts.54.w1", "model.layers.57.block_sparse_moe.experts.55.w1", "model.layers.57.block_sparse_moe.experts.56.w1", "model.layers.57.block_sparse_moe.experts.57.w1", "model.layers.57.block_sparse_moe.experts.58.w1", "model.layers.57.block_sparse_moe.experts.59.w1", "model.layers.57.block_sparse_moe.experts.60.w1", "model.layers.57.block_sparse_moe.experts.61.w1", "model.layers.57.block_sparse_moe.experts.62.w1", "model.layers.57.block_sparse_moe.experts.63.w1", "model.layers.57.block_sparse_moe.experts.64.w1", "model.layers.57.block_sparse_moe.experts.65.w1", "model.layers.57.block_sparse_moe.experts.66.w1", "model.layers.57.block_sparse_moe.experts.67.w1", "model.layers.57.block_sparse_moe.experts.68.w1", "model.layers.57.block_sparse_moe.experts.69.w1", "model.layers.57.block_sparse_moe.experts.70.w1", "model.layers.57.block_sparse_moe.experts.71.w1", "model.layers.57.block_sparse_moe.experts.72.w1", "model.layers.57.block_sparse_moe.experts.73.w1", "model.layers.57.block_sparse_moe.experts.74.w1", "model.layers.57.block_sparse_moe.experts.75.w1", "model.layers.57.block_sparse_moe.experts.76.w1", "model.layers.57.block_sparse_moe.experts.77.w1", "model.layers.57.block_sparse_moe.experts.78.w1", "model.layers.57.block_sparse_moe.experts.79.w1", "model.layers.57.block_sparse_moe.experts.80.w1", "model.layers.57.block_sparse_moe.experts.81.w1", "model.layers.57.block_sparse_moe.experts.82.w1", "model.layers.57.block_sparse_moe.experts.83.w1", "model.layers.57.block_sparse_moe.experts.84.w1", "model.layers.57.block_sparse_moe.experts.85.w1", "model.layers.57.block_sparse_moe.experts.86.w1", "model.layers.57.block_sparse_moe.experts.87.w1", "model.layers.57.block_sparse_moe.experts.88.w1", "model.layers.57.block_sparse_moe.experts.89.w1", "model.layers.57.block_sparse_moe.experts.90.w1", "model.layers.57.block_sparse_moe.experts.91.w1", "model.layers.57.block_sparse_moe.experts.92.w1", "model.layers.57.block_sparse_moe.experts.93.w1", "model.layers.57.block_sparse_moe.experts.94.w1", "model.layers.57.block_sparse_moe.experts.95.w1", "model.layers.57.block_sparse_moe.experts.96.w1", "model.layers.57.block_sparse_moe.experts.97.w1", "model.layers.57.block_sparse_moe.experts.98.w1", "model.layers.57.block_sparse_moe.experts.99.w1", "model.layers.57.block_sparse_moe.experts.100.w1", "model.layers.57.block_sparse_moe.experts.101.w1", "model.layers.57.block_sparse_moe.experts.102.w1", "model.layers.57.block_sparse_moe.experts.103.w1", "model.layers.57.block_sparse_moe.experts.104.w1", "model.layers.57.block_sparse_moe.experts.105.w1", "model.layers.57.block_sparse_moe.experts.106.w1", "model.layers.57.block_sparse_moe.experts.107.w1", "model.layers.57.block_sparse_moe.experts.108.w1", "model.layers.57.block_sparse_moe.experts.109.w1", "model.layers.57.block_sparse_moe.experts.110.w1", "model.layers.57.block_sparse_moe.experts.111.w1", "model.layers.57.block_sparse_moe.experts.112.w1", "model.layers.57.block_sparse_moe.experts.113.w1", "model.layers.57.block_sparse_moe.experts.114.w1", "model.layers.57.block_sparse_moe.experts.115.w1", "model.layers.57.block_sparse_moe.experts.116.w1", "model.layers.57.block_sparse_moe.experts.117.w1", "model.layers.57.block_sparse_moe.experts.118.w1", "model.layers.57.block_sparse_moe.experts.119.w1", "model.layers.57.block_sparse_moe.experts.120.w1", "model.layers.57.block_sparse_moe.experts.121.w1", "model.layers.57.block_sparse_moe.experts.122.w1", "model.layers.57.block_sparse_moe.experts.123.w1", "model.layers.57.block_sparse_moe.experts.124.w1", "model.layers.57.block_sparse_moe.experts.125.w1", "model.layers.57.block_sparse_moe.experts.126.w1", "model.layers.57.block_sparse_moe.experts.127.w1", "model.layers.57.block_sparse_moe.experts.128.w1", "model.layers.57.block_sparse_moe.experts.129.w1", "model.layers.57.block_sparse_moe.experts.130.w1", "model.layers.57.block_sparse_moe.experts.131.w1", "model.layers.57.block_sparse_moe.experts.132.w1", "model.layers.57.block_sparse_moe.experts.133.w1", "model.layers.57.block_sparse_moe.experts.134.w1", "model.layers.57.block_sparse_moe.experts.135.w1", "model.layers.57.block_sparse_moe.experts.136.w1", "model.layers.57.block_sparse_moe.experts.137.w1", "model.layers.57.block_sparse_moe.experts.138.w1", "model.layers.57.block_sparse_moe.experts.139.w1", "model.layers.57.block_sparse_moe.experts.140.w1", "model.layers.57.block_sparse_moe.experts.141.w1", "model.layers.57.block_sparse_moe.experts.142.w1", "model.layers.57.block_sparse_moe.experts.143.w1", "model.layers.57.block_sparse_moe.experts.144.w1", "model.layers.57.block_sparse_moe.experts.145.w1", "model.layers.57.block_sparse_moe.experts.146.w1", "model.layers.57.block_sparse_moe.experts.147.w1", "model.layers.57.block_sparse_moe.experts.148.w1", "model.layers.57.block_sparse_moe.experts.149.w1", "model.layers.57.block_sparse_moe.experts.150.w1", "model.layers.57.block_sparse_moe.experts.151.w1", "model.layers.57.block_sparse_moe.experts.152.w1", "model.layers.57.block_sparse_moe.experts.153.w1", "model.layers.57.block_sparse_moe.experts.154.w1", "model.layers.57.block_sparse_moe.experts.155.w1", "model.layers.57.block_sparse_moe.experts.156.w1", "model.layers.57.block_sparse_moe.experts.157.w1", "model.layers.57.block_sparse_moe.experts.158.w1", "model.layers.57.block_sparse_moe.experts.159.w1", "model.layers.57.block_sparse_moe.experts.160.w1", "model.layers.57.block_sparse_moe.experts.161.w1", "model.layers.57.block_sparse_moe.experts.162.w1", "model.layers.57.block_sparse_moe.experts.163.w1", "model.layers.57.block_sparse_moe.experts.164.w1", "model.layers.57.block_sparse_moe.experts.165.w1", "model.layers.57.block_sparse_moe.experts.166.w1", "model.layers.57.block_sparse_moe.experts.167.w1", "model.layers.57.block_sparse_moe.experts.168.w1", "model.layers.57.block_sparse_moe.experts.169.w1", "model.layers.57.block_sparse_moe.experts.170.w1", "model.layers.57.block_sparse_moe.experts.171.w1", "model.layers.57.block_sparse_moe.experts.172.w1", "model.layers.57.block_sparse_moe.experts.173.w1", "model.layers.57.block_sparse_moe.experts.174.w1", "model.layers.57.block_sparse_moe.experts.175.w1", "model.layers.57.block_sparse_moe.experts.176.w1", "model.layers.57.block_sparse_moe.experts.177.w1", "model.layers.57.block_sparse_moe.experts.178.w1", "model.layers.57.block_sparse_moe.experts.179.w1", "model.layers.57.block_sparse_moe.experts.180.w1", "model.layers.57.block_sparse_moe.experts.181.w1", "model.layers.57.block_sparse_moe.experts.182.w1", "model.layers.57.block_sparse_moe.experts.183.w1", "model.layers.57.block_sparse_moe.experts.184.w1", "model.layers.57.block_sparse_moe.experts.185.w1", "model.layers.57.block_sparse_moe.experts.186.w1", "model.layers.57.block_sparse_moe.experts.187.w1", "model.layers.57.block_sparse_moe.experts.188.w1", "model.layers.57.block_sparse_moe.experts.189.w1", "model.layers.57.block_sparse_moe.experts.190.w1", "model.layers.57.block_sparse_moe.experts.191.w1", "model.layers.57.block_sparse_moe.experts.192.w1", "model.layers.57.block_sparse_moe.experts.193.w1", "model.layers.57.block_sparse_moe.experts.194.w1", "model.layers.57.block_sparse_moe.experts.195.w1", "model.layers.57.block_sparse_moe.experts.196.w1", "model.layers.57.block_sparse_moe.experts.197.w1", "model.layers.57.block_sparse_moe.experts.198.w1", "model.layers.57.block_sparse_moe.experts.199.w1", "model.layers.57.block_sparse_moe.experts.200.w1", "model.layers.57.block_sparse_moe.experts.201.w1", "model.layers.57.block_sparse_moe.experts.202.w1", "model.layers.57.block_sparse_moe.experts.203.w1", "model.layers.57.block_sparse_moe.experts.204.w1", "model.layers.57.block_sparse_moe.experts.205.w1", "model.layers.57.block_sparse_moe.experts.206.w1", "model.layers.57.block_sparse_moe.experts.207.w1", "model.layers.57.block_sparse_moe.experts.208.w1", "model.layers.57.block_sparse_moe.experts.209.w1", "model.layers.57.block_sparse_moe.experts.210.w1", "model.layers.57.block_sparse_moe.experts.211.w1", "model.layers.57.block_sparse_moe.experts.212.w1", "model.layers.57.block_sparse_moe.experts.213.w1", "model.layers.57.block_sparse_moe.experts.214.w1", "model.layers.57.block_sparse_moe.experts.215.w1", "model.layers.57.block_sparse_moe.experts.216.w1", "model.layers.57.block_sparse_moe.experts.217.w1", "model.layers.57.block_sparse_moe.experts.218.w1", "model.layers.57.block_sparse_moe.experts.219.w1", "model.layers.57.block_sparse_moe.experts.220.w1", "model.layers.57.block_sparse_moe.experts.221.w1", "model.layers.57.block_sparse_moe.experts.222.w1", "model.layers.57.block_sparse_moe.experts.223.w1", "model.layers.57.block_sparse_moe.experts.224.w1", "model.layers.57.block_sparse_moe.experts.225.w1", "model.layers.57.block_sparse_moe.experts.226.w1", "model.layers.57.block_sparse_moe.experts.227.w1", "model.layers.57.block_sparse_moe.experts.228.w1", "model.layers.57.block_sparse_moe.experts.229.w1", "model.layers.57.block_sparse_moe.experts.230.w1", "model.layers.57.block_sparse_moe.experts.231.w1", "model.layers.57.block_sparse_moe.experts.232.w1", "model.layers.57.block_sparse_moe.experts.233.w1", "model.layers.57.block_sparse_moe.experts.234.w1", "model.layers.57.block_sparse_moe.experts.235.w1", "model.layers.57.block_sparse_moe.experts.236.w1", "model.layers.57.block_sparse_moe.experts.237.w1", "model.layers.57.block_sparse_moe.experts.238.w1", "model.layers.57.block_sparse_moe.experts.239.w1", "model.layers.57.block_sparse_moe.experts.240.w1", "model.layers.57.block_sparse_moe.experts.241.w1", "model.layers.57.block_sparse_moe.experts.242.w1", "model.layers.57.block_sparse_moe.experts.243.w1", "model.layers.57.block_sparse_moe.experts.244.w1", "model.layers.57.block_sparse_moe.experts.245.w1", "model.layers.57.block_sparse_moe.experts.246.w1", "model.layers.57.block_sparse_moe.experts.247.w1", "model.layers.57.block_sparse_moe.experts.248.w1", "model.layers.57.block_sparse_moe.experts.249.w1", "model.layers.57.block_sparse_moe.experts.250.w1", "model.layers.57.block_sparse_moe.experts.251.w1", "model.layers.57.block_sparse_moe.experts.252.w1", "model.layers.57.block_sparse_moe.experts.253.w1", "model.layers.57.block_sparse_moe.experts.254.w1", "model.layers.57.block_sparse_moe.experts.255.w1", "model.layers.57.block_sparse_moe.experts.0.w3", "model.layers.57.block_sparse_moe.experts.1.w3", "model.layers.57.block_sparse_moe.experts.2.w3", "model.layers.57.block_sparse_moe.experts.3.w3", "model.layers.57.block_sparse_moe.experts.4.w3", "model.layers.57.block_sparse_moe.experts.5.w3", "model.layers.57.block_sparse_moe.experts.6.w3", "model.layers.57.block_sparse_moe.experts.7.w3", "model.layers.57.block_sparse_moe.experts.8.w3", "model.layers.57.block_sparse_moe.experts.9.w3", "model.layers.57.block_sparse_moe.experts.10.w3", "model.layers.57.block_sparse_moe.experts.11.w3", "model.layers.57.block_sparse_moe.experts.12.w3", "model.layers.57.block_sparse_moe.experts.13.w3", "model.layers.57.block_sparse_moe.experts.14.w3", "model.layers.57.block_sparse_moe.experts.15.w3", "model.layers.57.block_sparse_moe.experts.16.w3", "model.layers.57.block_sparse_moe.experts.17.w3", "model.layers.57.block_sparse_moe.experts.18.w3", "model.layers.57.block_sparse_moe.experts.19.w3", "model.layers.57.block_sparse_moe.experts.20.w3", "model.layers.57.block_sparse_moe.experts.21.w3", "model.layers.57.block_sparse_moe.experts.22.w3", "model.layers.57.block_sparse_moe.experts.23.w3", "model.layers.57.block_sparse_moe.experts.24.w3", "model.layers.57.block_sparse_moe.experts.25.w3", "model.layers.57.block_sparse_moe.experts.26.w3", "model.layers.57.block_sparse_moe.experts.27.w3", "model.layers.57.block_sparse_moe.experts.28.w3", "model.layers.57.block_sparse_moe.experts.29.w3", "model.layers.57.block_sparse_moe.experts.30.w3", "model.layers.57.block_sparse_moe.experts.31.w3", "model.layers.57.block_sparse_moe.experts.32.w3", "model.layers.57.block_sparse_moe.experts.33.w3", "model.layers.57.block_sparse_moe.experts.34.w3", "model.layers.57.block_sparse_moe.experts.35.w3", "model.layers.57.block_sparse_moe.experts.36.w3", "model.layers.57.block_sparse_moe.experts.37.w3", "model.layers.57.block_sparse_moe.experts.38.w3", "model.layers.57.block_sparse_moe.experts.39.w3", "model.layers.57.block_sparse_moe.experts.40.w3", "model.layers.57.block_sparse_moe.experts.41.w3", "model.layers.57.block_sparse_moe.experts.42.w3", "model.layers.57.block_sparse_moe.experts.43.w3", "model.layers.57.block_sparse_moe.experts.44.w3", "model.layers.57.block_sparse_moe.experts.45.w3", "model.layers.57.block_sparse_moe.experts.46.w3", "model.layers.57.block_sparse_moe.experts.47.w3", "model.layers.57.block_sparse_moe.experts.48.w3", "model.layers.57.block_sparse_moe.experts.49.w3", "model.layers.57.block_sparse_moe.experts.50.w3", "model.layers.57.block_sparse_moe.experts.51.w3", "model.layers.57.block_sparse_moe.experts.52.w3", "model.layers.57.block_sparse_moe.experts.53.w3", "model.layers.57.block_sparse_moe.experts.54.w3", "model.layers.57.block_sparse_moe.experts.55.w3", "model.layers.57.block_sparse_moe.experts.56.w3", "model.layers.57.block_sparse_moe.experts.57.w3", "model.layers.57.block_sparse_moe.experts.58.w3", "model.layers.57.block_sparse_moe.experts.59.w3", "model.layers.57.block_sparse_moe.experts.60.w3", "model.layers.57.block_sparse_moe.experts.61.w3", "model.layers.57.block_sparse_moe.experts.62.w3", "model.layers.57.block_sparse_moe.experts.63.w3", "model.layers.57.block_sparse_moe.experts.64.w3", "model.layers.57.block_sparse_moe.experts.65.w3", "model.layers.57.block_sparse_moe.experts.66.w3", "model.layers.57.block_sparse_moe.experts.67.w3", "model.layers.57.block_sparse_moe.experts.68.w3", "model.layers.57.block_sparse_moe.experts.69.w3", "model.layers.57.block_sparse_moe.experts.70.w3", "model.layers.57.block_sparse_moe.experts.71.w3", "model.layers.57.block_sparse_moe.experts.72.w3", "model.layers.57.block_sparse_moe.experts.73.w3", "model.layers.57.block_sparse_moe.experts.74.w3", "model.layers.57.block_sparse_moe.experts.75.w3", "model.layers.57.block_sparse_moe.experts.76.w3", "model.layers.57.block_sparse_moe.experts.77.w3", "model.layers.57.block_sparse_moe.experts.78.w3", "model.layers.57.block_sparse_moe.experts.79.w3", "model.layers.57.block_sparse_moe.experts.80.w3", "model.layers.57.block_sparse_moe.experts.81.w3", "model.layers.57.block_sparse_moe.experts.82.w3", "model.layers.57.block_sparse_moe.experts.83.w3", "model.layers.57.block_sparse_moe.experts.84.w3", "model.layers.57.block_sparse_moe.experts.85.w3", "model.layers.57.block_sparse_moe.experts.86.w3", "model.layers.57.block_sparse_moe.experts.87.w3", "model.layers.57.block_sparse_moe.experts.88.w3", "model.layers.57.block_sparse_moe.experts.89.w3", "model.layers.57.block_sparse_moe.experts.90.w3", "model.layers.57.block_sparse_moe.experts.91.w3", "model.layers.57.block_sparse_moe.experts.92.w3", "model.layers.57.block_sparse_moe.experts.93.w3", "model.layers.57.block_sparse_moe.experts.94.w3", "model.layers.57.block_sparse_moe.experts.95.w3", "model.layers.57.block_sparse_moe.experts.96.w3", "model.layers.57.block_sparse_moe.experts.97.w3", "model.layers.57.block_sparse_moe.experts.98.w3", "model.layers.57.block_sparse_moe.experts.99.w3", "model.layers.57.block_sparse_moe.experts.100.w3", "model.layers.57.block_sparse_moe.experts.101.w3", "model.layers.57.block_sparse_moe.experts.102.w3", "model.layers.57.block_sparse_moe.experts.103.w3", "model.layers.57.block_sparse_moe.experts.104.w3", "model.layers.57.block_sparse_moe.experts.105.w3", "model.layers.57.block_sparse_moe.experts.106.w3", "model.layers.57.block_sparse_moe.experts.107.w3", "model.layers.57.block_sparse_moe.experts.108.w3", "model.layers.57.block_sparse_moe.experts.109.w3", "model.layers.57.block_sparse_moe.experts.110.w3", "model.layers.57.block_sparse_moe.experts.111.w3", "model.layers.57.block_sparse_moe.experts.112.w3", "model.layers.57.block_sparse_moe.experts.113.w3", "model.layers.57.block_sparse_moe.experts.114.w3", "model.layers.57.block_sparse_moe.experts.115.w3", "model.layers.57.block_sparse_moe.experts.116.w3", "model.layers.57.block_sparse_moe.experts.117.w3", "model.layers.57.block_sparse_moe.experts.118.w3", "model.layers.57.block_sparse_moe.experts.119.w3", "model.layers.57.block_sparse_moe.experts.120.w3", "model.layers.57.block_sparse_moe.experts.121.w3", "model.layers.57.block_sparse_moe.experts.122.w3", "model.layers.57.block_sparse_moe.experts.123.w3", "model.layers.57.block_sparse_moe.experts.124.w3", "model.layers.57.block_sparse_moe.experts.125.w3", "model.layers.57.block_sparse_moe.experts.126.w3", "model.layers.57.block_sparse_moe.experts.127.w3", "model.layers.57.block_sparse_moe.experts.128.w3", "model.layers.57.block_sparse_moe.experts.129.w3", "model.layers.57.block_sparse_moe.experts.130.w3", "model.layers.57.block_sparse_moe.experts.131.w3", "model.layers.57.block_sparse_moe.experts.132.w3", "model.layers.57.block_sparse_moe.experts.133.w3", "model.layers.57.block_sparse_moe.experts.134.w3", "model.layers.57.block_sparse_moe.experts.135.w3", "model.layers.57.block_sparse_moe.experts.136.w3", "model.layers.57.block_sparse_moe.experts.137.w3", "model.layers.57.block_sparse_moe.experts.138.w3", "model.layers.57.block_sparse_moe.experts.139.w3", "model.layers.57.block_sparse_moe.experts.140.w3", "model.layers.57.block_sparse_moe.experts.141.w3", "model.layers.57.block_sparse_moe.experts.142.w3", "model.layers.57.block_sparse_moe.experts.143.w3", "model.layers.57.block_sparse_moe.experts.144.w3", "model.layers.57.block_sparse_moe.experts.145.w3", "model.layers.57.block_sparse_moe.experts.146.w3", "model.layers.57.block_sparse_moe.experts.147.w3", "model.layers.57.block_sparse_moe.experts.148.w3", "model.layers.57.block_sparse_moe.experts.149.w3", "model.layers.57.block_sparse_moe.experts.150.w3", "model.layers.57.block_sparse_moe.experts.151.w3", "model.layers.57.block_sparse_moe.experts.152.w3", "model.layers.57.block_sparse_moe.experts.153.w3", "model.layers.57.block_sparse_moe.experts.154.w3", "model.layers.57.block_sparse_moe.experts.155.w3", "model.layers.57.block_sparse_moe.experts.156.w3", "model.layers.57.block_sparse_moe.experts.157.w3", "model.layers.57.block_sparse_moe.experts.158.w3", "model.layers.57.block_sparse_moe.experts.159.w3", "model.layers.57.block_sparse_moe.experts.160.w3", "model.layers.57.block_sparse_moe.experts.161.w3", "model.layers.57.block_sparse_moe.experts.162.w3", "model.layers.57.block_sparse_moe.experts.163.w3", "model.layers.57.block_sparse_moe.experts.164.w3", "model.layers.57.block_sparse_moe.experts.165.w3", "model.layers.57.block_sparse_moe.experts.166.w3", "model.layers.57.block_sparse_moe.experts.167.w3", "model.layers.57.block_sparse_moe.experts.168.w3", "model.layers.57.block_sparse_moe.experts.169.w3", "model.layers.57.block_sparse_moe.experts.170.w3", "model.layers.57.block_sparse_moe.experts.171.w3", "model.layers.57.block_sparse_moe.experts.172.w3", "model.layers.57.block_sparse_moe.experts.173.w3", "model.layers.57.block_sparse_moe.experts.174.w3", "model.layers.57.block_sparse_moe.experts.175.w3", "model.layers.57.block_sparse_moe.experts.176.w3", "model.layers.57.block_sparse_moe.experts.177.w3", "model.layers.57.block_sparse_moe.experts.178.w3", "model.layers.57.block_sparse_moe.experts.179.w3", "model.layers.57.block_sparse_moe.experts.180.w3", "model.layers.57.block_sparse_moe.experts.181.w3", "model.layers.57.block_sparse_moe.experts.182.w3", "model.layers.57.block_sparse_moe.experts.183.w3", "model.layers.57.block_sparse_moe.experts.184.w3", "model.layers.57.block_sparse_moe.experts.185.w3", "model.layers.57.block_sparse_moe.experts.186.w3", "model.layers.57.block_sparse_moe.experts.187.w3", "model.layers.57.block_sparse_moe.experts.188.w3", "model.layers.57.block_sparse_moe.experts.189.w3", "model.layers.57.block_sparse_moe.experts.190.w3", "model.layers.57.block_sparse_moe.experts.191.w3", "model.layers.57.block_sparse_moe.experts.192.w3", "model.layers.57.block_sparse_moe.experts.193.w3", "model.layers.57.block_sparse_moe.experts.194.w3", "model.layers.57.block_sparse_moe.experts.195.w3", "model.layers.57.block_sparse_moe.experts.196.w3", "model.layers.57.block_sparse_moe.experts.197.w3", "model.layers.57.block_sparse_moe.experts.198.w3", "model.layers.57.block_sparse_moe.experts.199.w3", "model.layers.57.block_sparse_moe.experts.200.w3", "model.layers.57.block_sparse_moe.experts.201.w3", "model.layers.57.block_sparse_moe.experts.202.w3", "model.layers.57.block_sparse_moe.experts.203.w3", "model.layers.57.block_sparse_moe.experts.204.w3", "model.layers.57.block_sparse_moe.experts.205.w3", "model.layers.57.block_sparse_moe.experts.206.w3", "model.layers.57.block_sparse_moe.experts.207.w3", "model.layers.57.block_sparse_moe.experts.208.w3", "model.layers.57.block_sparse_moe.experts.209.w3", "model.layers.57.block_sparse_moe.experts.210.w3", "model.layers.57.block_sparse_moe.experts.211.w3", "model.layers.57.block_sparse_moe.experts.212.w3", "model.layers.57.block_sparse_moe.experts.213.w3", "model.layers.57.block_sparse_moe.experts.214.w3", "model.layers.57.block_sparse_moe.experts.215.w3", "model.layers.57.block_sparse_moe.experts.216.w3", "model.layers.57.block_sparse_moe.experts.217.w3", "model.layers.57.block_sparse_moe.experts.218.w3", "model.layers.57.block_sparse_moe.experts.219.w3", "model.layers.57.block_sparse_moe.experts.220.w3", "model.layers.57.block_sparse_moe.experts.221.w3", "model.layers.57.block_sparse_moe.experts.222.w3", "model.layers.57.block_sparse_moe.experts.223.w3", "model.layers.57.block_sparse_moe.experts.224.w3", "model.layers.57.block_sparse_moe.experts.225.w3", "model.layers.57.block_sparse_moe.experts.226.w3", "model.layers.57.block_sparse_moe.experts.227.w3", "model.layers.57.block_sparse_moe.experts.228.w3", "model.layers.57.block_sparse_moe.experts.229.w3", "model.layers.57.block_sparse_moe.experts.230.w3", "model.layers.57.block_sparse_moe.experts.231.w3", "model.layers.57.block_sparse_moe.experts.232.w3", "model.layers.57.block_sparse_moe.experts.233.w3", "model.layers.57.block_sparse_moe.experts.234.w3", "model.layers.57.block_sparse_moe.experts.235.w3", "model.layers.57.block_sparse_moe.experts.236.w3", "model.layers.57.block_sparse_moe.experts.237.w3", "model.layers.57.block_sparse_moe.experts.238.w3", "model.layers.57.block_sparse_moe.experts.239.w3", "model.layers.57.block_sparse_moe.experts.240.w3", "model.layers.57.block_sparse_moe.experts.241.w3", "model.layers.57.block_sparse_moe.experts.242.w3", "model.layers.57.block_sparse_moe.experts.243.w3", "model.layers.57.block_sparse_moe.experts.244.w3", "model.layers.57.block_sparse_moe.experts.245.w3", "model.layers.57.block_sparse_moe.experts.246.w3", "model.layers.57.block_sparse_moe.experts.247.w3", "model.layers.57.block_sparse_moe.experts.248.w3", "model.layers.57.block_sparse_moe.experts.249.w3", "model.layers.57.block_sparse_moe.experts.250.w3", "model.layers.57.block_sparse_moe.experts.251.w3", "model.layers.57.block_sparse_moe.experts.252.w3", "model.layers.57.block_sparse_moe.experts.253.w3", "model.layers.57.block_sparse_moe.experts.254.w3", "model.layers.57.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0001337975263595581, "dbits": 2415919104 } ] }, { "idx": 289, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w2", "model.layers.57.block_sparse_moe.experts.1.w2", "model.layers.57.block_sparse_moe.experts.2.w2", "model.layers.57.block_sparse_moe.experts.3.w2", "model.layers.57.block_sparse_moe.experts.4.w2", "model.layers.57.block_sparse_moe.experts.5.w2", "model.layers.57.block_sparse_moe.experts.6.w2", "model.layers.57.block_sparse_moe.experts.7.w2", "model.layers.57.block_sparse_moe.experts.8.w2", "model.layers.57.block_sparse_moe.experts.9.w2", "model.layers.57.block_sparse_moe.experts.10.w2", "model.layers.57.block_sparse_moe.experts.11.w2", "model.layers.57.block_sparse_moe.experts.12.w2", "model.layers.57.block_sparse_moe.experts.13.w2", "model.layers.57.block_sparse_moe.experts.14.w2", "model.layers.57.block_sparse_moe.experts.15.w2", "model.layers.57.block_sparse_moe.experts.16.w2", "model.layers.57.block_sparse_moe.experts.17.w2", "model.layers.57.block_sparse_moe.experts.18.w2", "model.layers.57.block_sparse_moe.experts.19.w2", "model.layers.57.block_sparse_moe.experts.20.w2", "model.layers.57.block_sparse_moe.experts.21.w2", "model.layers.57.block_sparse_moe.experts.22.w2", "model.layers.57.block_sparse_moe.experts.23.w2", "model.layers.57.block_sparse_moe.experts.24.w2", "model.layers.57.block_sparse_moe.experts.25.w2", "model.layers.57.block_sparse_moe.experts.26.w2", "model.layers.57.block_sparse_moe.experts.27.w2", "model.layers.57.block_sparse_moe.experts.28.w2", "model.layers.57.block_sparse_moe.experts.29.w2", "model.layers.57.block_sparse_moe.experts.30.w2", "model.layers.57.block_sparse_moe.experts.31.w2", "model.layers.57.block_sparse_moe.experts.32.w2", "model.layers.57.block_sparse_moe.experts.33.w2", "model.layers.57.block_sparse_moe.experts.34.w2", "model.layers.57.block_sparse_moe.experts.35.w2", "model.layers.57.block_sparse_moe.experts.36.w2", "model.layers.57.block_sparse_moe.experts.37.w2", "model.layers.57.block_sparse_moe.experts.38.w2", "model.layers.57.block_sparse_moe.experts.39.w2", "model.layers.57.block_sparse_moe.experts.40.w2", "model.layers.57.block_sparse_moe.experts.41.w2", "model.layers.57.block_sparse_moe.experts.42.w2", "model.layers.57.block_sparse_moe.experts.43.w2", "model.layers.57.block_sparse_moe.experts.44.w2", "model.layers.57.block_sparse_moe.experts.45.w2", "model.layers.57.block_sparse_moe.experts.46.w2", "model.layers.57.block_sparse_moe.experts.47.w2", "model.layers.57.block_sparse_moe.experts.48.w2", "model.layers.57.block_sparse_moe.experts.49.w2", "model.layers.57.block_sparse_moe.experts.50.w2", "model.layers.57.block_sparse_moe.experts.51.w2", "model.layers.57.block_sparse_moe.experts.52.w2", "model.layers.57.block_sparse_moe.experts.53.w2", "model.layers.57.block_sparse_moe.experts.54.w2", "model.layers.57.block_sparse_moe.experts.55.w2", "model.layers.57.block_sparse_moe.experts.56.w2", "model.layers.57.block_sparse_moe.experts.57.w2", "model.layers.57.block_sparse_moe.experts.58.w2", "model.layers.57.block_sparse_moe.experts.59.w2", "model.layers.57.block_sparse_moe.experts.60.w2", "model.layers.57.block_sparse_moe.experts.61.w2", "model.layers.57.block_sparse_moe.experts.62.w2", "model.layers.57.block_sparse_moe.experts.63.w2", "model.layers.57.block_sparse_moe.experts.64.w2", "model.layers.57.block_sparse_moe.experts.65.w2", "model.layers.57.block_sparse_moe.experts.66.w2", "model.layers.57.block_sparse_moe.experts.67.w2", "model.layers.57.block_sparse_moe.experts.68.w2", "model.layers.57.block_sparse_moe.experts.69.w2", "model.layers.57.block_sparse_moe.experts.70.w2", "model.layers.57.block_sparse_moe.experts.71.w2", "model.layers.57.block_sparse_moe.experts.72.w2", "model.layers.57.block_sparse_moe.experts.73.w2", "model.layers.57.block_sparse_moe.experts.74.w2", "model.layers.57.block_sparse_moe.experts.75.w2", "model.layers.57.block_sparse_moe.experts.76.w2", "model.layers.57.block_sparse_moe.experts.77.w2", "model.layers.57.block_sparse_moe.experts.78.w2", "model.layers.57.block_sparse_moe.experts.79.w2", "model.layers.57.block_sparse_moe.experts.80.w2", "model.layers.57.block_sparse_moe.experts.81.w2", "model.layers.57.block_sparse_moe.experts.82.w2", "model.layers.57.block_sparse_moe.experts.83.w2", "model.layers.57.block_sparse_moe.experts.84.w2", "model.layers.57.block_sparse_moe.experts.85.w2", "model.layers.57.block_sparse_moe.experts.86.w2", "model.layers.57.block_sparse_moe.experts.87.w2", "model.layers.57.block_sparse_moe.experts.88.w2", "model.layers.57.block_sparse_moe.experts.89.w2", "model.layers.57.block_sparse_moe.experts.90.w2", "model.layers.57.block_sparse_moe.experts.91.w2", "model.layers.57.block_sparse_moe.experts.92.w2", "model.layers.57.block_sparse_moe.experts.93.w2", "model.layers.57.block_sparse_moe.experts.94.w2", "model.layers.57.block_sparse_moe.experts.95.w2", "model.layers.57.block_sparse_moe.experts.96.w2", "model.layers.57.block_sparse_moe.experts.97.w2", "model.layers.57.block_sparse_moe.experts.98.w2", "model.layers.57.block_sparse_moe.experts.99.w2", "model.layers.57.block_sparse_moe.experts.100.w2", "model.layers.57.block_sparse_moe.experts.101.w2", "model.layers.57.block_sparse_moe.experts.102.w2", "model.layers.57.block_sparse_moe.experts.103.w2", "model.layers.57.block_sparse_moe.experts.104.w2", "model.layers.57.block_sparse_moe.experts.105.w2", "model.layers.57.block_sparse_moe.experts.106.w2", "model.layers.57.block_sparse_moe.experts.107.w2", "model.layers.57.block_sparse_moe.experts.108.w2", "model.layers.57.block_sparse_moe.experts.109.w2", "model.layers.57.block_sparse_moe.experts.110.w2", "model.layers.57.block_sparse_moe.experts.111.w2", "model.layers.57.block_sparse_moe.experts.112.w2", "model.layers.57.block_sparse_moe.experts.113.w2", "model.layers.57.block_sparse_moe.experts.114.w2", "model.layers.57.block_sparse_moe.experts.115.w2", "model.layers.57.block_sparse_moe.experts.116.w2", "model.layers.57.block_sparse_moe.experts.117.w2", "model.layers.57.block_sparse_moe.experts.118.w2", "model.layers.57.block_sparse_moe.experts.119.w2", "model.layers.57.block_sparse_moe.experts.120.w2", "model.layers.57.block_sparse_moe.experts.121.w2", "model.layers.57.block_sparse_moe.experts.122.w2", "model.layers.57.block_sparse_moe.experts.123.w2", "model.layers.57.block_sparse_moe.experts.124.w2", "model.layers.57.block_sparse_moe.experts.125.w2", "model.layers.57.block_sparse_moe.experts.126.w2", "model.layers.57.block_sparse_moe.experts.127.w2", "model.layers.57.block_sparse_moe.experts.128.w2", "model.layers.57.block_sparse_moe.experts.129.w2", "model.layers.57.block_sparse_moe.experts.130.w2", "model.layers.57.block_sparse_moe.experts.131.w2", "model.layers.57.block_sparse_moe.experts.132.w2", "model.layers.57.block_sparse_moe.experts.133.w2", "model.layers.57.block_sparse_moe.experts.134.w2", "model.layers.57.block_sparse_moe.experts.135.w2", "model.layers.57.block_sparse_moe.experts.136.w2", "model.layers.57.block_sparse_moe.experts.137.w2", "model.layers.57.block_sparse_moe.experts.138.w2", "model.layers.57.block_sparse_moe.experts.139.w2", "model.layers.57.block_sparse_moe.experts.140.w2", "model.layers.57.block_sparse_moe.experts.141.w2", "model.layers.57.block_sparse_moe.experts.142.w2", "model.layers.57.block_sparse_moe.experts.143.w2", "model.layers.57.block_sparse_moe.experts.144.w2", "model.layers.57.block_sparse_moe.experts.145.w2", "model.layers.57.block_sparse_moe.experts.146.w2", "model.layers.57.block_sparse_moe.experts.147.w2", "model.layers.57.block_sparse_moe.experts.148.w2", "model.layers.57.block_sparse_moe.experts.149.w2", "model.layers.57.block_sparse_moe.experts.150.w2", "model.layers.57.block_sparse_moe.experts.151.w2", "model.layers.57.block_sparse_moe.experts.152.w2", "model.layers.57.block_sparse_moe.experts.153.w2", "model.layers.57.block_sparse_moe.experts.154.w2", "model.layers.57.block_sparse_moe.experts.155.w2", "model.layers.57.block_sparse_moe.experts.156.w2", "model.layers.57.block_sparse_moe.experts.157.w2", "model.layers.57.block_sparse_moe.experts.158.w2", "model.layers.57.block_sparse_moe.experts.159.w2", "model.layers.57.block_sparse_moe.experts.160.w2", "model.layers.57.block_sparse_moe.experts.161.w2", "model.layers.57.block_sparse_moe.experts.162.w2", "model.layers.57.block_sparse_moe.experts.163.w2", "model.layers.57.block_sparse_moe.experts.164.w2", "model.layers.57.block_sparse_moe.experts.165.w2", "model.layers.57.block_sparse_moe.experts.166.w2", "model.layers.57.block_sparse_moe.experts.167.w2", "model.layers.57.block_sparse_moe.experts.168.w2", "model.layers.57.block_sparse_moe.experts.169.w2", "model.layers.57.block_sparse_moe.experts.170.w2", "model.layers.57.block_sparse_moe.experts.171.w2", "model.layers.57.block_sparse_moe.experts.172.w2", "model.layers.57.block_sparse_moe.experts.173.w2", "model.layers.57.block_sparse_moe.experts.174.w2", "model.layers.57.block_sparse_moe.experts.175.w2", "model.layers.57.block_sparse_moe.experts.176.w2", "model.layers.57.block_sparse_moe.experts.177.w2", "model.layers.57.block_sparse_moe.experts.178.w2", "model.layers.57.block_sparse_moe.experts.179.w2", "model.layers.57.block_sparse_moe.experts.180.w2", "model.layers.57.block_sparse_moe.experts.181.w2", "model.layers.57.block_sparse_moe.experts.182.w2", "model.layers.57.block_sparse_moe.experts.183.w2", "model.layers.57.block_sparse_moe.experts.184.w2", "model.layers.57.block_sparse_moe.experts.185.w2", "model.layers.57.block_sparse_moe.experts.186.w2", "model.layers.57.block_sparse_moe.experts.187.w2", "model.layers.57.block_sparse_moe.experts.188.w2", "model.layers.57.block_sparse_moe.experts.189.w2", "model.layers.57.block_sparse_moe.experts.190.w2", "model.layers.57.block_sparse_moe.experts.191.w2", "model.layers.57.block_sparse_moe.experts.192.w2", "model.layers.57.block_sparse_moe.experts.193.w2", "model.layers.57.block_sparse_moe.experts.194.w2", "model.layers.57.block_sparse_moe.experts.195.w2", "model.layers.57.block_sparse_moe.experts.196.w2", "model.layers.57.block_sparse_moe.experts.197.w2", "model.layers.57.block_sparse_moe.experts.198.w2", "model.layers.57.block_sparse_moe.experts.199.w2", "model.layers.57.block_sparse_moe.experts.200.w2", "model.layers.57.block_sparse_moe.experts.201.w2", "model.layers.57.block_sparse_moe.experts.202.w2", "model.layers.57.block_sparse_moe.experts.203.w2", "model.layers.57.block_sparse_moe.experts.204.w2", "model.layers.57.block_sparse_moe.experts.205.w2", "model.layers.57.block_sparse_moe.experts.206.w2", "model.layers.57.block_sparse_moe.experts.207.w2", "model.layers.57.block_sparse_moe.experts.208.w2", "model.layers.57.block_sparse_moe.experts.209.w2", "model.layers.57.block_sparse_moe.experts.210.w2", "model.layers.57.block_sparse_moe.experts.211.w2", "model.layers.57.block_sparse_moe.experts.212.w2", "model.layers.57.block_sparse_moe.experts.213.w2", "model.layers.57.block_sparse_moe.experts.214.w2", "model.layers.57.block_sparse_moe.experts.215.w2", "model.layers.57.block_sparse_moe.experts.216.w2", "model.layers.57.block_sparse_moe.experts.217.w2", "model.layers.57.block_sparse_moe.experts.218.w2", "model.layers.57.block_sparse_moe.experts.219.w2", "model.layers.57.block_sparse_moe.experts.220.w2", "model.layers.57.block_sparse_moe.experts.221.w2", "model.layers.57.block_sparse_moe.experts.222.w2", "model.layers.57.block_sparse_moe.experts.223.w2", "model.layers.57.block_sparse_moe.experts.224.w2", "model.layers.57.block_sparse_moe.experts.225.w2", "model.layers.57.block_sparse_moe.experts.226.w2", "model.layers.57.block_sparse_moe.experts.227.w2", "model.layers.57.block_sparse_moe.experts.228.w2", "model.layers.57.block_sparse_moe.experts.229.w2", "model.layers.57.block_sparse_moe.experts.230.w2", "model.layers.57.block_sparse_moe.experts.231.w2", "model.layers.57.block_sparse_moe.experts.232.w2", "model.layers.57.block_sparse_moe.experts.233.w2", "model.layers.57.block_sparse_moe.experts.234.w2", "model.layers.57.block_sparse_moe.experts.235.w2", "model.layers.57.block_sparse_moe.experts.236.w2", "model.layers.57.block_sparse_moe.experts.237.w2", "model.layers.57.block_sparse_moe.experts.238.w2", "model.layers.57.block_sparse_moe.experts.239.w2", "model.layers.57.block_sparse_moe.experts.240.w2", "model.layers.57.block_sparse_moe.experts.241.w2", "model.layers.57.block_sparse_moe.experts.242.w2", "model.layers.57.block_sparse_moe.experts.243.w2", "model.layers.57.block_sparse_moe.experts.244.w2", "model.layers.57.block_sparse_moe.experts.245.w2", "model.layers.57.block_sparse_moe.experts.246.w2", "model.layers.57.block_sparse_moe.experts.247.w2", "model.layers.57.block_sparse_moe.experts.248.w2", "model.layers.57.block_sparse_moe.experts.249.w2", "model.layers.57.block_sparse_moe.experts.250.w2", "model.layers.57.block_sparse_moe.experts.251.w2", "model.layers.57.block_sparse_moe.experts.252.w2", "model.layers.57.block_sparse_moe.experts.253.w2", "model.layers.57.block_sparse_moe.experts.254.w2", "model.layers.57.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0004136711359024048, "dbits": 1207959552 } ] }, { "idx": 290, "layers": [ "model.layers.58.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003737509250640869, "dbits": 18874368 } ] }, { "idx": 291, "layers": [ "model.layers.58.self_attn.k_proj", "model.layers.58.self_attn.v_proj" ], "candidates": [ { "dkld": 0.006245505809784002, "dbits": 6291456 } ] }, { "idx": 292, "layers": [ "model.layers.58.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00500967204570768, "dbits": 18874368 } ] }, { "idx": 293, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w1", "model.layers.58.block_sparse_moe.experts.1.w1", "model.layers.58.block_sparse_moe.experts.2.w1", "model.layers.58.block_sparse_moe.experts.3.w1", "model.layers.58.block_sparse_moe.experts.4.w1", "model.layers.58.block_sparse_moe.experts.5.w1", "model.layers.58.block_sparse_moe.experts.6.w1", "model.layers.58.block_sparse_moe.experts.7.w1", "model.layers.58.block_sparse_moe.experts.8.w1", "model.layers.58.block_sparse_moe.experts.9.w1", "model.layers.58.block_sparse_moe.experts.10.w1", "model.layers.58.block_sparse_moe.experts.11.w1", "model.layers.58.block_sparse_moe.experts.12.w1", "model.layers.58.block_sparse_moe.experts.13.w1", "model.layers.58.block_sparse_moe.experts.14.w1", "model.layers.58.block_sparse_moe.experts.15.w1", "model.layers.58.block_sparse_moe.experts.16.w1", "model.layers.58.block_sparse_moe.experts.17.w1", "model.layers.58.block_sparse_moe.experts.18.w1", "model.layers.58.block_sparse_moe.experts.19.w1", "model.layers.58.block_sparse_moe.experts.20.w1", "model.layers.58.block_sparse_moe.experts.21.w1", "model.layers.58.block_sparse_moe.experts.22.w1", "model.layers.58.block_sparse_moe.experts.23.w1", "model.layers.58.block_sparse_moe.experts.24.w1", "model.layers.58.block_sparse_moe.experts.25.w1", "model.layers.58.block_sparse_moe.experts.26.w1", "model.layers.58.block_sparse_moe.experts.27.w1", "model.layers.58.block_sparse_moe.experts.28.w1", "model.layers.58.block_sparse_moe.experts.29.w1", "model.layers.58.block_sparse_moe.experts.30.w1", "model.layers.58.block_sparse_moe.experts.31.w1", "model.layers.58.block_sparse_moe.experts.32.w1", "model.layers.58.block_sparse_moe.experts.33.w1", "model.layers.58.block_sparse_moe.experts.34.w1", "model.layers.58.block_sparse_moe.experts.35.w1", "model.layers.58.block_sparse_moe.experts.36.w1", "model.layers.58.block_sparse_moe.experts.37.w1", "model.layers.58.block_sparse_moe.experts.38.w1", "model.layers.58.block_sparse_moe.experts.39.w1", "model.layers.58.block_sparse_moe.experts.40.w1", "model.layers.58.block_sparse_moe.experts.41.w1", "model.layers.58.block_sparse_moe.experts.42.w1", "model.layers.58.block_sparse_moe.experts.43.w1", "model.layers.58.block_sparse_moe.experts.44.w1", "model.layers.58.block_sparse_moe.experts.45.w1", "model.layers.58.block_sparse_moe.experts.46.w1", "model.layers.58.block_sparse_moe.experts.47.w1", "model.layers.58.block_sparse_moe.experts.48.w1", "model.layers.58.block_sparse_moe.experts.49.w1", "model.layers.58.block_sparse_moe.experts.50.w1", "model.layers.58.block_sparse_moe.experts.51.w1", "model.layers.58.block_sparse_moe.experts.52.w1", "model.layers.58.block_sparse_moe.experts.53.w1", "model.layers.58.block_sparse_moe.experts.54.w1", "model.layers.58.block_sparse_moe.experts.55.w1", "model.layers.58.block_sparse_moe.experts.56.w1", "model.layers.58.block_sparse_moe.experts.57.w1", "model.layers.58.block_sparse_moe.experts.58.w1", "model.layers.58.block_sparse_moe.experts.59.w1", "model.layers.58.block_sparse_moe.experts.60.w1", "model.layers.58.block_sparse_moe.experts.61.w1", "model.layers.58.block_sparse_moe.experts.62.w1", "model.layers.58.block_sparse_moe.experts.63.w1", "model.layers.58.block_sparse_moe.experts.64.w1", "model.layers.58.block_sparse_moe.experts.65.w1", "model.layers.58.block_sparse_moe.experts.66.w1", "model.layers.58.block_sparse_moe.experts.67.w1", "model.layers.58.block_sparse_moe.experts.68.w1", "model.layers.58.block_sparse_moe.experts.69.w1", "model.layers.58.block_sparse_moe.experts.70.w1", "model.layers.58.block_sparse_moe.experts.71.w1", "model.layers.58.block_sparse_moe.experts.72.w1", "model.layers.58.block_sparse_moe.experts.73.w1", "model.layers.58.block_sparse_moe.experts.74.w1", "model.layers.58.block_sparse_moe.experts.75.w1", "model.layers.58.block_sparse_moe.experts.76.w1", "model.layers.58.block_sparse_moe.experts.77.w1", "model.layers.58.block_sparse_moe.experts.78.w1", "model.layers.58.block_sparse_moe.experts.79.w1", "model.layers.58.block_sparse_moe.experts.80.w1", "model.layers.58.block_sparse_moe.experts.81.w1", "model.layers.58.block_sparse_moe.experts.82.w1", "model.layers.58.block_sparse_moe.experts.83.w1", "model.layers.58.block_sparse_moe.experts.84.w1", "model.layers.58.block_sparse_moe.experts.85.w1", "model.layers.58.block_sparse_moe.experts.86.w1", "model.layers.58.block_sparse_moe.experts.87.w1", "model.layers.58.block_sparse_moe.experts.88.w1", "model.layers.58.block_sparse_moe.experts.89.w1", "model.layers.58.block_sparse_moe.experts.90.w1", "model.layers.58.block_sparse_moe.experts.91.w1", "model.layers.58.block_sparse_moe.experts.92.w1", "model.layers.58.block_sparse_moe.experts.93.w1", "model.layers.58.block_sparse_moe.experts.94.w1", "model.layers.58.block_sparse_moe.experts.95.w1", "model.layers.58.block_sparse_moe.experts.96.w1", "model.layers.58.block_sparse_moe.experts.97.w1", "model.layers.58.block_sparse_moe.experts.98.w1", "model.layers.58.block_sparse_moe.experts.99.w1", "model.layers.58.block_sparse_moe.experts.100.w1", "model.layers.58.block_sparse_moe.experts.101.w1", "model.layers.58.block_sparse_moe.experts.102.w1", "model.layers.58.block_sparse_moe.experts.103.w1", "model.layers.58.block_sparse_moe.experts.104.w1", "model.layers.58.block_sparse_moe.experts.105.w1", "model.layers.58.block_sparse_moe.experts.106.w1", "model.layers.58.block_sparse_moe.experts.107.w1", "model.layers.58.block_sparse_moe.experts.108.w1", "model.layers.58.block_sparse_moe.experts.109.w1", "model.layers.58.block_sparse_moe.experts.110.w1", "model.layers.58.block_sparse_moe.experts.111.w1", "model.layers.58.block_sparse_moe.experts.112.w1", "model.layers.58.block_sparse_moe.experts.113.w1", "model.layers.58.block_sparse_moe.experts.114.w1", "model.layers.58.block_sparse_moe.experts.115.w1", "model.layers.58.block_sparse_moe.experts.116.w1", "model.layers.58.block_sparse_moe.experts.117.w1", "model.layers.58.block_sparse_moe.experts.118.w1", "model.layers.58.block_sparse_moe.experts.119.w1", "model.layers.58.block_sparse_moe.experts.120.w1", "model.layers.58.block_sparse_moe.experts.121.w1", "model.layers.58.block_sparse_moe.experts.122.w1", "model.layers.58.block_sparse_moe.experts.123.w1", "model.layers.58.block_sparse_moe.experts.124.w1", "model.layers.58.block_sparse_moe.experts.125.w1", "model.layers.58.block_sparse_moe.experts.126.w1", "model.layers.58.block_sparse_moe.experts.127.w1", "model.layers.58.block_sparse_moe.experts.128.w1", "model.layers.58.block_sparse_moe.experts.129.w1", "model.layers.58.block_sparse_moe.experts.130.w1", "model.layers.58.block_sparse_moe.experts.131.w1", "model.layers.58.block_sparse_moe.experts.132.w1", "model.layers.58.block_sparse_moe.experts.133.w1", "model.layers.58.block_sparse_moe.experts.134.w1", "model.layers.58.block_sparse_moe.experts.135.w1", "model.layers.58.block_sparse_moe.experts.136.w1", "model.layers.58.block_sparse_moe.experts.137.w1", "model.layers.58.block_sparse_moe.experts.138.w1", "model.layers.58.block_sparse_moe.experts.139.w1", "model.layers.58.block_sparse_moe.experts.140.w1", "model.layers.58.block_sparse_moe.experts.141.w1", "model.layers.58.block_sparse_moe.experts.142.w1", "model.layers.58.block_sparse_moe.experts.143.w1", "model.layers.58.block_sparse_moe.experts.144.w1", "model.layers.58.block_sparse_moe.experts.145.w1", "model.layers.58.block_sparse_moe.experts.146.w1", "model.layers.58.block_sparse_moe.experts.147.w1", "model.layers.58.block_sparse_moe.experts.148.w1", "model.layers.58.block_sparse_moe.experts.149.w1", "model.layers.58.block_sparse_moe.experts.150.w1", "model.layers.58.block_sparse_moe.experts.151.w1", "model.layers.58.block_sparse_moe.experts.152.w1", "model.layers.58.block_sparse_moe.experts.153.w1", "model.layers.58.block_sparse_moe.experts.154.w1", "model.layers.58.block_sparse_moe.experts.155.w1", "model.layers.58.block_sparse_moe.experts.156.w1", "model.layers.58.block_sparse_moe.experts.157.w1", "model.layers.58.block_sparse_moe.experts.158.w1", "model.layers.58.block_sparse_moe.experts.159.w1", "model.layers.58.block_sparse_moe.experts.160.w1", "model.layers.58.block_sparse_moe.experts.161.w1", "model.layers.58.block_sparse_moe.experts.162.w1", "model.layers.58.block_sparse_moe.experts.163.w1", "model.layers.58.block_sparse_moe.experts.164.w1", "model.layers.58.block_sparse_moe.experts.165.w1", "model.layers.58.block_sparse_moe.experts.166.w1", "model.layers.58.block_sparse_moe.experts.167.w1", "model.layers.58.block_sparse_moe.experts.168.w1", "model.layers.58.block_sparse_moe.experts.169.w1", "model.layers.58.block_sparse_moe.experts.170.w1", "model.layers.58.block_sparse_moe.experts.171.w1", "model.layers.58.block_sparse_moe.experts.172.w1", "model.layers.58.block_sparse_moe.experts.173.w1", "model.layers.58.block_sparse_moe.experts.174.w1", "model.layers.58.block_sparse_moe.experts.175.w1", "model.layers.58.block_sparse_moe.experts.176.w1", "model.layers.58.block_sparse_moe.experts.177.w1", "model.layers.58.block_sparse_moe.experts.178.w1", "model.layers.58.block_sparse_moe.experts.179.w1", "model.layers.58.block_sparse_moe.experts.180.w1", "model.layers.58.block_sparse_moe.experts.181.w1", "model.layers.58.block_sparse_moe.experts.182.w1", "model.layers.58.block_sparse_moe.experts.183.w1", "model.layers.58.block_sparse_moe.experts.184.w1", "model.layers.58.block_sparse_moe.experts.185.w1", "model.layers.58.block_sparse_moe.experts.186.w1", "model.layers.58.block_sparse_moe.experts.187.w1", "model.layers.58.block_sparse_moe.experts.188.w1", "model.layers.58.block_sparse_moe.experts.189.w1", "model.layers.58.block_sparse_moe.experts.190.w1", "model.layers.58.block_sparse_moe.experts.191.w1", "model.layers.58.block_sparse_moe.experts.192.w1", "model.layers.58.block_sparse_moe.experts.193.w1", "model.layers.58.block_sparse_moe.experts.194.w1", "model.layers.58.block_sparse_moe.experts.195.w1", "model.layers.58.block_sparse_moe.experts.196.w1", "model.layers.58.block_sparse_moe.experts.197.w1", "model.layers.58.block_sparse_moe.experts.198.w1", "model.layers.58.block_sparse_moe.experts.199.w1", "model.layers.58.block_sparse_moe.experts.200.w1", "model.layers.58.block_sparse_moe.experts.201.w1", "model.layers.58.block_sparse_moe.experts.202.w1", "model.layers.58.block_sparse_moe.experts.203.w1", "model.layers.58.block_sparse_moe.experts.204.w1", "model.layers.58.block_sparse_moe.experts.205.w1", "model.layers.58.block_sparse_moe.experts.206.w1", "model.layers.58.block_sparse_moe.experts.207.w1", "model.layers.58.block_sparse_moe.experts.208.w1", "model.layers.58.block_sparse_moe.experts.209.w1", "model.layers.58.block_sparse_moe.experts.210.w1", "model.layers.58.block_sparse_moe.experts.211.w1", "model.layers.58.block_sparse_moe.experts.212.w1", "model.layers.58.block_sparse_moe.experts.213.w1", "model.layers.58.block_sparse_moe.experts.214.w1", "model.layers.58.block_sparse_moe.experts.215.w1", "model.layers.58.block_sparse_moe.experts.216.w1", "model.layers.58.block_sparse_moe.experts.217.w1", "model.layers.58.block_sparse_moe.experts.218.w1", "model.layers.58.block_sparse_moe.experts.219.w1", "model.layers.58.block_sparse_moe.experts.220.w1", "model.layers.58.block_sparse_moe.experts.221.w1", "model.layers.58.block_sparse_moe.experts.222.w1", "model.layers.58.block_sparse_moe.experts.223.w1", "model.layers.58.block_sparse_moe.experts.224.w1", "model.layers.58.block_sparse_moe.experts.225.w1", "model.layers.58.block_sparse_moe.experts.226.w1", "model.layers.58.block_sparse_moe.experts.227.w1", "model.layers.58.block_sparse_moe.experts.228.w1", "model.layers.58.block_sparse_moe.experts.229.w1", "model.layers.58.block_sparse_moe.experts.230.w1", "model.layers.58.block_sparse_moe.experts.231.w1", "model.layers.58.block_sparse_moe.experts.232.w1", "model.layers.58.block_sparse_moe.experts.233.w1", "model.layers.58.block_sparse_moe.experts.234.w1", "model.layers.58.block_sparse_moe.experts.235.w1", "model.layers.58.block_sparse_moe.experts.236.w1", "model.layers.58.block_sparse_moe.experts.237.w1", "model.layers.58.block_sparse_moe.experts.238.w1", "model.layers.58.block_sparse_moe.experts.239.w1", "model.layers.58.block_sparse_moe.experts.240.w1", "model.layers.58.block_sparse_moe.experts.241.w1", "model.layers.58.block_sparse_moe.experts.242.w1", "model.layers.58.block_sparse_moe.experts.243.w1", "model.layers.58.block_sparse_moe.experts.244.w1", "model.layers.58.block_sparse_moe.experts.245.w1", "model.layers.58.block_sparse_moe.experts.246.w1", "model.layers.58.block_sparse_moe.experts.247.w1", "model.layers.58.block_sparse_moe.experts.248.w1", "model.layers.58.block_sparse_moe.experts.249.w1", "model.layers.58.block_sparse_moe.experts.250.w1", "model.layers.58.block_sparse_moe.experts.251.w1", "model.layers.58.block_sparse_moe.experts.252.w1", "model.layers.58.block_sparse_moe.experts.253.w1", "model.layers.58.block_sparse_moe.experts.254.w1", "model.layers.58.block_sparse_moe.experts.255.w1", "model.layers.58.block_sparse_moe.experts.0.w3", "model.layers.58.block_sparse_moe.experts.1.w3", "model.layers.58.block_sparse_moe.experts.2.w3", "model.layers.58.block_sparse_moe.experts.3.w3", "model.layers.58.block_sparse_moe.experts.4.w3", "model.layers.58.block_sparse_moe.experts.5.w3", "model.layers.58.block_sparse_moe.experts.6.w3", "model.layers.58.block_sparse_moe.experts.7.w3", "model.layers.58.block_sparse_moe.experts.8.w3", "model.layers.58.block_sparse_moe.experts.9.w3", "model.layers.58.block_sparse_moe.experts.10.w3", "model.layers.58.block_sparse_moe.experts.11.w3", "model.layers.58.block_sparse_moe.experts.12.w3", "model.layers.58.block_sparse_moe.experts.13.w3", "model.layers.58.block_sparse_moe.experts.14.w3", "model.layers.58.block_sparse_moe.experts.15.w3", "model.layers.58.block_sparse_moe.experts.16.w3", "model.layers.58.block_sparse_moe.experts.17.w3", "model.layers.58.block_sparse_moe.experts.18.w3", "model.layers.58.block_sparse_moe.experts.19.w3", "model.layers.58.block_sparse_moe.experts.20.w3", "model.layers.58.block_sparse_moe.experts.21.w3", "model.layers.58.block_sparse_moe.experts.22.w3", "model.layers.58.block_sparse_moe.experts.23.w3", "model.layers.58.block_sparse_moe.experts.24.w3", "model.layers.58.block_sparse_moe.experts.25.w3", "model.layers.58.block_sparse_moe.experts.26.w3", "model.layers.58.block_sparse_moe.experts.27.w3", "model.layers.58.block_sparse_moe.experts.28.w3", "model.layers.58.block_sparse_moe.experts.29.w3", "model.layers.58.block_sparse_moe.experts.30.w3", "model.layers.58.block_sparse_moe.experts.31.w3", "model.layers.58.block_sparse_moe.experts.32.w3", "model.layers.58.block_sparse_moe.experts.33.w3", "model.layers.58.block_sparse_moe.experts.34.w3", "model.layers.58.block_sparse_moe.experts.35.w3", "model.layers.58.block_sparse_moe.experts.36.w3", "model.layers.58.block_sparse_moe.experts.37.w3", "model.layers.58.block_sparse_moe.experts.38.w3", "model.layers.58.block_sparse_moe.experts.39.w3", "model.layers.58.block_sparse_moe.experts.40.w3", "model.layers.58.block_sparse_moe.experts.41.w3", "model.layers.58.block_sparse_moe.experts.42.w3", "model.layers.58.block_sparse_moe.experts.43.w3", "model.layers.58.block_sparse_moe.experts.44.w3", "model.layers.58.block_sparse_moe.experts.45.w3", "model.layers.58.block_sparse_moe.experts.46.w3", "model.layers.58.block_sparse_moe.experts.47.w3", "model.layers.58.block_sparse_moe.experts.48.w3", "model.layers.58.block_sparse_moe.experts.49.w3", "model.layers.58.block_sparse_moe.experts.50.w3", "model.layers.58.block_sparse_moe.experts.51.w3", "model.layers.58.block_sparse_moe.experts.52.w3", "model.layers.58.block_sparse_moe.experts.53.w3", "model.layers.58.block_sparse_moe.experts.54.w3", "model.layers.58.block_sparse_moe.experts.55.w3", "model.layers.58.block_sparse_moe.experts.56.w3", "model.layers.58.block_sparse_moe.experts.57.w3", "model.layers.58.block_sparse_moe.experts.58.w3", "model.layers.58.block_sparse_moe.experts.59.w3", "model.layers.58.block_sparse_moe.experts.60.w3", "model.layers.58.block_sparse_moe.experts.61.w3", "model.layers.58.block_sparse_moe.experts.62.w3", "model.layers.58.block_sparse_moe.experts.63.w3", "model.layers.58.block_sparse_moe.experts.64.w3", "model.layers.58.block_sparse_moe.experts.65.w3", "model.layers.58.block_sparse_moe.experts.66.w3", "model.layers.58.block_sparse_moe.experts.67.w3", "model.layers.58.block_sparse_moe.experts.68.w3", "model.layers.58.block_sparse_moe.experts.69.w3", "model.layers.58.block_sparse_moe.experts.70.w3", "model.layers.58.block_sparse_moe.experts.71.w3", "model.layers.58.block_sparse_moe.experts.72.w3", "model.layers.58.block_sparse_moe.experts.73.w3", "model.layers.58.block_sparse_moe.experts.74.w3", "model.layers.58.block_sparse_moe.experts.75.w3", "model.layers.58.block_sparse_moe.experts.76.w3", "model.layers.58.block_sparse_moe.experts.77.w3", "model.layers.58.block_sparse_moe.experts.78.w3", "model.layers.58.block_sparse_moe.experts.79.w3", "model.layers.58.block_sparse_moe.experts.80.w3", "model.layers.58.block_sparse_moe.experts.81.w3", "model.layers.58.block_sparse_moe.experts.82.w3", "model.layers.58.block_sparse_moe.experts.83.w3", "model.layers.58.block_sparse_moe.experts.84.w3", "model.layers.58.block_sparse_moe.experts.85.w3", "model.layers.58.block_sparse_moe.experts.86.w3", "model.layers.58.block_sparse_moe.experts.87.w3", "model.layers.58.block_sparse_moe.experts.88.w3", "model.layers.58.block_sparse_moe.experts.89.w3", "model.layers.58.block_sparse_moe.experts.90.w3", "model.layers.58.block_sparse_moe.experts.91.w3", "model.layers.58.block_sparse_moe.experts.92.w3", "model.layers.58.block_sparse_moe.experts.93.w3", "model.layers.58.block_sparse_moe.experts.94.w3", "model.layers.58.block_sparse_moe.experts.95.w3", "model.layers.58.block_sparse_moe.experts.96.w3", "model.layers.58.block_sparse_moe.experts.97.w3", "model.layers.58.block_sparse_moe.experts.98.w3", "model.layers.58.block_sparse_moe.experts.99.w3", "model.layers.58.block_sparse_moe.experts.100.w3", "model.layers.58.block_sparse_moe.experts.101.w3", "model.layers.58.block_sparse_moe.experts.102.w3", "model.layers.58.block_sparse_moe.experts.103.w3", "model.layers.58.block_sparse_moe.experts.104.w3", "model.layers.58.block_sparse_moe.experts.105.w3", "model.layers.58.block_sparse_moe.experts.106.w3", "model.layers.58.block_sparse_moe.experts.107.w3", "model.layers.58.block_sparse_moe.experts.108.w3", "model.layers.58.block_sparse_moe.experts.109.w3", "model.layers.58.block_sparse_moe.experts.110.w3", "model.layers.58.block_sparse_moe.experts.111.w3", "model.layers.58.block_sparse_moe.experts.112.w3", "model.layers.58.block_sparse_moe.experts.113.w3", "model.layers.58.block_sparse_moe.experts.114.w3", "model.layers.58.block_sparse_moe.experts.115.w3", "model.layers.58.block_sparse_moe.experts.116.w3", "model.layers.58.block_sparse_moe.experts.117.w3", "model.layers.58.block_sparse_moe.experts.118.w3", "model.layers.58.block_sparse_moe.experts.119.w3", "model.layers.58.block_sparse_moe.experts.120.w3", "model.layers.58.block_sparse_moe.experts.121.w3", "model.layers.58.block_sparse_moe.experts.122.w3", "model.layers.58.block_sparse_moe.experts.123.w3", "model.layers.58.block_sparse_moe.experts.124.w3", "model.layers.58.block_sparse_moe.experts.125.w3", "model.layers.58.block_sparse_moe.experts.126.w3", "model.layers.58.block_sparse_moe.experts.127.w3", "model.layers.58.block_sparse_moe.experts.128.w3", "model.layers.58.block_sparse_moe.experts.129.w3", "model.layers.58.block_sparse_moe.experts.130.w3", "model.layers.58.block_sparse_moe.experts.131.w3", "model.layers.58.block_sparse_moe.experts.132.w3", "model.layers.58.block_sparse_moe.experts.133.w3", "model.layers.58.block_sparse_moe.experts.134.w3", "model.layers.58.block_sparse_moe.experts.135.w3", "model.layers.58.block_sparse_moe.experts.136.w3", "model.layers.58.block_sparse_moe.experts.137.w3", "model.layers.58.block_sparse_moe.experts.138.w3", "model.layers.58.block_sparse_moe.experts.139.w3", "model.layers.58.block_sparse_moe.experts.140.w3", "model.layers.58.block_sparse_moe.experts.141.w3", "model.layers.58.block_sparse_moe.experts.142.w3", "model.layers.58.block_sparse_moe.experts.143.w3", "model.layers.58.block_sparse_moe.experts.144.w3", "model.layers.58.block_sparse_moe.experts.145.w3", "model.layers.58.block_sparse_moe.experts.146.w3", "model.layers.58.block_sparse_moe.experts.147.w3", "model.layers.58.block_sparse_moe.experts.148.w3", "model.layers.58.block_sparse_moe.experts.149.w3", "model.layers.58.block_sparse_moe.experts.150.w3", "model.layers.58.block_sparse_moe.experts.151.w3", "model.layers.58.block_sparse_moe.experts.152.w3", "model.layers.58.block_sparse_moe.experts.153.w3", "model.layers.58.block_sparse_moe.experts.154.w3", "model.layers.58.block_sparse_moe.experts.155.w3", "model.layers.58.block_sparse_moe.experts.156.w3", "model.layers.58.block_sparse_moe.experts.157.w3", "model.layers.58.block_sparse_moe.experts.158.w3", "model.layers.58.block_sparse_moe.experts.159.w3", "model.layers.58.block_sparse_moe.experts.160.w3", "model.layers.58.block_sparse_moe.experts.161.w3", "model.layers.58.block_sparse_moe.experts.162.w3", "model.layers.58.block_sparse_moe.experts.163.w3", "model.layers.58.block_sparse_moe.experts.164.w3", "model.layers.58.block_sparse_moe.experts.165.w3", "model.layers.58.block_sparse_moe.experts.166.w3", "model.layers.58.block_sparse_moe.experts.167.w3", "model.layers.58.block_sparse_moe.experts.168.w3", "model.layers.58.block_sparse_moe.experts.169.w3", "model.layers.58.block_sparse_moe.experts.170.w3", "model.layers.58.block_sparse_moe.experts.171.w3", "model.layers.58.block_sparse_moe.experts.172.w3", "model.layers.58.block_sparse_moe.experts.173.w3", "model.layers.58.block_sparse_moe.experts.174.w3", "model.layers.58.block_sparse_moe.experts.175.w3", "model.layers.58.block_sparse_moe.experts.176.w3", "model.layers.58.block_sparse_moe.experts.177.w3", "model.layers.58.block_sparse_moe.experts.178.w3", "model.layers.58.block_sparse_moe.experts.179.w3", "model.layers.58.block_sparse_moe.experts.180.w3", "model.layers.58.block_sparse_moe.experts.181.w3", "model.layers.58.block_sparse_moe.experts.182.w3", "model.layers.58.block_sparse_moe.experts.183.w3", "model.layers.58.block_sparse_moe.experts.184.w3", "model.layers.58.block_sparse_moe.experts.185.w3", "model.layers.58.block_sparse_moe.experts.186.w3", "model.layers.58.block_sparse_moe.experts.187.w3", "model.layers.58.block_sparse_moe.experts.188.w3", "model.layers.58.block_sparse_moe.experts.189.w3", "model.layers.58.block_sparse_moe.experts.190.w3", "model.layers.58.block_sparse_moe.experts.191.w3", "model.layers.58.block_sparse_moe.experts.192.w3", "model.layers.58.block_sparse_moe.experts.193.w3", "model.layers.58.block_sparse_moe.experts.194.w3", "model.layers.58.block_sparse_moe.experts.195.w3", "model.layers.58.block_sparse_moe.experts.196.w3", "model.layers.58.block_sparse_moe.experts.197.w3", "model.layers.58.block_sparse_moe.experts.198.w3", "model.layers.58.block_sparse_moe.experts.199.w3", "model.layers.58.block_sparse_moe.experts.200.w3", "model.layers.58.block_sparse_moe.experts.201.w3", "model.layers.58.block_sparse_moe.experts.202.w3", "model.layers.58.block_sparse_moe.experts.203.w3", "model.layers.58.block_sparse_moe.experts.204.w3", "model.layers.58.block_sparse_moe.experts.205.w3", "model.layers.58.block_sparse_moe.experts.206.w3", "model.layers.58.block_sparse_moe.experts.207.w3", "model.layers.58.block_sparse_moe.experts.208.w3", "model.layers.58.block_sparse_moe.experts.209.w3", "model.layers.58.block_sparse_moe.experts.210.w3", "model.layers.58.block_sparse_moe.experts.211.w3", "model.layers.58.block_sparse_moe.experts.212.w3", "model.layers.58.block_sparse_moe.experts.213.w3", "model.layers.58.block_sparse_moe.experts.214.w3", "model.layers.58.block_sparse_moe.experts.215.w3", "model.layers.58.block_sparse_moe.experts.216.w3", "model.layers.58.block_sparse_moe.experts.217.w3", "model.layers.58.block_sparse_moe.experts.218.w3", "model.layers.58.block_sparse_moe.experts.219.w3", "model.layers.58.block_sparse_moe.experts.220.w3", "model.layers.58.block_sparse_moe.experts.221.w3", "model.layers.58.block_sparse_moe.experts.222.w3", "model.layers.58.block_sparse_moe.experts.223.w3", "model.layers.58.block_sparse_moe.experts.224.w3", "model.layers.58.block_sparse_moe.experts.225.w3", "model.layers.58.block_sparse_moe.experts.226.w3", "model.layers.58.block_sparse_moe.experts.227.w3", "model.layers.58.block_sparse_moe.experts.228.w3", "model.layers.58.block_sparse_moe.experts.229.w3", "model.layers.58.block_sparse_moe.experts.230.w3", "model.layers.58.block_sparse_moe.experts.231.w3", "model.layers.58.block_sparse_moe.experts.232.w3", "model.layers.58.block_sparse_moe.experts.233.w3", "model.layers.58.block_sparse_moe.experts.234.w3", "model.layers.58.block_sparse_moe.experts.235.w3", "model.layers.58.block_sparse_moe.experts.236.w3", "model.layers.58.block_sparse_moe.experts.237.w3", "model.layers.58.block_sparse_moe.experts.238.w3", "model.layers.58.block_sparse_moe.experts.239.w3", "model.layers.58.block_sparse_moe.experts.240.w3", "model.layers.58.block_sparse_moe.experts.241.w3", "model.layers.58.block_sparse_moe.experts.242.w3", "model.layers.58.block_sparse_moe.experts.243.w3", "model.layers.58.block_sparse_moe.experts.244.w3", "model.layers.58.block_sparse_moe.experts.245.w3", "model.layers.58.block_sparse_moe.experts.246.w3", "model.layers.58.block_sparse_moe.experts.247.w3", "model.layers.58.block_sparse_moe.experts.248.w3", "model.layers.58.block_sparse_moe.experts.249.w3", "model.layers.58.block_sparse_moe.experts.250.w3", "model.layers.58.block_sparse_moe.experts.251.w3", "model.layers.58.block_sparse_moe.experts.252.w3", "model.layers.58.block_sparse_moe.experts.253.w3", "model.layers.58.block_sparse_moe.experts.254.w3", "model.layers.58.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0003254503011703713, "dbits": 2415919104 } ] }, { "idx": 294, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w2", "model.layers.58.block_sparse_moe.experts.1.w2", "model.layers.58.block_sparse_moe.experts.2.w2", "model.layers.58.block_sparse_moe.experts.3.w2", "model.layers.58.block_sparse_moe.experts.4.w2", "model.layers.58.block_sparse_moe.experts.5.w2", "model.layers.58.block_sparse_moe.experts.6.w2", "model.layers.58.block_sparse_moe.experts.7.w2", "model.layers.58.block_sparse_moe.experts.8.w2", "model.layers.58.block_sparse_moe.experts.9.w2", "model.layers.58.block_sparse_moe.experts.10.w2", "model.layers.58.block_sparse_moe.experts.11.w2", "model.layers.58.block_sparse_moe.experts.12.w2", "model.layers.58.block_sparse_moe.experts.13.w2", "model.layers.58.block_sparse_moe.experts.14.w2", "model.layers.58.block_sparse_moe.experts.15.w2", "model.layers.58.block_sparse_moe.experts.16.w2", "model.layers.58.block_sparse_moe.experts.17.w2", "model.layers.58.block_sparse_moe.experts.18.w2", "model.layers.58.block_sparse_moe.experts.19.w2", "model.layers.58.block_sparse_moe.experts.20.w2", "model.layers.58.block_sparse_moe.experts.21.w2", "model.layers.58.block_sparse_moe.experts.22.w2", "model.layers.58.block_sparse_moe.experts.23.w2", "model.layers.58.block_sparse_moe.experts.24.w2", "model.layers.58.block_sparse_moe.experts.25.w2", "model.layers.58.block_sparse_moe.experts.26.w2", "model.layers.58.block_sparse_moe.experts.27.w2", "model.layers.58.block_sparse_moe.experts.28.w2", "model.layers.58.block_sparse_moe.experts.29.w2", "model.layers.58.block_sparse_moe.experts.30.w2", "model.layers.58.block_sparse_moe.experts.31.w2", "model.layers.58.block_sparse_moe.experts.32.w2", "model.layers.58.block_sparse_moe.experts.33.w2", "model.layers.58.block_sparse_moe.experts.34.w2", "model.layers.58.block_sparse_moe.experts.35.w2", "model.layers.58.block_sparse_moe.experts.36.w2", "model.layers.58.block_sparse_moe.experts.37.w2", "model.layers.58.block_sparse_moe.experts.38.w2", "model.layers.58.block_sparse_moe.experts.39.w2", "model.layers.58.block_sparse_moe.experts.40.w2", "model.layers.58.block_sparse_moe.experts.41.w2", "model.layers.58.block_sparse_moe.experts.42.w2", "model.layers.58.block_sparse_moe.experts.43.w2", "model.layers.58.block_sparse_moe.experts.44.w2", "model.layers.58.block_sparse_moe.experts.45.w2", "model.layers.58.block_sparse_moe.experts.46.w2", "model.layers.58.block_sparse_moe.experts.47.w2", "model.layers.58.block_sparse_moe.experts.48.w2", "model.layers.58.block_sparse_moe.experts.49.w2", "model.layers.58.block_sparse_moe.experts.50.w2", "model.layers.58.block_sparse_moe.experts.51.w2", "model.layers.58.block_sparse_moe.experts.52.w2", "model.layers.58.block_sparse_moe.experts.53.w2", "model.layers.58.block_sparse_moe.experts.54.w2", "model.layers.58.block_sparse_moe.experts.55.w2", "model.layers.58.block_sparse_moe.experts.56.w2", "model.layers.58.block_sparse_moe.experts.57.w2", "model.layers.58.block_sparse_moe.experts.58.w2", "model.layers.58.block_sparse_moe.experts.59.w2", "model.layers.58.block_sparse_moe.experts.60.w2", "model.layers.58.block_sparse_moe.experts.61.w2", "model.layers.58.block_sparse_moe.experts.62.w2", "model.layers.58.block_sparse_moe.experts.63.w2", "model.layers.58.block_sparse_moe.experts.64.w2", "model.layers.58.block_sparse_moe.experts.65.w2", "model.layers.58.block_sparse_moe.experts.66.w2", "model.layers.58.block_sparse_moe.experts.67.w2", "model.layers.58.block_sparse_moe.experts.68.w2", "model.layers.58.block_sparse_moe.experts.69.w2", "model.layers.58.block_sparse_moe.experts.70.w2", "model.layers.58.block_sparse_moe.experts.71.w2", "model.layers.58.block_sparse_moe.experts.72.w2", "model.layers.58.block_sparse_moe.experts.73.w2", "model.layers.58.block_sparse_moe.experts.74.w2", "model.layers.58.block_sparse_moe.experts.75.w2", "model.layers.58.block_sparse_moe.experts.76.w2", "model.layers.58.block_sparse_moe.experts.77.w2", "model.layers.58.block_sparse_moe.experts.78.w2", "model.layers.58.block_sparse_moe.experts.79.w2", "model.layers.58.block_sparse_moe.experts.80.w2", "model.layers.58.block_sparse_moe.experts.81.w2", "model.layers.58.block_sparse_moe.experts.82.w2", "model.layers.58.block_sparse_moe.experts.83.w2", "model.layers.58.block_sparse_moe.experts.84.w2", "model.layers.58.block_sparse_moe.experts.85.w2", "model.layers.58.block_sparse_moe.experts.86.w2", "model.layers.58.block_sparse_moe.experts.87.w2", "model.layers.58.block_sparse_moe.experts.88.w2", "model.layers.58.block_sparse_moe.experts.89.w2", "model.layers.58.block_sparse_moe.experts.90.w2", "model.layers.58.block_sparse_moe.experts.91.w2", "model.layers.58.block_sparse_moe.experts.92.w2", "model.layers.58.block_sparse_moe.experts.93.w2", "model.layers.58.block_sparse_moe.experts.94.w2", "model.layers.58.block_sparse_moe.experts.95.w2", "model.layers.58.block_sparse_moe.experts.96.w2", "model.layers.58.block_sparse_moe.experts.97.w2", "model.layers.58.block_sparse_moe.experts.98.w2", "model.layers.58.block_sparse_moe.experts.99.w2", "model.layers.58.block_sparse_moe.experts.100.w2", "model.layers.58.block_sparse_moe.experts.101.w2", "model.layers.58.block_sparse_moe.experts.102.w2", "model.layers.58.block_sparse_moe.experts.103.w2", "model.layers.58.block_sparse_moe.experts.104.w2", "model.layers.58.block_sparse_moe.experts.105.w2", "model.layers.58.block_sparse_moe.experts.106.w2", "model.layers.58.block_sparse_moe.experts.107.w2", "model.layers.58.block_sparse_moe.experts.108.w2", "model.layers.58.block_sparse_moe.experts.109.w2", "model.layers.58.block_sparse_moe.experts.110.w2", "model.layers.58.block_sparse_moe.experts.111.w2", "model.layers.58.block_sparse_moe.experts.112.w2", "model.layers.58.block_sparse_moe.experts.113.w2", "model.layers.58.block_sparse_moe.experts.114.w2", "model.layers.58.block_sparse_moe.experts.115.w2", "model.layers.58.block_sparse_moe.experts.116.w2", "model.layers.58.block_sparse_moe.experts.117.w2", "model.layers.58.block_sparse_moe.experts.118.w2", "model.layers.58.block_sparse_moe.experts.119.w2", "model.layers.58.block_sparse_moe.experts.120.w2", "model.layers.58.block_sparse_moe.experts.121.w2", "model.layers.58.block_sparse_moe.experts.122.w2", "model.layers.58.block_sparse_moe.experts.123.w2", "model.layers.58.block_sparse_moe.experts.124.w2", "model.layers.58.block_sparse_moe.experts.125.w2", "model.layers.58.block_sparse_moe.experts.126.w2", "model.layers.58.block_sparse_moe.experts.127.w2", "model.layers.58.block_sparse_moe.experts.128.w2", "model.layers.58.block_sparse_moe.experts.129.w2", "model.layers.58.block_sparse_moe.experts.130.w2", "model.layers.58.block_sparse_moe.experts.131.w2", "model.layers.58.block_sparse_moe.experts.132.w2", "model.layers.58.block_sparse_moe.experts.133.w2", "model.layers.58.block_sparse_moe.experts.134.w2", "model.layers.58.block_sparse_moe.experts.135.w2", "model.layers.58.block_sparse_moe.experts.136.w2", "model.layers.58.block_sparse_moe.experts.137.w2", "model.layers.58.block_sparse_moe.experts.138.w2", "model.layers.58.block_sparse_moe.experts.139.w2", "model.layers.58.block_sparse_moe.experts.140.w2", "model.layers.58.block_sparse_moe.experts.141.w2", "model.layers.58.block_sparse_moe.experts.142.w2", "model.layers.58.block_sparse_moe.experts.143.w2", "model.layers.58.block_sparse_moe.experts.144.w2", "model.layers.58.block_sparse_moe.experts.145.w2", "model.layers.58.block_sparse_moe.experts.146.w2", "model.layers.58.block_sparse_moe.experts.147.w2", "model.layers.58.block_sparse_moe.experts.148.w2", "model.layers.58.block_sparse_moe.experts.149.w2", "model.layers.58.block_sparse_moe.experts.150.w2", "model.layers.58.block_sparse_moe.experts.151.w2", "model.layers.58.block_sparse_moe.experts.152.w2", "model.layers.58.block_sparse_moe.experts.153.w2", "model.layers.58.block_sparse_moe.experts.154.w2", "model.layers.58.block_sparse_moe.experts.155.w2", "model.layers.58.block_sparse_moe.experts.156.w2", "model.layers.58.block_sparse_moe.experts.157.w2", "model.layers.58.block_sparse_moe.experts.158.w2", "model.layers.58.block_sparse_moe.experts.159.w2", "model.layers.58.block_sparse_moe.experts.160.w2", "model.layers.58.block_sparse_moe.experts.161.w2", "model.layers.58.block_sparse_moe.experts.162.w2", "model.layers.58.block_sparse_moe.experts.163.w2", "model.layers.58.block_sparse_moe.experts.164.w2", "model.layers.58.block_sparse_moe.experts.165.w2", "model.layers.58.block_sparse_moe.experts.166.w2", "model.layers.58.block_sparse_moe.experts.167.w2", "model.layers.58.block_sparse_moe.experts.168.w2", "model.layers.58.block_sparse_moe.experts.169.w2", "model.layers.58.block_sparse_moe.experts.170.w2", "model.layers.58.block_sparse_moe.experts.171.w2", "model.layers.58.block_sparse_moe.experts.172.w2", "model.layers.58.block_sparse_moe.experts.173.w2", "model.layers.58.block_sparse_moe.experts.174.w2", "model.layers.58.block_sparse_moe.experts.175.w2", "model.layers.58.block_sparse_moe.experts.176.w2", "model.layers.58.block_sparse_moe.experts.177.w2", "model.layers.58.block_sparse_moe.experts.178.w2", "model.layers.58.block_sparse_moe.experts.179.w2", "model.layers.58.block_sparse_moe.experts.180.w2", "model.layers.58.block_sparse_moe.experts.181.w2", "model.layers.58.block_sparse_moe.experts.182.w2", "model.layers.58.block_sparse_moe.experts.183.w2", "model.layers.58.block_sparse_moe.experts.184.w2", "model.layers.58.block_sparse_moe.experts.185.w2", "model.layers.58.block_sparse_moe.experts.186.w2", "model.layers.58.block_sparse_moe.experts.187.w2", "model.layers.58.block_sparse_moe.experts.188.w2", "model.layers.58.block_sparse_moe.experts.189.w2", "model.layers.58.block_sparse_moe.experts.190.w2", "model.layers.58.block_sparse_moe.experts.191.w2", "model.layers.58.block_sparse_moe.experts.192.w2", "model.layers.58.block_sparse_moe.experts.193.w2", "model.layers.58.block_sparse_moe.experts.194.w2", "model.layers.58.block_sparse_moe.experts.195.w2", "model.layers.58.block_sparse_moe.experts.196.w2", "model.layers.58.block_sparse_moe.experts.197.w2", "model.layers.58.block_sparse_moe.experts.198.w2", "model.layers.58.block_sparse_moe.experts.199.w2", "model.layers.58.block_sparse_moe.experts.200.w2", "model.layers.58.block_sparse_moe.experts.201.w2", "model.layers.58.block_sparse_moe.experts.202.w2", "model.layers.58.block_sparse_moe.experts.203.w2", "model.layers.58.block_sparse_moe.experts.204.w2", "model.layers.58.block_sparse_moe.experts.205.w2", "model.layers.58.block_sparse_moe.experts.206.w2", "model.layers.58.block_sparse_moe.experts.207.w2", "model.layers.58.block_sparse_moe.experts.208.w2", "model.layers.58.block_sparse_moe.experts.209.w2", "model.layers.58.block_sparse_moe.experts.210.w2", "model.layers.58.block_sparse_moe.experts.211.w2", "model.layers.58.block_sparse_moe.experts.212.w2", "model.layers.58.block_sparse_moe.experts.213.w2", "model.layers.58.block_sparse_moe.experts.214.w2", "model.layers.58.block_sparse_moe.experts.215.w2", "model.layers.58.block_sparse_moe.experts.216.w2", "model.layers.58.block_sparse_moe.experts.217.w2", "model.layers.58.block_sparse_moe.experts.218.w2", "model.layers.58.block_sparse_moe.experts.219.w2", "model.layers.58.block_sparse_moe.experts.220.w2", "model.layers.58.block_sparse_moe.experts.221.w2", "model.layers.58.block_sparse_moe.experts.222.w2", "model.layers.58.block_sparse_moe.experts.223.w2", "model.layers.58.block_sparse_moe.experts.224.w2", "model.layers.58.block_sparse_moe.experts.225.w2", "model.layers.58.block_sparse_moe.experts.226.w2", "model.layers.58.block_sparse_moe.experts.227.w2", "model.layers.58.block_sparse_moe.experts.228.w2", "model.layers.58.block_sparse_moe.experts.229.w2", "model.layers.58.block_sparse_moe.experts.230.w2", "model.layers.58.block_sparse_moe.experts.231.w2", "model.layers.58.block_sparse_moe.experts.232.w2", "model.layers.58.block_sparse_moe.experts.233.w2", "model.layers.58.block_sparse_moe.experts.234.w2", "model.layers.58.block_sparse_moe.experts.235.w2", "model.layers.58.block_sparse_moe.experts.236.w2", "model.layers.58.block_sparse_moe.experts.237.w2", "model.layers.58.block_sparse_moe.experts.238.w2", "model.layers.58.block_sparse_moe.experts.239.w2", "model.layers.58.block_sparse_moe.experts.240.w2", "model.layers.58.block_sparse_moe.experts.241.w2", "model.layers.58.block_sparse_moe.experts.242.w2", "model.layers.58.block_sparse_moe.experts.243.w2", "model.layers.58.block_sparse_moe.experts.244.w2", "model.layers.58.block_sparse_moe.experts.245.w2", "model.layers.58.block_sparse_moe.experts.246.w2", "model.layers.58.block_sparse_moe.experts.247.w2", "model.layers.58.block_sparse_moe.experts.248.w2", "model.layers.58.block_sparse_moe.experts.249.w2", "model.layers.58.block_sparse_moe.experts.250.w2", "model.layers.58.block_sparse_moe.experts.251.w2", "model.layers.58.block_sparse_moe.experts.252.w2", "model.layers.58.block_sparse_moe.experts.253.w2", "model.layers.58.block_sparse_moe.experts.254.w2", "model.layers.58.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 4.0194392204373486e-05, "dbits": 1207959552 } ] }, { "idx": 295, "layers": [ "model.layers.59.self_attn.q_proj" ], "candidates": [ { "dkld": 0.000569954514503479, "dbits": 18874368 } ] }, { "idx": 296, "layers": [ "model.layers.59.self_attn.k_proj", "model.layers.59.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009022125601768405, "dbits": 6291456 } ] }, { "idx": 297, "layers": [ "model.layers.59.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001246106624603227, "dbits": 18874368 } ] }, { "idx": 298, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w1", "model.layers.59.block_sparse_moe.experts.1.w1", "model.layers.59.block_sparse_moe.experts.2.w1", "model.layers.59.block_sparse_moe.experts.3.w1", "model.layers.59.block_sparse_moe.experts.4.w1", "model.layers.59.block_sparse_moe.experts.5.w1", "model.layers.59.block_sparse_moe.experts.6.w1", "model.layers.59.block_sparse_moe.experts.7.w1", "model.layers.59.block_sparse_moe.experts.8.w1", "model.layers.59.block_sparse_moe.experts.9.w1", "model.layers.59.block_sparse_moe.experts.10.w1", "model.layers.59.block_sparse_moe.experts.11.w1", "model.layers.59.block_sparse_moe.experts.12.w1", "model.layers.59.block_sparse_moe.experts.13.w1", "model.layers.59.block_sparse_moe.experts.14.w1", "model.layers.59.block_sparse_moe.experts.15.w1", "model.layers.59.block_sparse_moe.experts.16.w1", "model.layers.59.block_sparse_moe.experts.17.w1", "model.layers.59.block_sparse_moe.experts.18.w1", "model.layers.59.block_sparse_moe.experts.19.w1", "model.layers.59.block_sparse_moe.experts.20.w1", "model.layers.59.block_sparse_moe.experts.21.w1", "model.layers.59.block_sparse_moe.experts.22.w1", "model.layers.59.block_sparse_moe.experts.23.w1", "model.layers.59.block_sparse_moe.experts.24.w1", "model.layers.59.block_sparse_moe.experts.25.w1", "model.layers.59.block_sparse_moe.experts.26.w1", "model.layers.59.block_sparse_moe.experts.27.w1", "model.layers.59.block_sparse_moe.experts.28.w1", "model.layers.59.block_sparse_moe.experts.29.w1", "model.layers.59.block_sparse_moe.experts.30.w1", "model.layers.59.block_sparse_moe.experts.31.w1", "model.layers.59.block_sparse_moe.experts.32.w1", "model.layers.59.block_sparse_moe.experts.33.w1", "model.layers.59.block_sparse_moe.experts.34.w1", "model.layers.59.block_sparse_moe.experts.35.w1", "model.layers.59.block_sparse_moe.experts.36.w1", "model.layers.59.block_sparse_moe.experts.37.w1", "model.layers.59.block_sparse_moe.experts.38.w1", "model.layers.59.block_sparse_moe.experts.39.w1", "model.layers.59.block_sparse_moe.experts.40.w1", "model.layers.59.block_sparse_moe.experts.41.w1", "model.layers.59.block_sparse_moe.experts.42.w1", "model.layers.59.block_sparse_moe.experts.43.w1", "model.layers.59.block_sparse_moe.experts.44.w1", "model.layers.59.block_sparse_moe.experts.45.w1", "model.layers.59.block_sparse_moe.experts.46.w1", "model.layers.59.block_sparse_moe.experts.47.w1", "model.layers.59.block_sparse_moe.experts.48.w1", "model.layers.59.block_sparse_moe.experts.49.w1", "model.layers.59.block_sparse_moe.experts.50.w1", "model.layers.59.block_sparse_moe.experts.51.w1", "model.layers.59.block_sparse_moe.experts.52.w1", "model.layers.59.block_sparse_moe.experts.53.w1", "model.layers.59.block_sparse_moe.experts.54.w1", "model.layers.59.block_sparse_moe.experts.55.w1", "model.layers.59.block_sparse_moe.experts.56.w1", "model.layers.59.block_sparse_moe.experts.57.w1", "model.layers.59.block_sparse_moe.experts.58.w1", "model.layers.59.block_sparse_moe.experts.59.w1", "model.layers.59.block_sparse_moe.experts.60.w1", "model.layers.59.block_sparse_moe.experts.61.w1", "model.layers.59.block_sparse_moe.experts.62.w1", "model.layers.59.block_sparse_moe.experts.63.w1", "model.layers.59.block_sparse_moe.experts.64.w1", "model.layers.59.block_sparse_moe.experts.65.w1", "model.layers.59.block_sparse_moe.experts.66.w1", "model.layers.59.block_sparse_moe.experts.67.w1", "model.layers.59.block_sparse_moe.experts.68.w1", "model.layers.59.block_sparse_moe.experts.69.w1", "model.layers.59.block_sparse_moe.experts.70.w1", "model.layers.59.block_sparse_moe.experts.71.w1", "model.layers.59.block_sparse_moe.experts.72.w1", "model.layers.59.block_sparse_moe.experts.73.w1", "model.layers.59.block_sparse_moe.experts.74.w1", "model.layers.59.block_sparse_moe.experts.75.w1", "model.layers.59.block_sparse_moe.experts.76.w1", "model.layers.59.block_sparse_moe.experts.77.w1", "model.layers.59.block_sparse_moe.experts.78.w1", "model.layers.59.block_sparse_moe.experts.79.w1", "model.layers.59.block_sparse_moe.experts.80.w1", "model.layers.59.block_sparse_moe.experts.81.w1", "model.layers.59.block_sparse_moe.experts.82.w1", "model.layers.59.block_sparse_moe.experts.83.w1", "model.layers.59.block_sparse_moe.experts.84.w1", "model.layers.59.block_sparse_moe.experts.85.w1", "model.layers.59.block_sparse_moe.experts.86.w1", "model.layers.59.block_sparse_moe.experts.87.w1", "model.layers.59.block_sparse_moe.experts.88.w1", "model.layers.59.block_sparse_moe.experts.89.w1", "model.layers.59.block_sparse_moe.experts.90.w1", "model.layers.59.block_sparse_moe.experts.91.w1", "model.layers.59.block_sparse_moe.experts.92.w1", "model.layers.59.block_sparse_moe.experts.93.w1", "model.layers.59.block_sparse_moe.experts.94.w1", "model.layers.59.block_sparse_moe.experts.95.w1", "model.layers.59.block_sparse_moe.experts.96.w1", "model.layers.59.block_sparse_moe.experts.97.w1", "model.layers.59.block_sparse_moe.experts.98.w1", "model.layers.59.block_sparse_moe.experts.99.w1", "model.layers.59.block_sparse_moe.experts.100.w1", "model.layers.59.block_sparse_moe.experts.101.w1", "model.layers.59.block_sparse_moe.experts.102.w1", "model.layers.59.block_sparse_moe.experts.103.w1", "model.layers.59.block_sparse_moe.experts.104.w1", "model.layers.59.block_sparse_moe.experts.105.w1", "model.layers.59.block_sparse_moe.experts.106.w1", "model.layers.59.block_sparse_moe.experts.107.w1", "model.layers.59.block_sparse_moe.experts.108.w1", "model.layers.59.block_sparse_moe.experts.109.w1", "model.layers.59.block_sparse_moe.experts.110.w1", "model.layers.59.block_sparse_moe.experts.111.w1", "model.layers.59.block_sparse_moe.experts.112.w1", "model.layers.59.block_sparse_moe.experts.113.w1", "model.layers.59.block_sparse_moe.experts.114.w1", "model.layers.59.block_sparse_moe.experts.115.w1", "model.layers.59.block_sparse_moe.experts.116.w1", "model.layers.59.block_sparse_moe.experts.117.w1", "model.layers.59.block_sparse_moe.experts.118.w1", "model.layers.59.block_sparse_moe.experts.119.w1", "model.layers.59.block_sparse_moe.experts.120.w1", "model.layers.59.block_sparse_moe.experts.121.w1", "model.layers.59.block_sparse_moe.experts.122.w1", "model.layers.59.block_sparse_moe.experts.123.w1", "model.layers.59.block_sparse_moe.experts.124.w1", "model.layers.59.block_sparse_moe.experts.125.w1", "model.layers.59.block_sparse_moe.experts.126.w1", "model.layers.59.block_sparse_moe.experts.127.w1", "model.layers.59.block_sparse_moe.experts.128.w1", "model.layers.59.block_sparse_moe.experts.129.w1", "model.layers.59.block_sparse_moe.experts.130.w1", "model.layers.59.block_sparse_moe.experts.131.w1", "model.layers.59.block_sparse_moe.experts.132.w1", "model.layers.59.block_sparse_moe.experts.133.w1", "model.layers.59.block_sparse_moe.experts.134.w1", "model.layers.59.block_sparse_moe.experts.135.w1", "model.layers.59.block_sparse_moe.experts.136.w1", "model.layers.59.block_sparse_moe.experts.137.w1", "model.layers.59.block_sparse_moe.experts.138.w1", "model.layers.59.block_sparse_moe.experts.139.w1", "model.layers.59.block_sparse_moe.experts.140.w1", "model.layers.59.block_sparse_moe.experts.141.w1", "model.layers.59.block_sparse_moe.experts.142.w1", "model.layers.59.block_sparse_moe.experts.143.w1", "model.layers.59.block_sparse_moe.experts.144.w1", "model.layers.59.block_sparse_moe.experts.145.w1", "model.layers.59.block_sparse_moe.experts.146.w1", "model.layers.59.block_sparse_moe.experts.147.w1", "model.layers.59.block_sparse_moe.experts.148.w1", "model.layers.59.block_sparse_moe.experts.149.w1", "model.layers.59.block_sparse_moe.experts.150.w1", "model.layers.59.block_sparse_moe.experts.151.w1", "model.layers.59.block_sparse_moe.experts.152.w1", "model.layers.59.block_sparse_moe.experts.153.w1", "model.layers.59.block_sparse_moe.experts.154.w1", "model.layers.59.block_sparse_moe.experts.155.w1", "model.layers.59.block_sparse_moe.experts.156.w1", "model.layers.59.block_sparse_moe.experts.157.w1", "model.layers.59.block_sparse_moe.experts.158.w1", "model.layers.59.block_sparse_moe.experts.159.w1", "model.layers.59.block_sparse_moe.experts.160.w1", "model.layers.59.block_sparse_moe.experts.161.w1", "model.layers.59.block_sparse_moe.experts.162.w1", "model.layers.59.block_sparse_moe.experts.163.w1", "model.layers.59.block_sparse_moe.experts.164.w1", "model.layers.59.block_sparse_moe.experts.165.w1", "model.layers.59.block_sparse_moe.experts.166.w1", "model.layers.59.block_sparse_moe.experts.167.w1", "model.layers.59.block_sparse_moe.experts.168.w1", "model.layers.59.block_sparse_moe.experts.169.w1", "model.layers.59.block_sparse_moe.experts.170.w1", "model.layers.59.block_sparse_moe.experts.171.w1", "model.layers.59.block_sparse_moe.experts.172.w1", "model.layers.59.block_sparse_moe.experts.173.w1", "model.layers.59.block_sparse_moe.experts.174.w1", "model.layers.59.block_sparse_moe.experts.175.w1", "model.layers.59.block_sparse_moe.experts.176.w1", "model.layers.59.block_sparse_moe.experts.177.w1", "model.layers.59.block_sparse_moe.experts.178.w1", "model.layers.59.block_sparse_moe.experts.179.w1", "model.layers.59.block_sparse_moe.experts.180.w1", "model.layers.59.block_sparse_moe.experts.181.w1", "model.layers.59.block_sparse_moe.experts.182.w1", "model.layers.59.block_sparse_moe.experts.183.w1", "model.layers.59.block_sparse_moe.experts.184.w1", "model.layers.59.block_sparse_moe.experts.185.w1", "model.layers.59.block_sparse_moe.experts.186.w1", "model.layers.59.block_sparse_moe.experts.187.w1", "model.layers.59.block_sparse_moe.experts.188.w1", "model.layers.59.block_sparse_moe.experts.189.w1", "model.layers.59.block_sparse_moe.experts.190.w1", "model.layers.59.block_sparse_moe.experts.191.w1", "model.layers.59.block_sparse_moe.experts.192.w1", "model.layers.59.block_sparse_moe.experts.193.w1", "model.layers.59.block_sparse_moe.experts.194.w1", "model.layers.59.block_sparse_moe.experts.195.w1", "model.layers.59.block_sparse_moe.experts.196.w1", "model.layers.59.block_sparse_moe.experts.197.w1", "model.layers.59.block_sparse_moe.experts.198.w1", "model.layers.59.block_sparse_moe.experts.199.w1", "model.layers.59.block_sparse_moe.experts.200.w1", "model.layers.59.block_sparse_moe.experts.201.w1", "model.layers.59.block_sparse_moe.experts.202.w1", "model.layers.59.block_sparse_moe.experts.203.w1", "model.layers.59.block_sparse_moe.experts.204.w1", "model.layers.59.block_sparse_moe.experts.205.w1", "model.layers.59.block_sparse_moe.experts.206.w1", "model.layers.59.block_sparse_moe.experts.207.w1", "model.layers.59.block_sparse_moe.experts.208.w1", "model.layers.59.block_sparse_moe.experts.209.w1", "model.layers.59.block_sparse_moe.experts.210.w1", "model.layers.59.block_sparse_moe.experts.211.w1", "model.layers.59.block_sparse_moe.experts.212.w1", "model.layers.59.block_sparse_moe.experts.213.w1", "model.layers.59.block_sparse_moe.experts.214.w1", "model.layers.59.block_sparse_moe.experts.215.w1", "model.layers.59.block_sparse_moe.experts.216.w1", "model.layers.59.block_sparse_moe.experts.217.w1", "model.layers.59.block_sparse_moe.experts.218.w1", "model.layers.59.block_sparse_moe.experts.219.w1", "model.layers.59.block_sparse_moe.experts.220.w1", "model.layers.59.block_sparse_moe.experts.221.w1", "model.layers.59.block_sparse_moe.experts.222.w1", "model.layers.59.block_sparse_moe.experts.223.w1", "model.layers.59.block_sparse_moe.experts.224.w1", "model.layers.59.block_sparse_moe.experts.225.w1", "model.layers.59.block_sparse_moe.experts.226.w1", "model.layers.59.block_sparse_moe.experts.227.w1", "model.layers.59.block_sparse_moe.experts.228.w1", "model.layers.59.block_sparse_moe.experts.229.w1", "model.layers.59.block_sparse_moe.experts.230.w1", "model.layers.59.block_sparse_moe.experts.231.w1", "model.layers.59.block_sparse_moe.experts.232.w1", "model.layers.59.block_sparse_moe.experts.233.w1", "model.layers.59.block_sparse_moe.experts.234.w1", "model.layers.59.block_sparse_moe.experts.235.w1", "model.layers.59.block_sparse_moe.experts.236.w1", "model.layers.59.block_sparse_moe.experts.237.w1", "model.layers.59.block_sparse_moe.experts.238.w1", "model.layers.59.block_sparse_moe.experts.239.w1", "model.layers.59.block_sparse_moe.experts.240.w1", "model.layers.59.block_sparse_moe.experts.241.w1", "model.layers.59.block_sparse_moe.experts.242.w1", "model.layers.59.block_sparse_moe.experts.243.w1", "model.layers.59.block_sparse_moe.experts.244.w1", "model.layers.59.block_sparse_moe.experts.245.w1", "model.layers.59.block_sparse_moe.experts.246.w1", "model.layers.59.block_sparse_moe.experts.247.w1", "model.layers.59.block_sparse_moe.experts.248.w1", "model.layers.59.block_sparse_moe.experts.249.w1", "model.layers.59.block_sparse_moe.experts.250.w1", "model.layers.59.block_sparse_moe.experts.251.w1", "model.layers.59.block_sparse_moe.experts.252.w1", "model.layers.59.block_sparse_moe.experts.253.w1", "model.layers.59.block_sparse_moe.experts.254.w1", "model.layers.59.block_sparse_moe.experts.255.w1", "model.layers.59.block_sparse_moe.experts.0.w3", "model.layers.59.block_sparse_moe.experts.1.w3", "model.layers.59.block_sparse_moe.experts.2.w3", "model.layers.59.block_sparse_moe.experts.3.w3", "model.layers.59.block_sparse_moe.experts.4.w3", "model.layers.59.block_sparse_moe.experts.5.w3", "model.layers.59.block_sparse_moe.experts.6.w3", "model.layers.59.block_sparse_moe.experts.7.w3", "model.layers.59.block_sparse_moe.experts.8.w3", "model.layers.59.block_sparse_moe.experts.9.w3", "model.layers.59.block_sparse_moe.experts.10.w3", "model.layers.59.block_sparse_moe.experts.11.w3", "model.layers.59.block_sparse_moe.experts.12.w3", "model.layers.59.block_sparse_moe.experts.13.w3", "model.layers.59.block_sparse_moe.experts.14.w3", "model.layers.59.block_sparse_moe.experts.15.w3", "model.layers.59.block_sparse_moe.experts.16.w3", "model.layers.59.block_sparse_moe.experts.17.w3", "model.layers.59.block_sparse_moe.experts.18.w3", "model.layers.59.block_sparse_moe.experts.19.w3", "model.layers.59.block_sparse_moe.experts.20.w3", "model.layers.59.block_sparse_moe.experts.21.w3", "model.layers.59.block_sparse_moe.experts.22.w3", "model.layers.59.block_sparse_moe.experts.23.w3", "model.layers.59.block_sparse_moe.experts.24.w3", "model.layers.59.block_sparse_moe.experts.25.w3", "model.layers.59.block_sparse_moe.experts.26.w3", "model.layers.59.block_sparse_moe.experts.27.w3", "model.layers.59.block_sparse_moe.experts.28.w3", "model.layers.59.block_sparse_moe.experts.29.w3", "model.layers.59.block_sparse_moe.experts.30.w3", "model.layers.59.block_sparse_moe.experts.31.w3", "model.layers.59.block_sparse_moe.experts.32.w3", "model.layers.59.block_sparse_moe.experts.33.w3", "model.layers.59.block_sparse_moe.experts.34.w3", "model.layers.59.block_sparse_moe.experts.35.w3", "model.layers.59.block_sparse_moe.experts.36.w3", "model.layers.59.block_sparse_moe.experts.37.w3", "model.layers.59.block_sparse_moe.experts.38.w3", "model.layers.59.block_sparse_moe.experts.39.w3", "model.layers.59.block_sparse_moe.experts.40.w3", "model.layers.59.block_sparse_moe.experts.41.w3", "model.layers.59.block_sparse_moe.experts.42.w3", "model.layers.59.block_sparse_moe.experts.43.w3", "model.layers.59.block_sparse_moe.experts.44.w3", "model.layers.59.block_sparse_moe.experts.45.w3", "model.layers.59.block_sparse_moe.experts.46.w3", "model.layers.59.block_sparse_moe.experts.47.w3", "model.layers.59.block_sparse_moe.experts.48.w3", "model.layers.59.block_sparse_moe.experts.49.w3", "model.layers.59.block_sparse_moe.experts.50.w3", "model.layers.59.block_sparse_moe.experts.51.w3", "model.layers.59.block_sparse_moe.experts.52.w3", "model.layers.59.block_sparse_moe.experts.53.w3", "model.layers.59.block_sparse_moe.experts.54.w3", "model.layers.59.block_sparse_moe.experts.55.w3", "model.layers.59.block_sparse_moe.experts.56.w3", "model.layers.59.block_sparse_moe.experts.57.w3", "model.layers.59.block_sparse_moe.experts.58.w3", "model.layers.59.block_sparse_moe.experts.59.w3", "model.layers.59.block_sparse_moe.experts.60.w3", "model.layers.59.block_sparse_moe.experts.61.w3", "model.layers.59.block_sparse_moe.experts.62.w3", "model.layers.59.block_sparse_moe.experts.63.w3", "model.layers.59.block_sparse_moe.experts.64.w3", "model.layers.59.block_sparse_moe.experts.65.w3", "model.layers.59.block_sparse_moe.experts.66.w3", "model.layers.59.block_sparse_moe.experts.67.w3", "model.layers.59.block_sparse_moe.experts.68.w3", "model.layers.59.block_sparse_moe.experts.69.w3", "model.layers.59.block_sparse_moe.experts.70.w3", "model.layers.59.block_sparse_moe.experts.71.w3", "model.layers.59.block_sparse_moe.experts.72.w3", "model.layers.59.block_sparse_moe.experts.73.w3", "model.layers.59.block_sparse_moe.experts.74.w3", "model.layers.59.block_sparse_moe.experts.75.w3", "model.layers.59.block_sparse_moe.experts.76.w3", "model.layers.59.block_sparse_moe.experts.77.w3", "model.layers.59.block_sparse_moe.experts.78.w3", "model.layers.59.block_sparse_moe.experts.79.w3", "model.layers.59.block_sparse_moe.experts.80.w3", "model.layers.59.block_sparse_moe.experts.81.w3", "model.layers.59.block_sparse_moe.experts.82.w3", "model.layers.59.block_sparse_moe.experts.83.w3", "model.layers.59.block_sparse_moe.experts.84.w3", "model.layers.59.block_sparse_moe.experts.85.w3", "model.layers.59.block_sparse_moe.experts.86.w3", "model.layers.59.block_sparse_moe.experts.87.w3", "model.layers.59.block_sparse_moe.experts.88.w3", "model.layers.59.block_sparse_moe.experts.89.w3", "model.layers.59.block_sparse_moe.experts.90.w3", "model.layers.59.block_sparse_moe.experts.91.w3", "model.layers.59.block_sparse_moe.experts.92.w3", "model.layers.59.block_sparse_moe.experts.93.w3", "model.layers.59.block_sparse_moe.experts.94.w3", "model.layers.59.block_sparse_moe.experts.95.w3", "model.layers.59.block_sparse_moe.experts.96.w3", "model.layers.59.block_sparse_moe.experts.97.w3", "model.layers.59.block_sparse_moe.experts.98.w3", "model.layers.59.block_sparse_moe.experts.99.w3", "model.layers.59.block_sparse_moe.experts.100.w3", "model.layers.59.block_sparse_moe.experts.101.w3", "model.layers.59.block_sparse_moe.experts.102.w3", "model.layers.59.block_sparse_moe.experts.103.w3", "model.layers.59.block_sparse_moe.experts.104.w3", "model.layers.59.block_sparse_moe.experts.105.w3", "model.layers.59.block_sparse_moe.experts.106.w3", "model.layers.59.block_sparse_moe.experts.107.w3", "model.layers.59.block_sparse_moe.experts.108.w3", "model.layers.59.block_sparse_moe.experts.109.w3", "model.layers.59.block_sparse_moe.experts.110.w3", "model.layers.59.block_sparse_moe.experts.111.w3", "model.layers.59.block_sparse_moe.experts.112.w3", "model.layers.59.block_sparse_moe.experts.113.w3", "model.layers.59.block_sparse_moe.experts.114.w3", "model.layers.59.block_sparse_moe.experts.115.w3", "model.layers.59.block_sparse_moe.experts.116.w3", "model.layers.59.block_sparse_moe.experts.117.w3", "model.layers.59.block_sparse_moe.experts.118.w3", "model.layers.59.block_sparse_moe.experts.119.w3", "model.layers.59.block_sparse_moe.experts.120.w3", "model.layers.59.block_sparse_moe.experts.121.w3", "model.layers.59.block_sparse_moe.experts.122.w3", "model.layers.59.block_sparse_moe.experts.123.w3", "model.layers.59.block_sparse_moe.experts.124.w3", "model.layers.59.block_sparse_moe.experts.125.w3", "model.layers.59.block_sparse_moe.experts.126.w3", "model.layers.59.block_sparse_moe.experts.127.w3", "model.layers.59.block_sparse_moe.experts.128.w3", "model.layers.59.block_sparse_moe.experts.129.w3", "model.layers.59.block_sparse_moe.experts.130.w3", "model.layers.59.block_sparse_moe.experts.131.w3", "model.layers.59.block_sparse_moe.experts.132.w3", "model.layers.59.block_sparse_moe.experts.133.w3", "model.layers.59.block_sparse_moe.experts.134.w3", "model.layers.59.block_sparse_moe.experts.135.w3", "model.layers.59.block_sparse_moe.experts.136.w3", "model.layers.59.block_sparse_moe.experts.137.w3", "model.layers.59.block_sparse_moe.experts.138.w3", "model.layers.59.block_sparse_moe.experts.139.w3", "model.layers.59.block_sparse_moe.experts.140.w3", "model.layers.59.block_sparse_moe.experts.141.w3", "model.layers.59.block_sparse_moe.experts.142.w3", "model.layers.59.block_sparse_moe.experts.143.w3", "model.layers.59.block_sparse_moe.experts.144.w3", "model.layers.59.block_sparse_moe.experts.145.w3", "model.layers.59.block_sparse_moe.experts.146.w3", "model.layers.59.block_sparse_moe.experts.147.w3", "model.layers.59.block_sparse_moe.experts.148.w3", "model.layers.59.block_sparse_moe.experts.149.w3", "model.layers.59.block_sparse_moe.experts.150.w3", "model.layers.59.block_sparse_moe.experts.151.w3", "model.layers.59.block_sparse_moe.experts.152.w3", "model.layers.59.block_sparse_moe.experts.153.w3", "model.layers.59.block_sparse_moe.experts.154.w3", "model.layers.59.block_sparse_moe.experts.155.w3", "model.layers.59.block_sparse_moe.experts.156.w3", "model.layers.59.block_sparse_moe.experts.157.w3", "model.layers.59.block_sparse_moe.experts.158.w3", "model.layers.59.block_sparse_moe.experts.159.w3", "model.layers.59.block_sparse_moe.experts.160.w3", "model.layers.59.block_sparse_moe.experts.161.w3", "model.layers.59.block_sparse_moe.experts.162.w3", "model.layers.59.block_sparse_moe.experts.163.w3", "model.layers.59.block_sparse_moe.experts.164.w3", "model.layers.59.block_sparse_moe.experts.165.w3", "model.layers.59.block_sparse_moe.experts.166.w3", "model.layers.59.block_sparse_moe.experts.167.w3", "model.layers.59.block_sparse_moe.experts.168.w3", "model.layers.59.block_sparse_moe.experts.169.w3", "model.layers.59.block_sparse_moe.experts.170.w3", "model.layers.59.block_sparse_moe.experts.171.w3", "model.layers.59.block_sparse_moe.experts.172.w3", "model.layers.59.block_sparse_moe.experts.173.w3", "model.layers.59.block_sparse_moe.experts.174.w3", "model.layers.59.block_sparse_moe.experts.175.w3", "model.layers.59.block_sparse_moe.experts.176.w3", "model.layers.59.block_sparse_moe.experts.177.w3", "model.layers.59.block_sparse_moe.experts.178.w3", "model.layers.59.block_sparse_moe.experts.179.w3", "model.layers.59.block_sparse_moe.experts.180.w3", "model.layers.59.block_sparse_moe.experts.181.w3", "model.layers.59.block_sparse_moe.experts.182.w3", "model.layers.59.block_sparse_moe.experts.183.w3", "model.layers.59.block_sparse_moe.experts.184.w3", "model.layers.59.block_sparse_moe.experts.185.w3", "model.layers.59.block_sparse_moe.experts.186.w3", "model.layers.59.block_sparse_moe.experts.187.w3", "model.layers.59.block_sparse_moe.experts.188.w3", "model.layers.59.block_sparse_moe.experts.189.w3", "model.layers.59.block_sparse_moe.experts.190.w3", "model.layers.59.block_sparse_moe.experts.191.w3", "model.layers.59.block_sparse_moe.experts.192.w3", "model.layers.59.block_sparse_moe.experts.193.w3", "model.layers.59.block_sparse_moe.experts.194.w3", "model.layers.59.block_sparse_moe.experts.195.w3", "model.layers.59.block_sparse_moe.experts.196.w3", "model.layers.59.block_sparse_moe.experts.197.w3", "model.layers.59.block_sparse_moe.experts.198.w3", "model.layers.59.block_sparse_moe.experts.199.w3", "model.layers.59.block_sparse_moe.experts.200.w3", "model.layers.59.block_sparse_moe.experts.201.w3", "model.layers.59.block_sparse_moe.experts.202.w3", "model.layers.59.block_sparse_moe.experts.203.w3", "model.layers.59.block_sparse_moe.experts.204.w3", "model.layers.59.block_sparse_moe.experts.205.w3", "model.layers.59.block_sparse_moe.experts.206.w3", "model.layers.59.block_sparse_moe.experts.207.w3", "model.layers.59.block_sparse_moe.experts.208.w3", "model.layers.59.block_sparse_moe.experts.209.w3", "model.layers.59.block_sparse_moe.experts.210.w3", "model.layers.59.block_sparse_moe.experts.211.w3", "model.layers.59.block_sparse_moe.experts.212.w3", "model.layers.59.block_sparse_moe.experts.213.w3", "model.layers.59.block_sparse_moe.experts.214.w3", "model.layers.59.block_sparse_moe.experts.215.w3", "model.layers.59.block_sparse_moe.experts.216.w3", "model.layers.59.block_sparse_moe.experts.217.w3", "model.layers.59.block_sparse_moe.experts.218.w3", "model.layers.59.block_sparse_moe.experts.219.w3", "model.layers.59.block_sparse_moe.experts.220.w3", "model.layers.59.block_sparse_moe.experts.221.w3", "model.layers.59.block_sparse_moe.experts.222.w3", "model.layers.59.block_sparse_moe.experts.223.w3", "model.layers.59.block_sparse_moe.experts.224.w3", "model.layers.59.block_sparse_moe.experts.225.w3", "model.layers.59.block_sparse_moe.experts.226.w3", "model.layers.59.block_sparse_moe.experts.227.w3", "model.layers.59.block_sparse_moe.experts.228.w3", "model.layers.59.block_sparse_moe.experts.229.w3", "model.layers.59.block_sparse_moe.experts.230.w3", "model.layers.59.block_sparse_moe.experts.231.w3", "model.layers.59.block_sparse_moe.experts.232.w3", "model.layers.59.block_sparse_moe.experts.233.w3", "model.layers.59.block_sparse_moe.experts.234.w3", "model.layers.59.block_sparse_moe.experts.235.w3", "model.layers.59.block_sparse_moe.experts.236.w3", "model.layers.59.block_sparse_moe.experts.237.w3", "model.layers.59.block_sparse_moe.experts.238.w3", "model.layers.59.block_sparse_moe.experts.239.w3", "model.layers.59.block_sparse_moe.experts.240.w3", "model.layers.59.block_sparse_moe.experts.241.w3", "model.layers.59.block_sparse_moe.experts.242.w3", "model.layers.59.block_sparse_moe.experts.243.w3", "model.layers.59.block_sparse_moe.experts.244.w3", "model.layers.59.block_sparse_moe.experts.245.w3", "model.layers.59.block_sparse_moe.experts.246.w3", "model.layers.59.block_sparse_moe.experts.247.w3", "model.layers.59.block_sparse_moe.experts.248.w3", "model.layers.59.block_sparse_moe.experts.249.w3", "model.layers.59.block_sparse_moe.experts.250.w3", "model.layers.59.block_sparse_moe.experts.251.w3", "model.layers.59.block_sparse_moe.experts.252.w3", "model.layers.59.block_sparse_moe.experts.253.w3", "model.layers.59.block_sparse_moe.experts.254.w3", "model.layers.59.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0005333334207535678, "dbits": 2415919104 } ] }, { "idx": 299, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w2", "model.layers.59.block_sparse_moe.experts.1.w2", "model.layers.59.block_sparse_moe.experts.2.w2", "model.layers.59.block_sparse_moe.experts.3.w2", "model.layers.59.block_sparse_moe.experts.4.w2", "model.layers.59.block_sparse_moe.experts.5.w2", "model.layers.59.block_sparse_moe.experts.6.w2", "model.layers.59.block_sparse_moe.experts.7.w2", "model.layers.59.block_sparse_moe.experts.8.w2", "model.layers.59.block_sparse_moe.experts.9.w2", "model.layers.59.block_sparse_moe.experts.10.w2", "model.layers.59.block_sparse_moe.experts.11.w2", "model.layers.59.block_sparse_moe.experts.12.w2", "model.layers.59.block_sparse_moe.experts.13.w2", "model.layers.59.block_sparse_moe.experts.14.w2", "model.layers.59.block_sparse_moe.experts.15.w2", "model.layers.59.block_sparse_moe.experts.16.w2", "model.layers.59.block_sparse_moe.experts.17.w2", "model.layers.59.block_sparse_moe.experts.18.w2", "model.layers.59.block_sparse_moe.experts.19.w2", "model.layers.59.block_sparse_moe.experts.20.w2", "model.layers.59.block_sparse_moe.experts.21.w2", "model.layers.59.block_sparse_moe.experts.22.w2", "model.layers.59.block_sparse_moe.experts.23.w2", "model.layers.59.block_sparse_moe.experts.24.w2", "model.layers.59.block_sparse_moe.experts.25.w2", "model.layers.59.block_sparse_moe.experts.26.w2", "model.layers.59.block_sparse_moe.experts.27.w2", "model.layers.59.block_sparse_moe.experts.28.w2", "model.layers.59.block_sparse_moe.experts.29.w2", "model.layers.59.block_sparse_moe.experts.30.w2", "model.layers.59.block_sparse_moe.experts.31.w2", "model.layers.59.block_sparse_moe.experts.32.w2", "model.layers.59.block_sparse_moe.experts.33.w2", "model.layers.59.block_sparse_moe.experts.34.w2", "model.layers.59.block_sparse_moe.experts.35.w2", "model.layers.59.block_sparse_moe.experts.36.w2", "model.layers.59.block_sparse_moe.experts.37.w2", "model.layers.59.block_sparse_moe.experts.38.w2", "model.layers.59.block_sparse_moe.experts.39.w2", "model.layers.59.block_sparse_moe.experts.40.w2", "model.layers.59.block_sparse_moe.experts.41.w2", "model.layers.59.block_sparse_moe.experts.42.w2", "model.layers.59.block_sparse_moe.experts.43.w2", "model.layers.59.block_sparse_moe.experts.44.w2", "model.layers.59.block_sparse_moe.experts.45.w2", "model.layers.59.block_sparse_moe.experts.46.w2", "model.layers.59.block_sparse_moe.experts.47.w2", "model.layers.59.block_sparse_moe.experts.48.w2", "model.layers.59.block_sparse_moe.experts.49.w2", "model.layers.59.block_sparse_moe.experts.50.w2", "model.layers.59.block_sparse_moe.experts.51.w2", "model.layers.59.block_sparse_moe.experts.52.w2", "model.layers.59.block_sparse_moe.experts.53.w2", "model.layers.59.block_sparse_moe.experts.54.w2", "model.layers.59.block_sparse_moe.experts.55.w2", "model.layers.59.block_sparse_moe.experts.56.w2", "model.layers.59.block_sparse_moe.experts.57.w2", "model.layers.59.block_sparse_moe.experts.58.w2", "model.layers.59.block_sparse_moe.experts.59.w2", "model.layers.59.block_sparse_moe.experts.60.w2", "model.layers.59.block_sparse_moe.experts.61.w2", "model.layers.59.block_sparse_moe.experts.62.w2", "model.layers.59.block_sparse_moe.experts.63.w2", "model.layers.59.block_sparse_moe.experts.64.w2", "model.layers.59.block_sparse_moe.experts.65.w2", "model.layers.59.block_sparse_moe.experts.66.w2", "model.layers.59.block_sparse_moe.experts.67.w2", "model.layers.59.block_sparse_moe.experts.68.w2", "model.layers.59.block_sparse_moe.experts.69.w2", "model.layers.59.block_sparse_moe.experts.70.w2", "model.layers.59.block_sparse_moe.experts.71.w2", "model.layers.59.block_sparse_moe.experts.72.w2", "model.layers.59.block_sparse_moe.experts.73.w2", "model.layers.59.block_sparse_moe.experts.74.w2", "model.layers.59.block_sparse_moe.experts.75.w2", "model.layers.59.block_sparse_moe.experts.76.w2", "model.layers.59.block_sparse_moe.experts.77.w2", "model.layers.59.block_sparse_moe.experts.78.w2", "model.layers.59.block_sparse_moe.experts.79.w2", "model.layers.59.block_sparse_moe.experts.80.w2", "model.layers.59.block_sparse_moe.experts.81.w2", "model.layers.59.block_sparse_moe.experts.82.w2", "model.layers.59.block_sparse_moe.experts.83.w2", "model.layers.59.block_sparse_moe.experts.84.w2", "model.layers.59.block_sparse_moe.experts.85.w2", "model.layers.59.block_sparse_moe.experts.86.w2", "model.layers.59.block_sparse_moe.experts.87.w2", "model.layers.59.block_sparse_moe.experts.88.w2", "model.layers.59.block_sparse_moe.experts.89.w2", "model.layers.59.block_sparse_moe.experts.90.w2", "model.layers.59.block_sparse_moe.experts.91.w2", "model.layers.59.block_sparse_moe.experts.92.w2", "model.layers.59.block_sparse_moe.experts.93.w2", "model.layers.59.block_sparse_moe.experts.94.w2", "model.layers.59.block_sparse_moe.experts.95.w2", "model.layers.59.block_sparse_moe.experts.96.w2", "model.layers.59.block_sparse_moe.experts.97.w2", "model.layers.59.block_sparse_moe.experts.98.w2", "model.layers.59.block_sparse_moe.experts.99.w2", "model.layers.59.block_sparse_moe.experts.100.w2", "model.layers.59.block_sparse_moe.experts.101.w2", "model.layers.59.block_sparse_moe.experts.102.w2", "model.layers.59.block_sparse_moe.experts.103.w2", "model.layers.59.block_sparse_moe.experts.104.w2", "model.layers.59.block_sparse_moe.experts.105.w2", "model.layers.59.block_sparse_moe.experts.106.w2", "model.layers.59.block_sparse_moe.experts.107.w2", "model.layers.59.block_sparse_moe.experts.108.w2", "model.layers.59.block_sparse_moe.experts.109.w2", "model.layers.59.block_sparse_moe.experts.110.w2", "model.layers.59.block_sparse_moe.experts.111.w2", "model.layers.59.block_sparse_moe.experts.112.w2", "model.layers.59.block_sparse_moe.experts.113.w2", "model.layers.59.block_sparse_moe.experts.114.w2", "model.layers.59.block_sparse_moe.experts.115.w2", "model.layers.59.block_sparse_moe.experts.116.w2", "model.layers.59.block_sparse_moe.experts.117.w2", "model.layers.59.block_sparse_moe.experts.118.w2", "model.layers.59.block_sparse_moe.experts.119.w2", "model.layers.59.block_sparse_moe.experts.120.w2", "model.layers.59.block_sparse_moe.experts.121.w2", "model.layers.59.block_sparse_moe.experts.122.w2", "model.layers.59.block_sparse_moe.experts.123.w2", "model.layers.59.block_sparse_moe.experts.124.w2", "model.layers.59.block_sparse_moe.experts.125.w2", "model.layers.59.block_sparse_moe.experts.126.w2", "model.layers.59.block_sparse_moe.experts.127.w2", "model.layers.59.block_sparse_moe.experts.128.w2", "model.layers.59.block_sparse_moe.experts.129.w2", "model.layers.59.block_sparse_moe.experts.130.w2", "model.layers.59.block_sparse_moe.experts.131.w2", "model.layers.59.block_sparse_moe.experts.132.w2", "model.layers.59.block_sparse_moe.experts.133.w2", "model.layers.59.block_sparse_moe.experts.134.w2", "model.layers.59.block_sparse_moe.experts.135.w2", "model.layers.59.block_sparse_moe.experts.136.w2", "model.layers.59.block_sparse_moe.experts.137.w2", "model.layers.59.block_sparse_moe.experts.138.w2", "model.layers.59.block_sparse_moe.experts.139.w2", "model.layers.59.block_sparse_moe.experts.140.w2", "model.layers.59.block_sparse_moe.experts.141.w2", "model.layers.59.block_sparse_moe.experts.142.w2", "model.layers.59.block_sparse_moe.experts.143.w2", "model.layers.59.block_sparse_moe.experts.144.w2", "model.layers.59.block_sparse_moe.experts.145.w2", "model.layers.59.block_sparse_moe.experts.146.w2", "model.layers.59.block_sparse_moe.experts.147.w2", "model.layers.59.block_sparse_moe.experts.148.w2", "model.layers.59.block_sparse_moe.experts.149.w2", "model.layers.59.block_sparse_moe.experts.150.w2", "model.layers.59.block_sparse_moe.experts.151.w2", "model.layers.59.block_sparse_moe.experts.152.w2", "model.layers.59.block_sparse_moe.experts.153.w2", "model.layers.59.block_sparse_moe.experts.154.w2", "model.layers.59.block_sparse_moe.experts.155.w2", "model.layers.59.block_sparse_moe.experts.156.w2", "model.layers.59.block_sparse_moe.experts.157.w2", "model.layers.59.block_sparse_moe.experts.158.w2", "model.layers.59.block_sparse_moe.experts.159.w2", "model.layers.59.block_sparse_moe.experts.160.w2", "model.layers.59.block_sparse_moe.experts.161.w2", "model.layers.59.block_sparse_moe.experts.162.w2", "model.layers.59.block_sparse_moe.experts.163.w2", "model.layers.59.block_sparse_moe.experts.164.w2", "model.layers.59.block_sparse_moe.experts.165.w2", "model.layers.59.block_sparse_moe.experts.166.w2", "model.layers.59.block_sparse_moe.experts.167.w2", "model.layers.59.block_sparse_moe.experts.168.w2", "model.layers.59.block_sparse_moe.experts.169.w2", "model.layers.59.block_sparse_moe.experts.170.w2", "model.layers.59.block_sparse_moe.experts.171.w2", "model.layers.59.block_sparse_moe.experts.172.w2", "model.layers.59.block_sparse_moe.experts.173.w2", "model.layers.59.block_sparse_moe.experts.174.w2", "model.layers.59.block_sparse_moe.experts.175.w2", "model.layers.59.block_sparse_moe.experts.176.w2", "model.layers.59.block_sparse_moe.experts.177.w2", "model.layers.59.block_sparse_moe.experts.178.w2", "model.layers.59.block_sparse_moe.experts.179.w2", "model.layers.59.block_sparse_moe.experts.180.w2", "model.layers.59.block_sparse_moe.experts.181.w2", "model.layers.59.block_sparse_moe.experts.182.w2", "model.layers.59.block_sparse_moe.experts.183.w2", "model.layers.59.block_sparse_moe.experts.184.w2", "model.layers.59.block_sparse_moe.experts.185.w2", "model.layers.59.block_sparse_moe.experts.186.w2", "model.layers.59.block_sparse_moe.experts.187.w2", "model.layers.59.block_sparse_moe.experts.188.w2", "model.layers.59.block_sparse_moe.experts.189.w2", "model.layers.59.block_sparse_moe.experts.190.w2", "model.layers.59.block_sparse_moe.experts.191.w2", "model.layers.59.block_sparse_moe.experts.192.w2", "model.layers.59.block_sparse_moe.experts.193.w2", "model.layers.59.block_sparse_moe.experts.194.w2", "model.layers.59.block_sparse_moe.experts.195.w2", "model.layers.59.block_sparse_moe.experts.196.w2", "model.layers.59.block_sparse_moe.experts.197.w2", "model.layers.59.block_sparse_moe.experts.198.w2", "model.layers.59.block_sparse_moe.experts.199.w2", "model.layers.59.block_sparse_moe.experts.200.w2", "model.layers.59.block_sparse_moe.experts.201.w2", "model.layers.59.block_sparse_moe.experts.202.w2", "model.layers.59.block_sparse_moe.experts.203.w2", "model.layers.59.block_sparse_moe.experts.204.w2", "model.layers.59.block_sparse_moe.experts.205.w2", "model.layers.59.block_sparse_moe.experts.206.w2", "model.layers.59.block_sparse_moe.experts.207.w2", "model.layers.59.block_sparse_moe.experts.208.w2", "model.layers.59.block_sparse_moe.experts.209.w2", "model.layers.59.block_sparse_moe.experts.210.w2", "model.layers.59.block_sparse_moe.experts.211.w2", "model.layers.59.block_sparse_moe.experts.212.w2", "model.layers.59.block_sparse_moe.experts.213.w2", "model.layers.59.block_sparse_moe.experts.214.w2", "model.layers.59.block_sparse_moe.experts.215.w2", "model.layers.59.block_sparse_moe.experts.216.w2", "model.layers.59.block_sparse_moe.experts.217.w2", "model.layers.59.block_sparse_moe.experts.218.w2", "model.layers.59.block_sparse_moe.experts.219.w2", "model.layers.59.block_sparse_moe.experts.220.w2", "model.layers.59.block_sparse_moe.experts.221.w2", "model.layers.59.block_sparse_moe.experts.222.w2", "model.layers.59.block_sparse_moe.experts.223.w2", "model.layers.59.block_sparse_moe.experts.224.w2", "model.layers.59.block_sparse_moe.experts.225.w2", "model.layers.59.block_sparse_moe.experts.226.w2", "model.layers.59.block_sparse_moe.experts.227.w2", "model.layers.59.block_sparse_moe.experts.228.w2", "model.layers.59.block_sparse_moe.experts.229.w2", "model.layers.59.block_sparse_moe.experts.230.w2", "model.layers.59.block_sparse_moe.experts.231.w2", "model.layers.59.block_sparse_moe.experts.232.w2", "model.layers.59.block_sparse_moe.experts.233.w2", "model.layers.59.block_sparse_moe.experts.234.w2", "model.layers.59.block_sparse_moe.experts.235.w2", "model.layers.59.block_sparse_moe.experts.236.w2", "model.layers.59.block_sparse_moe.experts.237.w2", "model.layers.59.block_sparse_moe.experts.238.w2", "model.layers.59.block_sparse_moe.experts.239.w2", "model.layers.59.block_sparse_moe.experts.240.w2", "model.layers.59.block_sparse_moe.experts.241.w2", "model.layers.59.block_sparse_moe.experts.242.w2", "model.layers.59.block_sparse_moe.experts.243.w2", "model.layers.59.block_sparse_moe.experts.244.w2", "model.layers.59.block_sparse_moe.experts.245.w2", "model.layers.59.block_sparse_moe.experts.246.w2", "model.layers.59.block_sparse_moe.experts.247.w2", "model.layers.59.block_sparse_moe.experts.248.w2", "model.layers.59.block_sparse_moe.experts.249.w2", "model.layers.59.block_sparse_moe.experts.250.w2", "model.layers.59.block_sparse_moe.experts.251.w2", "model.layers.59.block_sparse_moe.experts.252.w2", "model.layers.59.block_sparse_moe.experts.253.w2", "model.layers.59.block_sparse_moe.experts.254.w2", "model.layers.59.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0001548290252686435, "dbits": 1207959552 } ] }, { "idx": 300, "layers": [ "model.layers.60.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0020405590534209317, "dbits": 18874368 } ] }, { "idx": 301, "layers": [ "model.layers.60.self_attn.k_proj", "model.layers.60.self_attn.v_proj" ], "candidates": [ { "dkld": -0.009816020727157593, "dbits": 6291456 } ] }, { "idx": 302, "layers": [ "model.layers.60.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008454018831252963, "dbits": 18874368 } ] }, { "idx": 303, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w1", "model.layers.60.block_sparse_moe.experts.1.w1", "model.layers.60.block_sparse_moe.experts.2.w1", "model.layers.60.block_sparse_moe.experts.3.w1", "model.layers.60.block_sparse_moe.experts.4.w1", "model.layers.60.block_sparse_moe.experts.5.w1", "model.layers.60.block_sparse_moe.experts.6.w1", "model.layers.60.block_sparse_moe.experts.7.w1", "model.layers.60.block_sparse_moe.experts.8.w1", "model.layers.60.block_sparse_moe.experts.9.w1", "model.layers.60.block_sparse_moe.experts.10.w1", "model.layers.60.block_sparse_moe.experts.11.w1", "model.layers.60.block_sparse_moe.experts.12.w1", "model.layers.60.block_sparse_moe.experts.13.w1", "model.layers.60.block_sparse_moe.experts.14.w1", "model.layers.60.block_sparse_moe.experts.15.w1", "model.layers.60.block_sparse_moe.experts.16.w1", "model.layers.60.block_sparse_moe.experts.17.w1", "model.layers.60.block_sparse_moe.experts.18.w1", "model.layers.60.block_sparse_moe.experts.19.w1", "model.layers.60.block_sparse_moe.experts.20.w1", "model.layers.60.block_sparse_moe.experts.21.w1", "model.layers.60.block_sparse_moe.experts.22.w1", "model.layers.60.block_sparse_moe.experts.23.w1", "model.layers.60.block_sparse_moe.experts.24.w1", "model.layers.60.block_sparse_moe.experts.25.w1", "model.layers.60.block_sparse_moe.experts.26.w1", "model.layers.60.block_sparse_moe.experts.27.w1", "model.layers.60.block_sparse_moe.experts.28.w1", "model.layers.60.block_sparse_moe.experts.29.w1", "model.layers.60.block_sparse_moe.experts.30.w1", "model.layers.60.block_sparse_moe.experts.31.w1", "model.layers.60.block_sparse_moe.experts.32.w1", "model.layers.60.block_sparse_moe.experts.33.w1", "model.layers.60.block_sparse_moe.experts.34.w1", "model.layers.60.block_sparse_moe.experts.35.w1", "model.layers.60.block_sparse_moe.experts.36.w1", "model.layers.60.block_sparse_moe.experts.37.w1", "model.layers.60.block_sparse_moe.experts.38.w1", "model.layers.60.block_sparse_moe.experts.39.w1", "model.layers.60.block_sparse_moe.experts.40.w1", "model.layers.60.block_sparse_moe.experts.41.w1", "model.layers.60.block_sparse_moe.experts.42.w1", "model.layers.60.block_sparse_moe.experts.43.w1", "model.layers.60.block_sparse_moe.experts.44.w1", "model.layers.60.block_sparse_moe.experts.45.w1", "model.layers.60.block_sparse_moe.experts.46.w1", "model.layers.60.block_sparse_moe.experts.47.w1", "model.layers.60.block_sparse_moe.experts.48.w1", "model.layers.60.block_sparse_moe.experts.49.w1", "model.layers.60.block_sparse_moe.experts.50.w1", "model.layers.60.block_sparse_moe.experts.51.w1", "model.layers.60.block_sparse_moe.experts.52.w1", "model.layers.60.block_sparse_moe.experts.53.w1", "model.layers.60.block_sparse_moe.experts.54.w1", "model.layers.60.block_sparse_moe.experts.55.w1", "model.layers.60.block_sparse_moe.experts.56.w1", "model.layers.60.block_sparse_moe.experts.57.w1", "model.layers.60.block_sparse_moe.experts.58.w1", "model.layers.60.block_sparse_moe.experts.59.w1", "model.layers.60.block_sparse_moe.experts.60.w1", "model.layers.60.block_sparse_moe.experts.61.w1", "model.layers.60.block_sparse_moe.experts.62.w1", "model.layers.60.block_sparse_moe.experts.63.w1", "model.layers.60.block_sparse_moe.experts.64.w1", "model.layers.60.block_sparse_moe.experts.65.w1", "model.layers.60.block_sparse_moe.experts.66.w1", "model.layers.60.block_sparse_moe.experts.67.w1", "model.layers.60.block_sparse_moe.experts.68.w1", "model.layers.60.block_sparse_moe.experts.69.w1", "model.layers.60.block_sparse_moe.experts.70.w1", "model.layers.60.block_sparse_moe.experts.71.w1", "model.layers.60.block_sparse_moe.experts.72.w1", "model.layers.60.block_sparse_moe.experts.73.w1", "model.layers.60.block_sparse_moe.experts.74.w1", "model.layers.60.block_sparse_moe.experts.75.w1", "model.layers.60.block_sparse_moe.experts.76.w1", "model.layers.60.block_sparse_moe.experts.77.w1", "model.layers.60.block_sparse_moe.experts.78.w1", "model.layers.60.block_sparse_moe.experts.79.w1", "model.layers.60.block_sparse_moe.experts.80.w1", "model.layers.60.block_sparse_moe.experts.81.w1", "model.layers.60.block_sparse_moe.experts.82.w1", "model.layers.60.block_sparse_moe.experts.83.w1", "model.layers.60.block_sparse_moe.experts.84.w1", "model.layers.60.block_sparse_moe.experts.85.w1", "model.layers.60.block_sparse_moe.experts.86.w1", "model.layers.60.block_sparse_moe.experts.87.w1", "model.layers.60.block_sparse_moe.experts.88.w1", "model.layers.60.block_sparse_moe.experts.89.w1", "model.layers.60.block_sparse_moe.experts.90.w1", "model.layers.60.block_sparse_moe.experts.91.w1", "model.layers.60.block_sparse_moe.experts.92.w1", "model.layers.60.block_sparse_moe.experts.93.w1", "model.layers.60.block_sparse_moe.experts.94.w1", "model.layers.60.block_sparse_moe.experts.95.w1", "model.layers.60.block_sparse_moe.experts.96.w1", "model.layers.60.block_sparse_moe.experts.97.w1", "model.layers.60.block_sparse_moe.experts.98.w1", "model.layers.60.block_sparse_moe.experts.99.w1", "model.layers.60.block_sparse_moe.experts.100.w1", "model.layers.60.block_sparse_moe.experts.101.w1", "model.layers.60.block_sparse_moe.experts.102.w1", "model.layers.60.block_sparse_moe.experts.103.w1", "model.layers.60.block_sparse_moe.experts.104.w1", "model.layers.60.block_sparse_moe.experts.105.w1", "model.layers.60.block_sparse_moe.experts.106.w1", "model.layers.60.block_sparse_moe.experts.107.w1", "model.layers.60.block_sparse_moe.experts.108.w1", "model.layers.60.block_sparse_moe.experts.109.w1", "model.layers.60.block_sparse_moe.experts.110.w1", "model.layers.60.block_sparse_moe.experts.111.w1", "model.layers.60.block_sparse_moe.experts.112.w1", "model.layers.60.block_sparse_moe.experts.113.w1", "model.layers.60.block_sparse_moe.experts.114.w1", "model.layers.60.block_sparse_moe.experts.115.w1", "model.layers.60.block_sparse_moe.experts.116.w1", "model.layers.60.block_sparse_moe.experts.117.w1", "model.layers.60.block_sparse_moe.experts.118.w1", "model.layers.60.block_sparse_moe.experts.119.w1", "model.layers.60.block_sparse_moe.experts.120.w1", "model.layers.60.block_sparse_moe.experts.121.w1", "model.layers.60.block_sparse_moe.experts.122.w1", "model.layers.60.block_sparse_moe.experts.123.w1", "model.layers.60.block_sparse_moe.experts.124.w1", "model.layers.60.block_sparse_moe.experts.125.w1", "model.layers.60.block_sparse_moe.experts.126.w1", "model.layers.60.block_sparse_moe.experts.127.w1", "model.layers.60.block_sparse_moe.experts.128.w1", "model.layers.60.block_sparse_moe.experts.129.w1", "model.layers.60.block_sparse_moe.experts.130.w1", "model.layers.60.block_sparse_moe.experts.131.w1", "model.layers.60.block_sparse_moe.experts.132.w1", "model.layers.60.block_sparse_moe.experts.133.w1", "model.layers.60.block_sparse_moe.experts.134.w1", "model.layers.60.block_sparse_moe.experts.135.w1", "model.layers.60.block_sparse_moe.experts.136.w1", "model.layers.60.block_sparse_moe.experts.137.w1", "model.layers.60.block_sparse_moe.experts.138.w1", "model.layers.60.block_sparse_moe.experts.139.w1", "model.layers.60.block_sparse_moe.experts.140.w1", "model.layers.60.block_sparse_moe.experts.141.w1", "model.layers.60.block_sparse_moe.experts.142.w1", "model.layers.60.block_sparse_moe.experts.143.w1", "model.layers.60.block_sparse_moe.experts.144.w1", "model.layers.60.block_sparse_moe.experts.145.w1", "model.layers.60.block_sparse_moe.experts.146.w1", "model.layers.60.block_sparse_moe.experts.147.w1", "model.layers.60.block_sparse_moe.experts.148.w1", "model.layers.60.block_sparse_moe.experts.149.w1", "model.layers.60.block_sparse_moe.experts.150.w1", "model.layers.60.block_sparse_moe.experts.151.w1", "model.layers.60.block_sparse_moe.experts.152.w1", "model.layers.60.block_sparse_moe.experts.153.w1", "model.layers.60.block_sparse_moe.experts.154.w1", "model.layers.60.block_sparse_moe.experts.155.w1", "model.layers.60.block_sparse_moe.experts.156.w1", "model.layers.60.block_sparse_moe.experts.157.w1", "model.layers.60.block_sparse_moe.experts.158.w1", "model.layers.60.block_sparse_moe.experts.159.w1", "model.layers.60.block_sparse_moe.experts.160.w1", "model.layers.60.block_sparse_moe.experts.161.w1", "model.layers.60.block_sparse_moe.experts.162.w1", "model.layers.60.block_sparse_moe.experts.163.w1", "model.layers.60.block_sparse_moe.experts.164.w1", "model.layers.60.block_sparse_moe.experts.165.w1", "model.layers.60.block_sparse_moe.experts.166.w1", "model.layers.60.block_sparse_moe.experts.167.w1", "model.layers.60.block_sparse_moe.experts.168.w1", "model.layers.60.block_sparse_moe.experts.169.w1", "model.layers.60.block_sparse_moe.experts.170.w1", "model.layers.60.block_sparse_moe.experts.171.w1", "model.layers.60.block_sparse_moe.experts.172.w1", "model.layers.60.block_sparse_moe.experts.173.w1", "model.layers.60.block_sparse_moe.experts.174.w1", "model.layers.60.block_sparse_moe.experts.175.w1", "model.layers.60.block_sparse_moe.experts.176.w1", "model.layers.60.block_sparse_moe.experts.177.w1", "model.layers.60.block_sparse_moe.experts.178.w1", "model.layers.60.block_sparse_moe.experts.179.w1", "model.layers.60.block_sparse_moe.experts.180.w1", "model.layers.60.block_sparse_moe.experts.181.w1", "model.layers.60.block_sparse_moe.experts.182.w1", "model.layers.60.block_sparse_moe.experts.183.w1", "model.layers.60.block_sparse_moe.experts.184.w1", "model.layers.60.block_sparse_moe.experts.185.w1", "model.layers.60.block_sparse_moe.experts.186.w1", "model.layers.60.block_sparse_moe.experts.187.w1", "model.layers.60.block_sparse_moe.experts.188.w1", "model.layers.60.block_sparse_moe.experts.189.w1", "model.layers.60.block_sparse_moe.experts.190.w1", "model.layers.60.block_sparse_moe.experts.191.w1", "model.layers.60.block_sparse_moe.experts.192.w1", "model.layers.60.block_sparse_moe.experts.193.w1", "model.layers.60.block_sparse_moe.experts.194.w1", "model.layers.60.block_sparse_moe.experts.195.w1", "model.layers.60.block_sparse_moe.experts.196.w1", "model.layers.60.block_sparse_moe.experts.197.w1", "model.layers.60.block_sparse_moe.experts.198.w1", "model.layers.60.block_sparse_moe.experts.199.w1", "model.layers.60.block_sparse_moe.experts.200.w1", "model.layers.60.block_sparse_moe.experts.201.w1", "model.layers.60.block_sparse_moe.experts.202.w1", "model.layers.60.block_sparse_moe.experts.203.w1", "model.layers.60.block_sparse_moe.experts.204.w1", "model.layers.60.block_sparse_moe.experts.205.w1", "model.layers.60.block_sparse_moe.experts.206.w1", "model.layers.60.block_sparse_moe.experts.207.w1", "model.layers.60.block_sparse_moe.experts.208.w1", "model.layers.60.block_sparse_moe.experts.209.w1", "model.layers.60.block_sparse_moe.experts.210.w1", "model.layers.60.block_sparse_moe.experts.211.w1", "model.layers.60.block_sparse_moe.experts.212.w1", "model.layers.60.block_sparse_moe.experts.213.w1", "model.layers.60.block_sparse_moe.experts.214.w1", "model.layers.60.block_sparse_moe.experts.215.w1", "model.layers.60.block_sparse_moe.experts.216.w1", "model.layers.60.block_sparse_moe.experts.217.w1", "model.layers.60.block_sparse_moe.experts.218.w1", "model.layers.60.block_sparse_moe.experts.219.w1", "model.layers.60.block_sparse_moe.experts.220.w1", "model.layers.60.block_sparse_moe.experts.221.w1", "model.layers.60.block_sparse_moe.experts.222.w1", "model.layers.60.block_sparse_moe.experts.223.w1", "model.layers.60.block_sparse_moe.experts.224.w1", "model.layers.60.block_sparse_moe.experts.225.w1", "model.layers.60.block_sparse_moe.experts.226.w1", "model.layers.60.block_sparse_moe.experts.227.w1", "model.layers.60.block_sparse_moe.experts.228.w1", "model.layers.60.block_sparse_moe.experts.229.w1", "model.layers.60.block_sparse_moe.experts.230.w1", "model.layers.60.block_sparse_moe.experts.231.w1", "model.layers.60.block_sparse_moe.experts.232.w1", "model.layers.60.block_sparse_moe.experts.233.w1", "model.layers.60.block_sparse_moe.experts.234.w1", "model.layers.60.block_sparse_moe.experts.235.w1", "model.layers.60.block_sparse_moe.experts.236.w1", "model.layers.60.block_sparse_moe.experts.237.w1", "model.layers.60.block_sparse_moe.experts.238.w1", "model.layers.60.block_sparse_moe.experts.239.w1", "model.layers.60.block_sparse_moe.experts.240.w1", "model.layers.60.block_sparse_moe.experts.241.w1", "model.layers.60.block_sparse_moe.experts.242.w1", "model.layers.60.block_sparse_moe.experts.243.w1", "model.layers.60.block_sparse_moe.experts.244.w1", "model.layers.60.block_sparse_moe.experts.245.w1", "model.layers.60.block_sparse_moe.experts.246.w1", "model.layers.60.block_sparse_moe.experts.247.w1", "model.layers.60.block_sparse_moe.experts.248.w1", "model.layers.60.block_sparse_moe.experts.249.w1", "model.layers.60.block_sparse_moe.experts.250.w1", "model.layers.60.block_sparse_moe.experts.251.w1", "model.layers.60.block_sparse_moe.experts.252.w1", "model.layers.60.block_sparse_moe.experts.253.w1", "model.layers.60.block_sparse_moe.experts.254.w1", "model.layers.60.block_sparse_moe.experts.255.w1", "model.layers.60.block_sparse_moe.experts.0.w3", "model.layers.60.block_sparse_moe.experts.1.w3", "model.layers.60.block_sparse_moe.experts.2.w3", "model.layers.60.block_sparse_moe.experts.3.w3", "model.layers.60.block_sparse_moe.experts.4.w3", "model.layers.60.block_sparse_moe.experts.5.w3", "model.layers.60.block_sparse_moe.experts.6.w3", "model.layers.60.block_sparse_moe.experts.7.w3", "model.layers.60.block_sparse_moe.experts.8.w3", "model.layers.60.block_sparse_moe.experts.9.w3", "model.layers.60.block_sparse_moe.experts.10.w3", "model.layers.60.block_sparse_moe.experts.11.w3", "model.layers.60.block_sparse_moe.experts.12.w3", "model.layers.60.block_sparse_moe.experts.13.w3", "model.layers.60.block_sparse_moe.experts.14.w3", "model.layers.60.block_sparse_moe.experts.15.w3", "model.layers.60.block_sparse_moe.experts.16.w3", "model.layers.60.block_sparse_moe.experts.17.w3", "model.layers.60.block_sparse_moe.experts.18.w3", "model.layers.60.block_sparse_moe.experts.19.w3", "model.layers.60.block_sparse_moe.experts.20.w3", "model.layers.60.block_sparse_moe.experts.21.w3", "model.layers.60.block_sparse_moe.experts.22.w3", "model.layers.60.block_sparse_moe.experts.23.w3", "model.layers.60.block_sparse_moe.experts.24.w3", "model.layers.60.block_sparse_moe.experts.25.w3", "model.layers.60.block_sparse_moe.experts.26.w3", "model.layers.60.block_sparse_moe.experts.27.w3", "model.layers.60.block_sparse_moe.experts.28.w3", "model.layers.60.block_sparse_moe.experts.29.w3", "model.layers.60.block_sparse_moe.experts.30.w3", "model.layers.60.block_sparse_moe.experts.31.w3", "model.layers.60.block_sparse_moe.experts.32.w3", "model.layers.60.block_sparse_moe.experts.33.w3", "model.layers.60.block_sparse_moe.experts.34.w3", "model.layers.60.block_sparse_moe.experts.35.w3", "model.layers.60.block_sparse_moe.experts.36.w3", "model.layers.60.block_sparse_moe.experts.37.w3", "model.layers.60.block_sparse_moe.experts.38.w3", "model.layers.60.block_sparse_moe.experts.39.w3", "model.layers.60.block_sparse_moe.experts.40.w3", "model.layers.60.block_sparse_moe.experts.41.w3", "model.layers.60.block_sparse_moe.experts.42.w3", "model.layers.60.block_sparse_moe.experts.43.w3", "model.layers.60.block_sparse_moe.experts.44.w3", "model.layers.60.block_sparse_moe.experts.45.w3", "model.layers.60.block_sparse_moe.experts.46.w3", "model.layers.60.block_sparse_moe.experts.47.w3", "model.layers.60.block_sparse_moe.experts.48.w3", "model.layers.60.block_sparse_moe.experts.49.w3", "model.layers.60.block_sparse_moe.experts.50.w3", "model.layers.60.block_sparse_moe.experts.51.w3", "model.layers.60.block_sparse_moe.experts.52.w3", "model.layers.60.block_sparse_moe.experts.53.w3", "model.layers.60.block_sparse_moe.experts.54.w3", "model.layers.60.block_sparse_moe.experts.55.w3", "model.layers.60.block_sparse_moe.experts.56.w3", "model.layers.60.block_sparse_moe.experts.57.w3", "model.layers.60.block_sparse_moe.experts.58.w3", "model.layers.60.block_sparse_moe.experts.59.w3", "model.layers.60.block_sparse_moe.experts.60.w3", "model.layers.60.block_sparse_moe.experts.61.w3", "model.layers.60.block_sparse_moe.experts.62.w3", "model.layers.60.block_sparse_moe.experts.63.w3", "model.layers.60.block_sparse_moe.experts.64.w3", "model.layers.60.block_sparse_moe.experts.65.w3", "model.layers.60.block_sparse_moe.experts.66.w3", "model.layers.60.block_sparse_moe.experts.67.w3", "model.layers.60.block_sparse_moe.experts.68.w3", "model.layers.60.block_sparse_moe.experts.69.w3", "model.layers.60.block_sparse_moe.experts.70.w3", "model.layers.60.block_sparse_moe.experts.71.w3", "model.layers.60.block_sparse_moe.experts.72.w3", "model.layers.60.block_sparse_moe.experts.73.w3", "model.layers.60.block_sparse_moe.experts.74.w3", "model.layers.60.block_sparse_moe.experts.75.w3", "model.layers.60.block_sparse_moe.experts.76.w3", "model.layers.60.block_sparse_moe.experts.77.w3", "model.layers.60.block_sparse_moe.experts.78.w3", "model.layers.60.block_sparse_moe.experts.79.w3", "model.layers.60.block_sparse_moe.experts.80.w3", "model.layers.60.block_sparse_moe.experts.81.w3", "model.layers.60.block_sparse_moe.experts.82.w3", "model.layers.60.block_sparse_moe.experts.83.w3", "model.layers.60.block_sparse_moe.experts.84.w3", "model.layers.60.block_sparse_moe.experts.85.w3", "model.layers.60.block_sparse_moe.experts.86.w3", "model.layers.60.block_sparse_moe.experts.87.w3", "model.layers.60.block_sparse_moe.experts.88.w3", "model.layers.60.block_sparse_moe.experts.89.w3", "model.layers.60.block_sparse_moe.experts.90.w3", "model.layers.60.block_sparse_moe.experts.91.w3", "model.layers.60.block_sparse_moe.experts.92.w3", "model.layers.60.block_sparse_moe.experts.93.w3", "model.layers.60.block_sparse_moe.experts.94.w3", "model.layers.60.block_sparse_moe.experts.95.w3", "model.layers.60.block_sparse_moe.experts.96.w3", "model.layers.60.block_sparse_moe.experts.97.w3", "model.layers.60.block_sparse_moe.experts.98.w3", "model.layers.60.block_sparse_moe.experts.99.w3", "model.layers.60.block_sparse_moe.experts.100.w3", "model.layers.60.block_sparse_moe.experts.101.w3", "model.layers.60.block_sparse_moe.experts.102.w3", "model.layers.60.block_sparse_moe.experts.103.w3", "model.layers.60.block_sparse_moe.experts.104.w3", "model.layers.60.block_sparse_moe.experts.105.w3", "model.layers.60.block_sparse_moe.experts.106.w3", "model.layers.60.block_sparse_moe.experts.107.w3", "model.layers.60.block_sparse_moe.experts.108.w3", "model.layers.60.block_sparse_moe.experts.109.w3", "model.layers.60.block_sparse_moe.experts.110.w3", "model.layers.60.block_sparse_moe.experts.111.w3", "model.layers.60.block_sparse_moe.experts.112.w3", "model.layers.60.block_sparse_moe.experts.113.w3", "model.layers.60.block_sparse_moe.experts.114.w3", "model.layers.60.block_sparse_moe.experts.115.w3", "model.layers.60.block_sparse_moe.experts.116.w3", "model.layers.60.block_sparse_moe.experts.117.w3", "model.layers.60.block_sparse_moe.experts.118.w3", "model.layers.60.block_sparse_moe.experts.119.w3", "model.layers.60.block_sparse_moe.experts.120.w3", "model.layers.60.block_sparse_moe.experts.121.w3", "model.layers.60.block_sparse_moe.experts.122.w3", "model.layers.60.block_sparse_moe.experts.123.w3", "model.layers.60.block_sparse_moe.experts.124.w3", "model.layers.60.block_sparse_moe.experts.125.w3", "model.layers.60.block_sparse_moe.experts.126.w3", "model.layers.60.block_sparse_moe.experts.127.w3", "model.layers.60.block_sparse_moe.experts.128.w3", "model.layers.60.block_sparse_moe.experts.129.w3", "model.layers.60.block_sparse_moe.experts.130.w3", "model.layers.60.block_sparse_moe.experts.131.w3", "model.layers.60.block_sparse_moe.experts.132.w3", "model.layers.60.block_sparse_moe.experts.133.w3", "model.layers.60.block_sparse_moe.experts.134.w3", "model.layers.60.block_sparse_moe.experts.135.w3", "model.layers.60.block_sparse_moe.experts.136.w3", "model.layers.60.block_sparse_moe.experts.137.w3", "model.layers.60.block_sparse_moe.experts.138.w3", "model.layers.60.block_sparse_moe.experts.139.w3", "model.layers.60.block_sparse_moe.experts.140.w3", "model.layers.60.block_sparse_moe.experts.141.w3", "model.layers.60.block_sparse_moe.experts.142.w3", "model.layers.60.block_sparse_moe.experts.143.w3", "model.layers.60.block_sparse_moe.experts.144.w3", "model.layers.60.block_sparse_moe.experts.145.w3", "model.layers.60.block_sparse_moe.experts.146.w3", "model.layers.60.block_sparse_moe.experts.147.w3", "model.layers.60.block_sparse_moe.experts.148.w3", "model.layers.60.block_sparse_moe.experts.149.w3", "model.layers.60.block_sparse_moe.experts.150.w3", "model.layers.60.block_sparse_moe.experts.151.w3", "model.layers.60.block_sparse_moe.experts.152.w3", "model.layers.60.block_sparse_moe.experts.153.w3", "model.layers.60.block_sparse_moe.experts.154.w3", "model.layers.60.block_sparse_moe.experts.155.w3", "model.layers.60.block_sparse_moe.experts.156.w3", "model.layers.60.block_sparse_moe.experts.157.w3", "model.layers.60.block_sparse_moe.experts.158.w3", "model.layers.60.block_sparse_moe.experts.159.w3", "model.layers.60.block_sparse_moe.experts.160.w3", "model.layers.60.block_sparse_moe.experts.161.w3", "model.layers.60.block_sparse_moe.experts.162.w3", "model.layers.60.block_sparse_moe.experts.163.w3", "model.layers.60.block_sparse_moe.experts.164.w3", "model.layers.60.block_sparse_moe.experts.165.w3", "model.layers.60.block_sparse_moe.experts.166.w3", "model.layers.60.block_sparse_moe.experts.167.w3", "model.layers.60.block_sparse_moe.experts.168.w3", "model.layers.60.block_sparse_moe.experts.169.w3", "model.layers.60.block_sparse_moe.experts.170.w3", "model.layers.60.block_sparse_moe.experts.171.w3", "model.layers.60.block_sparse_moe.experts.172.w3", "model.layers.60.block_sparse_moe.experts.173.w3", "model.layers.60.block_sparse_moe.experts.174.w3", "model.layers.60.block_sparse_moe.experts.175.w3", "model.layers.60.block_sparse_moe.experts.176.w3", "model.layers.60.block_sparse_moe.experts.177.w3", "model.layers.60.block_sparse_moe.experts.178.w3", "model.layers.60.block_sparse_moe.experts.179.w3", "model.layers.60.block_sparse_moe.experts.180.w3", "model.layers.60.block_sparse_moe.experts.181.w3", "model.layers.60.block_sparse_moe.experts.182.w3", "model.layers.60.block_sparse_moe.experts.183.w3", "model.layers.60.block_sparse_moe.experts.184.w3", "model.layers.60.block_sparse_moe.experts.185.w3", "model.layers.60.block_sparse_moe.experts.186.w3", "model.layers.60.block_sparse_moe.experts.187.w3", "model.layers.60.block_sparse_moe.experts.188.w3", "model.layers.60.block_sparse_moe.experts.189.w3", "model.layers.60.block_sparse_moe.experts.190.w3", "model.layers.60.block_sparse_moe.experts.191.w3", "model.layers.60.block_sparse_moe.experts.192.w3", "model.layers.60.block_sparse_moe.experts.193.w3", "model.layers.60.block_sparse_moe.experts.194.w3", "model.layers.60.block_sparse_moe.experts.195.w3", "model.layers.60.block_sparse_moe.experts.196.w3", "model.layers.60.block_sparse_moe.experts.197.w3", "model.layers.60.block_sparse_moe.experts.198.w3", "model.layers.60.block_sparse_moe.experts.199.w3", "model.layers.60.block_sparse_moe.experts.200.w3", "model.layers.60.block_sparse_moe.experts.201.w3", "model.layers.60.block_sparse_moe.experts.202.w3", "model.layers.60.block_sparse_moe.experts.203.w3", "model.layers.60.block_sparse_moe.experts.204.w3", "model.layers.60.block_sparse_moe.experts.205.w3", "model.layers.60.block_sparse_moe.experts.206.w3", "model.layers.60.block_sparse_moe.experts.207.w3", "model.layers.60.block_sparse_moe.experts.208.w3", "model.layers.60.block_sparse_moe.experts.209.w3", "model.layers.60.block_sparse_moe.experts.210.w3", "model.layers.60.block_sparse_moe.experts.211.w3", "model.layers.60.block_sparse_moe.experts.212.w3", "model.layers.60.block_sparse_moe.experts.213.w3", "model.layers.60.block_sparse_moe.experts.214.w3", "model.layers.60.block_sparse_moe.experts.215.w3", "model.layers.60.block_sparse_moe.experts.216.w3", "model.layers.60.block_sparse_moe.experts.217.w3", "model.layers.60.block_sparse_moe.experts.218.w3", "model.layers.60.block_sparse_moe.experts.219.w3", "model.layers.60.block_sparse_moe.experts.220.w3", "model.layers.60.block_sparse_moe.experts.221.w3", "model.layers.60.block_sparse_moe.experts.222.w3", "model.layers.60.block_sparse_moe.experts.223.w3", "model.layers.60.block_sparse_moe.experts.224.w3", "model.layers.60.block_sparse_moe.experts.225.w3", "model.layers.60.block_sparse_moe.experts.226.w3", "model.layers.60.block_sparse_moe.experts.227.w3", "model.layers.60.block_sparse_moe.experts.228.w3", "model.layers.60.block_sparse_moe.experts.229.w3", "model.layers.60.block_sparse_moe.experts.230.w3", "model.layers.60.block_sparse_moe.experts.231.w3", "model.layers.60.block_sparse_moe.experts.232.w3", "model.layers.60.block_sparse_moe.experts.233.w3", "model.layers.60.block_sparse_moe.experts.234.w3", "model.layers.60.block_sparse_moe.experts.235.w3", "model.layers.60.block_sparse_moe.experts.236.w3", "model.layers.60.block_sparse_moe.experts.237.w3", "model.layers.60.block_sparse_moe.experts.238.w3", "model.layers.60.block_sparse_moe.experts.239.w3", "model.layers.60.block_sparse_moe.experts.240.w3", "model.layers.60.block_sparse_moe.experts.241.w3", "model.layers.60.block_sparse_moe.experts.242.w3", "model.layers.60.block_sparse_moe.experts.243.w3", "model.layers.60.block_sparse_moe.experts.244.w3", "model.layers.60.block_sparse_moe.experts.245.w3", "model.layers.60.block_sparse_moe.experts.246.w3", "model.layers.60.block_sparse_moe.experts.247.w3", "model.layers.60.block_sparse_moe.experts.248.w3", "model.layers.60.block_sparse_moe.experts.249.w3", "model.layers.60.block_sparse_moe.experts.250.w3", "model.layers.60.block_sparse_moe.experts.251.w3", "model.layers.60.block_sparse_moe.experts.252.w3", "model.layers.60.block_sparse_moe.experts.253.w3", "model.layers.60.block_sparse_moe.experts.254.w3", "model.layers.60.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005261033773421575, "dbits": 2415919104 } ] }, { "idx": 304, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w2", "model.layers.60.block_sparse_moe.experts.1.w2", "model.layers.60.block_sparse_moe.experts.2.w2", "model.layers.60.block_sparse_moe.experts.3.w2", "model.layers.60.block_sparse_moe.experts.4.w2", "model.layers.60.block_sparse_moe.experts.5.w2", "model.layers.60.block_sparse_moe.experts.6.w2", "model.layers.60.block_sparse_moe.experts.7.w2", "model.layers.60.block_sparse_moe.experts.8.w2", "model.layers.60.block_sparse_moe.experts.9.w2", "model.layers.60.block_sparse_moe.experts.10.w2", "model.layers.60.block_sparse_moe.experts.11.w2", "model.layers.60.block_sparse_moe.experts.12.w2", "model.layers.60.block_sparse_moe.experts.13.w2", "model.layers.60.block_sparse_moe.experts.14.w2", "model.layers.60.block_sparse_moe.experts.15.w2", "model.layers.60.block_sparse_moe.experts.16.w2", "model.layers.60.block_sparse_moe.experts.17.w2", "model.layers.60.block_sparse_moe.experts.18.w2", "model.layers.60.block_sparse_moe.experts.19.w2", "model.layers.60.block_sparse_moe.experts.20.w2", "model.layers.60.block_sparse_moe.experts.21.w2", "model.layers.60.block_sparse_moe.experts.22.w2", "model.layers.60.block_sparse_moe.experts.23.w2", "model.layers.60.block_sparse_moe.experts.24.w2", "model.layers.60.block_sparse_moe.experts.25.w2", "model.layers.60.block_sparse_moe.experts.26.w2", "model.layers.60.block_sparse_moe.experts.27.w2", "model.layers.60.block_sparse_moe.experts.28.w2", "model.layers.60.block_sparse_moe.experts.29.w2", "model.layers.60.block_sparse_moe.experts.30.w2", "model.layers.60.block_sparse_moe.experts.31.w2", "model.layers.60.block_sparse_moe.experts.32.w2", "model.layers.60.block_sparse_moe.experts.33.w2", "model.layers.60.block_sparse_moe.experts.34.w2", "model.layers.60.block_sparse_moe.experts.35.w2", "model.layers.60.block_sparse_moe.experts.36.w2", "model.layers.60.block_sparse_moe.experts.37.w2", "model.layers.60.block_sparse_moe.experts.38.w2", "model.layers.60.block_sparse_moe.experts.39.w2", "model.layers.60.block_sparse_moe.experts.40.w2", "model.layers.60.block_sparse_moe.experts.41.w2", "model.layers.60.block_sparse_moe.experts.42.w2", "model.layers.60.block_sparse_moe.experts.43.w2", "model.layers.60.block_sparse_moe.experts.44.w2", "model.layers.60.block_sparse_moe.experts.45.w2", "model.layers.60.block_sparse_moe.experts.46.w2", "model.layers.60.block_sparse_moe.experts.47.w2", "model.layers.60.block_sparse_moe.experts.48.w2", "model.layers.60.block_sparse_moe.experts.49.w2", "model.layers.60.block_sparse_moe.experts.50.w2", "model.layers.60.block_sparse_moe.experts.51.w2", "model.layers.60.block_sparse_moe.experts.52.w2", "model.layers.60.block_sparse_moe.experts.53.w2", "model.layers.60.block_sparse_moe.experts.54.w2", "model.layers.60.block_sparse_moe.experts.55.w2", "model.layers.60.block_sparse_moe.experts.56.w2", "model.layers.60.block_sparse_moe.experts.57.w2", "model.layers.60.block_sparse_moe.experts.58.w2", "model.layers.60.block_sparse_moe.experts.59.w2", "model.layers.60.block_sparse_moe.experts.60.w2", "model.layers.60.block_sparse_moe.experts.61.w2", "model.layers.60.block_sparse_moe.experts.62.w2", "model.layers.60.block_sparse_moe.experts.63.w2", "model.layers.60.block_sparse_moe.experts.64.w2", "model.layers.60.block_sparse_moe.experts.65.w2", "model.layers.60.block_sparse_moe.experts.66.w2", "model.layers.60.block_sparse_moe.experts.67.w2", "model.layers.60.block_sparse_moe.experts.68.w2", "model.layers.60.block_sparse_moe.experts.69.w2", "model.layers.60.block_sparse_moe.experts.70.w2", "model.layers.60.block_sparse_moe.experts.71.w2", "model.layers.60.block_sparse_moe.experts.72.w2", "model.layers.60.block_sparse_moe.experts.73.w2", "model.layers.60.block_sparse_moe.experts.74.w2", "model.layers.60.block_sparse_moe.experts.75.w2", "model.layers.60.block_sparse_moe.experts.76.w2", "model.layers.60.block_sparse_moe.experts.77.w2", "model.layers.60.block_sparse_moe.experts.78.w2", "model.layers.60.block_sparse_moe.experts.79.w2", "model.layers.60.block_sparse_moe.experts.80.w2", "model.layers.60.block_sparse_moe.experts.81.w2", "model.layers.60.block_sparse_moe.experts.82.w2", "model.layers.60.block_sparse_moe.experts.83.w2", "model.layers.60.block_sparse_moe.experts.84.w2", "model.layers.60.block_sparse_moe.experts.85.w2", "model.layers.60.block_sparse_moe.experts.86.w2", "model.layers.60.block_sparse_moe.experts.87.w2", "model.layers.60.block_sparse_moe.experts.88.w2", "model.layers.60.block_sparse_moe.experts.89.w2", "model.layers.60.block_sparse_moe.experts.90.w2", "model.layers.60.block_sparse_moe.experts.91.w2", "model.layers.60.block_sparse_moe.experts.92.w2", "model.layers.60.block_sparse_moe.experts.93.w2", "model.layers.60.block_sparse_moe.experts.94.w2", "model.layers.60.block_sparse_moe.experts.95.w2", "model.layers.60.block_sparse_moe.experts.96.w2", "model.layers.60.block_sparse_moe.experts.97.w2", "model.layers.60.block_sparse_moe.experts.98.w2", "model.layers.60.block_sparse_moe.experts.99.w2", "model.layers.60.block_sparse_moe.experts.100.w2", "model.layers.60.block_sparse_moe.experts.101.w2", "model.layers.60.block_sparse_moe.experts.102.w2", "model.layers.60.block_sparse_moe.experts.103.w2", "model.layers.60.block_sparse_moe.experts.104.w2", "model.layers.60.block_sparse_moe.experts.105.w2", "model.layers.60.block_sparse_moe.experts.106.w2", "model.layers.60.block_sparse_moe.experts.107.w2", "model.layers.60.block_sparse_moe.experts.108.w2", "model.layers.60.block_sparse_moe.experts.109.w2", "model.layers.60.block_sparse_moe.experts.110.w2", "model.layers.60.block_sparse_moe.experts.111.w2", "model.layers.60.block_sparse_moe.experts.112.w2", "model.layers.60.block_sparse_moe.experts.113.w2", "model.layers.60.block_sparse_moe.experts.114.w2", "model.layers.60.block_sparse_moe.experts.115.w2", "model.layers.60.block_sparse_moe.experts.116.w2", "model.layers.60.block_sparse_moe.experts.117.w2", "model.layers.60.block_sparse_moe.experts.118.w2", "model.layers.60.block_sparse_moe.experts.119.w2", "model.layers.60.block_sparse_moe.experts.120.w2", "model.layers.60.block_sparse_moe.experts.121.w2", "model.layers.60.block_sparse_moe.experts.122.w2", "model.layers.60.block_sparse_moe.experts.123.w2", "model.layers.60.block_sparse_moe.experts.124.w2", "model.layers.60.block_sparse_moe.experts.125.w2", "model.layers.60.block_sparse_moe.experts.126.w2", "model.layers.60.block_sparse_moe.experts.127.w2", "model.layers.60.block_sparse_moe.experts.128.w2", "model.layers.60.block_sparse_moe.experts.129.w2", "model.layers.60.block_sparse_moe.experts.130.w2", "model.layers.60.block_sparse_moe.experts.131.w2", "model.layers.60.block_sparse_moe.experts.132.w2", "model.layers.60.block_sparse_moe.experts.133.w2", "model.layers.60.block_sparse_moe.experts.134.w2", "model.layers.60.block_sparse_moe.experts.135.w2", "model.layers.60.block_sparse_moe.experts.136.w2", "model.layers.60.block_sparse_moe.experts.137.w2", "model.layers.60.block_sparse_moe.experts.138.w2", "model.layers.60.block_sparse_moe.experts.139.w2", "model.layers.60.block_sparse_moe.experts.140.w2", "model.layers.60.block_sparse_moe.experts.141.w2", "model.layers.60.block_sparse_moe.experts.142.w2", "model.layers.60.block_sparse_moe.experts.143.w2", "model.layers.60.block_sparse_moe.experts.144.w2", "model.layers.60.block_sparse_moe.experts.145.w2", "model.layers.60.block_sparse_moe.experts.146.w2", "model.layers.60.block_sparse_moe.experts.147.w2", "model.layers.60.block_sparse_moe.experts.148.w2", "model.layers.60.block_sparse_moe.experts.149.w2", "model.layers.60.block_sparse_moe.experts.150.w2", "model.layers.60.block_sparse_moe.experts.151.w2", "model.layers.60.block_sparse_moe.experts.152.w2", "model.layers.60.block_sparse_moe.experts.153.w2", "model.layers.60.block_sparse_moe.experts.154.w2", "model.layers.60.block_sparse_moe.experts.155.w2", "model.layers.60.block_sparse_moe.experts.156.w2", "model.layers.60.block_sparse_moe.experts.157.w2", "model.layers.60.block_sparse_moe.experts.158.w2", "model.layers.60.block_sparse_moe.experts.159.w2", "model.layers.60.block_sparse_moe.experts.160.w2", "model.layers.60.block_sparse_moe.experts.161.w2", "model.layers.60.block_sparse_moe.experts.162.w2", "model.layers.60.block_sparse_moe.experts.163.w2", "model.layers.60.block_sparse_moe.experts.164.w2", "model.layers.60.block_sparse_moe.experts.165.w2", "model.layers.60.block_sparse_moe.experts.166.w2", "model.layers.60.block_sparse_moe.experts.167.w2", "model.layers.60.block_sparse_moe.experts.168.w2", "model.layers.60.block_sparse_moe.experts.169.w2", "model.layers.60.block_sparse_moe.experts.170.w2", "model.layers.60.block_sparse_moe.experts.171.w2", "model.layers.60.block_sparse_moe.experts.172.w2", "model.layers.60.block_sparse_moe.experts.173.w2", "model.layers.60.block_sparse_moe.experts.174.w2", "model.layers.60.block_sparse_moe.experts.175.w2", "model.layers.60.block_sparse_moe.experts.176.w2", "model.layers.60.block_sparse_moe.experts.177.w2", "model.layers.60.block_sparse_moe.experts.178.w2", "model.layers.60.block_sparse_moe.experts.179.w2", "model.layers.60.block_sparse_moe.experts.180.w2", "model.layers.60.block_sparse_moe.experts.181.w2", "model.layers.60.block_sparse_moe.experts.182.w2", "model.layers.60.block_sparse_moe.experts.183.w2", "model.layers.60.block_sparse_moe.experts.184.w2", "model.layers.60.block_sparse_moe.experts.185.w2", "model.layers.60.block_sparse_moe.experts.186.w2", "model.layers.60.block_sparse_moe.experts.187.w2", "model.layers.60.block_sparse_moe.experts.188.w2", "model.layers.60.block_sparse_moe.experts.189.w2", "model.layers.60.block_sparse_moe.experts.190.w2", "model.layers.60.block_sparse_moe.experts.191.w2", "model.layers.60.block_sparse_moe.experts.192.w2", "model.layers.60.block_sparse_moe.experts.193.w2", "model.layers.60.block_sparse_moe.experts.194.w2", "model.layers.60.block_sparse_moe.experts.195.w2", "model.layers.60.block_sparse_moe.experts.196.w2", "model.layers.60.block_sparse_moe.experts.197.w2", "model.layers.60.block_sparse_moe.experts.198.w2", "model.layers.60.block_sparse_moe.experts.199.w2", "model.layers.60.block_sparse_moe.experts.200.w2", "model.layers.60.block_sparse_moe.experts.201.w2", "model.layers.60.block_sparse_moe.experts.202.w2", "model.layers.60.block_sparse_moe.experts.203.w2", "model.layers.60.block_sparse_moe.experts.204.w2", "model.layers.60.block_sparse_moe.experts.205.w2", "model.layers.60.block_sparse_moe.experts.206.w2", "model.layers.60.block_sparse_moe.experts.207.w2", "model.layers.60.block_sparse_moe.experts.208.w2", "model.layers.60.block_sparse_moe.experts.209.w2", "model.layers.60.block_sparse_moe.experts.210.w2", "model.layers.60.block_sparse_moe.experts.211.w2", "model.layers.60.block_sparse_moe.experts.212.w2", "model.layers.60.block_sparse_moe.experts.213.w2", "model.layers.60.block_sparse_moe.experts.214.w2", "model.layers.60.block_sparse_moe.experts.215.w2", "model.layers.60.block_sparse_moe.experts.216.w2", "model.layers.60.block_sparse_moe.experts.217.w2", "model.layers.60.block_sparse_moe.experts.218.w2", "model.layers.60.block_sparse_moe.experts.219.w2", "model.layers.60.block_sparse_moe.experts.220.w2", "model.layers.60.block_sparse_moe.experts.221.w2", "model.layers.60.block_sparse_moe.experts.222.w2", "model.layers.60.block_sparse_moe.experts.223.w2", "model.layers.60.block_sparse_moe.experts.224.w2", "model.layers.60.block_sparse_moe.experts.225.w2", "model.layers.60.block_sparse_moe.experts.226.w2", "model.layers.60.block_sparse_moe.experts.227.w2", "model.layers.60.block_sparse_moe.experts.228.w2", "model.layers.60.block_sparse_moe.experts.229.w2", "model.layers.60.block_sparse_moe.experts.230.w2", "model.layers.60.block_sparse_moe.experts.231.w2", "model.layers.60.block_sparse_moe.experts.232.w2", "model.layers.60.block_sparse_moe.experts.233.w2", "model.layers.60.block_sparse_moe.experts.234.w2", "model.layers.60.block_sparse_moe.experts.235.w2", "model.layers.60.block_sparse_moe.experts.236.w2", "model.layers.60.block_sparse_moe.experts.237.w2", "model.layers.60.block_sparse_moe.experts.238.w2", "model.layers.60.block_sparse_moe.experts.239.w2", "model.layers.60.block_sparse_moe.experts.240.w2", "model.layers.60.block_sparse_moe.experts.241.w2", "model.layers.60.block_sparse_moe.experts.242.w2", "model.layers.60.block_sparse_moe.experts.243.w2", "model.layers.60.block_sparse_moe.experts.244.w2", "model.layers.60.block_sparse_moe.experts.245.w2", "model.layers.60.block_sparse_moe.experts.246.w2", "model.layers.60.block_sparse_moe.experts.247.w2", "model.layers.60.block_sparse_moe.experts.248.w2", "model.layers.60.block_sparse_moe.experts.249.w2", "model.layers.60.block_sparse_moe.experts.250.w2", "model.layers.60.block_sparse_moe.experts.251.w2", "model.layers.60.block_sparse_moe.experts.252.w2", "model.layers.60.block_sparse_moe.experts.253.w2", "model.layers.60.block_sparse_moe.experts.254.w2", "model.layers.60.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 5.81890344619751e-05, "dbits": 1207959552 } ] }, { "idx": 305, "layers": [ "model.layers.61.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0005801558494568093, "dbits": 18874368 } ] }, { "idx": 306, "layers": [ "model.layers.61.self_attn.k_proj", "model.layers.61.self_attn.v_proj" ], "candidates": [ { "dkld": 0.008925920724868863, "dbits": 6291456 } ] }, { "idx": 307, "layers": [ "model.layers.61.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0018916219472885354, "dbits": 18874368 } ] }, { "idx": 308, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w1", "model.layers.61.block_sparse_moe.experts.1.w1", "model.layers.61.block_sparse_moe.experts.2.w1", "model.layers.61.block_sparse_moe.experts.3.w1", "model.layers.61.block_sparse_moe.experts.4.w1", "model.layers.61.block_sparse_moe.experts.5.w1", "model.layers.61.block_sparse_moe.experts.6.w1", "model.layers.61.block_sparse_moe.experts.7.w1", "model.layers.61.block_sparse_moe.experts.8.w1", "model.layers.61.block_sparse_moe.experts.9.w1", "model.layers.61.block_sparse_moe.experts.10.w1", "model.layers.61.block_sparse_moe.experts.11.w1", "model.layers.61.block_sparse_moe.experts.12.w1", "model.layers.61.block_sparse_moe.experts.13.w1", "model.layers.61.block_sparse_moe.experts.14.w1", "model.layers.61.block_sparse_moe.experts.15.w1", "model.layers.61.block_sparse_moe.experts.16.w1", "model.layers.61.block_sparse_moe.experts.17.w1", "model.layers.61.block_sparse_moe.experts.18.w1", "model.layers.61.block_sparse_moe.experts.19.w1", "model.layers.61.block_sparse_moe.experts.20.w1", "model.layers.61.block_sparse_moe.experts.21.w1", "model.layers.61.block_sparse_moe.experts.22.w1", "model.layers.61.block_sparse_moe.experts.23.w1", "model.layers.61.block_sparse_moe.experts.24.w1", "model.layers.61.block_sparse_moe.experts.25.w1", "model.layers.61.block_sparse_moe.experts.26.w1", "model.layers.61.block_sparse_moe.experts.27.w1", "model.layers.61.block_sparse_moe.experts.28.w1", "model.layers.61.block_sparse_moe.experts.29.w1", "model.layers.61.block_sparse_moe.experts.30.w1", "model.layers.61.block_sparse_moe.experts.31.w1", "model.layers.61.block_sparse_moe.experts.32.w1", "model.layers.61.block_sparse_moe.experts.33.w1", "model.layers.61.block_sparse_moe.experts.34.w1", "model.layers.61.block_sparse_moe.experts.35.w1", "model.layers.61.block_sparse_moe.experts.36.w1", "model.layers.61.block_sparse_moe.experts.37.w1", "model.layers.61.block_sparse_moe.experts.38.w1", "model.layers.61.block_sparse_moe.experts.39.w1", "model.layers.61.block_sparse_moe.experts.40.w1", "model.layers.61.block_sparse_moe.experts.41.w1", "model.layers.61.block_sparse_moe.experts.42.w1", "model.layers.61.block_sparse_moe.experts.43.w1", "model.layers.61.block_sparse_moe.experts.44.w1", "model.layers.61.block_sparse_moe.experts.45.w1", "model.layers.61.block_sparse_moe.experts.46.w1", "model.layers.61.block_sparse_moe.experts.47.w1", "model.layers.61.block_sparse_moe.experts.48.w1", "model.layers.61.block_sparse_moe.experts.49.w1", "model.layers.61.block_sparse_moe.experts.50.w1", "model.layers.61.block_sparse_moe.experts.51.w1", "model.layers.61.block_sparse_moe.experts.52.w1", "model.layers.61.block_sparse_moe.experts.53.w1", "model.layers.61.block_sparse_moe.experts.54.w1", "model.layers.61.block_sparse_moe.experts.55.w1", "model.layers.61.block_sparse_moe.experts.56.w1", "model.layers.61.block_sparse_moe.experts.57.w1", "model.layers.61.block_sparse_moe.experts.58.w1", "model.layers.61.block_sparse_moe.experts.59.w1", "model.layers.61.block_sparse_moe.experts.60.w1", "model.layers.61.block_sparse_moe.experts.61.w1", "model.layers.61.block_sparse_moe.experts.62.w1", "model.layers.61.block_sparse_moe.experts.63.w1", "model.layers.61.block_sparse_moe.experts.64.w1", "model.layers.61.block_sparse_moe.experts.65.w1", "model.layers.61.block_sparse_moe.experts.66.w1", "model.layers.61.block_sparse_moe.experts.67.w1", "model.layers.61.block_sparse_moe.experts.68.w1", "model.layers.61.block_sparse_moe.experts.69.w1", "model.layers.61.block_sparse_moe.experts.70.w1", "model.layers.61.block_sparse_moe.experts.71.w1", "model.layers.61.block_sparse_moe.experts.72.w1", "model.layers.61.block_sparse_moe.experts.73.w1", "model.layers.61.block_sparse_moe.experts.74.w1", "model.layers.61.block_sparse_moe.experts.75.w1", "model.layers.61.block_sparse_moe.experts.76.w1", "model.layers.61.block_sparse_moe.experts.77.w1", "model.layers.61.block_sparse_moe.experts.78.w1", "model.layers.61.block_sparse_moe.experts.79.w1", "model.layers.61.block_sparse_moe.experts.80.w1", "model.layers.61.block_sparse_moe.experts.81.w1", "model.layers.61.block_sparse_moe.experts.82.w1", "model.layers.61.block_sparse_moe.experts.83.w1", "model.layers.61.block_sparse_moe.experts.84.w1", "model.layers.61.block_sparse_moe.experts.85.w1", "model.layers.61.block_sparse_moe.experts.86.w1", "model.layers.61.block_sparse_moe.experts.87.w1", "model.layers.61.block_sparse_moe.experts.88.w1", "model.layers.61.block_sparse_moe.experts.89.w1", "model.layers.61.block_sparse_moe.experts.90.w1", "model.layers.61.block_sparse_moe.experts.91.w1", "model.layers.61.block_sparse_moe.experts.92.w1", "model.layers.61.block_sparse_moe.experts.93.w1", "model.layers.61.block_sparse_moe.experts.94.w1", "model.layers.61.block_sparse_moe.experts.95.w1", "model.layers.61.block_sparse_moe.experts.96.w1", "model.layers.61.block_sparse_moe.experts.97.w1", "model.layers.61.block_sparse_moe.experts.98.w1", "model.layers.61.block_sparse_moe.experts.99.w1", "model.layers.61.block_sparse_moe.experts.100.w1", "model.layers.61.block_sparse_moe.experts.101.w1", "model.layers.61.block_sparse_moe.experts.102.w1", "model.layers.61.block_sparse_moe.experts.103.w1", "model.layers.61.block_sparse_moe.experts.104.w1", "model.layers.61.block_sparse_moe.experts.105.w1", "model.layers.61.block_sparse_moe.experts.106.w1", "model.layers.61.block_sparse_moe.experts.107.w1", "model.layers.61.block_sparse_moe.experts.108.w1", "model.layers.61.block_sparse_moe.experts.109.w1", "model.layers.61.block_sparse_moe.experts.110.w1", "model.layers.61.block_sparse_moe.experts.111.w1", "model.layers.61.block_sparse_moe.experts.112.w1", "model.layers.61.block_sparse_moe.experts.113.w1", "model.layers.61.block_sparse_moe.experts.114.w1", "model.layers.61.block_sparse_moe.experts.115.w1", "model.layers.61.block_sparse_moe.experts.116.w1", "model.layers.61.block_sparse_moe.experts.117.w1", "model.layers.61.block_sparse_moe.experts.118.w1", "model.layers.61.block_sparse_moe.experts.119.w1", "model.layers.61.block_sparse_moe.experts.120.w1", "model.layers.61.block_sparse_moe.experts.121.w1", "model.layers.61.block_sparse_moe.experts.122.w1", "model.layers.61.block_sparse_moe.experts.123.w1", "model.layers.61.block_sparse_moe.experts.124.w1", "model.layers.61.block_sparse_moe.experts.125.w1", "model.layers.61.block_sparse_moe.experts.126.w1", "model.layers.61.block_sparse_moe.experts.127.w1", "model.layers.61.block_sparse_moe.experts.128.w1", "model.layers.61.block_sparse_moe.experts.129.w1", "model.layers.61.block_sparse_moe.experts.130.w1", "model.layers.61.block_sparse_moe.experts.131.w1", "model.layers.61.block_sparse_moe.experts.132.w1", "model.layers.61.block_sparse_moe.experts.133.w1", "model.layers.61.block_sparse_moe.experts.134.w1", "model.layers.61.block_sparse_moe.experts.135.w1", "model.layers.61.block_sparse_moe.experts.136.w1", "model.layers.61.block_sparse_moe.experts.137.w1", "model.layers.61.block_sparse_moe.experts.138.w1", "model.layers.61.block_sparse_moe.experts.139.w1", "model.layers.61.block_sparse_moe.experts.140.w1", "model.layers.61.block_sparse_moe.experts.141.w1", "model.layers.61.block_sparse_moe.experts.142.w1", "model.layers.61.block_sparse_moe.experts.143.w1", "model.layers.61.block_sparse_moe.experts.144.w1", "model.layers.61.block_sparse_moe.experts.145.w1", "model.layers.61.block_sparse_moe.experts.146.w1", "model.layers.61.block_sparse_moe.experts.147.w1", "model.layers.61.block_sparse_moe.experts.148.w1", "model.layers.61.block_sparse_moe.experts.149.w1", "model.layers.61.block_sparse_moe.experts.150.w1", "model.layers.61.block_sparse_moe.experts.151.w1", "model.layers.61.block_sparse_moe.experts.152.w1", "model.layers.61.block_sparse_moe.experts.153.w1", "model.layers.61.block_sparse_moe.experts.154.w1", "model.layers.61.block_sparse_moe.experts.155.w1", "model.layers.61.block_sparse_moe.experts.156.w1", "model.layers.61.block_sparse_moe.experts.157.w1", "model.layers.61.block_sparse_moe.experts.158.w1", "model.layers.61.block_sparse_moe.experts.159.w1", "model.layers.61.block_sparse_moe.experts.160.w1", "model.layers.61.block_sparse_moe.experts.161.w1", "model.layers.61.block_sparse_moe.experts.162.w1", "model.layers.61.block_sparse_moe.experts.163.w1", "model.layers.61.block_sparse_moe.experts.164.w1", "model.layers.61.block_sparse_moe.experts.165.w1", "model.layers.61.block_sparse_moe.experts.166.w1", "model.layers.61.block_sparse_moe.experts.167.w1", "model.layers.61.block_sparse_moe.experts.168.w1", "model.layers.61.block_sparse_moe.experts.169.w1", "model.layers.61.block_sparse_moe.experts.170.w1", "model.layers.61.block_sparse_moe.experts.171.w1", "model.layers.61.block_sparse_moe.experts.172.w1", "model.layers.61.block_sparse_moe.experts.173.w1", "model.layers.61.block_sparse_moe.experts.174.w1", "model.layers.61.block_sparse_moe.experts.175.w1", "model.layers.61.block_sparse_moe.experts.176.w1", "model.layers.61.block_sparse_moe.experts.177.w1", "model.layers.61.block_sparse_moe.experts.178.w1", "model.layers.61.block_sparse_moe.experts.179.w1", "model.layers.61.block_sparse_moe.experts.180.w1", "model.layers.61.block_sparse_moe.experts.181.w1", "model.layers.61.block_sparse_moe.experts.182.w1", "model.layers.61.block_sparse_moe.experts.183.w1", "model.layers.61.block_sparse_moe.experts.184.w1", "model.layers.61.block_sparse_moe.experts.185.w1", "model.layers.61.block_sparse_moe.experts.186.w1", "model.layers.61.block_sparse_moe.experts.187.w1", "model.layers.61.block_sparse_moe.experts.188.w1", "model.layers.61.block_sparse_moe.experts.189.w1", "model.layers.61.block_sparse_moe.experts.190.w1", "model.layers.61.block_sparse_moe.experts.191.w1", "model.layers.61.block_sparse_moe.experts.192.w1", "model.layers.61.block_sparse_moe.experts.193.w1", "model.layers.61.block_sparse_moe.experts.194.w1", "model.layers.61.block_sparse_moe.experts.195.w1", "model.layers.61.block_sparse_moe.experts.196.w1", "model.layers.61.block_sparse_moe.experts.197.w1", "model.layers.61.block_sparse_moe.experts.198.w1", "model.layers.61.block_sparse_moe.experts.199.w1", "model.layers.61.block_sparse_moe.experts.200.w1", "model.layers.61.block_sparse_moe.experts.201.w1", "model.layers.61.block_sparse_moe.experts.202.w1", "model.layers.61.block_sparse_moe.experts.203.w1", "model.layers.61.block_sparse_moe.experts.204.w1", "model.layers.61.block_sparse_moe.experts.205.w1", "model.layers.61.block_sparse_moe.experts.206.w1", "model.layers.61.block_sparse_moe.experts.207.w1", "model.layers.61.block_sparse_moe.experts.208.w1", "model.layers.61.block_sparse_moe.experts.209.w1", "model.layers.61.block_sparse_moe.experts.210.w1", "model.layers.61.block_sparse_moe.experts.211.w1", "model.layers.61.block_sparse_moe.experts.212.w1", "model.layers.61.block_sparse_moe.experts.213.w1", "model.layers.61.block_sparse_moe.experts.214.w1", "model.layers.61.block_sparse_moe.experts.215.w1", "model.layers.61.block_sparse_moe.experts.216.w1", "model.layers.61.block_sparse_moe.experts.217.w1", "model.layers.61.block_sparse_moe.experts.218.w1", "model.layers.61.block_sparse_moe.experts.219.w1", "model.layers.61.block_sparse_moe.experts.220.w1", "model.layers.61.block_sparse_moe.experts.221.w1", "model.layers.61.block_sparse_moe.experts.222.w1", "model.layers.61.block_sparse_moe.experts.223.w1", "model.layers.61.block_sparse_moe.experts.224.w1", "model.layers.61.block_sparse_moe.experts.225.w1", "model.layers.61.block_sparse_moe.experts.226.w1", "model.layers.61.block_sparse_moe.experts.227.w1", "model.layers.61.block_sparse_moe.experts.228.w1", "model.layers.61.block_sparse_moe.experts.229.w1", "model.layers.61.block_sparse_moe.experts.230.w1", "model.layers.61.block_sparse_moe.experts.231.w1", "model.layers.61.block_sparse_moe.experts.232.w1", "model.layers.61.block_sparse_moe.experts.233.w1", "model.layers.61.block_sparse_moe.experts.234.w1", "model.layers.61.block_sparse_moe.experts.235.w1", "model.layers.61.block_sparse_moe.experts.236.w1", "model.layers.61.block_sparse_moe.experts.237.w1", "model.layers.61.block_sparse_moe.experts.238.w1", "model.layers.61.block_sparse_moe.experts.239.w1", "model.layers.61.block_sparse_moe.experts.240.w1", "model.layers.61.block_sparse_moe.experts.241.w1", "model.layers.61.block_sparse_moe.experts.242.w1", "model.layers.61.block_sparse_moe.experts.243.w1", "model.layers.61.block_sparse_moe.experts.244.w1", "model.layers.61.block_sparse_moe.experts.245.w1", "model.layers.61.block_sparse_moe.experts.246.w1", "model.layers.61.block_sparse_moe.experts.247.w1", "model.layers.61.block_sparse_moe.experts.248.w1", "model.layers.61.block_sparse_moe.experts.249.w1", "model.layers.61.block_sparse_moe.experts.250.w1", "model.layers.61.block_sparse_moe.experts.251.w1", "model.layers.61.block_sparse_moe.experts.252.w1", "model.layers.61.block_sparse_moe.experts.253.w1", "model.layers.61.block_sparse_moe.experts.254.w1", "model.layers.61.block_sparse_moe.experts.255.w1", "model.layers.61.block_sparse_moe.experts.0.w3", "model.layers.61.block_sparse_moe.experts.1.w3", "model.layers.61.block_sparse_moe.experts.2.w3", "model.layers.61.block_sparse_moe.experts.3.w3", "model.layers.61.block_sparse_moe.experts.4.w3", "model.layers.61.block_sparse_moe.experts.5.w3", "model.layers.61.block_sparse_moe.experts.6.w3", "model.layers.61.block_sparse_moe.experts.7.w3", "model.layers.61.block_sparse_moe.experts.8.w3", "model.layers.61.block_sparse_moe.experts.9.w3", "model.layers.61.block_sparse_moe.experts.10.w3", "model.layers.61.block_sparse_moe.experts.11.w3", "model.layers.61.block_sparse_moe.experts.12.w3", "model.layers.61.block_sparse_moe.experts.13.w3", "model.layers.61.block_sparse_moe.experts.14.w3", "model.layers.61.block_sparse_moe.experts.15.w3", "model.layers.61.block_sparse_moe.experts.16.w3", "model.layers.61.block_sparse_moe.experts.17.w3", "model.layers.61.block_sparse_moe.experts.18.w3", "model.layers.61.block_sparse_moe.experts.19.w3", "model.layers.61.block_sparse_moe.experts.20.w3", "model.layers.61.block_sparse_moe.experts.21.w3", "model.layers.61.block_sparse_moe.experts.22.w3", "model.layers.61.block_sparse_moe.experts.23.w3", "model.layers.61.block_sparse_moe.experts.24.w3", "model.layers.61.block_sparse_moe.experts.25.w3", "model.layers.61.block_sparse_moe.experts.26.w3", "model.layers.61.block_sparse_moe.experts.27.w3", "model.layers.61.block_sparse_moe.experts.28.w3", "model.layers.61.block_sparse_moe.experts.29.w3", "model.layers.61.block_sparse_moe.experts.30.w3", "model.layers.61.block_sparse_moe.experts.31.w3", "model.layers.61.block_sparse_moe.experts.32.w3", "model.layers.61.block_sparse_moe.experts.33.w3", "model.layers.61.block_sparse_moe.experts.34.w3", "model.layers.61.block_sparse_moe.experts.35.w3", "model.layers.61.block_sparse_moe.experts.36.w3", "model.layers.61.block_sparse_moe.experts.37.w3", "model.layers.61.block_sparse_moe.experts.38.w3", "model.layers.61.block_sparse_moe.experts.39.w3", "model.layers.61.block_sparse_moe.experts.40.w3", "model.layers.61.block_sparse_moe.experts.41.w3", "model.layers.61.block_sparse_moe.experts.42.w3", "model.layers.61.block_sparse_moe.experts.43.w3", "model.layers.61.block_sparse_moe.experts.44.w3", "model.layers.61.block_sparse_moe.experts.45.w3", "model.layers.61.block_sparse_moe.experts.46.w3", "model.layers.61.block_sparse_moe.experts.47.w3", "model.layers.61.block_sparse_moe.experts.48.w3", "model.layers.61.block_sparse_moe.experts.49.w3", "model.layers.61.block_sparse_moe.experts.50.w3", "model.layers.61.block_sparse_moe.experts.51.w3", "model.layers.61.block_sparse_moe.experts.52.w3", "model.layers.61.block_sparse_moe.experts.53.w3", "model.layers.61.block_sparse_moe.experts.54.w3", "model.layers.61.block_sparse_moe.experts.55.w3", "model.layers.61.block_sparse_moe.experts.56.w3", "model.layers.61.block_sparse_moe.experts.57.w3", "model.layers.61.block_sparse_moe.experts.58.w3", "model.layers.61.block_sparse_moe.experts.59.w3", "model.layers.61.block_sparse_moe.experts.60.w3", "model.layers.61.block_sparse_moe.experts.61.w3", "model.layers.61.block_sparse_moe.experts.62.w3", "model.layers.61.block_sparse_moe.experts.63.w3", "model.layers.61.block_sparse_moe.experts.64.w3", "model.layers.61.block_sparse_moe.experts.65.w3", "model.layers.61.block_sparse_moe.experts.66.w3", "model.layers.61.block_sparse_moe.experts.67.w3", "model.layers.61.block_sparse_moe.experts.68.w3", "model.layers.61.block_sparse_moe.experts.69.w3", "model.layers.61.block_sparse_moe.experts.70.w3", "model.layers.61.block_sparse_moe.experts.71.w3", "model.layers.61.block_sparse_moe.experts.72.w3", "model.layers.61.block_sparse_moe.experts.73.w3", "model.layers.61.block_sparse_moe.experts.74.w3", "model.layers.61.block_sparse_moe.experts.75.w3", "model.layers.61.block_sparse_moe.experts.76.w3", "model.layers.61.block_sparse_moe.experts.77.w3", "model.layers.61.block_sparse_moe.experts.78.w3", "model.layers.61.block_sparse_moe.experts.79.w3", "model.layers.61.block_sparse_moe.experts.80.w3", "model.layers.61.block_sparse_moe.experts.81.w3", "model.layers.61.block_sparse_moe.experts.82.w3", "model.layers.61.block_sparse_moe.experts.83.w3", "model.layers.61.block_sparse_moe.experts.84.w3", "model.layers.61.block_sparse_moe.experts.85.w3", "model.layers.61.block_sparse_moe.experts.86.w3", "model.layers.61.block_sparse_moe.experts.87.w3", "model.layers.61.block_sparse_moe.experts.88.w3", "model.layers.61.block_sparse_moe.experts.89.w3", "model.layers.61.block_sparse_moe.experts.90.w3", "model.layers.61.block_sparse_moe.experts.91.w3", "model.layers.61.block_sparse_moe.experts.92.w3", "model.layers.61.block_sparse_moe.experts.93.w3", "model.layers.61.block_sparse_moe.experts.94.w3", "model.layers.61.block_sparse_moe.experts.95.w3", "model.layers.61.block_sparse_moe.experts.96.w3", "model.layers.61.block_sparse_moe.experts.97.w3", "model.layers.61.block_sparse_moe.experts.98.w3", "model.layers.61.block_sparse_moe.experts.99.w3", "model.layers.61.block_sparse_moe.experts.100.w3", "model.layers.61.block_sparse_moe.experts.101.w3", "model.layers.61.block_sparse_moe.experts.102.w3", "model.layers.61.block_sparse_moe.experts.103.w3", "model.layers.61.block_sparse_moe.experts.104.w3", "model.layers.61.block_sparse_moe.experts.105.w3", "model.layers.61.block_sparse_moe.experts.106.w3", "model.layers.61.block_sparse_moe.experts.107.w3", "model.layers.61.block_sparse_moe.experts.108.w3", "model.layers.61.block_sparse_moe.experts.109.w3", "model.layers.61.block_sparse_moe.experts.110.w3", "model.layers.61.block_sparse_moe.experts.111.w3", "model.layers.61.block_sparse_moe.experts.112.w3", "model.layers.61.block_sparse_moe.experts.113.w3", "model.layers.61.block_sparse_moe.experts.114.w3", "model.layers.61.block_sparse_moe.experts.115.w3", "model.layers.61.block_sparse_moe.experts.116.w3", "model.layers.61.block_sparse_moe.experts.117.w3", "model.layers.61.block_sparse_moe.experts.118.w3", "model.layers.61.block_sparse_moe.experts.119.w3", "model.layers.61.block_sparse_moe.experts.120.w3", "model.layers.61.block_sparse_moe.experts.121.w3", "model.layers.61.block_sparse_moe.experts.122.w3", "model.layers.61.block_sparse_moe.experts.123.w3", "model.layers.61.block_sparse_moe.experts.124.w3", "model.layers.61.block_sparse_moe.experts.125.w3", "model.layers.61.block_sparse_moe.experts.126.w3", "model.layers.61.block_sparse_moe.experts.127.w3", "model.layers.61.block_sparse_moe.experts.128.w3", "model.layers.61.block_sparse_moe.experts.129.w3", "model.layers.61.block_sparse_moe.experts.130.w3", "model.layers.61.block_sparse_moe.experts.131.w3", "model.layers.61.block_sparse_moe.experts.132.w3", "model.layers.61.block_sparse_moe.experts.133.w3", "model.layers.61.block_sparse_moe.experts.134.w3", "model.layers.61.block_sparse_moe.experts.135.w3", "model.layers.61.block_sparse_moe.experts.136.w3", "model.layers.61.block_sparse_moe.experts.137.w3", "model.layers.61.block_sparse_moe.experts.138.w3", "model.layers.61.block_sparse_moe.experts.139.w3", "model.layers.61.block_sparse_moe.experts.140.w3", "model.layers.61.block_sparse_moe.experts.141.w3", "model.layers.61.block_sparse_moe.experts.142.w3", "model.layers.61.block_sparse_moe.experts.143.w3", "model.layers.61.block_sparse_moe.experts.144.w3", "model.layers.61.block_sparse_moe.experts.145.w3", "model.layers.61.block_sparse_moe.experts.146.w3", "model.layers.61.block_sparse_moe.experts.147.w3", "model.layers.61.block_sparse_moe.experts.148.w3", "model.layers.61.block_sparse_moe.experts.149.w3", "model.layers.61.block_sparse_moe.experts.150.w3", "model.layers.61.block_sparse_moe.experts.151.w3", "model.layers.61.block_sparse_moe.experts.152.w3", "model.layers.61.block_sparse_moe.experts.153.w3", "model.layers.61.block_sparse_moe.experts.154.w3", "model.layers.61.block_sparse_moe.experts.155.w3", "model.layers.61.block_sparse_moe.experts.156.w3", "model.layers.61.block_sparse_moe.experts.157.w3", "model.layers.61.block_sparse_moe.experts.158.w3", "model.layers.61.block_sparse_moe.experts.159.w3", "model.layers.61.block_sparse_moe.experts.160.w3", "model.layers.61.block_sparse_moe.experts.161.w3", "model.layers.61.block_sparse_moe.experts.162.w3", "model.layers.61.block_sparse_moe.experts.163.w3", "model.layers.61.block_sparse_moe.experts.164.w3", "model.layers.61.block_sparse_moe.experts.165.w3", "model.layers.61.block_sparse_moe.experts.166.w3", "model.layers.61.block_sparse_moe.experts.167.w3", "model.layers.61.block_sparse_moe.experts.168.w3", "model.layers.61.block_sparse_moe.experts.169.w3", "model.layers.61.block_sparse_moe.experts.170.w3", "model.layers.61.block_sparse_moe.experts.171.w3", "model.layers.61.block_sparse_moe.experts.172.w3", "model.layers.61.block_sparse_moe.experts.173.w3", "model.layers.61.block_sparse_moe.experts.174.w3", "model.layers.61.block_sparse_moe.experts.175.w3", "model.layers.61.block_sparse_moe.experts.176.w3", "model.layers.61.block_sparse_moe.experts.177.w3", "model.layers.61.block_sparse_moe.experts.178.w3", "model.layers.61.block_sparse_moe.experts.179.w3", "model.layers.61.block_sparse_moe.experts.180.w3", "model.layers.61.block_sparse_moe.experts.181.w3", "model.layers.61.block_sparse_moe.experts.182.w3", "model.layers.61.block_sparse_moe.experts.183.w3", "model.layers.61.block_sparse_moe.experts.184.w3", "model.layers.61.block_sparse_moe.experts.185.w3", "model.layers.61.block_sparse_moe.experts.186.w3", "model.layers.61.block_sparse_moe.experts.187.w3", "model.layers.61.block_sparse_moe.experts.188.w3", "model.layers.61.block_sparse_moe.experts.189.w3", "model.layers.61.block_sparse_moe.experts.190.w3", "model.layers.61.block_sparse_moe.experts.191.w3", "model.layers.61.block_sparse_moe.experts.192.w3", "model.layers.61.block_sparse_moe.experts.193.w3", "model.layers.61.block_sparse_moe.experts.194.w3", "model.layers.61.block_sparse_moe.experts.195.w3", "model.layers.61.block_sparse_moe.experts.196.w3", "model.layers.61.block_sparse_moe.experts.197.w3", "model.layers.61.block_sparse_moe.experts.198.w3", "model.layers.61.block_sparse_moe.experts.199.w3", "model.layers.61.block_sparse_moe.experts.200.w3", "model.layers.61.block_sparse_moe.experts.201.w3", "model.layers.61.block_sparse_moe.experts.202.w3", "model.layers.61.block_sparse_moe.experts.203.w3", "model.layers.61.block_sparse_moe.experts.204.w3", "model.layers.61.block_sparse_moe.experts.205.w3", "model.layers.61.block_sparse_moe.experts.206.w3", "model.layers.61.block_sparse_moe.experts.207.w3", "model.layers.61.block_sparse_moe.experts.208.w3", "model.layers.61.block_sparse_moe.experts.209.w3", "model.layers.61.block_sparse_moe.experts.210.w3", "model.layers.61.block_sparse_moe.experts.211.w3", "model.layers.61.block_sparse_moe.experts.212.w3", "model.layers.61.block_sparse_moe.experts.213.w3", "model.layers.61.block_sparse_moe.experts.214.w3", "model.layers.61.block_sparse_moe.experts.215.w3", "model.layers.61.block_sparse_moe.experts.216.w3", "model.layers.61.block_sparse_moe.experts.217.w3", "model.layers.61.block_sparse_moe.experts.218.w3", "model.layers.61.block_sparse_moe.experts.219.w3", "model.layers.61.block_sparse_moe.experts.220.w3", "model.layers.61.block_sparse_moe.experts.221.w3", "model.layers.61.block_sparse_moe.experts.222.w3", "model.layers.61.block_sparse_moe.experts.223.w3", "model.layers.61.block_sparse_moe.experts.224.w3", "model.layers.61.block_sparse_moe.experts.225.w3", "model.layers.61.block_sparse_moe.experts.226.w3", "model.layers.61.block_sparse_moe.experts.227.w3", "model.layers.61.block_sparse_moe.experts.228.w3", "model.layers.61.block_sparse_moe.experts.229.w3", "model.layers.61.block_sparse_moe.experts.230.w3", "model.layers.61.block_sparse_moe.experts.231.w3", "model.layers.61.block_sparse_moe.experts.232.w3", "model.layers.61.block_sparse_moe.experts.233.w3", "model.layers.61.block_sparse_moe.experts.234.w3", "model.layers.61.block_sparse_moe.experts.235.w3", "model.layers.61.block_sparse_moe.experts.236.w3", "model.layers.61.block_sparse_moe.experts.237.w3", "model.layers.61.block_sparse_moe.experts.238.w3", "model.layers.61.block_sparse_moe.experts.239.w3", "model.layers.61.block_sparse_moe.experts.240.w3", "model.layers.61.block_sparse_moe.experts.241.w3", "model.layers.61.block_sparse_moe.experts.242.w3", "model.layers.61.block_sparse_moe.experts.243.w3", "model.layers.61.block_sparse_moe.experts.244.w3", "model.layers.61.block_sparse_moe.experts.245.w3", "model.layers.61.block_sparse_moe.experts.246.w3", "model.layers.61.block_sparse_moe.experts.247.w3", "model.layers.61.block_sparse_moe.experts.248.w3", "model.layers.61.block_sparse_moe.experts.249.w3", "model.layers.61.block_sparse_moe.experts.250.w3", "model.layers.61.block_sparse_moe.experts.251.w3", "model.layers.61.block_sparse_moe.experts.252.w3", "model.layers.61.block_sparse_moe.experts.253.w3", "model.layers.61.block_sparse_moe.experts.254.w3", "model.layers.61.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.000686433911323503, "dbits": 2415919104 } ] }, { "idx": 309, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w2", "model.layers.61.block_sparse_moe.experts.1.w2", "model.layers.61.block_sparse_moe.experts.2.w2", "model.layers.61.block_sparse_moe.experts.3.w2", "model.layers.61.block_sparse_moe.experts.4.w2", "model.layers.61.block_sparse_moe.experts.5.w2", "model.layers.61.block_sparse_moe.experts.6.w2", "model.layers.61.block_sparse_moe.experts.7.w2", "model.layers.61.block_sparse_moe.experts.8.w2", "model.layers.61.block_sparse_moe.experts.9.w2", "model.layers.61.block_sparse_moe.experts.10.w2", "model.layers.61.block_sparse_moe.experts.11.w2", "model.layers.61.block_sparse_moe.experts.12.w2", "model.layers.61.block_sparse_moe.experts.13.w2", "model.layers.61.block_sparse_moe.experts.14.w2", "model.layers.61.block_sparse_moe.experts.15.w2", "model.layers.61.block_sparse_moe.experts.16.w2", "model.layers.61.block_sparse_moe.experts.17.w2", "model.layers.61.block_sparse_moe.experts.18.w2", "model.layers.61.block_sparse_moe.experts.19.w2", "model.layers.61.block_sparse_moe.experts.20.w2", "model.layers.61.block_sparse_moe.experts.21.w2", "model.layers.61.block_sparse_moe.experts.22.w2", "model.layers.61.block_sparse_moe.experts.23.w2", "model.layers.61.block_sparse_moe.experts.24.w2", "model.layers.61.block_sparse_moe.experts.25.w2", "model.layers.61.block_sparse_moe.experts.26.w2", "model.layers.61.block_sparse_moe.experts.27.w2", "model.layers.61.block_sparse_moe.experts.28.w2", "model.layers.61.block_sparse_moe.experts.29.w2", "model.layers.61.block_sparse_moe.experts.30.w2", "model.layers.61.block_sparse_moe.experts.31.w2", "model.layers.61.block_sparse_moe.experts.32.w2", "model.layers.61.block_sparse_moe.experts.33.w2", "model.layers.61.block_sparse_moe.experts.34.w2", "model.layers.61.block_sparse_moe.experts.35.w2", "model.layers.61.block_sparse_moe.experts.36.w2", "model.layers.61.block_sparse_moe.experts.37.w2", "model.layers.61.block_sparse_moe.experts.38.w2", "model.layers.61.block_sparse_moe.experts.39.w2", "model.layers.61.block_sparse_moe.experts.40.w2", "model.layers.61.block_sparse_moe.experts.41.w2", "model.layers.61.block_sparse_moe.experts.42.w2", "model.layers.61.block_sparse_moe.experts.43.w2", "model.layers.61.block_sparse_moe.experts.44.w2", "model.layers.61.block_sparse_moe.experts.45.w2", "model.layers.61.block_sparse_moe.experts.46.w2", "model.layers.61.block_sparse_moe.experts.47.w2", "model.layers.61.block_sparse_moe.experts.48.w2", "model.layers.61.block_sparse_moe.experts.49.w2", "model.layers.61.block_sparse_moe.experts.50.w2", "model.layers.61.block_sparse_moe.experts.51.w2", "model.layers.61.block_sparse_moe.experts.52.w2", "model.layers.61.block_sparse_moe.experts.53.w2", "model.layers.61.block_sparse_moe.experts.54.w2", "model.layers.61.block_sparse_moe.experts.55.w2", "model.layers.61.block_sparse_moe.experts.56.w2", "model.layers.61.block_sparse_moe.experts.57.w2", "model.layers.61.block_sparse_moe.experts.58.w2", "model.layers.61.block_sparse_moe.experts.59.w2", "model.layers.61.block_sparse_moe.experts.60.w2", "model.layers.61.block_sparse_moe.experts.61.w2", "model.layers.61.block_sparse_moe.experts.62.w2", "model.layers.61.block_sparse_moe.experts.63.w2", "model.layers.61.block_sparse_moe.experts.64.w2", "model.layers.61.block_sparse_moe.experts.65.w2", "model.layers.61.block_sparse_moe.experts.66.w2", "model.layers.61.block_sparse_moe.experts.67.w2", "model.layers.61.block_sparse_moe.experts.68.w2", "model.layers.61.block_sparse_moe.experts.69.w2", "model.layers.61.block_sparse_moe.experts.70.w2", "model.layers.61.block_sparse_moe.experts.71.w2", "model.layers.61.block_sparse_moe.experts.72.w2", "model.layers.61.block_sparse_moe.experts.73.w2", "model.layers.61.block_sparse_moe.experts.74.w2", "model.layers.61.block_sparse_moe.experts.75.w2", "model.layers.61.block_sparse_moe.experts.76.w2", "model.layers.61.block_sparse_moe.experts.77.w2", "model.layers.61.block_sparse_moe.experts.78.w2", "model.layers.61.block_sparse_moe.experts.79.w2", "model.layers.61.block_sparse_moe.experts.80.w2", "model.layers.61.block_sparse_moe.experts.81.w2", "model.layers.61.block_sparse_moe.experts.82.w2", "model.layers.61.block_sparse_moe.experts.83.w2", "model.layers.61.block_sparse_moe.experts.84.w2", "model.layers.61.block_sparse_moe.experts.85.w2", "model.layers.61.block_sparse_moe.experts.86.w2", "model.layers.61.block_sparse_moe.experts.87.w2", "model.layers.61.block_sparse_moe.experts.88.w2", "model.layers.61.block_sparse_moe.experts.89.w2", "model.layers.61.block_sparse_moe.experts.90.w2", "model.layers.61.block_sparse_moe.experts.91.w2", "model.layers.61.block_sparse_moe.experts.92.w2", "model.layers.61.block_sparse_moe.experts.93.w2", "model.layers.61.block_sparse_moe.experts.94.w2", "model.layers.61.block_sparse_moe.experts.95.w2", "model.layers.61.block_sparse_moe.experts.96.w2", "model.layers.61.block_sparse_moe.experts.97.w2", "model.layers.61.block_sparse_moe.experts.98.w2", "model.layers.61.block_sparse_moe.experts.99.w2", "model.layers.61.block_sparse_moe.experts.100.w2", "model.layers.61.block_sparse_moe.experts.101.w2", "model.layers.61.block_sparse_moe.experts.102.w2", "model.layers.61.block_sparse_moe.experts.103.w2", "model.layers.61.block_sparse_moe.experts.104.w2", "model.layers.61.block_sparse_moe.experts.105.w2", "model.layers.61.block_sparse_moe.experts.106.w2", "model.layers.61.block_sparse_moe.experts.107.w2", "model.layers.61.block_sparse_moe.experts.108.w2", "model.layers.61.block_sparse_moe.experts.109.w2", "model.layers.61.block_sparse_moe.experts.110.w2", "model.layers.61.block_sparse_moe.experts.111.w2", "model.layers.61.block_sparse_moe.experts.112.w2", "model.layers.61.block_sparse_moe.experts.113.w2", "model.layers.61.block_sparse_moe.experts.114.w2", "model.layers.61.block_sparse_moe.experts.115.w2", "model.layers.61.block_sparse_moe.experts.116.w2", "model.layers.61.block_sparse_moe.experts.117.w2", "model.layers.61.block_sparse_moe.experts.118.w2", "model.layers.61.block_sparse_moe.experts.119.w2", "model.layers.61.block_sparse_moe.experts.120.w2", "model.layers.61.block_sparse_moe.experts.121.w2", "model.layers.61.block_sparse_moe.experts.122.w2", "model.layers.61.block_sparse_moe.experts.123.w2", "model.layers.61.block_sparse_moe.experts.124.w2", "model.layers.61.block_sparse_moe.experts.125.w2", "model.layers.61.block_sparse_moe.experts.126.w2", "model.layers.61.block_sparse_moe.experts.127.w2", "model.layers.61.block_sparse_moe.experts.128.w2", "model.layers.61.block_sparse_moe.experts.129.w2", "model.layers.61.block_sparse_moe.experts.130.w2", "model.layers.61.block_sparse_moe.experts.131.w2", "model.layers.61.block_sparse_moe.experts.132.w2", "model.layers.61.block_sparse_moe.experts.133.w2", "model.layers.61.block_sparse_moe.experts.134.w2", "model.layers.61.block_sparse_moe.experts.135.w2", "model.layers.61.block_sparse_moe.experts.136.w2", "model.layers.61.block_sparse_moe.experts.137.w2", "model.layers.61.block_sparse_moe.experts.138.w2", "model.layers.61.block_sparse_moe.experts.139.w2", "model.layers.61.block_sparse_moe.experts.140.w2", "model.layers.61.block_sparse_moe.experts.141.w2", "model.layers.61.block_sparse_moe.experts.142.w2", "model.layers.61.block_sparse_moe.experts.143.w2", "model.layers.61.block_sparse_moe.experts.144.w2", "model.layers.61.block_sparse_moe.experts.145.w2", "model.layers.61.block_sparse_moe.experts.146.w2", "model.layers.61.block_sparse_moe.experts.147.w2", "model.layers.61.block_sparse_moe.experts.148.w2", "model.layers.61.block_sparse_moe.experts.149.w2", "model.layers.61.block_sparse_moe.experts.150.w2", "model.layers.61.block_sparse_moe.experts.151.w2", "model.layers.61.block_sparse_moe.experts.152.w2", "model.layers.61.block_sparse_moe.experts.153.w2", "model.layers.61.block_sparse_moe.experts.154.w2", "model.layers.61.block_sparse_moe.experts.155.w2", "model.layers.61.block_sparse_moe.experts.156.w2", "model.layers.61.block_sparse_moe.experts.157.w2", "model.layers.61.block_sparse_moe.experts.158.w2", "model.layers.61.block_sparse_moe.experts.159.w2", "model.layers.61.block_sparse_moe.experts.160.w2", "model.layers.61.block_sparse_moe.experts.161.w2", "model.layers.61.block_sparse_moe.experts.162.w2", "model.layers.61.block_sparse_moe.experts.163.w2", "model.layers.61.block_sparse_moe.experts.164.w2", "model.layers.61.block_sparse_moe.experts.165.w2", "model.layers.61.block_sparse_moe.experts.166.w2", "model.layers.61.block_sparse_moe.experts.167.w2", "model.layers.61.block_sparse_moe.experts.168.w2", "model.layers.61.block_sparse_moe.experts.169.w2", "model.layers.61.block_sparse_moe.experts.170.w2", "model.layers.61.block_sparse_moe.experts.171.w2", "model.layers.61.block_sparse_moe.experts.172.w2", "model.layers.61.block_sparse_moe.experts.173.w2", "model.layers.61.block_sparse_moe.experts.174.w2", "model.layers.61.block_sparse_moe.experts.175.w2", "model.layers.61.block_sparse_moe.experts.176.w2", "model.layers.61.block_sparse_moe.experts.177.w2", "model.layers.61.block_sparse_moe.experts.178.w2", "model.layers.61.block_sparse_moe.experts.179.w2", "model.layers.61.block_sparse_moe.experts.180.w2", "model.layers.61.block_sparse_moe.experts.181.w2", "model.layers.61.block_sparse_moe.experts.182.w2", "model.layers.61.block_sparse_moe.experts.183.w2", "model.layers.61.block_sparse_moe.experts.184.w2", "model.layers.61.block_sparse_moe.experts.185.w2", "model.layers.61.block_sparse_moe.experts.186.w2", "model.layers.61.block_sparse_moe.experts.187.w2", "model.layers.61.block_sparse_moe.experts.188.w2", "model.layers.61.block_sparse_moe.experts.189.w2", "model.layers.61.block_sparse_moe.experts.190.w2", "model.layers.61.block_sparse_moe.experts.191.w2", "model.layers.61.block_sparse_moe.experts.192.w2", "model.layers.61.block_sparse_moe.experts.193.w2", "model.layers.61.block_sparse_moe.experts.194.w2", "model.layers.61.block_sparse_moe.experts.195.w2", "model.layers.61.block_sparse_moe.experts.196.w2", "model.layers.61.block_sparse_moe.experts.197.w2", "model.layers.61.block_sparse_moe.experts.198.w2", "model.layers.61.block_sparse_moe.experts.199.w2", "model.layers.61.block_sparse_moe.experts.200.w2", "model.layers.61.block_sparse_moe.experts.201.w2", "model.layers.61.block_sparse_moe.experts.202.w2", "model.layers.61.block_sparse_moe.experts.203.w2", "model.layers.61.block_sparse_moe.experts.204.w2", "model.layers.61.block_sparse_moe.experts.205.w2", "model.layers.61.block_sparse_moe.experts.206.w2", "model.layers.61.block_sparse_moe.experts.207.w2", "model.layers.61.block_sparse_moe.experts.208.w2", "model.layers.61.block_sparse_moe.experts.209.w2", "model.layers.61.block_sparse_moe.experts.210.w2", "model.layers.61.block_sparse_moe.experts.211.w2", "model.layers.61.block_sparse_moe.experts.212.w2", "model.layers.61.block_sparse_moe.experts.213.w2", "model.layers.61.block_sparse_moe.experts.214.w2", "model.layers.61.block_sparse_moe.experts.215.w2", "model.layers.61.block_sparse_moe.experts.216.w2", "model.layers.61.block_sparse_moe.experts.217.w2", "model.layers.61.block_sparse_moe.experts.218.w2", "model.layers.61.block_sparse_moe.experts.219.w2", "model.layers.61.block_sparse_moe.experts.220.w2", "model.layers.61.block_sparse_moe.experts.221.w2", "model.layers.61.block_sparse_moe.experts.222.w2", "model.layers.61.block_sparse_moe.experts.223.w2", "model.layers.61.block_sparse_moe.experts.224.w2", "model.layers.61.block_sparse_moe.experts.225.w2", "model.layers.61.block_sparse_moe.experts.226.w2", "model.layers.61.block_sparse_moe.experts.227.w2", "model.layers.61.block_sparse_moe.experts.228.w2", "model.layers.61.block_sparse_moe.experts.229.w2", "model.layers.61.block_sparse_moe.experts.230.w2", "model.layers.61.block_sparse_moe.experts.231.w2", "model.layers.61.block_sparse_moe.experts.232.w2", "model.layers.61.block_sparse_moe.experts.233.w2", "model.layers.61.block_sparse_moe.experts.234.w2", "model.layers.61.block_sparse_moe.experts.235.w2", "model.layers.61.block_sparse_moe.experts.236.w2", "model.layers.61.block_sparse_moe.experts.237.w2", "model.layers.61.block_sparse_moe.experts.238.w2", "model.layers.61.block_sparse_moe.experts.239.w2", "model.layers.61.block_sparse_moe.experts.240.w2", "model.layers.61.block_sparse_moe.experts.241.w2", "model.layers.61.block_sparse_moe.experts.242.w2", "model.layers.61.block_sparse_moe.experts.243.w2", "model.layers.61.block_sparse_moe.experts.244.w2", "model.layers.61.block_sparse_moe.experts.245.w2", "model.layers.61.block_sparse_moe.experts.246.w2", "model.layers.61.block_sparse_moe.experts.247.w2", "model.layers.61.block_sparse_moe.experts.248.w2", "model.layers.61.block_sparse_moe.experts.249.w2", "model.layers.61.block_sparse_moe.experts.250.w2", "model.layers.61.block_sparse_moe.experts.251.w2", "model.layers.61.block_sparse_moe.experts.252.w2", "model.layers.61.block_sparse_moe.experts.253.w2", "model.layers.61.block_sparse_moe.experts.254.w2", "model.layers.61.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005220770835876465, "dbits": 1207959552 } ] } ], "base_kld": 0.8732701063156127, "arch_string": "MiniMaxM2ForCausalLM" }